Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __version__ = "$Revision: 1.7 $" 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11   
 12  import sys, codecs, logging, csv 
 13   
 14   
 15  if __name__ == '__main__': 
 16          sys.path.insert(0, '../../') 
 17  from Gnumed.pycommon import gmPG2 
 18  from Gnumed.pycommon import gmTools 
 19   
 20   
 21  _log = logging.getLogger('gm.loinc') 
 22  _log.info(__version__) 
 23   
 24  origin_url = u'http://loinc.org' 
 25  file_encoding = 'latin1'                        # encoding is empirical 
 26  license_delimiter = u'Clip Here for Data' 
 27  version_tag = u'LOINC(R) Database Version' 
 28  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 29  name_short = u'LOINC' 
 30   
 31  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 32   
 33  #============================================================ 
 34   
 35  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 36  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 37  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 38   
 39  #============================================================ 
40 -def loinc2term(loinc=None):
41 42 # NOTE: will return [NULL] on no-match due to the coalesce() 43 cmd = u""" 44 SELECT coalesce ( 45 (SELECT term 46 FROM ref.v_coded_terms 47 WHERE 48 coding_system = 'LOINC' 49 AND 50 code = %(loinc)s 51 AND 52 lang = i18n.get_curr_lang() 53 ), 54 (SELECT term 55 FROM ref.v_coded_terms 56 WHERE 57 coding_system = 'LOINC' 58 AND 59 code = %(loinc)s 60 AND 61 lang = 'en_EN' 62 ), 63 (SELECT term 64 FROM ref.v_coded_terms 65 WHERE 66 coding_system = 'LOINC' 67 AND 68 code = %(loinc)s 69 ) 70 )""" 71 args = {'loinc': loinc} 72 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 73 74 if rows[0][0] is None: 75 return [] 76 77 return [ r[0] for r in rows ]
78 #============================================================
79 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
80 81 _log.debug('splitting LOINC source file [%s]', input_fname) 82 83 if license_fname is None: 84 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 85 _log.debug('LOINC header: %s', license_fname) 86 87 if data_fname is None: 88 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 89 _log.debug('LOINC data: %s', data_fname) 90 91 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 92 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 93 94 for line in loinc_file: 95 96 if license_delimiter in line: 97 out_file.write(line) 98 out_file.close() 99 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 100 continue 101 102 out_file.write(line) 103 104 out_file.close() 105 106 return data_fname, license_fname
107 #============================================================
108 -def map_field_names(data_fname='loinc_data.csv'):
109 110 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 111 first_line = csv_file.readline() 112 sniffer = csv.Sniffer() 113 if sniffer.has_header(first_line): 114 pass
115 #============================================================
116 -def get_version(license_fname='loinc_license.txt'):
117 118 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 119 120 version = None 121 for line in in_file: 122 if line.startswith(version_tag): 123 version = line[len(version_tag):].strip() 124 break 125 126 in_file.close() 127 return version
128 #============================================================
129 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
130 131 if version is None: 132 version = get_version(license_fname = license_fname) 133 134 if version is None: 135 raise ValueError('cannot detect LOINC version') 136 137 _log.debug('importing LOINC version [%s]', version) 138 139 # clean out staging area 140 curs = conn.cursor() 141 cmd = u"""DELETE FROM ref.loinc_staging""" 142 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 143 curs.close() 144 conn.commit() 145 _log.debug('staging table emptied') 146 147 # import data from csv file into staging table 148 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 149 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 150 curs = conn.cursor() 151 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 152 first = False 153 for loinc_line in loinc_reader: 154 if not first: 155 first = True 156 continue 157 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 158 curs.close() 159 conn.commit() 160 csv_file.close() 161 _log.debug('staging table loaded') 162 163 # create data source record 164 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 165 desc = in_file.read() 166 in_file.close() 167 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 168 queries = [ 169 # insert if not existing 170 {'args': args, 'cmd': u""" 171 INSERT INTO ref.data_source (name_long, name_short, version) SELECT 172 %(name_long)s, 173 %(name_short)s, 174 %(ver)s 175 WHERE NOT EXISTS ( 176 SELECT 1 FROM ref.data_source WHERE 177 name_long = %(name_long)s 178 AND 179 name_short = %(name_short)s 180 AND 181 version = %(ver)s 182 )""" 183 }, 184 # update non-unique fields 185 {'args': args, 'cmd': u""" 186 UPDATE ref.data_source SET 187 description = %(desc)s, 188 source = %(url)s, 189 lang = %(lang)s 190 WHERE 191 name_long = %(name_long)s 192 AND 193 name_short = %(name_short)s 194 AND 195 version = %(ver)s 196 """ 197 }, 198 # retrieve PK of data source 199 {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} 200 ] 201 curs = conn.cursor() 202 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) 203 data_src_pk = rows[0][0] 204 curs.close() 205 _log.debug('data source record created or updated, pk is #%s', data_src_pk) 206 207 # import from staging table to real table 208 args = {'src_pk': data_src_pk} 209 queries = [] 210 queries.append ({ 211 'args': args, 212 'cmd': u""" 213 INSERT INTO ref.loinc ( 214 fk_data_source, term, code 215 ) 216 SELECT 217 %(src_pk)s, 218 coalesce ( 219 nullif(long_common_name, ''), 220 ( 221 coalesce(nullif(component, '') || ':', '') || 222 coalesce(nullif(property, '') || ':', '') || 223 coalesce(nullif(time_aspect, '') || ':', '') || 224 coalesce(nullif(system, '') || ':', '') || 225 coalesce(nullif(scale_type, '') || ':', '') || 226 coalesce(nullif(method_type, '') || ':', '') 227 ) 228 ), 229 nullif(loinc_num, '') 230 FROM 231 ref.loinc_staging r_ls 232 WHERE NOT EXISTS ( 233 SELECT 1 FROM ref.loinc r_l WHERE 234 r_l.fk_data_source = %(src_pk)s 235 AND 236 r_l.code = nullif(r_ls.loinc_num, '') 237 AND 238 r_l.term = coalesce ( 239 nullif(r_ls.long_common_name, ''), 240 ( 241 coalesce(nullif(r_ls.component, '') || ':', '') || 242 coalesce(nullif(r_ls.property, '') || ':', '') || 243 coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 244 coalesce(nullif(r_ls.system, '') || ':', '') || 245 coalesce(nullif(r_ls.scale_type, '') || ':', '') || 246 coalesce(nullif(r_ls.method_type, '') || ':', '') 247 ) 248 ) 249 )""" 250 }) 251 queries.append ({ 252 'args': args, 253 'cmd': u""" 254 UPDATE ref.loinc SET 255 comment = nullif(r_ls.comments, ''), 256 component = nullif(r_ls.component, ''), 257 property = nullif(r_ls.property, ''), 258 time_aspect = nullif(r_ls.time_aspect, ''), 259 system = nullif(r_ls.system, ''), 260 scale_type = nullif(r_ls.scale_type, ''), 261 method_type = nullif(r_ls.method_type, ''), 262 related_names_1_old = nullif(r_ls.related_names_1_old, ''), 263 grouping_class = nullif(r_ls.class, ''), 264 loinc_internal_source = nullif(r_ls.source, ''), 265 dt_last_change = nullif(r_ls.dt_last_change, ''), 266 change_type = nullif(r_ls.change_type, ''), 267 answer_list = nullif(r_ls.answer_list, ''), 268 code_status = nullif(r_ls.status, ''), 269 maps_to = nullif(r_ls.map_to, ''), 270 scope = nullif(r_ls.scope, ''), 271 normal_range = nullif(r_ls.normal_range, ''), 272 ipcc_units = nullif(r_ls.ipcc_units, ''), 273 reference = nullif(r_ls.reference, ''), 274 exact_component_synonym = nullif(r_ls.exact_component_synonym, ''), 275 molar_mass = nullif(r_ls.molar_mass, ''), 276 grouping_class_type = nullif(r_ls.class_type, '')::smallint, 277 formula = nullif(r_ls.formula, ''), 278 species = nullif(r_ls.species, ''), 279 example_answers = nullif(r_ls.example_answers, ''), 280 acs_synonyms = nullif(r_ls.acs_synonyms, ''), 281 base_name = nullif(r_ls.base_name, ''), 282 final = nullif(r_ls.final, ''), 283 naa_ccr_id = nullif(r_ls.naa_ccr_id, ''), 284 code_table = nullif(r_ls.code_table, ''), 285 is_set_root = nullif(r_ls.is_set_root, '')::boolean, 286 panel_elements = nullif(r_ls.panel_elements, ''), 287 survey_question_text = nullif(r_ls.survey_question_text, ''), 288 survey_question_source = nullif(r_ls.survey_question_source, ''), 289 units_required = nullif(r_ls.units_required, ''), 290 submitted_units = nullif(r_ls.submitted_units, ''), 291 related_names_2 = nullif(r_ls.related_names_2, ''), 292 short_name = nullif(r_ls.short_name, ''), 293 order_obs = nullif(r_ls.order_obs, ''), 294 cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''), 295 hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''), 296 external_copyright_notice = nullif(r_ls.external_copyright_notice, ''), 297 example_units = nullif(r_ls.example_units, ''), 298 inpc_percentage = nullif(r_ls.inpc_percentage, ''), 299 long_common_name = nullif(r_ls.long_common_name, '') 300 FROM 301 ref.loinc_staging r_ls 302 WHERE 303 fk_data_source = %(src_pk)s 304 AND 305 code = nullif(r_ls.loinc_num, '') 306 AND 307 term = coalesce ( 308 nullif(r_ls.long_common_name, ''), 309 ( 310 coalesce(nullif(r_ls.component, '') || ':', '') || 311 coalesce(nullif(r_ls.property, '') || ':', '') || 312 coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 313 coalesce(nullif(r_ls.system, '') || ':', '') || 314 coalesce(nullif(r_ls.scale_type, '') || ':', '') || 315 coalesce(nullif(r_ls.method_type, '') || ':', '') 316 ) 317 ) 318 """ 319 }) 320 curs = conn.cursor() 321 gmPG2.run_rw_queries(link_obj = curs, queries = queries) 322 curs.close() 323 conn.commit() 324 _log.debug('transfer from staging table to real table done') 325 326 # clean out staging area 327 curs = conn.cursor() 328 cmd = u"""DELETE FROM ref.loinc_staging""" 329 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 330 curs.close() 331 conn.commit() 332 _log.debug('staging table emptied') 333 334 return True
335 #============================================================ 336 # main 337 #------------------------------------------------------------ 338 if __name__ == "__main__": 339 340 if len(sys.argv) < 2: 341 sys.exit() 342 343 if sys.argv[1] != 'test': 344 sys.exit() 345 346 from Gnumed.pycommon import gmLog2 347 from Gnumed.pycommon import gmI18N 348 349 gmI18N.activate_locale() 350 # gmDateTime.init() 351 352 #--------------------------------------------------------
353 - def test_loinc_split():
354 print split_LOINCDBTXT(input_fname = sys.argv[2])
355 #--------------------------------------------------------
356 - def test_loinc_import():
357 loinc_import(version = '2.26')
358 #--------------------------------------------------------
359 - def test_loinc2term():
360 term = loinc2term(sys.argv[2]) 361 print sys.argv[2], '->', term
362 #-------------------------------------------------------- 363 test_loinc_split() 364 #test_loinc_import() 365 #test_loinc2term() 366 367 #============================================================ 368