Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 10   
 11  import sys 
 12  import codecs 
 13  import logging 
 14  import csv 
 15  import re as regex 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmPG2 
 21  from Gnumed.pycommon import gmTools 
 22  from Gnumed.pycommon import gmMatchProvider 
 23   
 24   
 25  _log = logging.getLogger('gm.loinc') 
 26   
 27   
 28  origin_url = u'http://loinc.org' 
 29  file_encoding = 'latin1'                        # encoding is empirical 
 30  license_delimiter = u'Clip Here for Data' 
 31  version_tag = u'LOINC(R) Database Version' 
 32  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 33  name_short = u'LOINC' 
 34   
 35  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 36   
 37  #============================================================ 
 38   
 39  LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7'] 
 40  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 41  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 42  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 43   
 44  #============================================================ 
45 -def loinc2term(loinc=None):
46 47 # NOTE: will return [NULL] on no-match due to the coalesce() 48 cmd = u""" 49 SELECT coalesce ( 50 (SELECT term 51 FROM ref.v_coded_terms 52 WHERE 53 coding_system = 'LOINC' 54 AND 55 code = %(loinc)s 56 AND 57 lang = i18n.get_curr_lang() 58 ), 59 (SELECT term 60 FROM ref.v_coded_terms 61 WHERE 62 coding_system = 'LOINC' 63 AND 64 code = %(loinc)s 65 AND 66 lang = 'en_EN' 67 ), 68 (SELECT term 69 FROM ref.v_coded_terms 70 WHERE 71 coding_system = 'LOINC' 72 AND 73 code = %(loinc)s 74 ) 75 )""" 76 args = {'loinc': loinc} 77 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 78 79 if rows[0][0] is None: 80 return [] 81 82 return [ r[0] for r in rows ]
83 #============================================================
84 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
85 86 _log.debug('splitting LOINC source file [%s]', input_fname) 87 88 if license_fname is None: 89 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 90 _log.debug('LOINC header: %s', license_fname) 91 92 if data_fname is None: 93 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 94 _log.debug('LOINC data: %s', data_fname) 95 96 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 97 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 98 99 for line in loinc_file: 100 101 if license_delimiter in line: 102 out_file.write(line) 103 out_file.close() 104 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 105 continue 106 107 out_file.write(line) 108 109 out_file.close() 110 111 return data_fname, license_fname
112 #============================================================
113 -def map_field_names(data_fname='loinc_data.csv'):
114 115 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 116 first_line = csv_file.readline() 117 sniffer = csv.Sniffer() 118 if sniffer.has_header(first_line): 119 pass
120 #============================================================
121 -def get_version(license_fname='loinc_license.txt'):
122 123 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 124 125 version = None 126 for line in in_file: 127 if line.startswith(version_tag): 128 version = line[len(version_tag):].strip() 129 break 130 131 in_file.close() 132 return version
133 #============================================================
134 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
135 136 if version is None: 137 version = get_version(license_fname = license_fname) 138 139 if version is None: 140 raise ValueError('cannot detect LOINC version') 141 142 _log.debug('importing LOINC version [%s]', version) 143 144 # clean out staging area 145 curs = conn.cursor() 146 cmd = u"""DELETE FROM ref.loinc_staging""" 147 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 148 curs.close() 149 conn.commit() 150 _log.debug('staging table emptied') 151 152 # import data from csv file into staging table 153 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 154 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 155 curs = conn.cursor() 156 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 157 first = False 158 for loinc_line in loinc_reader: 159 if not first: 160 first = True 161 continue 162 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 163 curs.close() 164 conn.commit() 165 csv_file.close() 166 _log.debug('staging table loaded') 167 168 # create data source record 169 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 170 desc = in_file.read() 171 in_file.close() 172 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 173 queries = [ 174 # insert if not existing 175 {'args': args, 'cmd': u""" 176 INSERT INTO ref.data_source (name_long, name_short, version) SELECT 177 %(name_long)s, 178 %(name_short)s, 179 %(ver)s 180 WHERE NOT EXISTS ( 181 SELECT 1 FROM ref.data_source WHERE 182 name_long = %(name_long)s 183 AND 184 name_short = %(name_short)s 185 AND 186 version = %(ver)s 187 )""" 188 }, 189 # update non-unique fields 190 {'args': args, 'cmd': u""" 191 UPDATE ref.data_source SET 192 description = %(desc)s, 193 source = %(url)s, 194 lang = %(lang)s 195 WHERE 196 name_long = %(name_long)s 197 AND 198 name_short = %(name_short)s 199 AND 200 version = %(ver)s 201 """ 202 }, 203 # retrieve PK of data source 204 {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} 205 ] 206 curs = conn.cursor() 207 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) 208 data_src_pk = rows[0][0] 209 curs.close() 210 _log.debug('data source record created or updated, pk is #%s', data_src_pk) 211 212 # import from staging table to real table 213 args = {'src_pk': data_src_pk} 214 queries = [] 215 queries.append ({ 216 'args': args, 217 'cmd': u""" 218 INSERT INTO ref.loinc ( 219 fk_data_source, term, code 220 ) 221 SELECT 222 %(src_pk)s, 223 coalesce ( 224 nullif(long_common_name, ''), 225 ( 226 coalesce(nullif(component, '') || ':', '') || 227 coalesce(nullif(property, '') || ':', '') || 228 coalesce(nullif(time_aspect, '') || ':', '') || 229 coalesce(nullif(system, '') || ':', '') || 230 coalesce(nullif(scale_type, '') || ':', '') || 231 coalesce(nullif(method_type, '') || ':', '') 232 ) 233 ), 234 nullif(loinc_num, '') 235 FROM 236 ref.loinc_staging r_ls 237 WHERE NOT EXISTS ( 238 SELECT 1 FROM ref.loinc r_l WHERE 239 r_l.fk_data_source = %(src_pk)s 240 AND 241 r_l.code = nullif(r_ls.loinc_num, '') 242 AND 243 r_l.term = coalesce ( 244 nullif(r_ls.long_common_name, ''), 245 ( 246 coalesce(nullif(r_ls.component, '') || ':', '') || 247 coalesce(nullif(r_ls.property, '') || ':', '') || 248 coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 249 coalesce(nullif(r_ls.system, '') || ':', '') || 250 coalesce(nullif(r_ls.scale_type, '') || ':', '') || 251 coalesce(nullif(r_ls.method_type, '') || ':', '') 252 ) 253 ) 254 )""" 255 }) 256 queries.append ({ 257 'args': args, 258 'cmd': u""" 259 UPDATE ref.loinc SET 260 comment = nullif(r_ls.comments, ''), 261 component = nullif(r_ls.component, ''), 262 property = nullif(r_ls.property, ''), 263 time_aspect = nullif(r_ls.time_aspect, ''), 264 system = nullif(r_ls.system, ''), 265 scale_type = nullif(r_ls.scale_type, ''), 266 method_type = nullif(r_ls.method_type, ''), 267 related_names_1_old = nullif(r_ls.related_names_1_old, ''), 268 grouping_class = nullif(r_ls.class, ''), 269 loinc_internal_source = nullif(r_ls.source, ''), 270 dt_last_change = nullif(r_ls.dt_last_change, ''), 271 change_type = nullif(r_ls.change_type, ''), 272 answer_list = nullif(r_ls.answer_list, ''), 273 code_status = nullif(r_ls.status, ''), 274 maps_to = nullif(r_ls.map_to, ''), 275 scope = nullif(r_ls.scope, ''), 276 normal_range = nullif(r_ls.normal_range, ''), 277 ipcc_units = nullif(r_ls.ipcc_units, ''), 278 reference = nullif(r_ls.reference, ''), 279 exact_component_synonym = nullif(r_ls.exact_component_synonym, ''), 280 molar_mass = nullif(r_ls.molar_mass, ''), 281 grouping_class_type = nullif(r_ls.class_type, '')::smallint, 282 formula = nullif(r_ls.formula, ''), 283 species = nullif(r_ls.species, ''), 284 example_answers = nullif(r_ls.example_answers, ''), 285 acs_synonyms = nullif(r_ls.acs_synonyms, ''), 286 base_name = nullif(r_ls.base_name, ''), 287 final = nullif(r_ls.final, ''), 288 naa_ccr_id = nullif(r_ls.naa_ccr_id, ''), 289 code_table = nullif(r_ls.code_table, ''), 290 is_set_root = nullif(r_ls.is_set_root, '')::boolean, 291 panel_elements = nullif(r_ls.panel_elements, ''), 292 survey_question_text = nullif(r_ls.survey_question_text, ''), 293 survey_question_source = nullif(r_ls.survey_question_source, ''), 294 units_required = nullif(r_ls.units_required, ''), 295 submitted_units = nullif(r_ls.submitted_units, ''), 296 related_names_2 = nullif(r_ls.related_names_2, ''), 297 short_name = nullif(r_ls.short_name, ''), 298 order_obs = nullif(r_ls.order_obs, ''), 299 cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''), 300 hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''), 301 external_copyright_notice = nullif(r_ls.external_copyright_notice, ''), 302 example_units = nullif(r_ls.example_units, ''), 303 inpc_percentage = nullif(r_ls.inpc_percentage, ''), 304 long_common_name = nullif(r_ls.long_common_name, '') 305 FROM 306 ref.loinc_staging r_ls 307 WHERE 308 fk_data_source = %(src_pk)s 309 AND 310 code = nullif(r_ls.loinc_num, '') 311 AND 312 term = coalesce ( 313 nullif(r_ls.long_common_name, ''), 314 ( 315 coalesce(nullif(r_ls.component, '') || ':', '') || 316 coalesce(nullif(r_ls.property, '') || ':', '') || 317 coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 318 coalesce(nullif(r_ls.system, '') || ':', '') || 319 coalesce(nullif(r_ls.scale_type, '') || ':', '') || 320 coalesce(nullif(r_ls.method_type, '') || ':', '') 321 ) 322 ) 323 """ 324 }) 325 curs = conn.cursor() 326 gmPG2.run_rw_queries(link_obj = curs, queries = queries) 327 curs.close() 328 conn.commit() 329 _log.debug('transfer from staging table to real table done') 330 331 # clean out staging area 332 curs = conn.cursor() 333 cmd = u"""DELETE FROM ref.loinc_staging""" 334 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 335 curs.close() 336 conn.commit() 337 _log.debug('staging table emptied') 338 339 return True
340 341 #============================================================ 342 _SQL_LOINC_from_test_type = u""" 343 -- from test type 344 SELECT 345 loinc AS data, 346 loinc AS field_label, 347 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label 348 FROM clin.test_type 349 WHERE loinc %(fragment_condition)s 350 """ 351 352 _SQL_LOINC_from_i18n_coded_term = u""" 353 -- from coded term, in user language 354 SELECT 355 code AS data, 356 code AS field_label, 357 (code || ': ' || term) AS list_label 358 FROM ref.v_coded_terms 359 WHERE 360 coding_system = 'LOINC' 361 AND 362 lang = i18n.get_curr_lang() 363 AND 364 (code %(fragment_condition)s 365 OR 366 term %(fragment_condition)s) 367 """ 368 369 _SQL_LOINC_from_en_EN_coded_term = u""" 370 -- from coded term, in English 371 SELECT 372 code AS data, 373 code AS field_label, 374 (code || ': ' || term) AS list_label 375 FROM ref.v_coded_terms 376 WHERE 377 coding_system = 'LOINC' 378 AND 379 lang = 'en_EN' 380 AND 381 (code %(fragment_condition)s 382 OR 383 term %(fragment_condition)s) 384 """ 385 386 _SQL_LOINC_from_any_coded_term = u""" 387 -- from coded term, in any language 388 SELECT 389 code AS data, 390 code AS field_label, 391 (code || ': ' || term) AS list_label 392 FROM ref.v_coded_terms 393 WHERE 394 coding_system = 'LOINC' 395 AND 396 (code %(fragment_condition)s 397 OR 398 term %(fragment_condition)s) 399 """ 400
401 -class cLOINCMatchProvider(gmMatchProvider.cMatchProvider_SQL2):
402 403 _pattern = regex.compile(r'^\D+\s+\D+$', regex.UNICODE | regex.LOCALE) 404 405 _normal_query = u""" 406 SELECT DISTINCT ON (list_label) 407 data, 408 field_label, 409 list_label 410 FROM ( 411 (%s) UNION ALL ( 412 %s) 413 ) AS all_known_loinc""" % ( 414 _SQL_LOINC_from_test_type, 415 _SQL_LOINC_from_any_coded_term 416 ) 417 #-- %s) UNION ALL ( 418 #-- %s) UNION ALL ( 419 # % 420 # _SQL_LOINC_from_i18n_coded_term, 421 # _SQL_LOINC_from_en_EN_coded_term, 422 #--------------------------------------------------------
423 - def getMatchesByPhrase(self, aFragment):
424 """Return matches for aFragment at start of phrases.""" 425 426 self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 427 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByPhrase(self, aFragment)
428 #--------------------------------------------------------
429 - def getMatchesByWord(self, aFragment):
430 """Return matches for aFragment at start of words inside phrases.""" 431 432 if cLOINCMatchProvider._pattern.match(aFragment): 433 fragmentA, fragmentB = aFragment.split(u' ', 1) 434 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': u'~* %%(fragmentA)s'} 435 self._args['fragmentA'] = u"( %s)|(^%s)" % (fragmentA, fragmentA) 436 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': u'~* %%(fragmentB)s'} 437 self._args['fragmentB'] = u"( %s)|(^%s)" % (fragmentB, fragmentB) 438 self._queries = [u"SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 439 return self._find_matches(u'dummy') 440 441 self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 442 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByWord(self, aFragment)
443 #--------------------------------------------------------
444 - def getMatchesBySubstr(self, aFragment):
445 """Return matches for aFragment as a true substring.""" 446 447 if cLOINCMatchProvider._pattern.match(aFragment): 448 fragmentA, fragmentB = aFragment.split(u' ', 1) 449 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': u"ILIKE %%(fragmentA)s"} 450 self._args['fragmentA'] = u'%%%s%%' % fragmentA 451 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': u"ILIKE %%(fragmentB)s"} 452 self._args['fragmentB'] = u'%%%s%%' % fragmentB 453 self._queries = [u"SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 454 return self._find_matches(u'dummy') 455 456 self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 457 return gmMatchProvider.cMatchProvider_SQL2.getMatchesBySubstr(self, aFragment)
458 #============================================================ 459 # main 460 #------------------------------------------------------------ 461 if __name__ == "__main__": 462 463 if len(sys.argv) < 2: 464 sys.exit() 465 466 if sys.argv[1] != 'test': 467 sys.exit() 468 469 from Gnumed.pycommon import gmLog2 470 from Gnumed.pycommon import gmI18N 471 472 gmI18N.activate_locale() 473 # gmDateTime.init() 474 475 #--------------------------------------------------------
476 - def test_loinc_split():
477 print split_LOINCDBTXT(input_fname = sys.argv[2])
478 #--------------------------------------------------------
479 - def test_loinc_import():
480 loinc_import(version = '2.26')
481 #--------------------------------------------------------
482 - def test_loinc2term():
483 term = loinc2term(sys.argv[2]) 484 print sys.argv[2], '->', term
485 #-------------------------------------------------------- 486 test_loinc_split() 487 #test_loinc_import() 488 #test_loinc2term() 489 490 #============================================================ 491