Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf-8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 10   
 11  import sys 
 12  import io 
 13  import logging 
 14  import csv 
 15  import re as regex 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmPG2 
 21  from Gnumed.pycommon import gmTools 
 22  from Gnumed.pycommon import gmMatchProvider 
 23   
 24   
 25  _log = logging.getLogger('gm.loinc') 
 26   
 27   
 28  origin_url = 'http://loinc.org' 
 29  file_encoding = 'latin1'                        # encoding is empirical 
 30  license_delimiter = 'Clip Here for Data' 
 31  version_tag = 'LOINC(R) Database Version' 
 32  name_long = 'LOINC® (Logical Observation Identifiers Names and Codes)' 
 33  name_short = 'LOINC' 
 34   
 35  loinc_fields = "LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 36   
 37  #============================================================ 
 38   
 39  LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7'] 
 40  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 41  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 42  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 43  LOINC_rr_quantity = ['8478-0', '8448-3', '8449-1', '8456-6', '8457-4', '8458-2', '55284-4', '50403-5', '50402-7', '45372-0'] 
 44  LOINC_heart_rate_quantity = ['8867-4', '67129-7', '40443-4', '69000-8', '69001-6', '68999-2'] 
 45  LOINC_inr_quantity = ['34714-6', '46418-0', '6301-6', '38875-1'] 
 46   
 47  #============================================================ 
 48  # convenience functions 
 49  #------------------------------------------------------------ 
50 -def format_loinc(loinc):
51 data = loinc2data(loinc) 52 if data is None: 53 return None 54 return gmTools.format_dict_like ( 55 data, 56 tabular = True, 57 value_delimiters = None, 58 values2ignore = [None, ''] 59 )
60 61 #------------------------------------------------------------
62 -def loinc2data(loinc):
63 cmd = 'SELECT * FROM ref.loinc WHERE code = %(loinc)s' 64 args = {'loinc': loinc} 65 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 66 if len(rows) == 0: 67 return None 68 return rows[0]
69 70 #------------------------------------------------------------
71 -def loinc2term(loinc=None):
72 73 # NOTE: will return [NULL] on no-match due to the coalesce() 74 cmd = """ 75 SELECT coalesce ( 76 (SELECT term 77 FROM ref.v_coded_terms 78 WHERE 79 coding_system = 'LOINC' 80 AND 81 code = %(loinc)s 82 AND 83 lang = i18n.get_curr_lang() 84 ), 85 (SELECT term 86 FROM ref.v_coded_terms 87 WHERE 88 coding_system = 'LOINC' 89 AND 90 code = %(loinc)s 91 AND 92 lang = 'en_EN' 93 ), 94 (SELECT term 95 FROM ref.v_coded_terms 96 WHERE 97 coding_system = 'LOINC' 98 AND 99 code = %(loinc)s 100 ) 101 )""" 102 args = {'loinc': loinc} 103 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 104 105 if rows[0][0] is None: 106 return [] 107 108 return [ r[0] for r in rows ]
109 110 #============================================================ 111 # LOINCDBTXT handling 112 #------------------------------------------------------------
113 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
114 115 _log.debug('splitting LOINC source file [%s]', input_fname) 116 117 if license_fname is None: 118 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 119 _log.debug('LOINC header: %s', license_fname) 120 121 if data_fname is None: 122 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 123 _log.debug('LOINC data: %s', data_fname) 124 125 loinc_file = io.open(input_fname, mode = 'rt', encoding = file_encoding, errors = 'replace') 126 out_file = io.open(license_fname, mode = 'wt', encoding = 'utf8', errors = 'replace') 127 128 for line in loinc_file: 129 130 if license_delimiter in line: 131 out_file.write(line) 132 out_file.close() 133 out_file = io.open(data_fname, mode = 'wt', encoding = 'utf8', errors = 'replace') 134 continue 135 136 out_file.write(line) 137 138 out_file.close() 139 140 return data_fname, license_fname
141 142 #============================================================
143 -def map_field_names(data_fname='loinc_data.csv'):
144 145 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 146 first_line = csv_file.readline() 147 sniffer = csv.Sniffer() 148 if sniffer.has_header(first_line): 149 pass
150 151 #============================================================
152 -def get_version(license_fname='loinc_license.txt'):
153 154 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 155 156 version = None 157 for line in in_file: 158 if line.startswith(version_tag): 159 version = line[len(version_tag):].strip() 160 break 161 162 in_file.close() 163 return version
164 165 #============================================================
166 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
167 168 if version is None: 169 version = get_version(license_fname = license_fname) 170 171 if version is None: 172 raise ValueError('cannot detect LOINC version') 173 174 _log.debug('importing LOINC version [%s]', version) 175 176 # clean out staging area 177 curs = conn.cursor() 178 cmd = """DELETE FROM staging.loinc_staging""" 179 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 180 curs.close() 181 conn.commit() 182 _log.debug('staging table emptied') 183 184 # import data from csv file into staging table 185 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 186 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 187 curs = conn.cursor() 188 cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ('%s, ' * (len(loinc_fields) - 1)) 189 first = False 190 for loinc_line in loinc_reader: 191 if not first: 192 first = True 193 continue 194 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 195 curs.close() 196 conn.commit() 197 csv_file.close() 198 _log.debug('staging table loaded') 199 200 # create data source record 201 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 202 desc = in_file.read() 203 in_file.close() 204 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 205 queries = [ 206 # insert if not existing 207 {'args': args, 'cmd': """ 208 INSERT INTO ref.data_source (name_long, name_short, version) SELECT 209 %(name_long)s, 210 %(name_short)s, 211 %(ver)s 212 WHERE NOT EXISTS ( 213 SELECT 1 FROM ref.data_source WHERE 214 name_long = %(name_long)s 215 AND 216 name_short = %(name_short)s 217 AND 218 version = %(ver)s 219 )""" 220 }, 221 # update non-unique fields 222 {'args': args, 'cmd': """ 223 UPDATE ref.data_source SET 224 description = %(desc)s, 225 source = %(url)s, 226 lang = %(lang)s 227 WHERE 228 name_long = %(name_long)s 229 AND 230 name_short = %(name_short)s 231 AND 232 version = %(ver)s 233 """ 234 }, 235 # retrieve PK of data source 236 {'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} 237 ] 238 curs = conn.cursor() 239 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) 240 data_src_pk = rows[0][0] 241 curs.close() 242 _log.debug('data source record created or updated, pk is #%s', data_src_pk) 243 244 # import from staging table to real table 245 args = {'src_pk': data_src_pk} 246 queries = [] 247 queries.append ({ 248 'args': args, 249 'cmd': """ 250 INSERT INTO ref.loinc ( 251 fk_data_source, term, code 252 ) 253 SELECT 254 %(src_pk)s, 255 coalesce ( 256 nullif(long_common_name, ''), 257 ( 258 coalesce(nullif(component, '') || ':', '') || 259 coalesce(nullif(property, '') || ':', '') || 260 coalesce(nullif(time_aspect, '') || ':', '') || 261 coalesce(nullif(system, '') || ':', '') || 262 coalesce(nullif(scale_type, '') || ':', '') || 263 coalesce(nullif(method_type, '') || ':', '') 264 ) 265 ), 266 nullif(loinc_num, '') 267 FROM 268 staging.loinc_staging st_ls 269 WHERE NOT EXISTS ( 270 SELECT 1 FROM ref.loinc r_l WHERE 271 r_l.fk_data_source = %(src_pk)s 272 AND 273 r_l.code = nullif(st_ls.loinc_num, '') 274 AND 275 r_l.term = coalesce ( 276 nullif(st_ls.long_common_name, ''), 277 ( 278 coalesce(nullif(st_ls.component, '') || ':', '') || 279 coalesce(nullif(st_ls.property, '') || ':', '') || 280 coalesce(nullif(st_ls.time_aspect, '') || ':', '') || 281 coalesce(nullif(st_ls.system, '') || ':', '') || 282 coalesce(nullif(st_ls.scale_type, '') || ':', '') || 283 coalesce(nullif(st_ls.method_type, '') || ':', '') 284 ) 285 ) 286 )""" 287 }) 288 queries.append ({ 289 'args': args, 290 'cmd': """ 291 UPDATE ref.loinc SET 292 comment = nullif(st_ls.comments, ''), 293 component = nullif(st_ls.component, ''), 294 property = nullif(st_ls.property, ''), 295 time_aspect = nullif(st_ls.time_aspect, ''), 296 system = nullif(st_ls.system, ''), 297 scale_type = nullif(st_ls.scale_type, ''), 298 method_type = nullif(st_ls.method_type, ''), 299 related_names_1_old = nullif(st_ls.related_names_1_old, ''), 300 grouping_class = nullif(st_ls.class, ''), 301 loinc_internal_source = nullif(st_ls.source, ''), 302 dt_last_change = nullif(st_ls.dt_last_change, ''), 303 change_type = nullif(st_ls.change_type, ''), 304 answer_list = nullif(st_ls.answer_list, ''), 305 code_status = nullif(st_ls.status, ''), 306 maps_to = nullif(st_ls.map_to, ''), 307 scope = nullif(st_ls.scope, ''), 308 normal_range = nullif(st_ls.normal_range, ''), 309 ipcc_units = nullif(st_ls.ipcc_units, ''), 310 reference = nullif(st_ls.reference, ''), 311 exact_component_synonym = nullif(st_ls.exact_component_synonym, ''), 312 molar_mass = nullif(st_ls.molar_mass, ''), 313 grouping_class_type = nullif(st_ls.class_type, '')::smallint, 314 formula = nullif(st_ls.formula, ''), 315 species = nullif(st_ls.species, ''), 316 example_answers = nullif(st_ls.example_answers, ''), 317 acs_synonyms = nullif(st_ls.acs_synonyms, ''), 318 base_name = nullif(st_ls.base_name, ''), 319 final = nullif(st_ls.final, ''), 320 naa_ccr_id = nullif(st_ls.naa_ccr_id, ''), 321 code_table = nullif(st_ls.code_table, ''), 322 is_set_root = nullif(st_ls.is_set_root, '')::boolean, 323 panel_elements = nullif(st_ls.panel_elements, ''), 324 survey_question_text = nullif(st_ls.survey_question_text, ''), 325 survey_question_source = nullif(st_ls.survey_question_source, ''), 326 units_required = nullif(st_ls.units_required, ''), 327 submitted_units = nullif(st_ls.submitted_units, ''), 328 related_names_2 = nullif(st_ls.related_names_2, ''), 329 short_name = nullif(st_ls.short_name, ''), 330 order_obs = nullif(st_ls.order_obs, ''), 331 cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''), 332 hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''), 333 external_copyright_notice = nullif(st_ls.external_copyright_notice, ''), 334 example_units = nullif(st_ls.example_units, ''), 335 inpc_percentage = nullif(st_ls.inpc_percentage, ''), 336 long_common_name = nullif(st_ls.long_common_name, '') 337 FROM 338 staging.loinc_staging st_ls 339 WHERE 340 fk_data_source = %(src_pk)s 341 AND 342 code = nullif(st_ls.loinc_num, '') 343 AND 344 term = coalesce ( 345 nullif(st_ls.long_common_name, ''), 346 ( 347 coalesce(nullif(st_ls.component, '') || ':', '') || 348 coalesce(nullif(st_ls.property, '') || ':', '') || 349 coalesce(nullif(st_ls.time_aspect, '') || ':', '') || 350 coalesce(nullif(st_ls.system, '') || ':', '') || 351 coalesce(nullif(st_ls.scale_type, '') || ':', '') || 352 coalesce(nullif(st_ls.method_type, '') || ':', '') 353 ) 354 ) 355 """ 356 }) 357 curs = conn.cursor() 358 gmPG2.run_rw_queries(link_obj = curs, queries = queries) 359 curs.close() 360 conn.commit() 361 _log.debug('transfer from staging table to real table done') 362 363 # clean out staging area 364 curs = conn.cursor() 365 cmd = """DELETE FROM staging.loinc_staging""" 366 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 367 curs.close() 368 conn.commit() 369 _log.debug('staging table emptied') 370 371 return True
372 373 #============================================================ 374 _SQL_LOINC_from_test_type = """ 375 -- from test type 376 SELECT 377 loinc AS data, 378 loinc AS field_label, 379 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label 380 FROM clin.test_type 381 WHERE loinc %(fragment_condition)s 382 """ 383 384 _SQL_LOINC_from_i18n_coded_term = """ 385 -- from coded term, in user language 386 SELECT 387 code AS data, 388 code AS field_label, 389 (code || ': ' || term) AS list_label 390 FROM ref.v_coded_terms 391 WHERE 392 coding_system = 'LOINC' 393 AND 394 lang = i18n.get_curr_lang() 395 AND 396 (code %(fragment_condition)s 397 OR 398 term %(fragment_condition)s) 399 """ 400 401 _SQL_LOINC_from_en_EN_coded_term = """ 402 -- from coded term, in English 403 SELECT 404 code AS data, 405 code AS field_label, 406 (code || ': ' || term) AS list_label 407 FROM ref.v_coded_terms 408 WHERE 409 coding_system = 'LOINC' 410 AND 411 lang = 'en_EN' 412 AND 413 (code %(fragment_condition)s 414 OR 415 term %(fragment_condition)s) 416 """ 417 418 _SQL_LOINC_from_any_coded_term = """ 419 -- from coded term, in any language 420 SELECT 421 code AS data, 422 code AS field_label, 423 (code || ': ' || term) AS list_label 424 FROM ref.v_coded_terms 425 WHERE 426 coding_system = 'LOINC' 427 AND 428 (code %(fragment_condition)s 429 OR 430 term %(fragment_condition)s) 431 """ 432 433 #------------------------------------------------------------
434 -class cLOINCMatchProvider(gmMatchProvider.cMatchProvider_SQL2):
435 436 _pattern = regex.compile(r'^\D+\s+\D+$', regex.UNICODE) 437 438 _normal_query = """ 439 SELECT DISTINCT ON (list_label) 440 data, 441 field_label, 442 list_label 443 FROM ( 444 (%s) UNION ALL ( 445 %s) 446 ) AS all_known_loinc""" % ( 447 _SQL_LOINC_from_test_type, 448 _SQL_LOINC_from_any_coded_term 449 ) 450 #-- %s) UNION ALL ( 451 #-- %s) UNION ALL ( 452 # % 453 # _SQL_LOINC_from_i18n_coded_term, 454 # _SQL_LOINC_from_en_EN_coded_term, 455 456 #--------------------------------------------------------
457 - def getMatchesByPhrase(self, aFragment):
458 """Return matches for aFragment at start of phrases.""" 459 460 self._queries = [cLOINCMatchProvider._normal_query + '\nORDER BY list_label\nLIMIT 75'] 461 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByPhrase(self, aFragment)
462 463 #--------------------------------------------------------
464 - def getMatchesByWord(self, aFragment):
465 """Return matches for aFragment at start of words inside phrases.""" 466 467 if cLOINCMatchProvider._pattern.match(aFragment): 468 fragmentA, fragmentB = aFragment.split(' ', 1) 469 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': '~* %%(fragmentA)s'} 470 self._args['fragmentA'] = "( %s)|(^%s)" % (fragmentA, fragmentA) 471 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': '~* %%(fragmentB)s'} 472 self._args['fragmentB'] = "( %s)|(^%s)" % (fragmentB, fragmentB) 473 self._queries = ["SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 474 return self._find_matches('dummy') 475 476 self._queries = [cLOINCMatchProvider._normal_query + '\nORDER BY list_label\nLIMIT 75'] 477 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByWord(self, aFragment)
478 479 #--------------------------------------------------------
480 - def getMatchesBySubstr(self, aFragment):
481 """Return matches for aFragment as a true substring.""" 482 483 if cLOINCMatchProvider._pattern.match(aFragment): 484 fragmentA, fragmentB = aFragment.split(' ', 1) 485 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': "ILIKE %%(fragmentA)s"} 486 self._args['fragmentA'] = '%%%s%%' % fragmentA 487 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': "ILIKE %%(fragmentB)s"} 488 self._args['fragmentB'] = '%%%s%%' % fragmentB 489 self._queries = ["SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 490 return self._find_matches('dummy') 491 492 self._queries = [cLOINCMatchProvider._normal_query + '\nORDER BY list_label\nLIMIT 75'] 493 return gmMatchProvider.cMatchProvider_SQL2.getMatchesBySubstr(self, aFragment)
494 495 #============================================================ 496 # main 497 #------------------------------------------------------------ 498 if __name__ == "__main__": 499 500 if len(sys.argv) < 2: 501 sys.exit() 502 503 if sys.argv[1] != 'test': 504 sys.exit() 505 506 from Gnumed.pycommon import gmLog2 507 from Gnumed.pycommon import gmI18N 508 509 gmI18N.activate_locale() 510 # gmDateTime.init() 511 512 #--------------------------------------------------------
513 - def test_loinc_split():
514 print(split_LOINCDBTXT(input_fname = sys.argv[2]))
515 #--------------------------------------------------------
516 - def test_loinc_import():
517 loinc_import(version = '2.26')
518 #--------------------------------------------------------
519 - def test_loinc2term():
520 term = loinc2term(sys.argv[2]) 521 print(sys.argv[2], '->', term)
522 523 #--------------------------------------------------------
524 - def test_format_loinc():
525 loinc = sys.argv[2] 526 print(loinc) 527 print(format_loinc(loinc))
528 529 #-------------------------------------------------------- 530 #test_loinc_split() 531 #test_loinc_import() 532 #test_loinc2term() 533 test_format_loinc() 534 535 #============================================================ 536