Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __version__ = "$Revision: 1.7 $" 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11   
 12  import sys, codecs, logging, csv 
 13   
 14   
 15  if __name__ == '__main__': 
 16          sys.path.insert(0, '../../') 
 17  from Gnumed.pycommon import gmPG2 
 18  from Gnumed.pycommon import gmTools 
 19   
 20   
 21  _log = logging.getLogger('gm.loinc') 
 22  _log.info(__version__) 
 23   
 24  origin_url = u'http://loinc.org' 
 25  file_encoding = 'latin1'                        # encoding is empirical 
 26  license_delimiter = u'Clip Here for Data' 
 27  version_tag = u'LOINC(R) Database Version' 
 28  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 29  name_short = u'LOINC' 
 30   
 31  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 32   
 33  #============================================================ 
 34   
 35  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 36  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 37  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 38   
 39  #============================================================ 
40 -def loinc2term(loinc=None):
41 42 # NOTE: will return [NULL] on no-match due to the coalesce() 43 cmd = u""" 44 SELECT coalesce ( 45 (SELECT term 46 FROM ref.v_coded_terms 47 WHERE 48 coding_system = 'LOINC' 49 AND 50 code = %(loinc)s 51 AND 52 lang = i18n.get_curr_lang() 53 ), 54 (SELECT term 55 FROM ref.v_coded_terms 56 WHERE 57 coding_system = 'LOINC' 58 AND 59 code = %(loinc)s 60 AND 61 lang = 'en_EN' 62 ), 63 (SELECT term 64 FROM ref.v_coded_terms 65 WHERE 66 coding_system = 'LOINC' 67 AND 68 code = %(loinc)s 69 ) 70 )""" 71 args = {'loinc': loinc} 72 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 73 74 if rows[0][0] is None: 75 return [] 76 77 return [ r[0] for r in rows ]
78 #============================================================
79 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
80 81 _log.debug('splitting LOINC source file [%s]', input_fname) 82 83 if license_fname is None: 84 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 85 _log.debug('LOINC header: %s', license_fname) 86 87 if data_fname is None: 88 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 89 _log.debug('LOINC data: %s', data_fname) 90 91 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 92 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 93 94 for line in loinc_file: 95 96 if license_delimiter in line: 97 out_file.write(line) 98 out_file.close() 99 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 100 continue 101 102 out_file.write(line) 103 104 out_file.close() 105 106 return data_fname, license_fname
107 #============================================================
108 -def map_field_names(data_fname='loinc_data.csv'):
109 110 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 111 first_line = csv_file.readline() 112 sniffer = csv.Sniffer() 113 if sniffer.has_header(first_line): 114 pass
115 #============================================================
116 -def get_version(license_fname='loinc_license.txt'):
117 118 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 119 120 version = None 121 for line in in_file: 122 if line.startswith(version_tag): 123 version = line[len(version_tag):].strip() 124 break 125 126 in_file.close() 127 return version
128 #============================================================
129 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
130 131 if version is None: 132 version = get_version(license_fname = license_fname) 133 134 if version is None: 135 raise ValueError('cannot detect LOINC version') 136 137 _log.debug('importing LOINC version [%s]', version) 138 139 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 140 desc = in_file.read() 141 in_file.close() 142 143 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 144 145 # create data source record 146 queries = [{ 147 'cmd': u"""DELETE FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""", 148 'args': args 149 }, { 150 'cmd': u""" 151 INSERT INTO ref.data_source (name_long, name_short, version, description, lang, source) values ( 152 %(name_long)s, 153 %(name_short)s, 154 %(ver)s, 155 %(desc)s, 156 %(lang)s, 157 %(url)s 158 )""", 159 'args': args 160 }, { 161 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""", 162 'args': args 163 }] 164 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 165 data_src_pk = rows[0][0] 166 _log.debug('data source record created, pk is #%s', data_src_pk) 167 168 # import data 169 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 170 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 171 172 # clean out staging area 173 curs = conn.cursor() 174 cmd = u"""DELETE FROM ref.loinc_staging""" 175 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 176 curs.close() 177 conn.commit() 178 _log.debug('staging table emptied') 179 180 # from file into staging table 181 curs = conn.cursor() 182 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 183 first = False 184 for loinc_line in loinc_reader: 185 if not first: 186 first = True 187 continue 188 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 189 curs.close() 190 conn.commit() 191 csv_file.close() 192 _log.debug('staging table loaded') 193 194 # from staging table to real table 195 curs = conn.cursor() 196 args = {'src_pk': data_src_pk} 197 cmd = u""" 198 INSERT INTO ref.loinc ( 199 fk_data_source, 200 201 term, 202 203 code, 204 comment, 205 component, 206 property, 207 time_aspect, 208 system, 209 scale_type, 210 method_type, 211 related_names_1_old, 212 grouping_class, 213 loinc_internal_source, 214 dt_last_change, 215 change_type, 216 answer_list, 217 code_status, 218 maps_to, 219 scope, 220 normal_range, 221 ipcc_units, 222 reference, 223 exact_component_synonym, 224 molar_mass, 225 grouping_class_type, 226 formula, 227 species, 228 example_answers, 229 acs_synonyms, 230 base_name, 231 final, 232 naa_ccr_id, 233 code_table, 234 is_set_root, 235 panel_elements, 236 survey_question_text, 237 survey_question_source, 238 units_required, 239 submitted_units, 240 related_names_2, 241 short_name, 242 order_obs, 243 cdisc_common_tests, 244 hl7_field_subfield_id, 245 external_copyright_notice, 246 example_units, 247 inpc_percentage, 248 long_common_name 249 ) 250 251 SELECT 252 253 %(src_pk)s, 254 255 coalesce ( 256 nullif(long_common_name, ''), 257 ( 258 coalesce(nullif(component, '') || ':', '') || 259 coalesce(nullif(property, '') || ':', '') || 260 coalesce(nullif(time_aspect, '') || ':', '') || 261 coalesce(nullif(system, '') || ':', '') || 262 coalesce(nullif(scale_type, '') || ':', '') || 263 coalesce(nullif(method_type, '') || ':', '') 264 ) 265 ), 266 267 nullif(loinc_num, ''), 268 nullif(comments, ''), 269 nullif(component, ''), 270 nullif(property, ''), 271 nullif(time_aspect, ''), 272 nullif(system, ''), 273 nullif(scale_type, ''), 274 nullif(method_type, ''), 275 nullif(related_names_1_old, ''), 276 nullif(class, ''), 277 nullif(source, ''), 278 nullif(dt_last_change, ''), 279 nullif(change_type, ''), 280 nullif(answer_list, ''), 281 nullif(status, ''), 282 nullif(map_to, ''), 283 nullif(scope, ''), 284 nullif(normal_range, ''), 285 nullif(ipcc_units, ''), 286 nullif(reference, ''), 287 nullif(exact_component_synonym, ''), 288 nullif(molar_mass, ''), 289 nullif(class_type, '')::smallint, 290 nullif(formula, ''), 291 nullif(species, ''), 292 nullif(example_answers, ''), 293 nullif(acs_synonyms, ''), 294 nullif(base_name, ''), 295 nullif(final, ''), 296 nullif(naa_ccr_id, ''), 297 nullif(code_table, ''), 298 nullif(is_set_root, '')::boolean, 299 nullif(panel_elements, ''), 300 nullif(survey_question_text, ''), 301 nullif(survey_question_source, ''), 302 nullif(units_required, ''), 303 nullif(submitted_units, ''), 304 nullif(related_names_2, ''), 305 nullif(short_name, ''), 306 nullif(order_obs, ''), 307 nullif(cdisc_common_tests, ''), 308 nullif(hl7_field_subfield_id, ''), 309 nullif(external_copyright_notice, ''), 310 nullif(example_units, ''), 311 nullif(inpc_percentage, ''), 312 nullif(long_common_name, '') 313 314 FROM 315 ref.loinc_staging 316 """ 317 318 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 319 320 curs.close() 321 conn.commit() 322 _log.debug('transfer from staging table to real table done') 323 324 # clean out staging area 325 curs = conn.cursor() 326 cmd = u"""DELETE FROM ref.loinc_staging""" 327 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 328 curs.close() 329 conn.commit() 330 _log.debug('staging table emptied') 331 332 return True
333 #============================================================ 334 # main 335 #------------------------------------------------------------ 336 if __name__ == "__main__": 337 338 if len(sys.argv) < 2: 339 sys.exit() 340 341 if sys.argv[1] != 'test': 342 sys.exit() 343 344 from Gnumed.pycommon import gmLog2 345 from Gnumed.pycommon import gmI18N 346 347 gmI18N.activate_locale() 348 # gmDateTime.init() 349 350 #--------------------------------------------------------
351 - def test_loinc_split():
352 print split_LOINCDBTXT(input_fname = sys.argv[2])
353 #--------------------------------------------------------
354 - def test_loinc_import():
355 loinc_import(version = '2.26')
356 #--------------------------------------------------------
357 - def test_loinc2term():
358 term = loinc2term(sys.argv[2]) 359 print sys.argv[2], '->', term
360 #-------------------------------------------------------- 361 test_loinc_split() 362 #test_loinc_import() 363 #test_loinc2term() 364 365 #============================================================ 366