Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __version__ = "$Revision: 1.7 $" 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11   
 12  import sys, codecs, logging, csv 
 13   
 14   
 15  if __name__ == '__main__': 
 16          sys.path.insert(0, '../../') 
 17  from Gnumed.pycommon import gmPG2 
 18  from Gnumed.pycommon import gmTools 
 19   
 20   
 21  _log = logging.getLogger('gm.loinc') 
 22  _log.info(__version__) 
 23   
 24  origin_url = u'http://loinc.org' 
 25  file_encoding = 'latin1'                        # encoding is empirical 
 26  license_delimiter = u'Clip Here for Data' 
 27  version_tag = u'LOINC(R) Database Version' 
 28  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 29  name_short = u'LOINC' 
 30   
 31  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 32   
 33  #============================================================ 
34 -def loinc2term(loinc=None):
35 36 # NOTE: will return [NULL] on no-match due to the coalesce() 37 cmd = u""" 38 SELECT coalesce ( 39 (SELECT term 40 FROM ref.v_coded_terms 41 WHERE 42 coding_system = 'LOINC' 43 AND 44 code = %(loinc)s 45 AND 46 lang = i18n.get_curr_lang() 47 ), 48 (SELECT term 49 FROM ref.v_coded_terms 50 WHERE 51 coding_system = 'LOINC' 52 AND 53 code = %(loinc)s 54 AND 55 lang = 'en_EN' 56 ), 57 (SELECT term 58 FROM ref.v_coded_terms 59 WHERE 60 coding_system = 'LOINC' 61 AND 62 code = %(loinc)s 63 ) 64 )""" 65 args = {'loinc': loinc} 66 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 67 68 if rows[0][0] is None: 69 return [] 70 71 return [ r[0] for r in rows ]
72 #============================================================
73 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
74 75 _log.debug('splitting LOINC source file [%s]', input_fname) 76 77 if license_fname is None: 78 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 79 _log.debug('LOINC header: %s', license_fname) 80 81 if data_fname is None: 82 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 83 _log.debug('LOINC data: %s', data_fname) 84 85 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 86 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 87 88 for line in loinc_file: 89 90 if license_delimiter in line: 91 out_file.write(line) 92 out_file.close() 93 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 94 continue 95 96 out_file.write(line) 97 98 out_file.close() 99 100 return data_fname, license_fname
101 #============================================================
102 -def map_field_names(data_fname='loinc_data.csv'):
103 104 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 105 first_line = csv_file.readline() 106 sniffer = csv.Sniffer() 107 if sniffer.has_header(first_line): 108 pass
109 #============================================================
110 -def get_version(license_fname='loinc_license.txt'):
111 112 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 113 114 version = None 115 for line in in_file: 116 if line.startswith(version_tag): 117 version = line[len(version_tag):].strip() 118 break 119 120 in_file.close() 121 return version
122 #============================================================
123 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
124 125 if version is None: 126 version = get_version(license_fname = license_fname) 127 128 if version is None: 129 raise ValueError('cannot detect LOINC version') 130 131 _log.debug('importing LOINC version [%s]', version) 132 133 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 134 desc = in_file.read() 135 in_file.close() 136 137 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 138 139 # create data source record 140 queries = [{ 141 'cmd': u"""DELETE FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""", 142 'args': args 143 }, { 144 'cmd': u""" 145 INSERT INTO ref.data_source (name_long, name_short, version, description, lang, source) values ( 146 %(name_long)s, 147 %(name_short)s, 148 %(ver)s, 149 %(desc)s, 150 %(lang)s, 151 %(url)s 152 )""", 153 'args': args 154 }, { 155 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""", 156 'args': args 157 }] 158 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 159 data_src_pk = rows[0][0] 160 _log.debug('data source record created, pk is #%s', data_src_pk) 161 162 # import data 163 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 164 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 165 166 # clean out staging area 167 curs = conn.cursor() 168 cmd = u"""DELETE FROM ref.loinc_staging""" 169 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 170 curs.close() 171 conn.commit() 172 _log.debug('staging table emptied') 173 174 # from file into staging table 175 curs = conn.cursor() 176 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 177 first = False 178 for loinc_line in loinc_reader: 179 if not first: 180 first = True 181 continue 182 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 183 curs.close() 184 conn.commit() 185 csv_file.close() 186 _log.debug('staging table loaded') 187 188 # from staging table to real table 189 curs = conn.cursor() 190 args = {'src_pk': data_src_pk} 191 cmd = u""" 192 INSERT INTO ref.loinc ( 193 fk_data_source, 194 195 term, 196 197 code, 198 comment, 199 component, 200 property, 201 time_aspect, 202 system, 203 scale_type, 204 method_type, 205 related_names_1_old, 206 grouping_class, 207 loinc_internal_source, 208 dt_last_change, 209 change_type, 210 answer_list, 211 code_status, 212 maps_to, 213 scope, 214 normal_range, 215 ipcc_units, 216 reference, 217 exact_component_synonym, 218 molar_mass, 219 grouping_class_type, 220 formula, 221 species, 222 example_answers, 223 acs_synonyms, 224 base_name, 225 final, 226 naa_ccr_id, 227 code_table, 228 is_set_root, 229 panel_elements, 230 survey_question_text, 231 survey_question_source, 232 units_required, 233 submitted_units, 234 related_names_2, 235 short_name, 236 order_obs, 237 cdisc_common_tests, 238 hl7_field_subfield_id, 239 external_copyright_notice, 240 example_units, 241 inpc_percentage, 242 long_common_name 243 ) 244 245 SELECT 246 247 %(src_pk)s, 248 249 coalesce ( 250 nullif(long_common_name, ''), 251 ( 252 coalesce(nullif(component, '') || ':', '') || 253 coalesce(nullif(property, '') || ':', '') || 254 coalesce(nullif(time_aspect, '') || ':', '') || 255 coalesce(nullif(system, '') || ':', '') || 256 coalesce(nullif(scale_type, '') || ':', '') || 257 coalesce(nullif(method_type, '') || ':', '') 258 ) 259 ), 260 261 nullif(loinc_num, ''), 262 nullif(comments, ''), 263 nullif(component, ''), 264 nullif(property, ''), 265 nullif(time_aspect, ''), 266 nullif(system, ''), 267 nullif(scale_type, ''), 268 nullif(method_type, ''), 269 nullif(related_names_1_old, ''), 270 nullif(class, ''), 271 nullif(source, ''), 272 nullif(dt_last_change, ''), 273 nullif(change_type, ''), 274 nullif(answer_list, ''), 275 nullif(status, ''), 276 nullif(map_to, ''), 277 nullif(scope, ''), 278 nullif(normal_range, ''), 279 nullif(ipcc_units, ''), 280 nullif(reference, ''), 281 nullif(exact_component_synonym, ''), 282 nullif(molar_mass, ''), 283 nullif(class_type, '')::smallint, 284 nullif(formula, ''), 285 nullif(species, ''), 286 nullif(example_answers, ''), 287 nullif(acs_synonyms, ''), 288 nullif(base_name, ''), 289 nullif(final, ''), 290 nullif(naa_ccr_id, ''), 291 nullif(code_table, ''), 292 nullif(is_set_root, '')::boolean, 293 nullif(panel_elements, ''), 294 nullif(survey_question_text, ''), 295 nullif(survey_question_source, ''), 296 nullif(units_required, ''), 297 nullif(submitted_units, ''), 298 nullif(related_names_2, ''), 299 nullif(short_name, ''), 300 nullif(order_obs, ''), 301 nullif(cdisc_common_tests, ''), 302 nullif(hl7_field_subfield_id, ''), 303 nullif(external_copyright_notice, ''), 304 nullif(example_units, ''), 305 nullif(inpc_percentage, ''), 306 nullif(long_common_name, '') 307 308 FROM 309 ref.loinc_staging 310 """ 311 312 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 313 314 curs.close() 315 conn.commit() 316 _log.debug('transfer from staging table to real table done') 317 318 # clean out staging area 319 curs = conn.cursor() 320 cmd = u"""DELETE FROM ref.loinc_staging""" 321 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 322 curs.close() 323 conn.commit() 324 _log.debug('staging table emptied') 325 326 return True
327 #============================================================ 328 # main 329 #------------------------------------------------------------ 330 if __name__ == "__main__": 331 332 if len(sys.argv) < 2: 333 sys.exit() 334 335 if sys.argv[1] != 'test': 336 sys.exit() 337 338 from Gnumed.pycommon import gmLog2 339 from Gnumed.pycommon import gmI18N 340 341 gmI18N.activate_locale() 342 # gmDateTime.init() 343 344 #--------------------------------------------------------
345 - def test_loinc_split():
346 print split_LOINCDBTXT(input_fname = sys.argv[2])
347 #--------------------------------------------------------
348 - def test_loinc_import():
349 loinc_import(version = '2.26')
350 #--------------------------------------------------------
351 - def test_loinc2term():
352 term = loinc2term(sys.argv[2]) 353 print sys.argv[2], '->', term
354 #-------------------------------------------------------- 355 test_loinc_split() 356 #test_loinc_import() 357 #test_loinc2term() 358 359 #============================================================ 360