Package Gnumed :: Package business :: Module gmHL7
[frames] | no frames]

Source Code for Module Gnumed.business.gmHL7

  1  # -*- coding: utf8 -*- 
  2  """Some HL7 handling.""" 
  3  #============================================================ 
  4  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
  5  __license__ = "GPL v2 or later" 
  6   
  7   
  8  import sys 
  9  import os 
 10  import codecs 
 11  import logging 
 12  import time 
 13  import datetime as pyDT 
 14  import hl7 as pyhl7 
 15  from xml.etree import ElementTree as pyxml 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmTools 
 21  from Gnumed.pycommon import gmBusinessDBObject 
 22  from Gnumed.pycommon import gmPG2 
 23  from Gnumed.pycommon import gmDateTime 
 24  from Gnumed.business import gmPathLab 
 25   
 26   
 27  _log = logging.getLogger('gm.hl7') 
 28   
 29  # constants 
 30  HL7_EOL = u'\r' 
 31   
 32  HL7_SEGMENTS = u'FHS BHS MSH PID PV1 OBX NTE ORC OBR'.split() 
 33   
 34  MSH_sending_lab = 3 
 35   
 36  PID_name = 5 
 37  PID_lastname = 0 
 38  PID_firstname = 1 
 39  PID_middlename = 2 
 40  PID_dob = 7 
 41  PID_gender = 8 
 42   
 43  OBX_type = 3 
 44  OBX_LOINC = 0 
 45  OBX_name = 1 
 46  OBX_value = 5 
 47  OBX_unit = 6 
 48   
 49  #============================================================ 
 50  # class to handle unmatched incoming clinical data 
 51  #------------------------------------------------------------ 
 52  _SQL_get_incoming_data = u"""SELECT * FROM clin.v_incoming_data_unmatched WHERE %s""" 
 53   
54 -class cIncomingData(gmBusinessDBObject.cBusinessDBObject):
55 """Represents items of incoming data, say, HL7 snippets.""" 56 57 _cmd_fetch_payload = _SQL_get_incoming_data % u"pk_incoming_data_unmatched = %s" 58 _cmds_store_payload = [ 59 u"""UPDATE clin.incoming_data_unmatched SET 60 fk_patient_candidates = %(pk_patient_candidates)s, 61 fk_identity_disambiguated = %(pk_identity_disambiguated)s, 62 fk_provider_disambiguated = %(pk_provider_disambiguated)s, 63 request_id = gm.nullify_empty_string(%(request_id)s), 64 firstnames = gm.nullify_empty_string(%(firstnames)s), 65 lastnames = gm.nullify_empty_string(%(lastnames)s), 66 dob = %(dob)s, 67 postcode = gm.nullify_empty_string(%(postcode)s), 68 other_info = gm.nullify_empty_string(%(other_info)s), 69 type = gm.nullify_empty_string(%(data_type)s), 70 gender = gm.nullify_empty_string(%(gender)s), 71 requestor = gm.nullify_empty_string(%(requestor)s), 72 external_data_id = gm.nullify_empty_string(%(external_data_id)s), 73 comment = gm.nullify_empty_string(%(comment)s) 74 WHERE 75 pk = %(pk_incoming_data_unmatched)s 76 AND 77 xmin = %(xmin_incoming_data_unmatched)s 78 RETURNING 79 xmin as xmin_incoming_data_unmatched, 80 octet_length(data) as data_size 81 """ 82 ] 83 # view columns that can be updated: 84 _updatable_fields = [ 85 u'pk_patient_candidates', 86 u'request_id', # request ID as found in <data> 87 u'firstnames', 88 u'lastnames', 89 u'dob', 90 u'postcode', 91 u'other_info', # other identifying info in .data 92 u'data_type', 93 u'gender', 94 u'requestor', # Requestor of data (e.g. who ordered test results) if available in source data. 95 u'external_data_id', # ID of content of .data in external system (e.g. importer) where appropriate 96 u'comment', # a free text comment on this row, eg. why is it here, error logs etc 97 u'pk_identity_disambiguated', 98 u'pk_provider_disambiguated' # The provider the data is relevant to. 99 ] 100 #--------------------------------------------------------
101 - def format(self):
102 return u'%s' % self
103 #--------------------------------------------------------
104 - def update_data_from_file(self, fname=None):
105 # sanity check 106 if not (os.access(fname, os.R_OK) and os.path.isfile(fname)): 107 _log.error('[%s] is not a readable file' % fname) 108 return False 109 110 gmPG2.file2bytea ( 111 query = u"UPDATE clin.incoming_data_unmatched SET data = %(data)s::bytea WHERE pk = %(pk)s", 112 filename = fname, 113 args = {'pk': self.pk_obj} 114 ) 115 116 # must update XMIN now ... 117 self.refetch_payload() 118 return True
119 120 #------------------------------------------------------------
121 -def get_incoming_data(order_by=None):
122 if order_by is None: 123 order_by = u'true' 124 else: 125 order_by = u'true ORDER BY %s' % order_by 126 cmd = _SQL_get_incoming_data % order_by 127 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = True) 128 return [ cIncomingData(row = {'data': r, 'idx': idx, 'pk_field': 'pk_incoming_data_unmatched'}) for r in rows ]
129 130 #------------------------------------------------------------
131 -def create_incoming_data(data_type, filename):
132 args = {'typ': data_type} 133 cmd = u""" 134 INSERT INTO clin.incoming_data_unmatched (type, data) 135 VALUES (%(typ)s, 'new data'::bytea) 136 RETURNING pk""" 137 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True, get_col_idx = False) 138 pk = rows[0]['pk'] 139 incoming = cIncomingData(aPK_obj = pk) 140 if not incoming.update_data_from_file(fname = filename): 141 delete_incoming_data(incoming_data = pk) 142 return None 143 return incoming
144 145 #------------------------------------------------------------
146 -def delete_incoming_data(pk_incoming_data=None):
147 args = {'pk': pk_incoming_data} 148 cmd = u"DELETE FROM clin.incoming_data_unmatched WHERE pk = %(pk)s" 149 gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}]) 150 return True
151 152 #------------------------------------------------------------ 153 154 #============================================================
155 -def fix_HL7_stupidities(filename, encoding='utf8'):
156 157 out_fname = gmTools.get_unique_filename ( 158 prefix = u'%s-fixed-' % gmTools.fname_stem(filename), 159 suffix = '.hl7' 160 ) 161 _log.debug('fixing HL7 [%s] -> [%s]', filename, out_fname) 162 hl7_in = codecs.open(filename, 'rb', encoding) 163 hl7_out = codecs.open(out_fname, 'wb', 'utf8') 164 165 line_idx = 0 166 prev_line = None 167 for line in hl7_in: 168 line_idx += 1 169 # suspicious for non-terminating line ? 170 if line.endswith(u' \n'): 171 _log.debug('#%s: suspicious non-terminating line ("...<SPACE>\\n"): [%s...%s]', line_idx, line[:4], line[-7:]) 172 if prev_line is None: 173 prev_line = line[:-1] 174 else: 175 prev_line = prev_line + line[:-1] 176 continue 177 178 line = line.strip('\r').strip('\n').strip('\r').strip('\n') 179 180 # final continuation line ? 181 if line[3] != u'|': 182 if prev_line is None: 183 raise ValueError('line #%s does not start with "<SEGMENT>|" but previous line did not end with BLANK either: [%s]' % (line_idx, line)) 184 hl7_out.write(prev_line) 185 prev_line = None 186 hl7_out.write(line + HL7_EOL) 187 continue 188 189 # start of a known segment ? 190 if line[:3] in HL7_SEGMENTS: 191 if prev_line is not None: 192 hl7_out.write(prev_line + HL7_EOL) 193 prev_line = None 194 hl7_out.write(line + HL7_EOL) 195 continue 196 197 hl7_out.close() 198 hl7_in.close() 199 200 return out_fname
201 #============================================================
202 -def extract_HL7_from_CDATA(filename, xml_path):
203 204 _log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename) 205 206 hl7_xml = pyxml.ElementTree() 207 try: 208 hl7_xml.parse(filename) 209 except pyxml.ParseError: 210 _log.exception('cannot parse [%s]' % filename) 211 return None 212 nodes = hl7_xml.findall(xml_path) 213 if len(nodes) == 0: 214 _log.debug('no data found') 215 return None 216 217 out_fname = gmTools.get_unique_filename(prefix = u'%s-' % gmTools.fname_stem(filename), suffix = '.hl7') 218 _log.debug('writing HL7 to [%s]', out_fname) 219 hl7_file = codecs.open(out_fname, 'wb', 'utf8') 220 for node in nodes: 221 hl7_file.write(node.text) 222 223 return out_fname
224 #============================================================
225 -def split_HL7_by_MSH(filename, encoding='utf8'):
226 227 _log.debug('splitting [%s]', filename) 228 229 hl7_in = codecs.open(filename, 'rb', encoding) 230 231 idx = 0 232 first_line = True 233 MSH_file = None 234 MSH_fnames = [] 235 for line in hl7_in: 236 # first line must be MSH 237 if first_line: 238 # ignore empty / FHS / BHS lines 239 if line.strip() == u'': 240 continue 241 if line.startswith(u'FHS|'): 242 _log.debug('ignoring FHS') 243 continue 244 if line.startswith(u'BHS|'): 245 _log.debug('ignoring BHS') 246 continue 247 if not line.startswith(u'MSH|'): 248 raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename) 249 first_line = False 250 # start new file 251 if line.startswith(u'MSH|'): 252 if MSH_file is not None: 253 MSH_file.close() 254 idx += 1 255 out_fname = gmTools.get_unique_filename(prefix = u'%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7') 256 _log.debug('writing message %s to [%s]', idx, out_fname) 257 MSH_fnames.append(out_fname) 258 MSH_file = codecs.open(out_fname, 'wb', 'utf8') 259 # ignore BTS / FTS lines 260 if line.startswith(u'BTS|'): 261 _log.debug('ignoring BTS') 262 continue 263 if line.startswith(u'FTS|'): 264 _log.debug('ignoring FTS') 265 continue 266 # else write line to new file 267 MSH_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r') 268 269 if MSH_file is not None: 270 MSH_file.close() 271 hl7_in.close() 272 273 return MSH_fnames
274 275 #============================================================
276 -def flatten_MSH_by_PID(filename):
277 """Assumes: 278 - ONE MSH per file 279 - utf8 encoding 280 - first non-empty line must be MSH line 281 282 - anything between MSH and PID is lost 283 284 IOW, what's created by split_HL7_into_MSH() 285 """ 286 _log.debug('splitting [%s]', filename) 287 288 MSH_in = codecs.open(filename, 'rb', 'utf8') 289 290 looking_for_MSH = True 291 MSH_line = None 292 looking_for_first_PID = True 293 PID_file = None 294 PID_fnames = [] 295 idx = 0 296 for line in MSH_in: 297 # ignore empty 298 if line.strip() == u'': 299 continue 300 301 # first non-empty line must be MSH 302 if looking_for_MSH: 303 if line.startswith(u'MSH|'): 304 looking_for_MSH = False 305 MSH_line = line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r' 306 continue 307 raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename) 308 309 # first non-empty line after MSH must be PID 310 if looking_for_first_PID: 311 if not line.startswith(u'PID|'): 312 raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename) 313 looking_for_first_PID = False 314 315 # start new file if line is PID 316 if line.startswith(u'PID|'): 317 if PID_file is not None: 318 PID_file.close() 319 idx += 1 320 out_fname = gmTools.get_unique_filename(prefix = u'%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7') 321 _log.debug('writing message for PID %s to [%s]', idx, out_fname) 322 PID_fnames.append(out_fname) 323 PID_file = codecs.open(out_fname, 'wb', 'utf8') 324 PID_file.write(MSH_line) 325 # else write line to new file 326 PID_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r') 327 328 if PID_file is not None: 329 PID_file.close() 330 MSH_in.close() 331 332 return PID_fnames
333 334 #============================================================
335 -def split_HL7_by_PID(filename, encoding='utf8'):
336 337 PID_fnames = [] 338 for MSH_fname in split_HL7_by_MSH(filename, encoding): 339 PID_fnames.extend(flatten_MSH_by_PID(MSH_fname)) 340 341 return PID_fnames
342 343 #============================================================
344 -def __find_or_create_lab(hl7_lab):
345 comment_tag = u'[HL7 name::%s]' % hl7_lab 346 for gm_lab in gmPathLab.get_test_orgs(): 347 if comment_tag in gmTools.coalesce(gm_lab['comment'], u''): 348 return gm_lab 349 _log.debug('lab not found: %s', hl7_lab) 350 gm_lab = gmPathLab.create_test_org(name = hl7_lab, comment = comment_tag) 351 if gm_lab is None: 352 raise ValueError('cannot create lab [%s] in GNUmed' % hl7_lab) 353 _log.debug('created lab: %s', gm_lab) 354 return gm_lab
355 356 #------------------------------------------------------------
357 -def __find_or_create_test_type(loinc, name, pk_lab, unit):
358 359 tt = gmPathLab.find_measurement_type(lab = pk_lab, name = name) 360 if tt is None: 361 _log.debug('test type [%s %s (%s)] not found for lab #%s, creating', name, unit, loinc, pk_lab) 362 tt = gmPathLab.create_measurement_type(lab = pk_lab, abbrev = name, unit = unit, name = name) 363 364 if loinc is None: 365 return tt 366 if loinc.strip() == u'': 367 return tt 368 if tt['loinc'] is None: 369 tt['loinc'] = loinc 370 tt.save() 371 return tt 372 if tt['loinc'] != loinc: 373 # raise ValueError('LOINC code mismatch between GM (%s) and HL7 (%s) for result type [%s]' % (tt['loinc'], loinc, name)) 374 _log.error('LOINC code mismatch between GM (%s) and HL7 (%s) for result type [%s]', tt['loinc'], loinc, name) 375 376 return tt
377 378 #------------------------------------------------------------
379 -def import_MSH(filename):
380 """Assumes what's produced by flatten_MSH_by_PID().""" 381 382 _log.debug('importing HL7 from [%s]', filename) 383 384 # read the file 385 MSH_file = codecs.open(filename, 'rb', 'utf8') 386 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5)) # 5 MB max 387 MSH_file.close() 388 389 # verify lab is in database 390 gm_lab = __find_or_create_lab(HL7.segment('MSH')[MSH_sending_lab][0]) 391 392 # verify test types are in database 393 for OBX in HL7.segments('OBX'): 394 tt = OBX[OBX_type] 395 unit = OBX[OBX_unit][0] 396 __find_or_create_test_type(tt[OBX_LOINC], tt[OBX_name], gm_lab['pk_test_org'], unit) 397 398 # find patient 399 name = HL7.segment('PID')[PID_name] 400 pat_lname = name[PID_lastname] 401 pat_fname = name[PID_firstname] 402 pat_mname = None 403 if len(name) > 2: 404 pat_mname = name[PID_middlename] 405 print " Patient: %s (%s) %s" % (pat_fname, pat_mname, pat_lname)
406 407 #------------------------------------------------------------
408 -def stage_MSH_as_incoming_data(filename, source=None):
409 """Assumes what's produced by flatten_MSH_by_PID().""" 410 411 _log.debug('staging HL7%s from [%s]', gmTools.coalesce(source, u'', u' (%s)'), filename) 412 413 # parse HL7 414 MSH_file = codecs.open(filename, 'rb', 'utf8') 415 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5)) # 5 MB max 416 MSH_file.close() 417 418 # import file 419 inc = create_incoming_data(u'HL7%s' % gmTools.coalesce(source, u'', u' (%s)'), filename) 420 if inc is None: 421 return None 422 423 try: 424 # set fields if known 425 PID = HL7.segment('PID') 426 name = PID[PID_name] 427 inc['lastnames'] = gmTools.coalesce(name[PID_lastname], u'') 428 inc['firstnames'] = gmTools.coalesce(name[PID_firstname], u'') 429 if len(name) > 2: 430 inc['firstnames'] += u' ' 431 inc['firstnames'] += name[PID_middlename] 432 if PID[PID_dob] is not None: 433 tmp = time.strptime(PID[PID_dob][0], '%Y%m%d') 434 inc['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone) 435 if PID[PID_gender] is not None: 436 inc['gender'] = PID[PID_gender][0] 437 inc['external_data_id'] = filename 438 #u'fk_patient_candidates', 439 # u'request_id', # request ID as found in <data> 440 # u'postcode', 441 # u'other_info', # other identifying info in .data 442 # u'requestor', # Requestor of data (e.g. who ordered test results) if available in source data. 443 # u'fk_identity_disambiguated', 444 # u'comment', # a free text comment on this row, eg. why is it here, error logs etc 445 # u'fk_provider_disambiguated' # The provider the data is relevant to. 446 inc.save() 447 except: 448 delete_incoming_data(pk_incoming_data = inc['pk_incoming_data_unmatched']) 449 raise 450 451 return inc
452 453 #============================================================ 454 # main 455 #------------------------------------------------------------ 456 if __name__ == "__main__": 457 458 if len(sys.argv) < 2: 459 sys.exit() 460 461 if sys.argv[1] != 'test': 462 sys.exit() 463 464 from Gnumed.pycommon import gmLog2 465 466 gmDateTime.init() 467 468 #-------------------------------------------------------
469 - def test_import_HL7():
470 PID_names = split_HL7_by_PID(sys.argv[2], encoding='utf8') 471 for name in PID_names: 472 print name 473 import_MSH(name)
474 #-------------------------------------------------------
475 - def test_xml_extract():
476 hl7 = extract_HL7_from_CDATA(sys.argv[2], u'.//Message') 477 print "HL7:", hl7 478 fixed = fix_HL7_stupidities(hl7) 479 print "fixed HL7:", fixed 480 PID_names = split_HL7_by_PID(fixed, encoding='utf8') 481 print "per-PID MSH files:" 482 for name in PID_names: 483 print " ", name
484 #-------------------------------------------------------
485 - def test_incoming_data():
486 for d in get_incoming_data(): 487 print d
488 #-------------------------------------------------------
489 - def test_stage_hl7_from_xml():
490 hl7 = extract_HL7_from_CDATA(sys.argv[2], u'.//Message') 491 print "HL7:", hl7 492 fixed = fix_HL7_stupidities(hl7) 493 print "fixed HL7:", fixed 494 PID_names = split_HL7_by_PID(fixed, encoding='utf8') 495 print "staging per-PID HL7 files:" 496 for name in PID_names: 497 print " file:", name 498 print "", stage_MSH_as_incoming_data(name, source = u'Excelleris')
499 #-------------------------------------------------------
500 - def test_stage_hl7():
501 fixed = fix_HL7_stupidities(sys.argv[2]) 502 print "fixed HL7:", fixed 503 PID_names = split_HL7_by_PID(fixed, encoding='utf8') 504 print "staging per-PID HL7 files:" 505 for name in PID_names: 506 print " file:", name 507 print "", stage_MSH_as_incoming_data(name, source = u'?')
508 #------------------------------------------------------- 509 #test_import_HL7() 510 #test_xml_extract() 511 #test_incoming_data() 512 #test_stage_hl7_from_xml() 513 test_stage_hl7() 514