1
2 """Some HL7 handling."""
3
4 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
5 __license__ = "GPL v2 or later"
6
7
8 import sys
9 import os
10 import codecs
11 import logging
12 import time
13 import datetime as pyDT
14 import hl7 as pyhl7
15 from xml.etree import ElementTree as pyxml
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmTools
21 from Gnumed.pycommon import gmBusinessDBObject
22 from Gnumed.pycommon import gmPG2
23 from Gnumed.pycommon import gmDateTime
24 from Gnumed.business import gmPathLab
25
26
27 _log = logging.getLogger('gm.hl7')
28
29
30 HL7_EOL = u'\r'
31
32 HL7_SEGMENTS = u'FHS BHS MSH PID PV1 OBX NTE ORC OBR'.split()
33
34 MSH_sending_lab = 3
35
36 PID_name = 5
37 PID_lastname = 0
38 PID_firstname = 1
39 PID_middlename = 2
40 PID_dob = 7
41 PID_gender = 8
42
43 OBX_type = 3
44 OBX_LOINC = 0
45 OBX_name = 1
46 OBX_value = 5
47 OBX_unit = 6
48
49
50
51
52 _SQL_get_incoming_data = u"""SELECT * FROM clin.v_incoming_data_unmatched WHERE %s"""
53
55 """Represents items of incoming data, say, HL7 snippets."""
56
57 _cmd_fetch_payload = _SQL_get_incoming_data % u"pk_incoming_data_unmatched = %s"
58 _cmds_store_payload = [
59 u"""UPDATE clin.incoming_data_unmatched SET
60 fk_patient_candidates = %(pk_patient_candidates)s,
61 fk_identity_disambiguated = %(pk_identity_disambiguated)s,
62 fk_provider_disambiguated = %(pk_provider_disambiguated)s,
63 request_id = gm.nullify_empty_string(%(request_id)s),
64 firstnames = gm.nullify_empty_string(%(firstnames)s),
65 lastnames = gm.nullify_empty_string(%(lastnames)s),
66 dob = %(dob)s,
67 postcode = gm.nullify_empty_string(%(postcode)s),
68 other_info = gm.nullify_empty_string(%(other_info)s),
69 type = gm.nullify_empty_string(%(data_type)s),
70 gender = gm.nullify_empty_string(%(gender)s),
71 requestor = gm.nullify_empty_string(%(requestor)s),
72 external_data_id = gm.nullify_empty_string(%(external_data_id)s),
73 comment = gm.nullify_empty_string(%(comment)s)
74 WHERE
75 pk = %(pk_incoming_data_unmatched)s
76 AND
77 xmin = %(xmin_incoming_data_unmatched)s
78 RETURNING
79 xmin as xmin_incoming_data_unmatched,
80 octet_length(data) as data_size
81 """
82 ]
83
84 _updatable_fields = [
85 u'pk_patient_candidates',
86 u'request_id',
87 u'firstnames',
88 u'lastnames',
89 u'dob',
90 u'postcode',
91 u'other_info',
92 u'data_type',
93 u'gender',
94 u'requestor',
95 u'external_data_id',
96 u'comment',
97 u'pk_identity_disambiguated',
98 u'pk_provider_disambiguated'
99 ]
100
103
105
106 if not (os.access(fname, os.R_OK) and os.path.isfile(fname)):
107 _log.error('[%s] is not a readable file' % fname)
108 return False
109
110 gmPG2.file2bytea (
111 query = u"UPDATE clin.incoming_data_unmatched SET data = %(data)s::bytea WHERE pk = %(pk)s",
112 filename = fname,
113 args = {'pk': self.pk_obj}
114 )
115
116
117 self.refetch_payload()
118 return True
119
120
122 if order_by is None:
123 order_by = u'true'
124 else:
125 order_by = u'true ORDER BY %s' % order_by
126 cmd = _SQL_get_incoming_data % order_by
127 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = True)
128 return [ cIncomingData(row = {'data': r, 'idx': idx, 'pk_field': 'pk_incoming_data_unmatched'}) for r in rows ]
129
130
132 args = {'typ': data_type}
133 cmd = u"""
134 INSERT INTO clin.incoming_data_unmatched (type, data)
135 VALUES (%(typ)s, 'new data'::bytea)
136 RETURNING pk"""
137 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True, get_col_idx = False)
138 pk = rows[0]['pk']
139 incoming = cIncomingData(aPK_obj = pk)
140 if not incoming.update_data_from_file(fname = filename):
141 delete_incoming_data(incoming_data = pk)
142 return None
143 return incoming
144
145
147 args = {'pk': pk_incoming_data}
148 cmd = u"DELETE FROM clin.incoming_data_unmatched WHERE pk = %(pk)s"
149 gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}])
150 return True
151
152
153
154
156
157 out_fname = gmTools.get_unique_filename (
158 prefix = u'%s-fixed-' % gmTools.fname_stem(filename),
159 suffix = '.hl7'
160 )
161 _log.debug('fixing HL7 [%s] -> [%s]', filename, out_fname)
162 hl7_in = codecs.open(filename, 'rb', encoding)
163 hl7_out = codecs.open(out_fname, 'wb', 'utf8')
164
165 line_idx = 0
166 prev_line = None
167 for line in hl7_in:
168 line_idx += 1
169
170 if line.endswith(u' \n'):
171 _log.debug('#%s: suspicious non-terminating line ("...<SPACE>\\n"): [%s...%s]', line_idx, line[:4], line[-7:])
172 if prev_line is None:
173 prev_line = line[:-1]
174 else:
175 prev_line = prev_line + line[:-1]
176 continue
177
178 line = line.strip('\r').strip('\n').strip('\r').strip('\n')
179
180
181 if line[3] != u'|':
182 if prev_line is None:
183 raise ValueError('line #%s does not start with "<SEGMENT>|" but previous line did not end with BLANK either: [%s]' % (line_idx, line))
184 hl7_out.write(prev_line)
185 prev_line = None
186 hl7_out.write(line + HL7_EOL)
187 continue
188
189
190 if line[:3] in HL7_SEGMENTS:
191 if prev_line is not None:
192 hl7_out.write(prev_line + HL7_EOL)
193 prev_line = None
194 hl7_out.write(line + HL7_EOL)
195 continue
196
197 hl7_out.close()
198 hl7_in.close()
199
200 return out_fname
201
203
204 _log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename)
205
206 hl7_xml = pyxml.ElementTree()
207 try:
208 hl7_xml.parse(filename)
209 except pyxml.ParseError:
210 _log.exception('cannot parse [%s]' % filename)
211 return None
212 nodes = hl7_xml.findall(xml_path)
213 if len(nodes) == 0:
214 _log.debug('no data found')
215 return None
216
217 out_fname = gmTools.get_unique_filename(prefix = u'%s-' % gmTools.fname_stem(filename), suffix = '.hl7')
218 _log.debug('writing HL7 to [%s]', out_fname)
219 hl7_file = codecs.open(out_fname, 'wb', 'utf8')
220 for node in nodes:
221 hl7_file.write(node.text)
222
223 return out_fname
224
226
227 _log.debug('splitting [%s]', filename)
228
229 hl7_in = codecs.open(filename, 'rb', encoding)
230
231 idx = 0
232 first_line = True
233 MSH_file = None
234 MSH_fnames = []
235 for line in hl7_in:
236
237 if first_line:
238
239 if line.strip() == u'':
240 continue
241 if line.startswith(u'FHS|'):
242 _log.debug('ignoring FHS')
243 continue
244 if line.startswith(u'BHS|'):
245 _log.debug('ignoring BHS')
246 continue
247 if not line.startswith(u'MSH|'):
248 raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename)
249 first_line = False
250
251 if line.startswith(u'MSH|'):
252 if MSH_file is not None:
253 MSH_file.close()
254 idx += 1
255 out_fname = gmTools.get_unique_filename(prefix = u'%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
256 _log.debug('writing message %s to [%s]', idx, out_fname)
257 MSH_fnames.append(out_fname)
258 MSH_file = codecs.open(out_fname, 'wb', 'utf8')
259
260 if line.startswith(u'BTS|'):
261 _log.debug('ignoring BTS')
262 continue
263 if line.startswith(u'FTS|'):
264 _log.debug('ignoring FTS')
265 continue
266
267 MSH_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r')
268
269 if MSH_file is not None:
270 MSH_file.close()
271 hl7_in.close()
272
273 return MSH_fnames
274
275
277 """Assumes:
278 - ONE MSH per file
279 - utf8 encoding
280 - first non-empty line must be MSH line
281
282 - anything between MSH and PID is lost
283
284 IOW, what's created by split_HL7_into_MSH()
285 """
286 _log.debug('splitting [%s]', filename)
287
288 MSH_in = codecs.open(filename, 'rb', 'utf8')
289
290 looking_for_MSH = True
291 MSH_line = None
292 looking_for_first_PID = True
293 PID_file = None
294 PID_fnames = []
295 idx = 0
296 for line in MSH_in:
297
298 if line.strip() == u'':
299 continue
300
301
302 if looking_for_MSH:
303 if line.startswith(u'MSH|'):
304 looking_for_MSH = False
305 MSH_line = line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r'
306 continue
307 raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename)
308
309
310 if looking_for_first_PID:
311 if not line.startswith(u'PID|'):
312 raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename)
313 looking_for_first_PID = False
314
315
316 if line.startswith(u'PID|'):
317 if PID_file is not None:
318 PID_file.close()
319 idx += 1
320 out_fname = gmTools.get_unique_filename(prefix = u'%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
321 _log.debug('writing message for PID %s to [%s]', idx, out_fname)
322 PID_fnames.append(out_fname)
323 PID_file = codecs.open(out_fname, 'wb', 'utf8')
324 PID_file.write(MSH_line)
325
326 PID_file.write(line.strip('\n').strip('\r').strip('\n').strip('\r') + u'\r')
327
328 if PID_file is not None:
329 PID_file.close()
330 MSH_in.close()
331
332 return PID_fnames
333
334
342
343
345 comment_tag = u'[HL7 name::%s]' % hl7_lab
346 for gm_lab in gmPathLab.get_test_orgs():
347 if comment_tag in gmTools.coalesce(gm_lab['comment'], u''):
348 return gm_lab
349 _log.debug('lab not found: %s', hl7_lab)
350 gm_lab = gmPathLab.create_test_org(name = hl7_lab, comment = comment_tag)
351 if gm_lab is None:
352 raise ValueError('cannot create lab [%s] in GNUmed' % hl7_lab)
353 _log.debug('created lab: %s', gm_lab)
354 return gm_lab
355
356
358
359 tt = gmPathLab.find_measurement_type(lab = pk_lab, name = name)
360 if tt is None:
361 _log.debug('test type [%s %s (%s)] not found for lab #%s, creating', name, unit, loinc, pk_lab)
362 tt = gmPathLab.create_measurement_type(lab = pk_lab, abbrev = name, unit = unit, name = name)
363
364 if loinc is None:
365 return tt
366 if loinc.strip() == u'':
367 return tt
368 if tt['loinc'] is None:
369 tt['loinc'] = loinc
370 tt.save()
371 return tt
372 if tt['loinc'] != loinc:
373
374 _log.error('LOINC code mismatch between GM (%s) and HL7 (%s) for result type [%s]', tt['loinc'], loinc, name)
375
376 return tt
377
378
380 """Assumes what's produced by flatten_MSH_by_PID()."""
381
382 _log.debug('importing HL7 from [%s]', filename)
383
384
385 MSH_file = codecs.open(filename, 'rb', 'utf8')
386 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5))
387 MSH_file.close()
388
389
390 gm_lab = __find_or_create_lab(HL7.segment('MSH')[MSH_sending_lab][0])
391
392
393 for OBX in HL7.segments('OBX'):
394 tt = OBX[OBX_type]
395 unit = OBX[OBX_unit][0]
396 __find_or_create_test_type(tt[OBX_LOINC], tt[OBX_name], gm_lab['pk_test_org'], unit)
397
398
399 name = HL7.segment('PID')[PID_name]
400 pat_lname = name[PID_lastname]
401 pat_fname = name[PID_firstname]
402 pat_mname = None
403 if len(name) > 2:
404 pat_mname = name[PID_middlename]
405 print " Patient: %s (%s) %s" % (pat_fname, pat_mname, pat_lname)
406
407
409 """Assumes what's produced by flatten_MSH_by_PID()."""
410
411 _log.debug('staging HL7%s from [%s]', gmTools.coalesce(source, u'', u' (%s)'), filename)
412
413
414 MSH_file = codecs.open(filename, 'rb', 'utf8')
415 HL7 = pyhl7.parse(MSH_file.read(1024 * 1024 * 5))
416 MSH_file.close()
417
418
419 inc = create_incoming_data(u'HL7%s' % gmTools.coalesce(source, u'', u' (%s)'), filename)
420 if inc is None:
421 return None
422
423 try:
424
425 PID = HL7.segment('PID')
426 name = PID[PID_name]
427 inc['lastnames'] = gmTools.coalesce(name[PID_lastname], u'')
428 inc['firstnames'] = gmTools.coalesce(name[PID_firstname], u'')
429 if len(name) > 2:
430 inc['firstnames'] += u' '
431 inc['firstnames'] += name[PID_middlename]
432 if PID[PID_dob] is not None:
433 tmp = time.strptime(PID[PID_dob][0], '%Y%m%d')
434 inc['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
435 if PID[PID_gender] is not None:
436 inc['gender'] = PID[PID_gender][0]
437 inc['external_data_id'] = filename
438
439
440
441
442
443
444
445
446 inc.save()
447 except:
448 delete_incoming_data(pk_incoming_data = inc['pk_incoming_data_unmatched'])
449 raise
450
451 return inc
452
453
454
455
456 if __name__ == "__main__":
457
458 if len(sys.argv) < 2:
459 sys.exit()
460
461 if sys.argv[1] != 'test':
462 sys.exit()
463
464 from Gnumed.pycommon import gmLog2
465
466 gmDateTime.init()
467
468
474
484
488
499
508
509
510
511
512
513 test_stage_hl7()
514