1
2 """GNUmed LDT importer.
3
4 This script automatically imports German pathology result
5 files in LDT format.
6
7 It relies on patient-to-request-ID mappings to be present
8 in the GNUmed database. It will only import those request
9 that have a mapping.
10
11 The general theory of operation of automatic import at
12 Hilbert office is as follows:
13
14 - automatically retrieve LDT files from labs
15 - archive them
16 - make them available in a GNUmed private directory
17 - run importer every hour
18 - import those records that have a mapping
19 - make those records available to TurboMed
20 - retain unmapped records until next time around
21
22 copyright: authors
23
24 FIXME: check status on save_payload()s
25 """
26
27 __version__ = "$Revision: 1.34 $"
28 __author__ = "Karsten Hilbert <Karsten.Hilbert@gmx.net>"
29 __license__ = "GPL, details at http://www.gnu.org"
30
31
32 import glob, os.path, sys, tempfile, fileinput, time, copy, random, shutil
33
34
35 from Gnumed.pycommon import gmCfg, gmLoginInfo, gmExceptions, gmI18N
36 from Gnumed.business import gmPathLab, gmXdtMappings, gmPerson, gmPersonSearch
37
38 import mx.DateTime as mxDT
39
40 _log = gmLog.gmDefLog
41 _cfg = gmCfg.gmDefCfgFile
42
44
45 _chunk_starters = ['8000', '8410']
46 _map_820xline2req_field = {
47 '8311': 'lab_request_id',
48 '8301': 'lab_rxd_when',
49 '8401': 'request_status',
50 '8405': 'progress_note'
51 }
52
54 self._cfg = cfg
55
56
57 pool = gmPG.ConnectionPool()
58 conn = pool.GetConnection('historica')
59 if conn is None:
60 _log.Log(gmLog.lErr, 'cannot connect to database')
61 raise gmExceptions.ConstructorError, 'cannot connect to database'
62 else:
63 pool.ReleaseConnection('historica')
64
66
67 if not os.access(filename, os.R_OK):
68 _log.Log(gmLog.lErr, 'cannot access LDT file [%s] for reads' % filename)
69 return False
70 self.ldt_filename = filename
71
72
73 if not self.__verify_file_header(self.ldt_filename):
74 _log.Log(gmLog.lInfo, 'cannot verify file header on [%s]' % self.ldt_filename)
75 return False
76
77
78 self.work_base = self._cfg.get('import', 'work dir base')
79 if self.work_base is None:
80 self.work_base = os.path.dirname(self.ldt_filename)
81 self.work_base = os.path.abspath(os.path.expanduser(self.work_base))
82 if not os.access(self.work_base, os.W_OK):
83 _log.Log(gmLog.lErr, 'cannot write to work directory [%s]' % self.work_base)
84 return False
85
86
87 tempfile.tempdir = self.work_base
88 self.work_dir = tempfile.mktemp()
89 os.mkdir(self.work_dir, 0700)
90
91
92 source_files = self.__split_file(self.ldt_filename)
93 if source_files is None:
94 _log.Log(gmLog.lErr, 'cannot split LDT file [%s]' % self.ldt_filename)
95 return False
96
97
98 if len(source_files['data']) > 0:
99 tmp_src_files = copy.copy(source_files['data'])
100 for request_file in tmp_src_files:
101 _log.Log(gmLog.lInfo, 'importing request file [%s]' % request_file)
102 if self.__import_request_file(request_file):
103 source_files['data'].remove(request_file)
104 _log.Log(gmLog.lErr, 'success importing request file\n')
105 else:
106 _log.Log(gmLog.lErr, 'cannot import request file [%s]\n' % request_file)
107 else:
108 _log.Log(gmLog.lData, 'skipping empty LDT file [%s]' % self.ldt_filename)
109
110
111 try:
112 shutil.rmtree(path=self.work_dir, ignore_errors=True)
113 except Exception:
114 _log.LogException('cannot cleanup work dir [%s]' % self.work_dir, sys.exc_info(), verbose=0)
115
116
117 if len(source_files['data']) > 0:
118
119 return False
120 return True
121
122
123
125
126 cmd = "select exists(select pk from test_org where internal_name=%s)"
127 status = gmPG.run_ro_query('historica', cmd, None, field_data)
128 if status is None:
129 _log.Log(gmLog.lErr, 'cannot check for lab existance on [%s]' % field_data)
130 return False
131 if not status[0][0]:
132 _log.Log(gmLog.lErr, 'Unbekanntes Labor [%s]' % field_data)
133 prob = 'Labor unbekannt. Import abgebrochen.' % field_data
134 sol = 'Labor ergänzen oder vorhandenes Labor anpassen (test_org.internal_name).'
135 ctxt = 'LDT-Datei [%s], Labor [%s]' % (self.ldt_filename, field_data)
136 add_todo(problem=prob, solution=sol, context=txt)
137 return False
138 self.__lab_name = field_data
139 return True
140
141
142
143
144
145 _map_field2verifier = {
146 '8300': _verify_8300
147
148 }
149
151 """Verify that header is suitable for import.
152
153 This does not verify whether the header is conforming
154 to the LDT specs but rather that it is fit for import.
155 """
156 verified_lines = 0
157 in_header = False
158 for line in fileinput.input(filename):
159 tmp = line.replace('\r','')
160 tmp = tmp.replace('\n','')
161 line_type = tmp[3:7]
162 line_data = tmp[7:]
163
164 if line_type == '8000':
165
166 if line_data == '8220':
167 in_header = True
168 continue
169
170 if not in_header:
171 continue
172
173 if line_data != '8220':
174 fileinput.close()
175 if verified_lines == len(cLDTImporter._map_field2verifier):
176 return True
177 _log.Log(gmLog.lErr, 'zuwenige verifizierbare Zeilen im LDT-Datei-Header')
178 return False
179 if not in_header:
180 continue
181 try:
182 verify_line = cLDTImporter._map_field2verifier[line_type]
183 except KeyError:
184 _log.Log(gmLog.lData, 'kein Handler für Zeilentyp [%s] in LDT-Datei-Header' % line_type)
185 continue
186 if verify_line(self, line, line_data):
187 verified_lines += 1
188 else:
189 _log.Log(gmLog.lErr, 'cannot handle LDT file [%s]' % filename)
190 fileinput.close()
191 return False
192
193 _log.Log(gmLog.lErr, 'LDT file [%s] contains nothing but a header' % filename)
194 fileinput.close()
195 return False
196
198 """Split LDT file.
199
200 Splits LDT files into header (record type 8220), data
201 records (8202, etc) and trailer (8221).
202 """
203 tempfile.tempdir = self.work_dir
204 source_files = {}
205 source_files['data'] = []
206 outname = None
207 in_header = False
208 in_trailer = False
209 for line in fileinput.input(filename):
210 tmp = line.replace('\r','')
211 tmp = tmp.replace('\n','')
212 line_type = tmp[3:7]
213 line_data = tmp[7:]
214 if line_type == '8000':
215
216 if line_data in ['0020', '8220']:
217 if not in_header:
218 header = os.path.join(self.work_dir, 'header.txt')
219 source_files['header'] = header
220 outfile = open(header, 'w+b')
221 in_header = True
222
223 elif line_data in ['8221', '0021']:
224 in_header = False
225
226 if outname is not None:
227
228 source_files['data'].append(outname)
229 if not in_trailer:
230 outfile.close()
231 trailer = os.path.join(self.work_dir, 'trailer.txt')
232 source_files['trailer'] = trailer
233 outfile = open(trailer, 'w+b')
234 in_trailer = True
235
236 else:
237 in_header = False
238 in_trailer = False
239 outfile.close()
240
241 if outname is not None:
242
243 source_files['data'].append(outname)
244 outname = os.path.join(self.work_dir, tempfile.mktemp(suffix='.txt'))
245 outfile = open(outname, 'w+b')
246 if line_data not in ['8202', '8201']:
247 _log.Log(gmLog.lWarn, 'unbekannter Satztyp [%s]' % line_data)
248
249 outfile.write(line)
250
251
252 outfile.close()
253 fileinput.close()
254 return source_files
255
256
259
262
273
284
308
331
340
383
384 __820xline_handler = {
385 '0020': None,
386 '9105': None,
387 '8000': None,
388 '8100': None,
389 '8310': None,
390 '8311': __xform_8311,
391 '8301': __xform_8301,
392 '8302': __xform_8302,
393 '8303': __xform_8303,
394 '3100': None,
395 '3101': None,
396 '3102': None,
397 '3103': __xform_3103,
398 '3104': None,
399 '8401': __xform_8401,
400 '8403': None,
401 '8405': __xform_8405,
402 '8407': __xform_8407,
403 '8609': None
404 }
405
407
408 try:
409 reqid = request_data['8310'][0]
410 except KeyError, IndexError:
411
412 _log.Log(gmLog.lErr, 'Satz vom Typ [8000:%s] enthält keine Probennummer' % request_data['8000'][0])
413 return False
414
415 try:
416 self.__request = gmPathLab.cLabRequest(req_id=reqid, lab=self.__lab_name)
417 except gmExceptions.ConstructorError:
418 prob = 'Kann keine Patientenzuordnung der Probe finden.'
419 sol = 'Zuordnung der Probe zu einem Patienten prüfen. Falls doch vorhanden, Systembetreuer verständigen.'
420 ctxt = 'Labor [%s], Probe [%s], LDT-Datei [%s]' % (self.__lab_name, reqid, self.ldt_filename)
421 add_todo(problem=prob, solution=sol, context=ctxt)
422 _log.LogException('cannot get lab request', sys.exc_info(), verbose=0)
423 return False
424
425 for line_type in request_data.keys():
426
427 try:
428 handle_line = cLDTImporter.__820xline_handler[line_type]
429 except KeyError:
430 _log.LogException('no handler for line [%s:%s] in [8000:8201] record' % (line_type, request_data[line_type]), sys.exc_info(), verbose=0)
431 continue
432
433 if handle_line is None:
434 try:
435 name = gmXdtMappings.xdt_id_map[line_type]
436 except KeyError:
437 name = '?'
438 _log.Log(gmLog.lData, 'skipping [%s] (%s)' % (line_type, name))
439 continue
440
441 line_data = handle_line(self, request_data)
442 if line_data is False:
443
444 _log.Log(gmLog.lErr, 'handling line [%s] failed' % line_type)
445 return False
446 try:
447 self.__request[cLDTImporter._map_820xline2req_field[line_type]] = line_data
448 except KeyError:
449 pass
450 self.__request.save_payload()
451 return True
452
453
455 request = None
456 try:
457 pat_ldt = {
458 'lastnames': request_data['3101'][0],
459 'firstnames': request_data['3102'][0],
460 'dob': gmXdtMappings.xdt_8date2iso(request_data['3103'][0])
461 }
462 except KeyError, IndexError:
463 pat_ldt = None
464
465 if '8310' in request_data:
466 reqid = request_data['8310'][0]
467 try:
468 request = gmPathLab.cLabRequest(req_id=reqid, lab=self.__lab_name)
469 except gmExceptions.ConstructorError:
470 _log.LogException('cannot get lab request', sys.exc_info(), verbose=0)
471
472 if request is not None:
473 if pat_ldt is not None:
474 pat_db = request.get_patient()
475 if ((pat_ldt['lastnames'] != pat_db[3]) or
476 (pat_ldt['firstnames'] != pat_db[2]) or
477 (pat_ldt['dob'] != pat_db[4].strftime('%Y-%m-%d'))):
478 _log.Log(gmLog.lErr, 'patient mismatch LDT-Datei <-> Datenbank')
479 _log.Log(gmLog.lData, 'Datei: %s' % pat_ldt)
480 _log.Log(gmLog.lData, 'DB: %s' % pat_db)
481 return None
482
483
484
485 if request is None:
486
487 if pat_ldt is None:
488 _log.Log(gmLog.lErr, 'Satz vom Typ [8000:%s] enthaelt nicht alle Felder [3101, 3102, 3103]' % request_data['8000'][0])
489 _log.Log(gmLog.lErr, 'Kann lab_request nicht automatisch erzeugen.')
490 return None
491
492 searcher = gmPersonSearch.cPatientSearcher_SQL()
493 pat_ids = searcher.get_patient_ids(search_dict=pat_ldt)
494 print "must use dto, not search_dict"
495 if len(pat_ids) == 0:
496 _log.Log(gmLog.lErr, 'Kann in der Datenbank keinen Patienten fuer %s finden.' % str(pat_ldt))
497 return None
498 if len(pat_ids) > 1:
499 _log.Log(gmLog.lErr, 'Mehrere Patienten fuer %s gefunden: %s' % (str(pat_ldt), str(pat_ids)))
500 return None
501
502 try:
503 pat = gmPerson.gmPerson(aPKey=pat_ids[0])
504 except gmExceptions.ConstructorError:
505 _log.LogException('patient error', sys.exc_info())
506 return None
507 emr = pat.emr
508 if '8310' in request_data:
509 reqid = request_data['8310'][0]
510 elif '8311' in request_data:
511 reqid = request_data['8311'][0]
512 else:
513 reqid = str(random.randrange(sys.maxsize))
514 request = emr.add_lab_request(lab=self.__lab_name, req_id=reqid)
515 pat.cleanup()
516 if request is None:
517 _log.Log(gmLog.lErr, 'cannot auto-create lab request with [%s:%s]' % (self.__lab_name, reqid))
518 return None
519
520 return request
521
523 self.__request = self.__get_request_from_8201(request_data)
524 if self.__request is None:
525 prob = 'Kann Labordaten keiner Anforderung zuordnen.'
526 sol = 'Zuordnungen ueberpruefen. Systembetreuer verstaendigen. Details im Log.'
527 ctxt = 'Labor [%s], LDT-Datei [%s]' % (self.__lab_name, self.ldt_filename)
528 add_todo(problem=prob, solution=sol, context=ctxt)
529 _log.Log(gmLog.lErr, 'cannot find lab request matching data derived from 8201 record')
530 return False
531
532
533 for line_type in request_data.keys():
534
535 try:
536 handle_line = cLDTImporter.__820xline_handler[line_type]
537 except KeyError:
538 _log.LogException('no handler for line [%s:%s] in [8000:8201] record' % (line_type, request_data[line_type]), sys.exc_info(), verbose=0)
539 continue
540
541 if handle_line is None:
542 try:
543 name = gmXdtMappings.xdt_id_map[line_type]
544 except KeyError:
545 name = '?'
546 _log.Log(gmLog.lData, 'skipping [%s] (%s)' % (line_type, name))
547 continue
548
549 line_data = handle_line(self, request_data)
550 if line_data is False:
551
552 _log.Log(gmLog.lErr, 'failed to handle [%s] line' % line_type)
553 return False
554 try:
555 self.__request[cLDTImporter._map_820xline2req_field[line_type]] = line_data
556 except KeyError:
557 pass
558 self.__request.save_payload()
559 return True
560
561
562 __chunk8000_handler = {
563
564 '8220': None,
565 '8201': __handle_8201,
566 '8202': __handle_8202
567 }
568
570 try:
571 handler = cLDTImporter.__chunk8000_handler[chunk['8000'][0]]
572 except KeyError:
573 _log.Log(gmLog.lErr, 'kein Handler fuer Satztyp [8000:%s]' % chunk['8000'][0])
574 return False
575 if handler is None:
576 return True
577 if not handler(self, chunk):
578 _log.Log(gmLog.lErr, 'kann Satz vom Typ [8000:%s] nicht importieren' % chunk['8000'][0])
579 return False
580 return True
581
582
593
601
609
613
621
629
637
649
653
661
669
673
677
681
685
697
698 __8410line_handler = {
699 '5001': None,
700 '8404': None,
701 '8406': None,
702 '8410': None,
703 '8411': None,
704 '8412': None,
705 '8418': __xform_8418,
706 '8428': __xform_8428,
707 '8429': __xform_8429,
708 '8430': __xform_8430,
709 '8431': __xform_8431,
710 '8432': __xform_8432,
711 '8433': __xform_8433,
712 '8420': __xform_8420,
713 '8421': __xform_8421,
714 '8480': __xform_8480,
715 '8470': __xform_8470,
716 '8460': __xform_8460,
717 '8461': __xform_8461,
718 '8462': __xform_8462,
719 '8422': __xform_8422,
720 '8490': __xform_8490
721 }
722
724 if self.__request is None:
725 _log.Log(gmLog.lErr, 'Kann Labordaten nicht ohne Zuordnung importieren.')
726 return False
727
728 if len(result_data) == 3:
729 if ('8410' in result_data and
730 '8411' in result_data and
731 '8412' in result_data):
732 _log.Log(gmLog.lInfo, 'skipping billing-only record')
733 return True
734
735
736 try:
737 vnum = '\n'.join(result_data['8420'])
738 except KeyError:
739 vnum = None
740
741 try:
742 valpha = '\n'.join(result_data['8480'])
743 except KeyError:
744 valpha = None
745 if (valpha is None) and (vnum is None):
746 valpha = ''
747
748 try:
749 vcode = result_data['8410'][0]
750 except KeyError:
751 _log.Log(gmLog.lWarn, 'adding default test type code')
752
753 try:
754 vname = result_data['8411'][0]
755 except KeyError:
756 _log.Log(gmLog.lWarn, 'adding default test type name')
757
758 try:
759 vunit = result_data['8421'][0]
760 except KeyError:
761 vunit = ''
762
763 status, ttype = gmPathLab.create_measurement_type (
764 lab=self.__lab_name,
765 code=vcode,
766 name=vname,
767 unit=vunit
768 )
769 if status in [False, None]:
770 _log.Log(gmLog.lErr, 'cannot create/retrieve test type')
771 return False
772 if ttype['comment'] in [None, '']:
773 ttype['comment'] = 'created [%s] by [$RCSfile: gmLDTimporter.py,v $ $Revision: 1.34 $] from [%s]' % (time.strftime('%Y-%m-%d %H:%M'), self.ldt_filename)
774 ttype.save_payload()
775
776 whenfield = 'lab_rxd_when'
777 status, self.__lab_result = gmPathLab.create_lab_result(
778 patient_id = self.__request.get_patient()[0],
779 when_field = whenfield,
780 when = self.__request[whenfield],
781 test_type = ttype['pk'],
782 val_num = vnum,
783 val_alpha = valpha,
784 unit = vunit,
785 request = self.__request
786 )
787 if status is False:
788 _log.Log(gmLog.lErr, 'cannot create result record')
789 _log.Log(gmLog.lInfo, str(result_data)[:500])
790 return False
791
792 if status is None:
793 _log.Log(gmLog.lData, 'skipping duplicate lab result on import')
794 _log.Log(gmLog.lData, 'in file: %s' % str(result_data))
795 return True
796
797
798 self.__lab_result['val_num'] = None
799 self.__lab_result['val_alpha'] = None
800
801 self.__lab_result['reviewed'] = 'false'
802 if self.__ref_group_str != '':
803 self.__lab_result['ref_group'] = self.__ref_group_str
804
805 for line_type in result_data.keys():
806
807 try:
808 handle_line = cLDTImporter.__8410line_handler[line_type]
809 except KeyError:
810 _log.LogException('no handler for line [%s] in [8410] record' % line_type, sys.exc_info(), verbose=0)
811 return False
812
813 if handle_line is None:
814 continue
815 if handle_line(self, result_data) is False:
816
817 _log.Log(gmLog.lErr, 'cannot handle [%s] line' % line_type)
818 return False
819 if (self.__lab_result['val_alpha'] is None) and (self.__lab_result['val_num'] is None):
820 _log.Log(gmLog.lWarn, 'both result fields empty, setting alphanumeric default')
821 self.__lab_result['val_alpha'] = ''
822 saved, msg = self.__lab_result.save_payload()
823 del self.__lab_result
824 if not saved:
825 _log.Log(gmLog.lErr, 'kann Laborwert (8410) nicht importieren')
826 return False
827 _log.Log(gmLog.lInfo, 'Laborwert (8410) erfolgreich importiert')
828 return True
829
830
831 __chunk_handler = {
832 '8000': __handle_8000,
833 '8410': __handle_8410
834 }
835
837
838 random.getrandbits(random.randint(1,4))
839 self.__ref_group_str = ''
840 self.__request = None
841 chunk = {}
842 for line in fileinput.input(filename):
843 tmp = line.replace('\r','')
844 tmp = tmp.replace('\n','')
845 line_type = tmp[3:7]
846 line_data = tmp[7:]
847 if line_type in cLDTImporter._chunk_starters:
848
849 if len(chunk) != 0:
850
851 try:
852 handle_chunk = cLDTImporter.__chunk_handler[chunk_type]
853 except KeyError:
854 fileinput.close()
855 if self.__request is not None:
856 self.__request['request_status'] = 'partial'
857 self.__request['is_pending'] = 'true'
858 self.__request.save_payload()
859 _log.Log(gmLog.lErr, 'kein Handler fuer Satztyp [%s] verfuegbar' % chunk_type)
860 return False
861
862 if not handle_chunk(self, chunk):
863 if self.__request is not None:
864 self.__request['request_status'] = 'partial'
865 self.__request['is_pending'] = 'true'
866 self.__request.save_payload()
867 fileinput.close()
868 _log.Log(gmLog.lErr, 'cannot handle [%s] chunk' % chunk_type)
869 return False
870
871 chunk = {}
872 chunk_type = line_type
873
874 if line_type not in chunk:
875 chunk[line_type] = []
876 chunk[line_type].append(line_data)
877 fileinput.close()
878 return True
879
880
881
882
883
884
885
886
887
888
889
890
892
893 target_dir = _cfg.get('target', 'repository')
894 if target_dir is None:
895 return False
896 target_dir = os.path.abspath(os.path.expanduser(target_dir))
897 if not (os.access(target_dir, os.W_OK) and os.path.isdir(target_dir)):
898 _log.Log(gmLog.lErr, 'cannot write to target repository [%s]\n' % target_dir)
899 return False
900
901 import_dir = _cfg.get('import', 'repository')
902 if import_dir is None:
903 _log.Log(gmLog.lErr, 'no import dir found\n')
904 return False
905 import_dir = os.path.abspath(os.path.expanduser(import_dir))
906 filename_pattern = _cfg.get('import', 'file pattern')
907 if filename_pattern is None:
908 _log.Log(gmLog.lErr, 'no import file name pattern found\n')
909 return False
910 import_file_pattern = os.path.join(import_dir, filename_pattern)
911 files2import = glob.glob(import_file_pattern)
912
913 importer = cLDTImporter(cfg=_cfg)
914 for ldt_file in files2import:
915 _log.Log(gmLog.lInfo, 'importing LDT file [%s]' % ldt_file)
916 if not importer.import_file(ldt_file):
917 _log.Log(gmLog.lErr, 'cannot import LDT file\n')
918 else:
919 _log.Log(gmLog.lInfo, 'success importing LDT file\n')
920 try:
921 shutil.copy(ldt_file, target_dir)
922 os.remove(ldt_file)
923 except Exception:
924 _log.LogException('cannot move [%s] to [%s]\n' % (ldt_file, target_dir))
925 return True
926
927 -def add_todo(problem, solution, context):
928 cat = 'lab'
929 by = '$RCSfile: gmLDTimporter.py,v $ $Revision: 1.34 $'
930 rcvr = 'user'
931 gmPG.add_housekeeping_todo(reporter=by, receiver=rcvr, problem=problem, solution=solution, context=context, category=cat)
932
933
934
935 if __name__ == '__main__':
936 if _cfg is None:
937 _log.Log(gmLog.lErr, 'need config file to run')
938 sys.exit(1)
939
940
941 auth_data = gmLoginInfo.LoginInfo(
942 user = _cfg.get('database', 'user'),
943 password = _cfg.get('database', 'password'),
944 host = _cfg.get('database', 'host'),
945 port = _cfg.get('database', 'port'),
946 database = _cfg.get('database', 'database')
947 )
948 backend = gmPG.ConnectionPool(login = auth_data)
949
950 try:
951 import profile
952 profile.run('run_import()', './profile.log')
953 except Exception:
954 _log.LogException('unhandled exception caught', sys.exc_info(), verbose=1)
955 backend.StopListeners()
956 sys.exit('aborting')
957 backend.StopListeners()
958 sys.exit(0)
959
960
961