1 """GNUmed German XDT parsing objects.
2
3 This encapsulates some of the XDT data into
4 objects for easy access.
5 """
6
7 __version__ = "$Revision: 1.33 $"
8 __author__ = "K.Hilbert, S.Hilbert"
9 __license__ = "GPL"
10
11 import os.path, sys, linecache, io, re as regex, time, datetime as pyDT, logging, io
12
13
14 if __name__ == '__main__':
15 sys.path.insert(0, '../../')
16 from Gnumed.pycommon import gmDateTime, gmTools
17 from Gnumed.business import gmXdtMappings, gmPerson
18
19
20 _log = logging.getLogger('gm.xdt')
21 _log.info(__version__)
22
23
28
30
31 f = io.open(filename, mode = 'rt', encoding = 'utf8', errors = 'ignore')
32
33 file_encoding = None
34 for line in f:
35 field = line[3:7]
36 if field in gmXdtMappings._charset_fields:
37 _log.debug('found charset field [%s] in <%s>', field, filename)
38 val = line[7:8]
39 file_encoding = gmXdtMappings._map_field2charset[field][val]
40 _log.debug('encoding in file is "%s" (%s)', file_encoding, val)
41 break
42 f.close()
43
44 if file_encoding is None:
45 _log.debug('no encoding found in <%s>, assuming [%s]', filename, default_encoding)
46 return default_encoding
47
48 return file_encoding
49
51
52 _map_id2name = {
53 '3101': 'lastnames',
54 '3102': 'firstnames',
55 '3103': 'dob',
56 '3110': 'gender',
57 '3106': 'zipurb',
58 '3107': 'street',
59 '3112': 'zip',
60 '3113': 'urb',
61 '8316': 'source'
62 }
63
64 needed_fields = (
65 '3101',
66 '3102'
67 )
68
69 interesting_fields = _map_id2name.keys()
70
71 data = {}
72
73
74 if encoding is None:
75 encoding = determine_xdt_encoding(filename=filename)
76
77 xdt_file = io.open(filename, mode = 'rt', encoding = encoding)
78
79 for line in xdt_file:
80
81
82
83
84
85 line = line.replace('\015','')
86 line = line.replace('\012','')
87
88
89 field = line[3:7]
90
91 if field in interesting_fields:
92 try:
93 already_seen = data[_map_id2name[field]]
94 break
95 except KeyError:
96 data[_map_id2name[field]] = line[7:]
97
98 xdt_file.close()
99
100
101 if len(data) < len(needed_fields):
102 raise ValueError('insufficient patient data in XDT file [%s], found only: %s' % (filename, data))
103
104 from Gnumed.business import gmPerson
105 dto = gmPerson.cDTO_person()
106
107 dto.firstnames = data['firstnames']
108 dto.lastnames = data['lastnames']
109
110
111
112 try:
113 dob = time.strptime(data['dob'], gmTools.coalesce(dob_format, '%d%m%Y'))
114 dto.dob = pyDT.datetime(dob.tm_year, dob.tm_mon, dob.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
115 except KeyError:
116 dto.dob = None
117
118 try:
119 dto.gender = gmXdtMappings.map_gender_xdt2gm[data['gender'].lower()]
120 except KeyError:
121 dto.gender = None
122
123 dto.zip = None
124 try:
125 dto.zip = regex.match('\d{5}', data['zipurb']).group()
126 except KeyError: pass
127 try:
128 dto.zip = data['zip']
129 except KeyError: pass
130
131 dto.urb = None
132 try:
133 dto.urb = regex.sub('\d{5} ', '', data['zipurb'])
134 except KeyError: pass
135 try:
136 dto.urb = data['urb']
137 except KeyError: pass
138
139 try:
140 dto.street = data['street']
141 except KeyError:
142 dto.street = None
143
144 try:
145 dto.source = data['source']
146 except KeyError:
147 dto.source = None
148
149 return dto
150
152
153 - def __init__(self, filename=None, encoding=None, override_encoding=False):
175
177
178 if self.__header is not None:
179 return self.__header
180
181 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding)
182 self.__header = []
183 for line in ldt_file:
184 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
185
186 if field == '8000':
187 if content in ['8202']:
188 break
189 self.__header.append(line)
190
191 ldt_file.close()
192 return self.__header
193
194 header = property(_get_header, lambda x:x)
195
197
198 if self.__tail is not None:
199 return self.__tail
200
201 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding)
202 self.__tail = []
203 in_tail = False
204 for line in ldt_file:
205 if in_tail:
206 self.__tail.append(line)
207 continue
208
209 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
210
211
212 if field == '8000':
213 if content not in ['8221']:
214 continue
215 in_tail = True
216 self.__tail.append(line)
217
218 ldt_file.close()
219 return self.__tail
220
221 tail = property(_get_tail, lambda x:x)
222
224
225 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding)
226 out_file = None
227
228 in_patient = False
229 for line in ldt_file:
230
231 if in_patient:
232 out_file.write(line)
233 continue
234
235 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
236
237
238 if field == '8000':
239
240 if content == '8202':
241 in_patient = True
242 if out_file is not None:
243 out_file.write(''.join(self.tail))
244 out_file.close()
245
246 out_file.write(''.join(self.header))
247 else:
248 in_patient = False
249 if out_file is not None:
250 out_file.write(''.join(self.tail))
251 out_file.close()
252
253 if out_file is not None:
254 if not out_file.closed:
255 out_file.write(''.join(self.tail))
256 out_file.close()
257
258 ldt_file.close()
259
260
261
263 pat_ids = []
264 pat_names = []
265 pats = {}
266
267
268 for line in fileinput.input(aFile):
269
270 line = line.replace('\015','')
271 line = line.replace('\012','')
272
273 field = line[3:7]
274
275 if field == '3000':
276 pat_id = line[7:]
277 if pat_id not in pat_ids:
278 pat_ids.append(pat_id)
279 continue
280
281 if field == '3101':
282 pat_name = line [7:]
283 if pat_name not in pat_names:
284 pat_names.append(pat_name)
285 pats[pat_id] = pat_name
286 continue
287 fileinput.close()
288
289 _log.debug("patients found: %s" % len(pat_ids))
290 return pats
291
292 -def get_pat_files(aFile, ID, name, patdir = None, patlst = None):
293 _log.debug("getting files for patient [%s:%s]" % (ID, name))
294 files = patlst.get(aGroup = "%s:%s" % (ID, name), anOption = "files")
295 _log.debug("%s => %s" % (patdir, files))
296 return [patdir, files]
297
299 content=[]
300 lineno = []
301
302
303
304 content = []
305 record_start_lines = []
306
307
308 for line in fileinput.input(aFile):
309 strippedline = line.replace('\015','')
310 strippedline = strippedline.replace('\012','')
311
312 if strippedline[3:7] == '8000':
313 record_start_lines.append(fileinput.filelineno())
314
315
316 for aline in record_start_lines:
317
318 line = linecache.getline(aFile,aline+2)
319
320 strippedline = line.replace('\015','')
321 strippedline = strippedline.replace('\012','')
322
323 field = strippedline[3:7]
324
325 if field == '3000':
326 ID = strippedline[7:]
327 line = linecache.getline(aFile,aline+3)
328
329 strippedline = line.replace('\015','')
330 strippedline = strippedline.replace('\012','')
331
332 field = strippedline[3:7]
333 if field == '3101':
334 name = strippedline [7:]
335 startline=aline
336 endline=record_start_lines[record_start_lines.index(aline)+1]
337 _log.debug("reading from%s" %str(startline)+' '+str(endline) )
338 for tmp in range(startline,endline):
339 content.append(linecache.getline(aFile,tmp))
340 _log.debug("reading %s"%tmp )
341 hashes = check_for_previous_records(ID,name,patlst)
342
343 data_hash = md5.new()
344 map(data_hash.update, content)
345 digest = data_hash.hexdigest()
346 if digest not in hashes:
347 pat_dir = cfg.get("xdt-viewer", "export-dir")
348 file = write_xdt_pat_data(content, pat_dir)
349 add_file_to_patlst(ID, name, patlst, file, ahash)
350 content = []
351 else:
352 continue
353
354 fileinput.close()
355 patlst.store()
356 return 1
357
362
364 """write record for this patient to new file"""
365 pat_file = io.open(os.path.join(aDir, get_rand_fname(aDir)), mode = "wt", encoding = 'utf8')
366 map(pat_file.write, data)
367 pat_file.close()
368 return fname
369
371 anIdentity = "%s:%s" % (ID, name)
372 hashes = []
373
374 if anIdentity not in patlst.getGroups():
375 _log.debug("identity not yet in list" )
376 patlst.set(aGroup = anIdentity, anOption = 'files', aValue = [], aComment = '')
377
378 file_defs = patlst.get(aGroup = anIdentity, anOption = "files")
379 for line in file_defs:
380 file, ahash = line.split(':')
381 hashes.append(ahash)
382
383 return hashes
384
386 anIdentity = "%s:%s" % (ID, name)
387 files = patlst.get(aGroup = anIdentity, anOption = "files")
388 for file in new_files:
389 files.append("%s:%s" % (file, ahash))
390 _log.debug("files now there : %s" % files)
391 patlst.set(aGroup=anIdentity, anOption="files", aValue = files, aComment="")
392
393
394
395 if __name__ == "__main__":
396 from Gnumed.pycommon import gmI18N, gmLog2
397
398 root_log = logging.getLogger()
399 root_log.setLevel(logging.DEBUG)
400 _log = logging.getLogger('gm.xdt')
401
402
403 gmI18N.activate_locale()
404 gmI18N.install_domain()
405 gmDateTime.init()
406
407 ldt = cLDTFile(filename = sys.argv[1])
408 print("header:")
409 for line in ldt.header:
410 print(line.encode('utf8', 'replace'))
411 print("tail:")
412 for line in ldt.tail:
413 print(line.encode('utf8', 'replace'))
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433