Package Gnumed :: Package business :: Module gmXdtObjects
[frames] | no frames]

Source Code for Module Gnumed.business.gmXdtObjects

  1  """GNUmed German XDT parsing objects. 
  2   
  3  This encapsulates some of the XDT data into 
  4  objects for easy access. 
  5  """ 
  6  #============================================================== 
  7  __version__ = "$Revision: 1.33 $" 
  8  __author__ = "K.Hilbert, S.Hilbert" 
  9  __license__ = "GPL" 
 10   
 11  import os.path, sys, linecache, io, re as regex, time, datetime as pyDT, logging, io 
 12   
 13   
 14  if __name__ == '__main__': 
 15          sys.path.insert(0, '../../') 
 16  from Gnumed.pycommon import gmDateTime, gmTools 
 17  from Gnumed.business import gmXdtMappings, gmPerson 
 18   
 19   
 20  _log = logging.getLogger('gm.xdt') 
 21  _log.info(__version__) 
 22   
 23  #============================================================== 
24 -class cDTO_xdt_person(gmPerson.cDTO_person):
25
26 - def store(self):
27 pass
28 #==============================================================
29 -def determine_xdt_encoding(filename=None, default_encoding=None):
30 31 f = io.open(filename, mode = 'rt', encoding = 'utf8', errors = 'ignore') 32 33 file_encoding = None 34 for line in f: 35 field = line[3:7] 36 if field in gmXdtMappings._charset_fields: 37 _log.debug('found charset field [%s] in <%s>', field, filename) 38 val = line[7:8] 39 file_encoding = gmXdtMappings._map_field2charset[field][val] 40 _log.debug('encoding in file is "%s" (%s)', file_encoding, val) 41 break 42 f.close() 43 44 if file_encoding is None: 45 _log.debug('no encoding found in <%s>, assuming [%s]', filename, default_encoding) 46 return default_encoding 47 48 return file_encoding
49 #==============================================================
50 -def read_person_from_xdt(filename=None, encoding=None, dob_format=None):
51 52 _map_id2name = { 53 '3101': 'lastnames', 54 '3102': 'firstnames', 55 '3103': 'dob', 56 '3110': 'gender', 57 '3106': 'zipurb', 58 '3107': 'street', 59 '3112': 'zip', 60 '3113': 'urb', 61 '8316': 'source' 62 } 63 64 needed_fields = ( 65 '3101', 66 '3102' 67 ) 68 69 interesting_fields = list(_map_id2name) 70 71 data = {} 72 73 # try to find encoding if not given 74 if encoding is None: 75 encoding = determine_xdt_encoding(filename=filename) 76 77 xdt_file = io.open(filename, mode = 'rt', encoding = encoding) 78 79 for line in xdt_file: 80 81 # # can't use more than what's interesting ... ;-) 82 # if len(data) == len(interesting_fields): 83 # break 84 85 line = line.replace('\015','') 86 line = line.replace('\012','') 87 88 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content 89 field = line[3:7] 90 # do we care about this line ? 91 if field in interesting_fields: 92 try: 93 already_seen = data[_map_id2name[field]] 94 break 95 except KeyError: 96 data[_map_id2name[field]] = line[7:] 97 98 xdt_file.close() 99 100 # found enough data ? 101 if len(data) < len(needed_fields): 102 raise ValueError('insufficient patient data in XDT file [%s], found only: %s' % (filename, data)) 103 104 from Gnumed.business import gmPerson 105 dto = gmPerson.cDTO_person() 106 107 dto.firstnames = data['firstnames'] 108 dto.lastnames = data['lastnames'] 109 110 # CAVE: different data orders are possible, so configuration may be needed 111 # FIXME: detect xDT version and use default from the standard when dob_format is None 112 try: 113 dob = time.strptime(data['dob'], gmTools.coalesce(dob_format, '%d%m%Y')) 114 dto.dob = pyDT.datetime(dob.tm_year, dob.tm_mon, dob.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone) 115 except KeyError: 116 dto.dob = None 117 118 try: 119 dto.gender = gmXdtMappings.map_gender_xdt2gm[data['gender'].lower()] 120 except KeyError: 121 dto.gender = None 122 123 dto.zip = None 124 try: 125 dto.zip = regex.match('\d{5}', data['zipurb']).group() 126 except KeyError: pass 127 try: 128 dto.zip = data['zip'] 129 except KeyError: pass 130 131 dto.urb = None 132 try: 133 dto.urb = regex.sub('\d{5} ', '', data['zipurb']) 134 except KeyError: pass 135 try: 136 dto.urb = data['urb'] 137 except KeyError: pass 138 139 try: 140 dto.street = data['street'] 141 except KeyError: 142 dto.street = None 143 144 try: 145 dto.source = data['source'] 146 except KeyError: 147 dto.source = None 148 149 return dto
150 #==============================================================
151 -class cLDTFile(object):
152
153 - def __init__(self, filename=None, encoding=None, override_encoding=False):
154 155 file_encoding = determine_xdt_encoding(filename=filename) 156 if file_encoding is None: 157 _log.warning('LDT file <%s> does not specify encoding', filename) 158 if encoding is None: 159 raise ValueError('no encoding specified in file <%s> or method call' % filename) 160 161 if override_encoding: 162 if encoding is None: 163 raise ValueError('no encoding specified in method call for overriding encoding in file <%s>' % filename) 164 self.encoding = encoding 165 else: 166 if file_encoding is None: 167 self.encoding = encoding 168 else: 169 self.encoding = file_encoding 170 171 self.filename = filename 172 173 self.__header = None 174 self.__tail = None
175 #----------------------------------------------------------
176 - def _get_header(self):
177 178 if self.__header is not None: 179 return self.__header 180 181 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding) 182 self.__header = [] 183 for line in ldt_file: 184 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','') 185 # loop until found first LG-Bericht 186 if field == '8000': 187 if content in ['8202']: 188 break 189 self.__header.append(line) 190 191 ldt_file.close() 192 return self.__header
193 194 header = property(_get_header, lambda x:x) 195 #----------------------------------------------------------
196 - def _get_tail(self):
197 198 if self.__tail is not None: 199 return self.__tail 200 201 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding) 202 self.__tail = [] 203 in_tail = False 204 for line in ldt_file: 205 if in_tail: 206 self.__tail.append(line) 207 continue 208 209 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','') 210 211 # loop until found tail 212 if field == '8000': 213 if content not in ['8221']: 214 continue 215 in_tail = True 216 self.__tail.append(line) 217 218 ldt_file.close() 219 return self.__tail
220 221 tail = property(_get_tail, lambda x:x) 222 #----------------------------------------------------------
223 - def split_by_patient(self, dir=None, file=None):
224 225 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding) 226 out_file = None 227 228 in_patient = False 229 for line in ldt_file: 230 231 if in_patient: 232 out_file.write(line) 233 continue 234 235 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','') 236 237 # start of record 238 if field == '8000': 239 # start of LG-Bericht 240 if content == '8202': 241 in_patient = True 242 if out_file is not None: 243 out_file.write(''.join(self.tail)) 244 out_file.close() 245 #out_file = io.open(filename=filename_xxxx, mode=xxxx_'rU', encoding=self.encoding) 246 out_file.write(''.join(self.header)) 247 else: 248 in_patient = False 249 if out_file is not None: 250 out_file.write(''.join(self.tail)) 251 out_file.close() 252 253 if out_file is not None: 254 if not out_file.closed: 255 out_file.write(''.join(self.tail)) 256 out_file.close() 257 258 ldt_file.close()
259 #============================================================== 260 # FIXME: the following *should* get wrapped in class XdtFile ... 261 #--------------------------------------------------------------
262 -def xdt_get_pats(aFile):
263 pat_ids = [] 264 pat_names = [] 265 pats = {} 266 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content 267 # read patient dat 268 for line in fileinput.input(aFile): 269 # remove trailing CR and/or LF 270 line = line.replace('\015','') 271 line = line.replace('\012','') 272 # do we care about this line ? 273 field = line[3:7] 274 # yes, if type = patient id 275 if field == '3000': 276 pat_id = line[7:] 277 if pat_id not in pat_ids: 278 pat_ids.append(pat_id) 279 continue 280 # yes, if type = patient name 281 if field == '3101': 282 pat_name = line [7:] 283 if pat_name not in pat_names: 284 pat_names.append(pat_name) 285 pats[pat_id] = pat_name 286 continue 287 fileinput.close() 288 289 _log.debug("patients found: %s" % len(pat_ids)) 290 return pats
291 #==============================================================
292 -def get_pat_files(aFile, ID, name, patdir = None, patlst = None):
293 _log.debug("getting files for patient [%s:%s]" % (ID, name)) 294 files = patlst.get(aGroup = "%s:%s" % (ID, name), anOption = "files") 295 _log.debug("%s => %s" % (patdir, files)) 296 return [patdir, files]
297 #==============================================================
298 -def split_xdt_file(aFile,patlst,cfg):
299 content=[] 300 lineno = [] 301 302 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content 303 304 content = [] 305 record_start_lines = [] 306 307 # find record starts 308 for line in fileinput.input(aFile): 309 strippedline = line.replace('\015','') 310 strippedline = strippedline.replace('\012','') 311 # do we care about this line ? (records start with 8000) 312 if strippedline[3:7] == '8000': 313 record_start_lines.append(fileinput.filelineno()) 314 315 # loop over patient records 316 for aline in record_start_lines: 317 # WHY +2 ?!? 318 line = linecache.getline(aFile,aline+2) 319 # remove trailing CR and/or LF 320 strippedline = line.replace('\015','') 321 strippedline = strippedline.replace('\012','') 322 # do we care about this line ? 323 field = strippedline[3:7] 324 # extract patient id 325 if field == '3000': 326 ID = strippedline[7:] 327 line = linecache.getline(aFile,aline+3) 328 # remove trailing CR and/or LF 329 strippedline = line.replace('\015','') 330 strippedline = strippedline.replace('\012','') 331 # do we care about this line ? 332 field = strippedline[3:7] 333 if field == '3101': 334 name = strippedline [7:] 335 startline=aline 336 endline=record_start_lines[record_start_lines.index(aline)+1] 337 _log.debug("reading from%s" %str(startline)+' '+str(endline) ) 338 for tmp in range(startline,endline): 339 content.append(linecache.getline(aFile,tmp)) 340 _log.debug("reading %s"%tmp ) 341 hashes = check_for_previous_records(ID,name,patlst) 342 # is this new content ? 343 data_hash = md5.new() # FIXME: use hashlib 344 map(data_hash.update, content) 345 digest = data_hash.hexdigest() 346 if digest not in hashes: 347 pat_dir = cfg.get("xdt-viewer", "export-dir") 348 file = write_xdt_pat_data(content, pat_dir) 349 add_file_to_patlst(ID, name, patlst, file, ahash) 350 content = [] 351 else: 352 continue 353 # cleanup 354 fileinput.close() 355 patlst.store() 356 return 1
357 #==============================================================
358 -def get_rand_fname(aDir):
359 tmpname = gmTools.get_unique_filename(prefix='', suffix = time.strftime(".%Y%m%d-%H%M%S", time.localtime()), tmp_dir=aDir) 360 path, fname = os.path.split(tmpname) 361 return fname
362 #==============================================================
363 -def write_xdt_pat_data(data, aDir):
364 """write record for this patient to new file""" 365 pat_file = io.open(os.path.join(aDir, get_rand_fname(aDir)), mode = "wt", encoding = 'utf8') 366 map(pat_file.write, data) 367 pat_file.close() 368 return fname
369 #==============================================================
370 -def check_for_previous_records(ID, name, patlst):
371 anIdentity = "%s:%s" % (ID, name) 372 hashes = [] 373 # patient not listed yet 374 if anIdentity not in patlst.getGroups(): 375 _log.debug("identity not yet in list" ) 376 patlst.set(aGroup = anIdentity, anOption = 'files', aValue = [], aComment = '') 377 # file already listed ? 378 file_defs = patlst.get(aGroup = anIdentity, anOption = "files") 379 for line in file_defs: 380 file, ahash = line.split(':') 381 hashes.append(ahash) 382 383 return hashes
384 #==============================================================
385 -def add_file_to_patlst(ID, name, patlst, new_file, ahash):
386 anIdentity = "%s:%s" % (ID, name) 387 files = patlst.get(aGroup = anIdentity, anOption = "files") 388 for file in new_files: 389 files.append("%s:%s" % (file, ahash)) 390 _log.debug("files now there : %s" % files) 391 patlst.set(aGroup=anIdentity, anOption="files", aValue = files, aComment="")
392 #============================================================== 393 # main 394 #-------------------------------------------------------------- 395 if __name__ == "__main__": 396 from Gnumed.pycommon import gmI18N, gmLog2 397 398 root_log = logging.getLogger() 399 root_log.setLevel(logging.DEBUG) 400 _log = logging.getLogger('gm.xdt') 401 402 #from Gnumed.business import gmPerson 403 gmI18N.activate_locale() 404 gmI18N.install_domain() 405 gmDateTime.init() 406 407 ldt = cLDTFile(filename = sys.argv[1]) 408 print("header:") 409 for line in ldt.header: 410 print(line.encode('utf8', 'replace')) 411 print("tail:") 412 for line in ldt.tail: 413 print(line.encode('utf8', 'replace')) 414 415 # # test framework if run by itself 416 # patfile = sys.argv[1] 417 # dobformat = sys.argv[2] 418 # encoding = sys.argv[3] 419 # print "reading patient data from xDT file [%s]" % patfile 420 421 # dto = read_person_from_xdt(patfile, dob_format=dobformat, encoding=encoding) 422 # print "DTO:", dto 423 # print "dto.dob:", dto.dob, type(dto.dob) 424 # print "dto.dob.tz:", dto.dob.tzinfo 425 # print "dto.zip: %s dto.urb: %s" % (dto.zip, dto.urb) 426 # print "dto.street", dto.street 427 # searcher = gmPersonSearch.cPatientSearcher_SQL() 428 # ident = searcher.get_identities(dto=dto)[0] 429 # print ident 430 ## print ident.get_medical_age() 431 432 #============================================================== 433