Package Gnumed :: Package business :: Module gmPersonSearch
[frames] | no frames]

Source Code for Module Gnumed.business.gmPersonSearch

   1  # -*- coding: utf-8 -*- 
   2  """GNUmed person searching code.""" 
   3  #============================================================ 
   4  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
   5  __license__ = "GPL" 
   6   
   7  # std lib 
   8  import sys, logging, re as regex 
   9   
  10   
  11  # GNUmed 
  12  if __name__ == '__main__': 
  13          sys.path.insert(0, '../../') 
  14  from Gnumed.pycommon import gmPG2, gmI18N, gmTools, gmDateTime 
  15  from Gnumed.business import gmPerson 
  16  if __name__ == '__main__': 
  17          sys.path.insert(0, '../../') 
  18          gmI18N.activate_locale() 
  19          gmI18N.install_domain() 
  20   
  21   
  22  _log = logging.getLogger('gm.person') 
  23   
  24  #============================================================ 
25 -class cPatientSearcher_SQL:
26 """UI independant i18n aware patient searcher."""
27 - def __init__(self):
28 self._generate_queries = self._generate_queries_de 29 # make a cursor 30 self.conn = gmPG2.get_connection() 31 self.curs = self.conn.cursor()
32 #--------------------------------------------------------
33 - def __del__(self):
34 try: 35 self.curs.close() 36 except Exception: pass 37 try: 38 self.conn.close() 39 except Exception: pass
40 #-------------------------------------------------------- 41 # public API 42 #--------------------------------------------------------
43 - def get_patients(self, search_term = None, a_locale = None, dto = None):
44 identities = self.get_identities(search_term, a_locale, dto) 45 if identities is None: 46 return None 47 return [ gmPerson.cPatient(aPK_obj=ident['pk_identity']) for ident in identities ]
48 49 #--------------------------------------------------------
50 - def get_identities(self, search_term = None, a_locale = None, dto = None):
51 """Get patient identity objects for given parameters. 52 53 - either search term or search dict 54 - dto contains structured data that doesn't need to be parsed (cDTO_person) 55 - dto takes precedence over search_term 56 """ 57 parse_search_term = (dto is None) 58 59 if not parse_search_term: 60 queries = self._generate_queries_from_dto(dto) 61 if queries is None: 62 parse_search_term = True 63 if len(queries) == 0: 64 parse_search_term = True 65 66 if parse_search_term: 67 # temporary change of locale for selecting query generator 68 if a_locale is not None: 69 print("temporary change of locale on patient search not implemented") 70 _log.warning("temporary change of locale on patient search not implemented") 71 # generate queries 72 if search_term is None: 73 raise ValueError('need search term (dto AND search_term are None)') 74 75 queries = self._generate_queries(search_term) 76 77 # anything to do ? 78 if len(queries) == 0: 79 _log.error('query tree empty') 80 _log.error('[%s] [%s] [%s]' % (search_term, a_locale, str(dto))) 81 return None 82 83 # collect IDs here 84 identities = [] 85 # cycle through query list 86 for query in queries: 87 _log.debug("running %s" % query) 88 try: 89 rows, idx = gmPG2.run_ro_queries(queries = [query], get_col_idx=True) 90 except Exception: 91 _log.exception('error running query') 92 continue 93 if len(rows) == 0: 94 continue 95 identities.extend ( 96 [ gmPerson.cPerson(row = {'pk_field': 'pk_identity', 'data': row, 'idx': idx}) for row in rows ] 97 ) 98 99 pks = [] 100 unique_identities = [] 101 for identity in identities: 102 if identity['pk_identity'] in pks: 103 continue 104 pks.append(identity['pk_identity']) 105 unique_identities.append(identity) 106 107 return unique_identities
108 109 #-------------------------------------------------------- 110 # internal helpers 111 #--------------------------------------------------------
112 - def _normalize_soundalikes(self, aString = None, aggressive = False):
113 """Transform some characters into a regex.""" 114 if aString.strip() == '': 115 return aString 116 117 # umlauts 118 normalized = aString.replace('Ä', '(Ä|AE|Ae|A|E)') 119 normalized = normalized.replace('Ö', '(Ö|OE|Oe|O)') 120 normalized = normalized.replace('Ü', '(Ü|UE|Ue|U)') 121 normalized = normalized.replace('ä', '(ä|ae|e|a)') 122 normalized = normalized.replace('ö', '(ö|oe|o)') 123 normalized = normalized.replace('ü', '(ü|ue|u|y)') 124 normalized = normalized.replace('ß', '(ß|sz|ss|s)') 125 126 # common soundalikes 127 # - René, Desiré, Inés ... 128 normalized = normalized.replace('é', '***DUMMY***') 129 normalized = normalized.replace('è', '***DUMMY***') 130 normalized = normalized.replace('***DUMMY***', '(é|e|è|ä|ae)') 131 132 # FIXME: missing i/a/o - but uncommon in German 133 normalized = normalized.replace('v', '***DUMMY***') 134 normalized = normalized.replace('f', '***DUMMY***') 135 normalized = normalized.replace('ph', '***DUMMY***') # now, this is *really* specific for German 136 normalized = normalized.replace('***DUMMY***', '(v|f|ph)') 137 138 # silent characters (Thomas vs Tomas) 139 normalized = normalized.replace('Th','***DUMMY***') 140 normalized = normalized.replace('T', '***DUMMY***') 141 normalized = normalized.replace('***DUMMY***', '(Th|T)') 142 normalized = normalized.replace('th', '***DUMMY***') 143 normalized = normalized.replace('t', '***DUMMY***') 144 normalized = normalized.replace('***DUMMY***', '(th|t)') 145 146 # apostrophes, hyphens et al 147 normalized = normalized.replace('"', '***DUMMY***') 148 normalized = normalized.replace("'", '***DUMMY***') 149 normalized = normalized.replace('`', '***DUMMY***') 150 normalized = normalized.replace('***DUMMY***', """("|'|`|***DUMMY***|\s)*""") 151 normalized = normalized.replace('-', """(-|\s)*""") 152 normalized = normalized.replace('|***DUMMY***|', '|-|') 153 154 if aggressive: 155 pass 156 # some more here 157 158 _log.debug('[%s] -> [%s]' % (aString, normalized)) 159 160 return normalized
161 162 #-------------------------------------------------------- 163 # write your own query generator and add it here: 164 # use compile() for speedup 165 # must escape strings before use !! 166 # ORDER BY ! 167 # FIXME: what about "< 40" ? 168 #--------------------------------------------------------
170 """Generate search queries for [ , <alpha> ] search terms.""" 171 if regex.match(",\s*\w+$", raw.strip()) is None: 172 return [] 173 _log.debug("[%s]: a firstname" % raw) 174 tmp = self._normalize_soundalikes(raw.strip(' ,')) 175 cmd = """ 176 SELECT DISTINCT ON (pk_identity) * FROM ( 177 SELECT *, %(match)s AS match_type FROM (( 178 SELECT d_vap.* 179 FROM dem.names, dem.v_active_persons d_vap 180 WHERE dem.names.firstnames ~ %(first)s and d_vap.pk_identity = dem.names.id_identity 181 ) union all ( 182 SELECT d_vap.* 183 FROM dem.names, dem.v_active_persons d_vap 184 WHERE dem.names.firstnames ~ %(first_w_caps)s and d_vap.pk_identity = dem.names.id_identity 185 ) union all ( 186 SELECT d_vap.* 187 FROM dem.names, dem.v_active_persons d_vap 188 WHERE lower(dem.names.firstnames) ~ lower(%(first)s) and d_vap.pk_identity = dem.names.id_identity 189 )) AS super_list ORDER BY lastnames, firstnames, dob 190 ) AS sorted_list""" 191 args = { 192 'match': _('first name'), 193 'first': '^' + tmp, 194 'first_w_caps': '^' + gmTools.capitalize(tmp, mode = gmTools.CAPS_NAMES) 195 } 196 return [{'cmd': cmd, 'args': args}]
197 198 #--------------------------------------------------------
199 - def __queries_for_lastname_with_comma(self, raw):
200 """Generate search queries for [ <alpha> , ] search terms.""" 201 if regex.match("\w+\s*,$", raw) is None: 202 return [] 203 _log.debug("[%s]: a lastname" % raw) 204 tmp = self._normalize_soundalikes(raw.strip(' ,')) 205 cmd = """ 206 SELECT DISTINCT ON (pk_identity) * FROM ( 207 SELECT *, %(match)s AS match_type FROM (( 208 SELECT d_vap.* 209 FROM dem.names, dem.v_active_persons d_vap 210 WHERE dem.names.lastnames ~ %(last)s and d_vap.pk_identity = dem.names.id_identity 211 ) union all ( 212 SELECT d_vap.* 213 FROM dem.names, dem.v_active_persons d_vap 214 WHERE dem.names.lastnames ~ %(last_w_caps)s and d_vap.pk_identity = dem.names.id_identity 215 ) union all ( 216 SELECT d_vap.* 217 FROM dem.names, dem.v_active_persons d_vap 218 WHERE lower(dem.names.lastnames) ~ lower(%(last)s) and d_vap.pk_identity = dem.names.id_identity 219 )) AS super_list ORDER BY lastnames, firstnames, dob 220 ) AS sorted_list""" 221 args = { 222 'match': _('last name'), 223 'last': '^' + tmp, 224 'last_w_caps': '^' + gmTools.capitalize(tmp, mode=gmTools.CAPS_NAMES) 225 } 226 return [{'cmd': cmd, 'args': args}]
227 228 #--------------------------------------------------------
229 - def __queries_for_LASTNAME(self, raw):
230 """Generate search queries for [ <ALPHA> ] search terms.""" 231 if regex.match("\w+$", raw) is None: 232 return [] 233 if raw != raw.upper(): 234 # not all UPPERCASE 235 return [] 236 _log.debug("[%s]: a lastname" % raw) 237 tmp = self._normalize_soundalikes(raw) 238 cmd = """ 239 SELECT DISTINCT ON (pk_identity) * FROM ( 240 SELECT *, %(match)s AS match_type FROM (( 241 SELECT d_vap.* 242 FROM dem.names, dem.v_active_persons d_vap 243 WHERE dem.names.lastnames ~ %(last_w_caps)s and d_vap.pk_identity = dem.names.id_identity 244 ) union all ( 245 SELECT d_vap.* 246 FROM dem.names, dem.v_active_persons d_vap 247 WHERE lower(dem.names.lastnames) ~ lower(%(last)s) and d_vap.pk_identity = dem.names.id_identity 248 )) AS super_list ORDER BY lastnames, firstnames, dob 249 ) AS sorted_list""" 250 args = { 251 'match': _('last name'), 252 'last': '^' + tmp, 253 'last_w_caps': '^' + gmTools.capitalize(tmp.lower(), mode=gmTools.CAPS_NAMES) 254 } 255 return [{'cmd': cmd, 'args': args}]
256 257 #--------------------------------------------------------
258 - def __queries_for_LAST_and_first(self, raw):
259 """Generate search queries for [ <ALPHA> <alpha> ] or [ <alpha> <ALPHA> ] search terms.""" 260 if regex.match("\w+\s+\w+$", raw) is None: 261 return [] 262 if raw == raw.upper(): 263 # ALL caps 264 return [] 265 if raw == raw.lower(): 266 # ALL lowercase 267 return [] 268 parts = [ p for p in regex.split('\s+', raw) ] 269 last = None 270 if parts[0] == parts[0].upper(): 271 last = parts[0] 272 first = parts[1] 273 if parts[1] == parts[1].upper(): 274 last = parts[1] 275 first = parts[0] 276 # found no UPPERCASE 277 if last is None: 278 return [] 279 _log.debug("[%s]: <LASTNAME firstname> or firstname LASTNAME" % raw) 280 last = self._normalize_soundalikes(last) 281 first = self._normalize_soundalikes(first) 282 cmd = """ 283 SELECT DISTINCT ON (pk_identity) * FROM ( 284 SELECT *, %(match)s AS match_type FROM (( 285 SELECT d_vap.* 286 FROM dem.names, dem.v_active_persons d_vap 287 WHERE 288 dem.names.lastnames ~ %(last)s 289 AND 290 dem.names.firstnames ~ %(first)s 291 AND 292 d_vap.pk_identity = dem.names.id_identity 293 ) union all ( 294 SELECT d_vap.* 295 FROM dem.names, dem.v_active_persons d_vap 296 WHERE 297 lower(dem.names.lastnames) ~ lower(%(last)s) 298 AND 299 lower(dem.names.firstnames) ~ lower(%(first)s) 300 AND 301 d_vap.pk_identity = dem.names.id_identity 302 )) AS super_list ORDER BY lastnames, firstnames, dob 303 ) AS sorted_list""" 304 args = { 305 'match': _('LASTNAME and firstname'), 306 'last': '^' + last, 307 'first': '^' + first 308 } 309 return [{'cmd': cmd, 'args': args}]
310 311 #--------------------------------------------------------
312 - def _generate_simple_query(self, raw):
313 """Compose queries if search term seems unambigous.""" 314 queries = [] 315 316 #raw = raw.strip(' ,;') 317 raw = raw.strip() 318 319 # "<digits>" - GNUmed patient PK or DOB 320 if regex.match("^(\s|\t)*\d+(\s|\t)*$", raw): 321 _log.debug("[%s]: a PK or DOB" % raw) 322 queries.append ({ 323 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE pk_identity = %s ORDER BY lastnames, firstnames, dob", 324 'args': [_('internal patient ID'), raw] 325 }) 326 if len(raw) > 7: # DOB needs at least 8 digits 327 queries.append ({ 328 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 329 'args': [_('date of birth'), raw.replace(',', '.')] 330 }) 331 queries.append ({ 332 'cmd': """ 333 SELECT vba.*, %s::text AS match_type 334 FROM 335 dem.lnk_identity2ext_id li2ext_id, 336 dem.v_active_persons vba 337 WHERE 338 vba.pk_identity = li2ext_id.id_identity and lower(li2ext_id.external_id) ~* lower(%s) 339 ORDER BY 340 lastnames, firstnames, dob 341 """, 342 'args': [_('external patient ID'), raw] 343 }) 344 return queries 345 346 # "<d igi ts>" - DOB or patient PK 347 if regex.match("^(\d|\s|\t)+$", raw): 348 _log.debug("[%s]: a DOB or PK" % raw) 349 queries.append ({ 350 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 351 'args': [_('date of birth'), raw.replace(',', '.')] 352 }) 353 tmp = raw.replace(' ', '') 354 tmp = tmp.replace('\t', '') 355 queries.append ({ 356 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE pk_identity LIKE %s%%", 357 'args': [_('internal patient ID'), tmp] 358 }) 359 return queries 360 361 # "#<di git s>" - GNUmed patient PK 362 if regex.match("^(\s|\t)*#(\d|\s|\t)+$", raw): 363 _log.debug("[%s]: a PK" % raw) 364 tmp = raw.replace('#', '') 365 tmp = tmp.strip() 366 tmp = tmp.replace(' ', '') 367 tmp = tmp.replace('\t', '') 368 # this seemingly stupid query ensures the PK actually exists 369 queries.append ({ 370 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE pk_identity = %s ORDER BY lastnames, firstnames, dob", 371 'args': [_('internal patient ID'), tmp] 372 }) 373 return queries 374 375 # "##<di git s or c-hars>" - external ID" 376 # but might also be an external ID 377 # tmp = raw.replace('#', '') 378 # tmp = tmp.strip() 379 # tmp = tmp.replace(' ', '***DUMMY***') 380 # tmp = tmp.replace('\t', '***DUMMY***') 381 # tmp = tmp.replace('***DUMMY***', '(\s|\t|-|/)*') 382 # queries.append ({ 383 # 'cmd': """ 384 # SELECT vba.*, %s::text AS match_type FROM dem.lnk_identity2ext_id li2ext_id, dem.v_active_persons vba 385 # WHERE vba.pk_identity = li2ext_id.id_identity and lower(li2ext_id.external_id) ~* lower(%s) 386 # ORDER BY lastnames, firstnames, dob""", 387 # 'args': [_('external patient ID'), tmp] 388 # }) 389 390 # "#<di git s or c-hars>" - external ID 391 if regex.match("^(\s|\t)*#.+$", raw): 392 _log.debug("[%s]: an external ID" % raw) 393 tmp = raw.replace('#', '') 394 tmp = tmp.strip() 395 tmp = tmp.replace(' ', '***DUMMY***') 396 tmp = tmp.replace('\t', '***DUMMY***') 397 tmp = tmp.replace('-', '***DUMMY***') 398 tmp = tmp.replace('/', '***DUMMY***') 399 tmp = tmp.replace('***DUMMY***', '(\s|\t|-|/)*') 400 queries.append ({ 401 'cmd': """ 402 SELECT 403 vba.*, 404 %s::text AS match_type 405 FROM 406 dem.lnk_identity2ext_id li2ext_id, 407 dem.v_active_persons vba 408 WHERE 409 vba.pk_identity = li2ext_id.id_identity 410 AND 411 lower(li2ext_id.external_id) ~* lower(%s) 412 ORDER BY 413 lastnames, firstnames, dob""", 414 'args': [_('external patient ID'), tmp] 415 }) 416 return queries 417 418 # digits interspersed with "./-" or blank space - DOB 419 if regex.match("^(\s|\t)*\d+(\s|\t|\.|\-|/)*\d+(\s|\t|\.|\-|/)*\d+(\s|\t|\.)*$", raw): 420 _log.debug("[%s]: a DOB" % raw) 421 tmp = raw.strip() 422 while '\t\t' in tmp: tmp = tmp.replace('\t\t', ' ') 423 while ' ' in tmp: tmp = tmp.replace(' ', ' ') 424 # apparently not needed due to PostgreSQL smarts... 425 #tmp = tmp.replace('-', '.') 426 #tmp = tmp.replace('/', '.') 427 queries.append ({ 428 'cmd': "SELECT *, %s AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 429 'args': [_('date of birth'), tmp.replace(',', '.')] 430 }) 431 return queries 432 433 # " , <alpha>" - first name 434 queries = self.__queries_for_firstname_with_comma(raw) 435 if len(queries) > 0: 436 return queries 437 438 # "<alpha>, " - last name 439 queries = self.__queries_for_lastname_with_comma(raw) 440 if len(queries) > 0: 441 return queries 442 443 # "<ALPHA>" - last name 444 queries = self.__queries_for_LASTNAME(raw) 445 if len(queries) > 0: 446 return queries 447 448 # "<alpha alpha>" - first last or last first, depending on UPPERCASE 449 queries = self.__queries_for_LAST_and_first(raw) 450 if len(queries) > 0: 451 return queries 452 453 # "*|$<...>" - DOB 454 if regex.match("\s*(\*|\$).+$", raw): 455 _log.debug("[%s]: a DOB" % raw) 456 tmp = raw.replace('*', '') 457 tmp = tmp.replace('$', '') 458 queries.append ({ 459 'cmd': "SELECT *, %s AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 460 'args': [_('date of birth'), tmp.replace(',', '.')] 461 }) 462 return queries 463 464 return []
465 466 #-------------------------------------------------------- 467 # generic, locale independant queries 468 #--------------------------------------------------------
469 - def _generate_queries_from_dto(self, dto = None):
470 """Generate generic queries. 471 472 - not locale dependant 473 - data -> firstnames, lastnames, dob, gender 474 """ 475 _log.debug('_generate_queries_from_dto("%s")' % dto) 476 477 if not isinstance(dto, gmPerson.cDTO_person): 478 return None 479 480 vals = [_('name, gender, date of birth')] 481 where_snippets = [] 482 483 vals.append(dto.firstnames) 484 where_snippets.append('firstnames=%s') 485 vals.append(dto.lastnames) 486 where_snippets.append('lastnames=%s') 487 488 if dto.dob is not None: 489 vals.append(dto.dob) 490 #where_snippets.append(u"dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)") 491 where_snippets.append("dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s)") 492 493 if dto.gender is not None: 494 vals.append(dto.gender) 495 where_snippets.append('gender=%s') 496 497 # sufficient data ? 498 if len(where_snippets) == 0: 499 _log.error('invalid search dict structure') 500 _log.debug(data) 501 return None 502 503 cmd = """ 504 SELECT *, %%s AS match_type FROM dem.v_active_persons 505 WHERE pk_identity in ( 506 SELECT id_identity FROM dem.names WHERE %s 507 ) ORDER BY lastnames, firstnames, dob""" % ' and '.join(where_snippets) 508 509 queries = [ 510 {'cmd': cmd, 'args': vals} 511 ] 512 513 # shall we mogrify name parts ? probably not 514 515 return queries
516 #-------------------------------------------------------- 517 # queries for DE 518 #--------------------------------------------------------
519 - def __generate_queries_from_single_major_part(self, part=None):
520 521 # split on whitespace 522 parts_list = regex.split("\s+|\t+", part) 523 # ignore empty parts 524 parts_list = [ p.strip() for p in parts_list if p.strip() != '' ] 525 526 # parse into name/date parts 527 date_count = 0 528 name_parts = [] 529 for part in parts_list: 530 # any digit signifies a date, FIXME: what about "<40" ? 531 if regex.search("\d", part): 532 date_count = date_count + 1 533 date_part = part 534 else: 535 name_parts.append(part) 536 537 # exactly 1 word ? 538 if len(parts_list) == 1: 539 return [] 540 541 # exactly 2 words ? 542 if len(parts_list) == 2: 543 if date_count > 0: 544 # FIXME: either "name date" or "date date" 545 _log.error("don't know how to generate queries for [%s]" % search_term) 546 return [] 547 # no date = "first last" or "last first" 548 queries = [] 549 # assumption: first last 550 queries.append ({ 551 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s", 552 'args': [_('name: first-last'), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES)] 553 }) 554 queries.append ({ 555 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s)", 556 'args': [_('name: first-last'), '^' + name_parts[0], '^' + name_parts[1]] 557 }) 558 # assumption: last first 559 queries.append ({ 560 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s", 561 'args': [_('name: last-first'), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES)] 562 }) 563 queries.append ({ 564 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s)", 565 'args': [_('name: last-first'), '^' + name_parts[1], '^' + name_parts[0]] 566 }) 567 # assumption: last nick 568 queries.append ({ 569 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.preferred ~ %s AND n.lastnames ~ %s", 570 'args': [_('name: last-nick'), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES)] 571 }) 572 queries.append ({ 573 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.preferred) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s)", 574 'args': [_('name: last-nick'), '^' + name_parts[1], '^' + name_parts[0]] 575 }) 576 # name parts anywhere inside name - third order query ... 577 queries.append ({ 578 'cmd': """SELECT DISTINCT ON (id_identity) 579 d_vap.*, 580 %s::text AS match_type 581 FROM 582 dem.v_active_persons d_vap, 583 dem.names n 584 WHERE 585 d_vap.pk_identity = n.id_identity 586 AND 587 -- name_parts[0] 588 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s) 589 AND 590 -- name_parts[1] 591 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s)""", 592 'args': [_('name'), name_parts[0], name_parts[1]] 593 }) 594 return queries 595 596 # exactly 3 words ? 597 if len(parts_list) == 3: 598 if date_count != 1: 599 # FIXME: "name name name" or "name date date" 600 return [] 601 602 # special case: 3 words, exactly 1 of them a date, no ",;" 603 # assumption: first, last, dob - first order 604 queries.append ({ 605 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 606 'args': [_('names: first-last, date of birth'), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), date_part.replace(',', '.')] 607 }) 608 queries.append ({ 609 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s) AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 610 'args': [_('names: first-last, date of birth'), '^' + name_parts[0], '^' + name_parts[1], date_part.replace(',', '.')] 611 }) 612 # assumption: last, first, dob - second order query 613 queries.append ({ 614 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 615 'args': [_('names: last-first, date of birth'), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES), date_part.replace(',', '.')] 616 }) 617 queries.append ({ 618 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s) AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 619 'args': [_('names: last-first, dob'), '^' + name_parts[1], '^' + name_parts[0], date_part.replace(',', '.')] 620 }) 621 # name parts anywhere in name - third order query ... 622 queries.append ({ 623 'cmd': """SELECT DISTINCT ON (id_identity) 624 d_vap.*, 625 %s::text AS match_type 626 FROM 627 dem.v_active_persons d_vap, 628 dem.names n 629 WHERE 630 d_vap.pk_identity = n.id_identity 631 AND 632 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s) 633 AND 634 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s) 635 AND 636 dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) 637 """, 638 'args': [_('name, date of birth'), name_parts[0], name_parts[1], date_part.replace(',', '.')] 639 }) 640 return queries 641 642 return []
643 644 #--------------------------------------------------------
645 - def _generate_queries_de(self, search_term=None):
646 647 if search_term is None: 648 return [] 649 650 # check to see if we get away with a simple query ... 651 queries = self._generate_simple_query(search_term) 652 if len(queries) > 0: 653 _log.debug('[%s]: search term with a simple, unambigous structure' % search_term) 654 return queries 655 656 # no we don't 657 _log.debug('[%s]: not a search term with a simple, unambigous structure' % search_term) 658 659 search_term = search_term.strip().strip(',').strip(';').strip() 660 normalized = self._normalize_soundalikes(search_term) 661 662 queries = [] 663 664 # "<CHARS>" - single name part 665 # yes, I know, this is culture specific (did you read the docs ?) 666 if regex.match("^(\s|\t)*[a-zäöüßéáúóçøA-ZÄÖÜÇØ]+(\s|\t)*$", search_term): 667 _log.debug("[%s]: a single name part", search_term) 668 # there's no intermediate whitespace due to the regex 669 cmd = """ 670 SELECT DISTINCT ON (pk_identity) * FROM ( 671 SELECT * FROM (( 672 -- lastname 673 SELECT d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n 674 WHERE d_vap.pk_identity = n.id_identity and lower(n.lastnames) ~* lower(%s) 675 ) union all ( 676 -- firstname 677 SELECT d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n 678 WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) 679 ) union all ( 680 -- nickname 681 SELECT d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n 682 WHERE d_vap.pk_identity = n.id_identity and lower(n.preferred) ~* lower(%s) 683 ) union all ( 684 -- anywhere in name 685 SELECT 686 d_vap.*, 687 %s::text AS match_type 688 FROM 689 dem.v_active_persons d_vap, 690 dem.names n 691 WHERE 692 d_vap.pk_identity = n.id_identity 693 AND 694 lower(n.firstnames || ' ' || n.lastnames || ' ' || coalesce(n.preferred, '')) ~* lower(%s) 695 )) AS super_list ORDER BY lastnames, firstnames, dob 696 ) AS sorted_list 697 """ 698 tmp = normalized.strip() 699 args = [] 700 args.append(_('lastname')) 701 args.append('^' + tmp) 702 args.append(_('firstname')) 703 args.append('^' + tmp) 704 args.append(_('nickname')) 705 args.append('^' + tmp) 706 args.append(_('any name part')) 707 args.append(tmp) 708 709 queries.append ({ 710 'cmd': cmd, 711 'args': args 712 }) 713 return queries 714 715 # try to split on (major) part separators 716 major_parts = regex.split(',|;', normalized) 717 718 # ignore empty parts 719 major_parts = [ p.strip() for p in major_parts if p.strip() != '' ] 720 721 # only one "major" part ? (i.e. no ",;" ?) 722 if len(major_parts) == 1: 723 _log.debug('[%s]: only one non-empty part after splitting by , or ; ("major" part)', normalized) 724 queries = self.__generate_queries_from_single_major_part(part = normalized) 725 if len(queries) > 0: 726 return queries 727 return self._generate_dumb_brute_query(search_term) 728 729 # more than one major part (separated by ';,') 730 # this else is not needed 731 else: 732 _log.debug('[%s]: more than one non-empty part after splitting by , or ; ("major" parts)', normalized) 733 # parse into name and date parts 734 date_parts = [] 735 name_parts = [] 736 name_count = 0 737 for part in major_parts: 738 if part.strip() == '': 739 continue 740 # any digits ? 741 if regex.search("\d+", part): 742 # FIXME: parse out whitespace *not* adjacent to a *word* 743 date_parts.append(part) 744 else: 745 tmp = part.strip() 746 tmp = regex.split("\s+|\t+", tmp) 747 name_count = name_count + len(tmp) 748 name_parts.append(tmp) 749 750 _log.debug('found %s character (name) parts and %s number (date ?) parts', len(name_parts), len(date_parts)) 751 752 where_parts = [] 753 # first, handle name parts 754 # special case: "<date(s)>, <name> <name>, <date(s)>" 755 if (len(name_parts) == 1) and (name_count == 2): 756 # usually "first last" 757 where_parts.append ({ 758 'conditions': "firstnames ~ %s and lastnames ~ %s", 759 'args': [_('names: first last'), '^' + gmTools.capitalize(name_parts[0][0], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0][1], mode=gmTools.CAPS_NAMES)] 760 }) 761 where_parts.append ({ 762 'conditions': "lower(firstnames) ~* lower(%s) and lower(lastnames) ~* lower(%s)", 763 'args': [_('names: first last'), '^' + name_parts[0][0], '^' + name_parts[0][1]] 764 }) 765 # but sometimes "last first"" 766 where_parts.append ({ 767 'conditions': "firstnames ~ %s and lastnames ~ %s", 768 'args': [_('names: last, first'), '^' + gmTools.capitalize(name_parts[0][1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0][0], mode=gmTools.CAPS_NAMES)] 769 }) 770 where_parts.append ({ 771 'conditions': "lower(firstnames) ~* lower(%s) and lower(lastnames) ~* lower(%s)", 772 'args': [_('names: last, first'), '^' + name_parts[0][1], '^' + name_parts[0][0]] 773 }) 774 # or even substrings anywhere in name 775 where_parts.append ({ 776 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s) OR lower(firstnames || ' ' || lastnames) ~* lower(%s)", 777 'args': [_('name'), name_parts[0][0], name_parts[0][1]] 778 }) 779 780 # special case: "<date(s)>, <name(s)>, <name(s)>, <date(s)>" 781 elif len(name_parts) == 2: 782 # usually "last, first" 783 where_parts.append ({ 784 'conditions': "firstnames ~ %s AND lastnames ~ %s", 785 'args': [_('name: last, first'), '^' + ' '.join(map(gmTools.capitalize, name_parts[1])), '^' + ' '.join(map(gmTools.capitalize, name_parts[0]))] 786 }) 787 where_parts.append ({ 788 'conditions': "lower(firstnames) ~* lower(%s) AND lower(lastnames) ~* lower(%s)", 789 'args': [_('name: last, first'), '^' + ' '.join(name_parts[1]), '^' + ' '.join(name_parts[0])] 790 }) 791 # but sometimes "first, last" 792 where_parts.append ({ 793 'conditions': "firstnames ~ %s AND lastnames ~ %s", 794 'args': [_('name: last, first'), '^' + ' '.join(map(gmTools.capitalize, name_parts[0])), '^' + ' '.join(map(gmTools.capitalize, name_parts[1]))] 795 }) 796 where_parts.append ({ 797 'conditions': "lower(firstnames) ~* lower(%s) AND lower(lastnames) ~* lower(%s)", 798 'args': [_('name: last, first'), '^' + ' '.join(name_parts[0]), '^' + ' '.join(name_parts[1])] 799 }) 800 # and sometimes "last, nick" 801 where_parts.append ({ 802 'conditions': "preferred ~ %s AND lastnames ~ %s", 803 'args': [_('name: last, first'), '^' + ' '.join(map(gmTools.capitalize, name_parts[1])), '^' + ' '.join(map(gmTools.capitalize, name_parts[0]))] 804 }) 805 where_parts.append ({ 806 'conditions': "lower(preferred) ~* lower(%s) AND lower(lastnames) ~* lower(%s)", 807 'args': [_('name: last, first'), '^' + ' '.join(name_parts[1]), '^' + ' '.join(name_parts[0])] 808 }) 809 810 # or even substrings anywhere in name 811 where_parts.append ({ 812 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s) AND lower(firstnames || ' ' || lastnames) ~* lower(%s)", 813 'args': [_('name'), ' '.join(name_parts[0]), ' '.join(name_parts[1])] 814 }) 815 816 # big trouble - arbitrary number of names 817 else: 818 # FIXME: deep magic, not sure of rationale ... 819 if len(name_parts) == 1: 820 for part in name_parts[0]: 821 where_parts.append ({ 822 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s)", 823 'args': [_('name'), part] 824 }) 825 else: 826 tmp = [] 827 for part in name_parts: 828 tmp.append(' '.join(part)) 829 for part in tmp: 830 where_parts.append ({ 831 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s)", 832 'args': [_('name'), part] 833 }) 834 835 # secondly handle date parts 836 # FIXME: this needs a considerable smart-up ! 837 if len(date_parts) == 1: 838 if len(where_parts) == 0: 839 where_parts.append ({ 840 'conditions': "dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 841 'args': [_('date of birth'), date_parts[0].replace(',', '.')] 842 }) 843 if len(where_parts) > 0: 844 where_parts[0]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)" 845 where_parts[0]['args'].append(date_parts[0].replace(',', '.')) 846 where_parts[0]['args'][0] += ', ' + _('date of birth') 847 if len(where_parts) > 1: 848 where_parts[1]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)" 849 where_parts[1]['args'].append(date_parts[0].replace(',', '.')) 850 where_parts[1]['args'][0] += ', ' + _('date of birth') 851 elif len(date_parts) > 1: 852 if len(where_parts) == 0: 853 where_parts.append ({ 854 'conditions': "dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) AND dem.date_trunc_utc('day'::text, dem.identity.deceased) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 855 'args': [_('date of birth/death'), date_parts[0].replace(',', '.'), date_parts[1].replace(',', '.')] 856 }) 857 if len(where_parts) > 0: 858 where_parts[0]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) AND dem.date_trunc_utc('day'::text, dem.identity.deceased) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 859 where_parts[0]['args'].append(date_parts[0].replace(',', '.'), date_parts[1].replace(',', '.')) 860 where_parts[0]['args'][0] += ', ' + _('date of birth/death') 861 if len(where_parts) > 1: 862 where_parts[1]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) AND dem.date_trunc_utc('day'::text, dem.identity.deceased) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 863 where_parts[1]['args'].append(date_parts[0].replace(',', '.'), date_parts[1].replace(',', '.')) 864 where_parts[1]['args'][0] += ', ' + _('date of birth/death') 865 866 # and finally generate the queries ... 867 for where_part in where_parts: 868 queries.append ({ 869 'cmd': "SELECT *, %%s::text AS match_type FROM dem.v_active_persons WHERE %s" % where_part['conditions'], 870 'args': where_part['args'] 871 }) 872 return queries 873 874 return []
875 #--------------------------------------------------------
876 - def _generate_dumb_brute_query(self, search_term=''):
877 878 _log.debug('_generate_dumb_brute_query("%s")' % search_term) 879 880 where_clause = '' 881 args = [] 882 # FIXME: split on more than just ' ' 883 for arg in search_term.strip().split(): 884 where_clause += " AND lower(coalesce(d_vap.title, '') || ' ' || d_vap.firstnames || ' ' || d_vap.lastnames) ~* lower(%s)" 885 args.append(arg) 886 887 query = """ 888 SELECT DISTINCT ON (pk_identity) * FROM ( 889 SELECT 890 d_vap.*, 891 '%s'::text AS match_type 892 FROM 893 dem.v_active_persons d_vap, 894 dem.names n 895 WHERE 896 d_vap.pk_identity = n.id_identity 897 %s 898 ORDER BY 899 lastnames, 900 firstnames, 901 dob 902 ) AS ordered_list""" % (_('full name'), where_clause) 903 904 return ({'cmd': query, 'args': args})
905 906 #============================================================
907 -def ask_for_patient():
908 """Text mode UI function to ask for patient.""" 909 910 person_searcher = cPatientSearcher_SQL() 911 912 while True: 913 search_fragment = gmTools.prompted_input(prompt = "\nEnter person search term or leave blank to exit") 914 915 if search_fragment in ['exit', 'quit', 'bye', None]: 916 print("user cancelled patient search") 917 return None 918 919 pats = person_searcher.get_patients(search_term = search_fragment) 920 921 if (pats is None) or (len(pats) == 0): 922 print("No patient matches the search term.") 923 print("") 924 continue 925 926 if len(pats) > 1: 927 print("Several patients match the search term:") 928 print("") 929 for pat in pats: 930 print(pat) 931 print("") 932 print("Please refine the search term so it matches one patient only.") 933 continue 934 935 return pats[0] 936 937 return None
938 939 #============================================================ 940 # main/testing 941 #============================================================ 942 if __name__ == '__main__': 943 944 if len(sys.argv) == 1: 945 sys.exit() 946 947 if sys.argv[1] != 'test': 948 sys.exit() 949 950 import datetime 951 gmDateTime.init() 952 953 #--------------------------------------------------------
954 - def test_search_by_dto():
955 dto = gmPerson.cDTO_person() 956 dto.firstnames = 'Sigrid' 957 dto.lastnames = 'Kiesewetter' 958 dto.gender = 'female' 959 # dto.dob = pyDT.datetime.now(tz=gmDateTime.gmCurrentLocalTimezone) 960 dto.dob = datetime.datetime(1939,6,24,23,0,0,0,gmDateTime.gmCurrentLocalTimezone) 961 print(dto) 962 963 searcher = cPatientSearcher_SQL() 964 pats = searcher.get_patients(dto = dto) 965 print(pats)
966 #--------------------------------------------------------
967 - def test_patient_search_queries():
968 searcher = cPatientSearcher_SQL() 969 970 print("testing _normalize_soundalikes()") 971 print("--------------------------------") 972 # FIXME: support Ähler -> Äler and Dähler -> Däler 973 data = ['Krüger', 'Krueger', 'Kruger', 'Überle', 'Böger', 'Boger', 'Öder', 'Ähler', 'Däler', 'Großer', 'müller', 'Özdemir', 'özdemir'] 974 for name in data: 975 print('%s: %s' % (name, searcher._normalize_soundalikes(name))) 976 977 input('press [ENTER] to continue') 978 print("============") 979 980 print("testing _generate_queries_from_dto()") 981 print("------------------------------------") 982 dto = cDTO_person() 983 dto.gender = 'm' 984 dto.lastnames = 'Kirk' 985 dto.firstnames = 'James' 986 dto.dob = pyDT.datetime.now(tz=gmDateTime.gmCurrentLocalTimezone) 987 q = searcher._generate_queries_from_dto(dto)[0] 988 print("dto:", dto) 989 print(" match on:", q['args'][0]) 990 print(" query:", q['cmd']) 991 992 input('press [ENTER] to continue') 993 print("============") 994 995 print("testing _generate_queries_de()") 996 print("------------------------------") 997 qs = searcher._generate_queries_de('Kirk, James') 998 for q in qs: 999 print(" match on:", q['args'][0]) 1000 print(" query :", q['cmd']) 1001 print(" args :", q['args']) 1002 input('press [ENTER] to continue') 1003 print("============") 1004 1005 qs = searcher._generate_queries_de('müller') 1006 for q in qs: 1007 print(" match on:", q['args'][0]) 1008 print(" query :", q['cmd']) 1009 print(" args :", q['args']) 1010 input('press [ENTER] to continue') 1011 print("============") 1012 1013 qs = searcher._generate_queries_de('özdemir') 1014 for q in qs: 1015 print(" match on:", q['args'][0]) 1016 print(" query :", q['cmd']) 1017 print(" args :", q['args']) 1018 input('press [ENTER] to continue') 1019 print("============") 1020 1021 qs = searcher._generate_queries_de('Özdemir') 1022 for q in qs: 1023 print(" match on:", q['args'][0]) 1024 print(" query :", q['cmd']) 1025 print(" args :", q['args']) 1026 input('press [ENTER] to continue') 1027 print("============") 1028 1029 print("testing _generate_dumb_brute_query()") 1030 print("------------------------------------") 1031 q = searcher._generate_dumb_brute_query('Kirk, James Tiberius') 1032 print(" match on:", q['args'][0]) 1033 print(" args:", q['args']) 1034 print(" query:", q['cmd']) 1035 1036 1037 input('press [ENTER] to continue')
1038 #--------------------------------------------------------
1039 - def test_ask_for_patient():
1040 while 1: 1041 myPatient = ask_for_patient() 1042 if myPatient is None: 1043 break 1044 print("ID ", myPatient.ID) 1045 print("names ", myPatient.get_names()) 1046 print("addresses:", myPatient.get_addresses(address_type='home')) 1047 print("recent birthday:", myPatient.dob_in_range()) 1048 myPatient.export_as_gdt(filename='apw.gdt', encoding = 'cp850')
1049 # docs = myPatient.get_document_folder() 1050 # print "docs ", docs 1051 # emr = myPatient.emr 1052 # print "EMR ", emr 1053 1054 #--------------------------------------------------------
1055 - def test_generate_simple_query():
1056 searcher = cPatientSearcher_SQL() 1057 print("testing _generate_simple_query()") 1058 print("----------------------------") 1059 data = [ 1060 '51234', '1 134 153', '#13 41 34', '#3-AFY322.4', '22-04-1906', '1235/32/3525', 1061 ', tiberiu',# firstname 1062 'KIRK', # lastname 1063 'kirk,', # lastname 1064 'KIR tib', # LAST first 1065 'Tib KI' # first LAST 1066 ] 1067 for fragment in data: 1068 print("fragment:", fragment) 1069 qs = searcher._generate_simple_query(fragment) 1070 for q in qs: 1071 print('') 1072 print(" match on:", q['args']) 1073 print(" query :", q['cmd']) 1074 input('press [ENTER] to continue') 1075 print("============")
1076 1077 #-------------------------------------------------------- 1078 test_generate_simple_query() 1079 #test_patient_search_queries() 1080 #test_ask_for_patient() 1081 #test_search_by_dto() 1082 1083 #============================================================ 1084