Package Gnumed :: Package business :: Module gmPersonSearch
[frames] | no frames]

Source Code for Module Gnumed.business.gmPersonSearch

   1  # -*- coding: utf-8 -*- 
   2  """GNUmed person searching code.""" 
   3  #============================================================ 
   4  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
   5  __license__ = "GPL" 
   6   
   7  # std lib 
   8  import sys, logging, re as regex 
   9   
  10   
  11  # GNUmed 
  12  if __name__ == '__main__': 
  13          sys.path.insert(0, '../../') 
  14  from Gnumed.pycommon import gmPG2, gmI18N, gmTools, gmDateTime 
  15  from Gnumed.business import gmPerson 
  16  if __name__ == '__main__': 
  17          sys.path.insert(0, '../../') 
  18          gmI18N.activate_locale() 
  19          gmI18N.install_domain() 
  20   
  21   
  22  _log = logging.getLogger('gm.person') 
  23   
  24  #============================================================ 
25 -class cPatientSearcher_SQL:
26 """UI independant i18n aware patient searcher."""
27 - def __init__(self):
28 self._generate_queries = self._generate_queries_de 29 # make a cursor 30 self.conn = gmPG2.get_connection() 31 self.curs = self.conn.cursor()
32 #--------------------------------------------------------
33 - def __del__(self):
34 try: 35 self.curs.close() 36 except: pass 37 try: 38 self.conn.close() 39 except: pass
40 #-------------------------------------------------------- 41 # public API 42 #--------------------------------------------------------
43 - def get_patients(self, search_term = None, a_locale = None, dto = None):
44 identities = self.get_identities(search_term, a_locale, dto) 45 if identities is None: 46 return None 47 return [ gmPerson.cPatient(aPK_obj=ident['pk_identity']) for ident in identities ]
48 49 #--------------------------------------------------------
50 - def get_identities(self, search_term = None, a_locale = None, dto = None):
51 """Get patient identity objects for given parameters. 52 53 - either search term or search dict 54 - dto contains structured data that doesn't need to be parsed (cDTO_person) 55 - dto takes precedence over search_term 56 """ 57 parse_search_term = (dto is None) 58 59 if not parse_search_term: 60 queries = self._generate_queries_from_dto(dto) 61 if queries is None: 62 parse_search_term = True 63 if len(queries) == 0: 64 parse_search_term = True 65 66 if parse_search_term: 67 # temporary change of locale for selecting query generator 68 if a_locale is not None: 69 print("temporary change of locale on patient search not implemented") 70 _log.warning("temporary change of locale on patient search not implemented") 71 # generate queries 72 if search_term is None: 73 raise ValueError('need search term (dto AND search_term are None)') 74 75 queries = self._generate_queries(search_term) 76 77 # anything to do ? 78 if len(queries) == 0: 79 _log.error('query tree empty') 80 _log.error('[%s] [%s] [%s]' % (search_term, a_locale, str(dto))) 81 return None 82 83 # collect IDs here 84 identities = [] 85 # cycle through query list 86 for query in queries: 87 _log.debug("running %s" % query) 88 try: 89 rows, idx = gmPG2.run_ro_queries(queries = [query], get_col_idx=True) 90 except: 91 _log.exception('error running query') 92 continue 93 if len(rows) == 0: 94 continue 95 identities.extend ( 96 [ gmPerson.cPerson(row = {'pk_field': 'pk_identity', 'data': row, 'idx': idx}) for row in rows ] 97 ) 98 99 pks = [] 100 unique_identities = [] 101 for identity in identities: 102 if identity['pk_identity'] in pks: 103 continue 104 pks.append(identity['pk_identity']) 105 unique_identities.append(identity) 106 107 return unique_identities
108 109 #-------------------------------------------------------- 110 # internal helpers 111 #--------------------------------------------------------
112 - def _normalize_soundalikes(self, aString = None, aggressive = False):
113 """Transform some characters into a regex.""" 114 if aString.strip() == '': 115 return aString 116 117 # umlauts 118 normalized = aString.replace('Ä', '(Ä|AE|Ae|A|E)') 119 normalized = normalized.replace('Ö', '(Ö|OE|Oe|O)') 120 normalized = normalized.replace('Ü', '(Ü|UE|Ue|U)') 121 normalized = normalized.replace('ä', '(ä|ae|e|a)') 122 normalized = normalized.replace('ö', '(ö|oe|o)') 123 normalized = normalized.replace('ü', '(ü|ue|u|y)') 124 normalized = normalized.replace('ß', '(ß|sz|ss|s)') 125 126 # common soundalikes 127 # - René, Desiré, Inés ... 128 normalized = normalized.replace('é', '***DUMMY***') 129 normalized = normalized.replace('è', '***DUMMY***') 130 normalized = normalized.replace('***DUMMY***', '(é|e|è|ä|ae)') 131 132 # FIXME: missing i/a/o - but uncommon in German 133 normalized = normalized.replace('v', '***DUMMY***') 134 normalized = normalized.replace('f', '***DUMMY***') 135 normalized = normalized.replace('ph', '***DUMMY***') # now, this is *really* specific for German 136 normalized = normalized.replace('***DUMMY***', '(v|f|ph)') 137 138 # silent characters (Thomas vs Tomas) 139 normalized = normalized.replace('Th','***DUMMY***') 140 normalized = normalized.replace('T', '***DUMMY***') 141 normalized = normalized.replace('***DUMMY***', '(Th|T)') 142 normalized = normalized.replace('th', '***DUMMY***') 143 normalized = normalized.replace('t', '***DUMMY***') 144 normalized = normalized.replace('***DUMMY***', '(th|t)') 145 146 # apostrophes, hyphens et al 147 normalized = normalized.replace('"', '***DUMMY***') 148 normalized = normalized.replace("'", '***DUMMY***') 149 normalized = normalized.replace('`', '***DUMMY***') 150 normalized = normalized.replace('***DUMMY***', """("|'|`|***DUMMY***|\s)*""") 151 normalized = normalized.replace('-', """(-|\s)*""") 152 normalized = normalized.replace('|***DUMMY***|', '|-|') 153 154 if aggressive: 155 pass 156 # some more here 157 158 _log.debug('[%s] -> [%s]' % (aString, normalized)) 159 160 return normalized
161 162 #-------------------------------------------------------- 163 # write your own query generator and add it here: 164 # use compile() for speedup 165 # must escape strings before use !! 166 # ORDER BY ! 167 # FIXME: what about "< 40" ? 168 #--------------------------------------------------------
170 """Generate search queries for [ , <alpha> ] search terms.""" 171 if regex.match(",\s*\w+$", raw.strip()) is None: 172 return [] 173 _log.debug("[%s]: a firstname" % raw) 174 tmp = self._normalize_soundalikes(raw.strip(' ,')) 175 cmd = """ 176 SELECT DISTINCT ON (pk_identity) * FROM ( 177 SELECT *, %(match)s AS match_type FROM (( 178 SELECT d_vap.* 179 FROM dem.names, dem.v_active_persons d_vap 180 WHERE dem.names.firstnames ~ %(first)s and d_vap.pk_identity = dem.names.id_identity 181 ) union all ( 182 SELECT d_vap.* 183 FROM dem.names, dem.v_active_persons d_vap 184 WHERE dem.names.firstnames ~ %(first_w_caps)s and d_vap.pk_identity = dem.names.id_identity 185 ) union all ( 186 SELECT d_vap.* 187 FROM dem.names, dem.v_active_persons d_vap 188 WHERE lower(dem.names.firstnames) ~ lower(%(first)s) and d_vap.pk_identity = dem.names.id_identity 189 )) AS super_list ORDER BY lastnames, firstnames, dob 190 ) AS sorted_list""" 191 args = { 192 'match': _('first name'), 193 'first': '^' + tmp, 194 'first_w_caps': '^' + gmTools.capitalize(tmp, mode = gmTools.CAPS_NAMES) 195 } 196 return [{'cmd': cmd, 'args': args}]
197 198 #--------------------------------------------------------
199 - def __queries_for_lastname_with_comma(self, raw):
200 """Generate search queries for [ <alpha> , ] search terms.""" 201 if regex.match("\w+\s*,$", raw) is None: 202 return [] 203 _log.debug("[%s]: a lastname" % raw) 204 tmp = self._normalize_soundalikes(raw.strip(' ,')) 205 cmd = """ 206 SELECT DISTINCT ON (pk_identity) * FROM ( 207 SELECT *, %(match)s AS match_type FROM (( 208 SELECT d_vap.* 209 FROM dem.names, dem.v_active_persons d_vap 210 WHERE dem.names.lastnames ~ %(last)s and d_vap.pk_identity = dem.names.id_identity 211 ) union all ( 212 SELECT d_vap.* 213 FROM dem.names, dem.v_active_persons d_vap 214 WHERE dem.names.lastnames ~ %(last_w_caps)s and d_vap.pk_identity = dem.names.id_identity 215 ) union all ( 216 SELECT d_vap.* 217 FROM dem.names, dem.v_active_persons d_vap 218 WHERE lower(dem.names.lastnames) ~ lower(%(last)s) and d_vap.pk_identity = dem.names.id_identity 219 )) AS super_list ORDER BY lastnames, firstnames, dob 220 ) AS sorted_list""" 221 args = { 222 'match': _('last name'), 223 'last': '^' + tmp, 224 'last_w_caps': '^' + gmTools.capitalize(tmp, mode=gmTools.CAPS_NAMES) 225 } 226 return [{'cmd': cmd, 'args': args}]
227 228 #--------------------------------------------------------
229 - def __queries_for_LASTNAME(self, raw):
230 """Generate search queries for [ <ALPHA> ] search terms.""" 231 if regex.match("\w+$", raw) is None: 232 return [] 233 if raw != raw.upper(): 234 # not all UPPERCASE 235 return [] 236 _log.debug("[%s]: a lastname" % raw) 237 tmp = self._normalize_soundalikes(raw) 238 cmd = """ 239 SELECT DISTINCT ON (pk_identity) * FROM ( 240 SELECT *, %(match)s AS match_type FROM (( 241 SELECT d_vap.* 242 FROM dem.names, dem.v_active_persons d_vap 243 WHERE dem.names.lastnames ~ %(last_w_caps)s and d_vap.pk_identity = dem.names.id_identity 244 ) union all ( 245 SELECT d_vap.* 246 FROM dem.names, dem.v_active_persons d_vap 247 WHERE lower(dem.names.lastnames) ~ lower(%(last)s) and d_vap.pk_identity = dem.names.id_identity 248 )) AS super_list ORDER BY lastnames, firstnames, dob 249 ) AS sorted_list""" 250 args = { 251 'match': _('last name'), 252 'last': '^' + tmp, 253 'last_w_caps': '^' + gmTools.capitalize(tmp.lower(), mode=gmTools.CAPS_NAMES) 254 } 255 return [{'cmd': cmd, 'args': args}]
256 257 #--------------------------------------------------------
258 - def __queries_for_LAST_and_first(self, raw):
259 """Generate search queries for [ <ALPHA> <alpha> ] or [ <alpha> <ALPHA> ] search terms.""" 260 if regex.match("\w+\s+\w+$", raw) is None: 261 return [] 262 if raw == raw.upper(): 263 # ALL caps 264 return [] 265 if raw == raw.lower(): 266 # ALL lowercase 267 return [] 268 parts = [ p for p in regex.split('\s+', raw) ] 269 last = None 270 if parts[0] == parts[0].upper(): 271 last = parts[0] 272 first = parts[1] 273 if parts[1] == parts[1].upper(): 274 last = parts[1] 275 first = parts[0] 276 # found no UPPERCASE 277 if last is None: 278 return [] 279 _log.debug("[%s]: <LASTNAME firstname> or firstname LASTNAME" % raw) 280 last = self._normalize_soundalikes(last) 281 first = self._normalize_soundalikes(first) 282 cmd = """ 283 SELECT DISTINCT ON (pk_identity) * FROM ( 284 SELECT *, %(match)s AS match_type FROM (( 285 SELECT d_vap.* 286 FROM dem.names, dem.v_active_persons d_vap 287 WHERE 288 dem.names.lastnames ~ %(last)s 289 AND 290 dem.names.firstnames ~ %(first)s 291 AND 292 d_vap.pk_identity = dem.names.id_identity 293 ) union all ( 294 SELECT d_vap.* 295 FROM dem.names, dem.v_active_persons d_vap 296 WHERE 297 lower(dem.names.lastnames) ~ lower(%(last)s) 298 AND 299 lower(dem.names.firstnames) ~ lower(%(first)s) 300 AND 301 d_vap.pk_identity = dem.names.id_identity 302 )) AS super_list ORDER BY lastnames, firstnames, dob 303 ) AS sorted_list""" 304 args = { 305 'match': _('LASTNAME and firstname'), 306 'last': '^' + last, 307 'first': '^' + first 308 } 309 return [{'cmd': cmd, 'args': args}]
310 311 #--------------------------------------------------------
312 - def _generate_simple_query(self, raw):
313 """Compose queries if search term seems unambigous.""" 314 queries = [] 315 316 #raw = raw.strip(' ,;') 317 raw = raw.strip() 318 319 # "<digits>" - GNUmed patient PK or DOB 320 if regex.match("^(\s|\t)*\d+(\s|\t)*$", raw): 321 _log.debug("[%s]: a PK or DOB" % raw) 322 queries.append ({ 323 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE pk_identity = %s ORDER BY lastnames, firstnames, dob", 324 'args': [_('internal patient ID'), raw] 325 }) 326 if len(raw) > 7: # DOB needs at least 8 digits 327 queries.append ({ 328 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 329 'args': [_('date of birth'), raw.replace(',', '.')] 330 }) 331 queries.append ({ 332 'cmd': """ 333 SELECT vba.*, %s::text AS match_type 334 FROM 335 dem.lnk_identity2ext_id li2ext_id, 336 dem.v_active_persons vba 337 WHERE 338 vba.pk_identity = li2ext_id.id_identity and lower(li2ext_id.external_id) ~* lower(%s) 339 ORDER BY 340 lastnames, firstnames, dob 341 """, 342 'args': [_('external patient ID'), raw] 343 }) 344 return queries 345 346 # "<d igi ts>" - DOB or patient PK 347 if regex.match("^(\d|\s|\t)+$", raw): 348 _log.debug("[%s]: a DOB or PK" % raw) 349 queries.append ({ 350 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 351 'args': [_('date of birth'), raw.replace(',', '.')] 352 }) 353 tmp = raw.replace(' ', '') 354 tmp = tmp.replace('\t', '') 355 queries.append ({ 356 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE pk_identity LIKE %s%%", 357 'args': [_('internal patient ID'), tmp] 358 }) 359 return queries 360 361 # "#<di git s>" - GNUmed patient PK 362 if regex.match("^(\s|\t)*#(\d|\s|\t)+$", raw): 363 _log.debug("[%s]: a PK or external ID" % raw) 364 tmp = raw.replace('#', '') 365 tmp = tmp.strip() 366 tmp = tmp.replace(' ', '') 367 tmp = tmp.replace('\t', '') 368 # this seemingly stupid query ensures the PK actually exists 369 queries.append ({ 370 'cmd': "SELECT *, %s::text AS match_type FROM dem.v_active_persons WHERE pk_identity = %s ORDER BY lastnames, firstnames, dob", 371 'args': [_('internal patient ID'), tmp] 372 }) 373 # but might also be an external ID 374 tmp = raw.replace('#', '') 375 tmp = tmp.strip() 376 tmp = tmp.replace(' ', '***DUMMY***') 377 tmp = tmp.replace('\t', '***DUMMY***') 378 tmp = tmp.replace('***DUMMY***', '(\s|\t|-|/)*') 379 queries.append ({ 380 'cmd': """ 381 SELECT vba.*, %s::text AS match_type FROM dem.lnk_identity2ext_id li2ext_id, dem.v_active_persons vba 382 WHERE vba.pk_identity = li2ext_id.id_identity and lower(li2ext_id.external_id) ~* lower(%s) 383 ORDER BY lastnames, firstnames, dob""", 384 'args': [_('external patient ID'), tmp] 385 }) 386 return queries 387 388 # "#<di/git s or c-hars>" - external ID 389 if regex.match("^(\s|\t)*#.+$", raw): 390 _log.debug("[%s]: an external ID" % raw) 391 tmp = raw.replace('#', '') 392 tmp = tmp.strip() 393 tmp = tmp.replace(' ', '***DUMMY***') 394 tmp = tmp.replace('\t', '***DUMMY***') 395 tmp = tmp.replace('-', '***DUMMY***') 396 tmp = tmp.replace('/', '***DUMMY***') 397 tmp = tmp.replace('***DUMMY***', '(\s|\t|-|/)*') 398 queries.append ({ 399 'cmd': """ 400 SELECT 401 vba.*, 402 %s::text AS match_type 403 FROM 404 dem.lnk_identity2ext_id li2ext_id, 405 dem.v_active_persons vba 406 WHERE 407 vba.pk_identity = li2ext_id.id_identity 408 AND 409 lower(li2ext_id.external_id) ~* lower(%s) 410 ORDER BY 411 lastnames, firstnames, dob""", 412 'args': [_('external patient ID'), tmp] 413 }) 414 return queries 415 416 # digits interspersed with "./-" or blank space - DOB 417 if regex.match("^(\s|\t)*\d+(\s|\t|\.|\-|/)*\d+(\s|\t|\.|\-|/)*\d+(\s|\t|\.)*$", raw): 418 _log.debug("[%s]: a DOB" % raw) 419 tmp = raw.strip() 420 while '\t\t' in tmp: tmp = tmp.replace('\t\t', ' ') 421 while ' ' in tmp: tmp = tmp.replace(' ', ' ') 422 # apparently not needed due to PostgreSQL smarts... 423 #tmp = tmp.replace('-', '.') 424 #tmp = tmp.replace('/', '.') 425 queries.append ({ 426 'cmd': "SELECT *, %s AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 427 'args': [_('date of birth'), tmp.replace(',', '.')] 428 }) 429 return queries 430 431 # " , <alpha>" - first name 432 queries = self.__queries_for_firstname_with_comma(raw) 433 if len(queries) > 0: 434 return queries 435 436 # "<alpha>, " - last name 437 queries = self.__queries_for_lastname_with_comma(raw) 438 if len(queries) > 0: 439 return queries 440 441 # "<ALPHA>" - last name 442 queries = self.__queries_for_LASTNAME(raw) 443 if len(queries) > 0: 444 return queries 445 446 # "<alpha alpha>" - first last or last first, depending on UPPERCASE 447 queries = self.__queries_for_LAST_and_first(raw) 448 if len(queries) > 0: 449 return queries 450 451 # "*|$<...>" - DOB 452 if regex.match("\s*(\*|\$).+$", raw): 453 _log.debug("[%s]: a DOB" % raw) 454 tmp = raw.replace('*', '') 455 tmp = tmp.replace('$', '') 456 queries.append ({ 457 'cmd': "SELECT *, %s AS match_type FROM dem.v_active_persons WHERE dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) ORDER BY lastnames, firstnames, dob", 458 'args': [_('date of birth'), tmp.replace(',', '.')] 459 }) 460 return queries 461 462 return []
463 464 #-------------------------------------------------------- 465 # generic, locale independant queries 466 #--------------------------------------------------------
467 - def _generate_queries_from_dto(self, dto = None):
468 """Generate generic queries. 469 470 - not locale dependant 471 - data -> firstnames, lastnames, dob, gender 472 """ 473 _log.debug('_generate_queries_from_dto("%s")' % dto) 474 475 if not isinstance(dto, gmPerson.cDTO_person): 476 return None 477 478 vals = [_('name, gender, date of birth')] 479 where_snippets = [] 480 481 vals.append(dto.firstnames) 482 where_snippets.append('firstnames=%s') 483 vals.append(dto.lastnames) 484 where_snippets.append('lastnames=%s') 485 486 if dto.dob is not None: 487 vals.append(dto.dob) 488 #where_snippets.append(u"dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)") 489 where_snippets.append("dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s)") 490 491 if dto.gender is not None: 492 vals.append(dto.gender) 493 where_snippets.append('gender=%s') 494 495 # sufficient data ? 496 if len(where_snippets) == 0: 497 _log.error('invalid search dict structure') 498 _log.debug(data) 499 return None 500 501 cmd = """ 502 SELECT *, %%s AS match_type FROM dem.v_active_persons 503 WHERE pk_identity in ( 504 SELECT id_identity FROM dem.names WHERE %s 505 ) ORDER BY lastnames, firstnames, dob""" % ' and '.join(where_snippets) 506 507 queries = [ 508 {'cmd': cmd, 'args': vals} 509 ] 510 511 # shall we mogrify name parts ? probably not 512 513 return queries
514 #-------------------------------------------------------- 515 # queries for DE 516 #--------------------------------------------------------
517 - def __generate_queries_from_single_major_part(self, part=None):
518 519 # split on whitespace 520 parts_list = regex.split("\s+|\t+", part) 521 # ignore empty parts 522 parts_list = [ p.strip() for p in parts_list if p.strip() != '' ] 523 524 # parse into name/date parts 525 date_count = 0 526 name_parts = [] 527 for part in parts_list: 528 # any digit signifies a date, FIXME: what about "<40" ? 529 if regex.search("\d", part): 530 date_count = date_count + 1 531 date_part = part 532 else: 533 name_parts.append(part) 534 535 # exactly 1 word ? 536 if len(parts_list) == 1: 537 return [] 538 539 # exactly 2 words ? 540 if len(parts_list) == 2: 541 if date_count > 0: 542 # FIXME: either "name date" or "date date" 543 _log.error("don't know how to generate queries for [%s]" % search_term) 544 return [] 545 # no date = "first last" or "last first" 546 queries = [] 547 # assumption: first last 548 queries.append ({ 549 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s", 550 'args': [_('name: first-last'), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES)] 551 }) 552 queries.append ({ 553 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s)", 554 'args': [_('name: first-last'), '^' + name_parts[0], '^' + name_parts[1]] 555 }) 556 # assumption: last first 557 queries.append ({ 558 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s", 559 'args': [_('name: last-first'), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES)] 560 }) 561 queries.append ({ 562 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s)", 563 'args': [_('name: last-first'), '^' + name_parts[1], '^' + name_parts[0]] 564 }) 565 # assumption: last nick 566 queries.append ({ 567 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.preferred ~ %s AND n.lastnames ~ %s", 568 'args': [_('name: last-nick'), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES)] 569 }) 570 queries.append ({ 571 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.preferred) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s)", 572 'args': [_('name: last-nick'), '^' + name_parts[1], '^' + name_parts[0]] 573 }) 574 # name parts anywhere inside name - third order query ... 575 queries.append ({ 576 'cmd': """SELECT DISTINCT ON (id_identity) 577 d_vap.*, 578 %s::text AS match_type 579 FROM 580 dem.v_active_persons d_vap, 581 dem.names n 582 WHERE 583 d_vap.pk_identity = n.id_identity 584 AND 585 -- name_parts[0] 586 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s) 587 AND 588 -- name_parts[1] 589 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s)""", 590 'args': [_('name'), name_parts[0], name_parts[1]] 591 }) 592 return queries 593 594 # exactly 3 words ? 595 if len(parts_list) == 3: 596 if date_count != 1: 597 # FIXME: "name name name" or "name date date" 598 return [] 599 600 # special case: 3 words, exactly 1 of them a date, no ",;" 601 # assumption: first, last, dob - first order 602 queries.append ({ 603 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 604 'args': [_('names: first-last, date of birth'), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), date_part.replace(',', '.')] 605 }) 606 queries.append ({ 607 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s) AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 608 'args': [_('names: first-last, date of birth'), '^' + name_parts[0], '^' + name_parts[1], date_part.replace(',', '.')] 609 }) 610 # assumption: last, first, dob - second order query 611 queries.append ({ 612 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and n.firstnames ~ %s AND n.lastnames ~ %s AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 613 'args': [_('names: last-first, date of birth'), '^' + gmTools.capitalize(name_parts[1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0], mode=gmTools.CAPS_NAMES), date_part.replace(',', '.')] 614 }) 615 queries.append ({ 616 'cmd': "SELECT DISTINCT ON (id_identity) d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) AND lower(n.lastnames) ~* lower(%s) AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 617 'args': [_('names: last-first, dob'), '^' + name_parts[1], '^' + name_parts[0], date_part.replace(',', '.')] 618 }) 619 # name parts anywhere in name - third order query ... 620 queries.append ({ 621 'cmd': """SELECT DISTINCT ON (id_identity) 622 d_vap.*, 623 %s::text AS match_type 624 FROM 625 dem.v_active_persons d_vap, 626 dem.names n 627 WHERE 628 d_vap.pk_identity = n.id_identity 629 AND 630 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s) 631 AND 632 lower(n.firstnames || ' ' || n.lastnames) ~* lower(%s) 633 AND 634 dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) 635 """, 636 'args': [_('name, date of birth'), name_parts[0], name_parts[1], date_part.replace(',', '.')] 637 }) 638 return queries 639 640 return []
641 642 #--------------------------------------------------------
643 - def _generate_queries_de(self, search_term=None):
644 645 if search_term is None: 646 return [] 647 648 # check to see if we get away with a simple query ... 649 queries = self._generate_simple_query(search_term) 650 if len(queries) > 0: 651 _log.debug('[%s]: search term with a simple, unambigous structure' % search_term) 652 return queries 653 654 # no we don't 655 _log.debug('[%s]: not a search term with a simple, unambigous structure' % search_term) 656 657 search_term = search_term.strip().strip(',').strip(';').strip() 658 normalized = self._normalize_soundalikes(search_term) 659 660 queries = [] 661 662 # "<CHARS>" - single name part 663 # yes, I know, this is culture specific (did you read the docs ?) 664 if regex.match("^(\s|\t)*[a-zäöüßéáúóçøA-ZÄÖÜÇØ]+(\s|\t)*$", search_term): 665 _log.debug("[%s]: a single name part", search_term) 666 # there's no intermediate whitespace due to the regex 667 cmd = """ 668 SELECT DISTINCT ON (pk_identity) * FROM ( 669 SELECT * FROM (( 670 -- lastname 671 SELECT d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n 672 WHERE d_vap.pk_identity = n.id_identity and lower(n.lastnames) ~* lower(%s) 673 ) union all ( 674 -- firstname 675 SELECT d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n 676 WHERE d_vap.pk_identity = n.id_identity and lower(n.firstnames) ~* lower(%s) 677 ) union all ( 678 -- nickname 679 SELECT d_vap.*, %s::text AS match_type FROM dem.v_active_persons d_vap, dem.names n 680 WHERE d_vap.pk_identity = n.id_identity and lower(n.preferred) ~* lower(%s) 681 ) union all ( 682 -- anywhere in name 683 SELECT 684 d_vap.*, 685 %s::text AS match_type 686 FROM 687 dem.v_active_persons d_vap, 688 dem.names n 689 WHERE 690 d_vap.pk_identity = n.id_identity 691 AND 692 lower(n.firstnames || ' ' || n.lastnames || ' ' || coalesce(n.preferred, '')) ~* lower(%s) 693 )) AS super_list ORDER BY lastnames, firstnames, dob 694 ) AS sorted_list 695 """ 696 tmp = normalized.strip() 697 args = [] 698 args.append(_('lastname')) 699 args.append('^' + tmp) 700 args.append(_('firstname')) 701 args.append('^' + tmp) 702 args.append(_('nickname')) 703 args.append('^' + tmp) 704 args.append(_('any name part')) 705 args.append(tmp) 706 707 queries.append ({ 708 'cmd': cmd, 709 'args': args 710 }) 711 return queries 712 713 # try to split on (major) part separators 714 major_parts = regex.split(',|;', normalized) 715 716 # ignore empty parts 717 major_parts = [ p.strip() for p in major_parts if p.strip() != '' ] 718 719 # only one "major" part ? (i.e. no ",;" ?) 720 if len(major_parts) == 1: 721 _log.debug('[%s]: only one non-empty part after splitting by , or ; ("major" part)', normalized) 722 queries = self.__generate_queries_from_single_major_part(part = normalized) 723 if len(queries) > 0: 724 return queries 725 return self._generate_dumb_brute_query(search_term) 726 727 # more than one major part (separated by ';,') 728 # this else is not needed 729 else: 730 _log.debug('[%s]: more than one non-empty part after splitting by , or ; ("major" parts)', normalized) 731 # parse into name and date parts 732 date_parts = [] 733 name_parts = [] 734 name_count = 0 735 for part in major_parts: 736 if part.strip() == '': 737 continue 738 # any digits ? 739 if regex.search("\d+", part): 740 # FIXME: parse out whitespace *not* adjacent to a *word* 741 date_parts.append(part) 742 else: 743 tmp = part.strip() 744 tmp = regex.split("\s+|\t+", tmp) 745 name_count = name_count + len(tmp) 746 name_parts.append(tmp) 747 748 _log.debug('found %s character (name) parts and %s number (date ?) parts', len(name_parts), len(date_parts)) 749 750 where_parts = [] 751 # first, handle name parts 752 # special case: "<date(s)>, <name> <name>, <date(s)>" 753 if (len(name_parts) == 1) and (name_count == 2): 754 # usually "first last" 755 where_parts.append ({ 756 'conditions': "firstnames ~ %s and lastnames ~ %s", 757 'args': [_('names: first last'), '^' + gmTools.capitalize(name_parts[0][0], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0][1], mode=gmTools.CAPS_NAMES)] 758 }) 759 where_parts.append ({ 760 'conditions': "lower(firstnames) ~* lower(%s) and lower(lastnames) ~* lower(%s)", 761 'args': [_('names: first last'), '^' + name_parts[0][0], '^' + name_parts[0][1]] 762 }) 763 # but sometimes "last first"" 764 where_parts.append ({ 765 'conditions': "firstnames ~ %s and lastnames ~ %s", 766 'args': [_('names: last, first'), '^' + gmTools.capitalize(name_parts[0][1], mode=gmTools.CAPS_NAMES), '^' + gmTools.capitalize(name_parts[0][0], mode=gmTools.CAPS_NAMES)] 767 }) 768 where_parts.append ({ 769 'conditions': "lower(firstnames) ~* lower(%s) and lower(lastnames) ~* lower(%s)", 770 'args': [_('names: last, first'), '^' + name_parts[0][1], '^' + name_parts[0][0]] 771 }) 772 # or even substrings anywhere in name 773 where_parts.append ({ 774 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s) OR lower(firstnames || ' ' || lastnames) ~* lower(%s)", 775 'args': [_('name'), name_parts[0][0], name_parts[0][1]] 776 }) 777 778 # special case: "<date(s)>, <name(s)>, <name(s)>, <date(s)>" 779 elif len(name_parts) == 2: 780 # usually "last, first" 781 where_parts.append ({ 782 'conditions': "firstnames ~ %s AND lastnames ~ %s", 783 'args': [_('name: last, first'), '^' + ' '.join(map(gmTools.capitalize, name_parts[1])), '^' + ' '.join(map(gmTools.capitalize, name_parts[0]))] 784 }) 785 where_parts.append ({ 786 'conditions': "lower(firstnames) ~* lower(%s) AND lower(lastnames) ~* lower(%s)", 787 'args': [_('name: last, first'), '^' + ' '.join(name_parts[1]), '^' + ' '.join(name_parts[0])] 788 }) 789 # but sometimes "first, last" 790 where_parts.append ({ 791 'conditions': "firstnames ~ %s AND lastnames ~ %s", 792 'args': [_('name: last, first'), '^' + ' '.join(map(gmTools.capitalize, name_parts[0])), '^' + ' '.join(map(gmTools.capitalize, name_parts[1]))] 793 }) 794 where_parts.append ({ 795 'conditions': "lower(firstnames) ~* lower(%s) AND lower(lastnames) ~* lower(%s)", 796 'args': [_('name: last, first'), '^' + ' '.join(name_parts[0]), '^' + ' '.join(name_parts[1])] 797 }) 798 # and sometimes "last, nick" 799 where_parts.append ({ 800 'conditions': "preferred ~ %s AND lastnames ~ %s", 801 'args': [_('name: last, first'), '^' + ' '.join(map(gmTools.capitalize, name_parts[1])), '^' + ' '.join(map(gmTools.capitalize, name_parts[0]))] 802 }) 803 where_parts.append ({ 804 'conditions': "lower(preferred) ~* lower(%s) AND lower(lastnames) ~* lower(%s)", 805 'args': [_('name: last, first'), '^' + ' '.join(name_parts[1]), '^' + ' '.join(name_parts[0])] 806 }) 807 808 # or even substrings anywhere in name 809 where_parts.append ({ 810 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s) AND lower(firstnames || ' ' || lastnames) ~* lower(%s)", 811 'args': [_('name'), ' '.join(name_parts[0]), ' '.join(name_parts[1])] 812 }) 813 814 # big trouble - arbitrary number of names 815 else: 816 # FIXME: deep magic, not sure of rationale ... 817 if len(name_parts) == 1: 818 for part in name_parts[0]: 819 where_parts.append ({ 820 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s)", 821 'args': [_('name'), part] 822 }) 823 else: 824 tmp = [] 825 for part in name_parts: 826 tmp.append(' '.join(part)) 827 for part in tmp: 828 where_parts.append ({ 829 'conditions': "lower(firstnames || ' ' || lastnames) ~* lower(%s)", 830 'args': [_('name'), part] 831 }) 832 833 # secondly handle date parts 834 # FIXME: this needs a considerable smart-up ! 835 if len(date_parts) == 1: 836 if len(where_parts) == 0: 837 where_parts.append ({ 838 'conditions': "dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 839 'args': [_('date of birth'), date_parts[0].replace(',', '.')] 840 }) 841 if len(where_parts) > 0: 842 where_parts[0]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)" 843 where_parts[0]['args'].append(date_parts[0].replace(',', '.')) 844 where_parts[0]['args'][0] += ', ' + _('date of birth') 845 if len(where_parts) > 1: 846 where_parts[1]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)" 847 where_parts[1]['args'].append(date_parts[0].replace(',', '.')) 848 where_parts[1]['args'][0] += ', ' + _('date of birth') 849 elif len(date_parts) > 1: 850 if len(where_parts) == 0: 851 where_parts.append ({ 852 'conditions': "dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) AND dem.date_trunc_utc('day'::text, dem.identity.deceased) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 853 'args': [_('date of birth/death'), date_parts[0].replace(',', '.'), date_parts[1].replace(',', '.')] 854 }) 855 if len(where_parts) > 0: 856 where_parts[0]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) AND dem.date_trunc_utc('day'::text, dem.identity.deceased) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 857 where_parts[0]['args'].append(date_parts[0].replace(',', '.'), date_parts[1].replace(',', '.')) 858 where_parts[0]['args'][0] += ', ' + _('date of birth/death') 859 if len(where_parts) > 1: 860 where_parts[1]['conditions'] += " AND dem.date_trunc_utc('day'::text, dob) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone) AND dem.date_trunc_utc('day'::text, dem.identity.deceased) = dem.date_trunc_utc('day'::text, %s::timestamp with time zone)", 861 where_parts[1]['args'].append(date_parts[0].replace(',', '.'), date_parts[1].replace(',', '.')) 862 where_parts[1]['args'][0] += ', ' + _('date of birth/death') 863 864 # and finally generate the queries ... 865 for where_part in where_parts: 866 queries.append ({ 867 'cmd': "SELECT *, %%s::text AS match_type FROM dem.v_active_persons WHERE %s" % where_part['conditions'], 868 'args': where_part['args'] 869 }) 870 return queries 871 872 return []
873 #--------------------------------------------------------
874 - def _generate_dumb_brute_query(self, search_term=''):
875 876 _log.debug('_generate_dumb_brute_query("%s")' % search_term) 877 878 where_clause = '' 879 args = [] 880 # FIXME: split on more than just ' ' 881 for arg in search_term.strip().split(): 882 where_clause += " AND lower(coalesce(d_vap.title, '') || ' ' || d_vap.firstnames || ' ' || d_vap.lastnames) ~* lower(%s)" 883 args.append(arg) 884 885 query = """ 886 SELECT DISTINCT ON (pk_identity) * FROM ( 887 SELECT 888 d_vap.*, 889 '%s'::text AS match_type 890 FROM 891 dem.v_active_persons d_vap, 892 dem.names n 893 WHERE 894 d_vap.pk_identity = n.id_identity 895 %s 896 ORDER BY 897 lastnames, 898 firstnames, 899 dob 900 ) AS ordered_list""" % (_('full name'), where_clause) 901 902 return ({'cmd': query, 'args': args})
903 904 #============================================================
905 -def ask_for_patient():
906 """Text mode UI function to ask for patient.""" 907 908 person_searcher = cPatientSearcher_SQL() 909 910 while True: 911 search_fragment = gmTools.prompted_input(prompt = "\nEnter person search term or leave blank to exit") 912 913 if search_fragment in ['exit', 'quit', 'bye', None]: 914 print("user cancelled patient search") 915 return None 916 917 pats = person_searcher.get_patients(search_term = search_fragment) 918 919 if (pats is None) or (len(pats) == 0): 920 print("No patient matches the search term.") 921 print("") 922 continue 923 924 if len(pats) > 1: 925 print("Several patients match the search term:") 926 print("") 927 for pat in pats: 928 print(pat) 929 print("") 930 print("Please refine the search term so it matches one patient only.") 931 continue 932 933 return pats[0] 934 935 return None
936 937 #============================================================ 938 # main/testing 939 #============================================================ 940 if __name__ == '__main__': 941 942 if len(sys.argv) == 1: 943 sys.exit() 944 945 if sys.argv[1] != 'test': 946 sys.exit() 947 948 import datetime 949 gmDateTime.init() 950 951 #--------------------------------------------------------
952 - def test_search_by_dto():
953 dto = gmPerson.cDTO_person() 954 dto.firstnames = 'Sigrid' 955 dto.lastnames = 'Kiesewetter' 956 dto.gender = 'female' 957 # dto.dob = pyDT.datetime.now(tz=gmDateTime.gmCurrentLocalTimezone) 958 dto.dob = datetime.datetime(1939,6,24,23,0,0,0,gmDateTime.gmCurrentLocalTimezone) 959 print(dto) 960 961 searcher = cPatientSearcher_SQL() 962 pats = searcher.get_patients(dto = dto) 963 print(pats)
964 #--------------------------------------------------------
965 - def test_patient_search_queries():
966 searcher = cPatientSearcher_SQL() 967 968 print("testing _normalize_soundalikes()") 969 print("--------------------------------") 970 # FIXME: support Ähler -> Äler and Dähler -> Däler 971 data = ['Krüger', 'Krueger', 'Kruger', 'Überle', 'Böger', 'Boger', 'Öder', 'Ähler', 'Däler', 'Großer', 'müller', 'Özdemir', 'özdemir'] 972 for name in data: 973 print('%s: %s' % (name, searcher._normalize_soundalikes(name))) 974 975 input('press [ENTER] to continue') 976 print("============") 977 978 print("testing _generate_queries_from_dto()") 979 print("------------------------------------") 980 dto = cDTO_person() 981 dto.gender = 'm' 982 dto.lastnames = 'Kirk' 983 dto.firstnames = 'James' 984 dto.dob = pyDT.datetime.now(tz=gmDateTime.gmCurrentLocalTimezone) 985 q = searcher._generate_queries_from_dto(dto)[0] 986 print("dto:", dto) 987 print(" match on:", q['args'][0]) 988 print(" query:", q['cmd']) 989 990 input('press [ENTER] to continue') 991 print("============") 992 993 print("testing _generate_queries_de()") 994 print("------------------------------") 995 qs = searcher._generate_queries_de('Kirk, James') 996 for q in qs: 997 print(" match on:", q['args'][0]) 998 print(" query :", q['cmd']) 999 print(" args :", q['args']) 1000 input('press [ENTER] to continue') 1001 print("============") 1002 1003 qs = searcher._generate_queries_de('müller') 1004 for q in qs: 1005 print(" match on:", q['args'][0]) 1006 print(" query :", q['cmd']) 1007 print(" args :", q['args']) 1008 input('press [ENTER] to continue') 1009 print("============") 1010 1011 qs = searcher._generate_queries_de('özdemir') 1012 for q in qs: 1013 print(" match on:", q['args'][0]) 1014 print(" query :", q['cmd']) 1015 print(" args :", q['args']) 1016 input('press [ENTER] to continue') 1017 print("============") 1018 1019 qs = searcher._generate_queries_de('Özdemir') 1020 for q in qs: 1021 print(" match on:", q['args'][0]) 1022 print(" query :", q['cmd']) 1023 print(" args :", q['args']) 1024 input('press [ENTER] to continue') 1025 print("============") 1026 1027 print("testing _generate_dumb_brute_query()") 1028 print("------------------------------------") 1029 q = searcher._generate_dumb_brute_query('Kirk, James Tiberius') 1030 print(" match on:", q['args'][0]) 1031 print(" args:", q['args']) 1032 print(" query:", q['cmd']) 1033 1034 1035 input('press [ENTER] to continue')
1036 #--------------------------------------------------------
1037 - def test_ask_for_patient():
1038 while 1: 1039 myPatient = ask_for_patient() 1040 if myPatient is None: 1041 break 1042 print("ID ", myPatient.ID) 1043 print("names ", myPatient.get_names()) 1044 print("addresses:", myPatient.get_addresses(address_type='home')) 1045 print("recent birthday:", myPatient.dob_in_range()) 1046 myPatient.export_as_gdt(filename='apw.gdt', encoding = 'cp850')
1047 # docs = myPatient.get_document_folder() 1048 # print "docs ", docs 1049 # emr = myPatient.emr 1050 # print "EMR ", emr 1051 1052 #--------------------------------------------------------
1053 - def test_generate_simple_query():
1054 searcher = cPatientSearcher_SQL() 1055 print("testing _generate_simple_query()") 1056 print("----------------------------") 1057 data = [ 1058 '51234', '1 134 153', '#13 41 34', '#3-AFY322.4', '22-04-1906', '1235/32/3525', 1059 ', tiberiu',# firstname 1060 'KIRK', # lastname 1061 'kirk,', # lastname 1062 'KIR tib', # LAST first 1063 'Tib KI' # first LAST 1064 ] 1065 for fragment in data: 1066 print("fragment:", fragment) 1067 qs = searcher._generate_simple_query(fragment) 1068 for q in qs: 1069 print('') 1070 print(" match on:", q['args']) 1071 print(" query :", q['cmd']) 1072 input('press [ENTER] to continue') 1073 print("============")
1074 1075 #-------------------------------------------------------- 1076 test_generate_simple_query() 1077 #test_patient_search_queries() 1078 #test_ask_for_patient() 1079 #test_search_by_dto() 1080 1081 #============================================================ 1082