1 """Base classes for match providers.
2
3 They are used by business objects to give
4 phrasewheels the ability to guess phrases.
5
6 Copyright (C) GNUMed developers
7 license: GPL v2 or later
8 """
9 __version__ = "$Revision: 1.34 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>, I.Haywood <ihaywood@gnu.org>, S.J.Tan <sjtan@bigpond.com>"
11
12
13 import re as regex, logging
14
15
16
17 from Gnumed.pycommon import gmPG2
18
19
20 _log = logging.getLogger('gm.ui')
21 _log.info(__version__)
22
23
24
25
26 default_ignored_chars = "[?!.'\\(){}\[\]<>~#*$%^_]+" + '"'
27
28
29
30
31 default_word_separators = '[- \t=+&:@]+'
32
34 """Base class for match providing objects.
35
36 Match sources might be:
37 - database tables
38 - flat files
39 - previous input
40 - config files
41 - in-memory list created on the fly
42 """
43 print_queries = False
44
52
53
54
56 """Return matches according to aFragment and matching thresholds.
57
58 FIXME: design decision: we dont worry about data source changes
59 during the lifetime of a MatchProvider
60 FIXME: append _("*get all items*") on truncation
61 """
62
63 if aFragment is None:
64 raise ValueError, 'Cannot find matches without a fragment.'
65
66
67 if aFragment == u'*':
68 return self.getAllMatches()
69
70
71 tmpFragment = aFragment.lower()
72
73 if self.__ignored_chars is not None:
74 tmpFragment = self.__ignored_chars.sub('', tmpFragment)
75
76 if self.__word_separators is not None:
77 tmpFragment = u' '.join(self.__word_separators.split(tmpFragment))
78
79 lngFragment = len(tmpFragment)
80
81
82 if lngFragment >= self.__threshold_substring:
83 return self.getMatchesBySubstr(tmpFragment)
84 elif lngFragment >= self.__threshold_word:
85 return self.getMatchesByWord(tmpFragment)
86 elif lngFragment >= self.__threshold_phrase:
87 return self.getMatchesByPhrase(tmpFragment)
88 else:
89 return (False, [])
90
92 raise NotImplementedError
93
95 raise NotImplementedError
96
98 raise NotImplementedError
99
101 raise NotImplementedError
102
105
106
107
108 - def setThresholds(self, aPhrase = 1, aWord = 3, aSubstring = 5):
109 """Set match location thresholds.
110
111 - the fragment passed to getMatches() must contain at least this many
112 characters before it triggers a match search at:
113 1) phrase_start - start of phrase (first word)
114 2) word_start - start of any word within phrase
115 3) in_word - _inside_ any word within phrase
116 """
117
118 if aSubstring < aWord:
119 _log.error('Setting substring threshold (%s) lower than word-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_substring, self.__threshold_word))
120 return False
121 if aWord < aPhrase:
122 _log.error('Setting word-start threshold (%s) lower than phrase-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_word, self.__threshold_phrase))
123 return False
124
125
126 self.__threshold_phrase = aPhrase
127 self.__threshold_word = aWord
128 self.__threshold_substring = aSubstring
129
130 return True
131
137
139 if self.__word_separators is None:
140 return None
141 return self.__word_separators.pattern
142
143 word_separators = property(_get_word_separators, _set_word_separators)
144
150
152 if self.__ignored_chars is None:
153 return None
154 return self.__ignored_chars.pattern
155
156 ignored_chars = property(_get_ignored_chars, _set_ignored_chars)
157
158 - def set_context (self, context=None, val=None):
159 """Set value to provide context information for matches.
160
161 The matching code may ignore it depending on its exact
162 implementation. Names and values of the context depend
163 on what is being matched.
164
165 <context> -- the *placeholder* key *inside* the context
166 definition, not the context *definition* key
167 """
168 if context is None:
169 return False
170 self._context_vals[context] = val
171 return True
172
173 - def unset_context(self, context=None):
174 try:
175 del self._context_vals[context]
176 except KeyError:
177 pass
178
179
180
182 """Match provider where all possible options can be held
183 in a reasonably sized, pre-allocated list.
184 """
186 """aSeq must be a list of dicts. Each dict must have the keys (data, label, weight)
187 """
188 if not type(aSeq) in [type(None), type([]), type(())]:
189 _log.error('fixed list match provider argument must be a list/tuple of dicts/None')
190 raise TypeError('fixed list match provider argument must be a list/tuple of dicts/None')
191
192 self.__items = aSeq
193 cMatchProvider.__init__(self)
194
195
196
197
198
199
200
201
203 """Return matches for aFragment at start of phrases."""
204 matches = []
205
206 for item in self.__items:
207
208 if item['list_label'].lower().startswith(aFragment.lower()):
209 matches.append(item)
210
211 if len(matches) == 0:
212 return (False, [])
213
214 matches.sort(self.__cmp_items)
215 return (True, matches)
216
218 """Return matches for aFragment at start of words inside phrases."""
219 matches = []
220
221 for item in self.__items:
222 item_label = item['list_label'].lower()
223 fragment_pos = item_label.find(aFragment.lower())
224
225 if fragment_pos == 0:
226 matches.append(item)
227
228 elif fragment_pos > 0:
229
230 if item_label[fragment_pos-1] == u' ':
231 matches.append(item)
232
233 if len(matches) == 0:
234 return (False, [])
235
236 matches.sort(self.__cmp_items)
237 return (True, matches)
238
240 """Return matches for aFragment as a true substring."""
241 matches = []
242
243 for item in self.__items:
244 if item['list_label'].lower().find(aFragment.lower()) != -1:
245 matches.append(item)
246
247 if len(matches) == 0:
248 return (False, [])
249
250 matches.sort(self.__cmp_items)
251 return (True, matches)
252
254 """Return all items."""
255 matches = self.__items
256
257 if len(matches) == 0:
258 return (False, [])
259
260 matches.sort(self.__cmp_items)
261 return (True, matches)
262
264 """items must be a list of dicts. Each dict must have the keys (data, list_label, weight)"""
265 self.__items = items
266
268 """Compare items based on weight."""
269 if item1['weight'] == item2['weight']:
270 return 0
271
272
273 if item1['weight'] < item2['weight']:
274 return 1
275 if item1['weight'] > item2['weight']:
276 return -1
277
279 """Match provider which searches matches
280 in the results of a function call.
281 """
282 - def __init__(self, get_candidates = None):
283 """get_candidates() must return a list of strings."""
284 if get_candidates is None:
285 _log.error('must define function to retrieve match candidates list')
286 raise ValueError('must define function to retrieve match candidates list')
287
288 self._get_candidates = get_candidates
289 cMatchProvider.__init__(self)
290
291
292
293
294
295
296
297
299 """Return matches for aFragment at start of phrases."""
300 matches = []
301 candidates = self._get_candidates()
302
303 for candidate in candidates:
304
305 if aFragment.startswith(candidate['list_label'].lower()):
306 matches.append(candidate)
307
308 if len(matches) == 0:
309 return (False, [])
310
311 matches.sort(self.__cmp_candidates)
312 return (True, matches)
313
315 """Return matches for aFragment at start of words inside phrases."""
316 matches = []
317 candidates = self._get_candidates()
318
319 for candidate in candidates:
320 pos = candidate['list_label'].lower().find(aFragment)
321
322
323
324
325 if (pos == 0) or (candidate['list_label'][pos-1] == u' '):
326 matches.append(candidate)
327
328 if len(matches) == 0:
329 return (False, [])
330
331 matches.sort(self.__cmp_candidates)
332 return (True, matches)
333
335 """Return matches for aFragment as a true substring."""
336 matches = []
337 candidates = self._get_candidates()
338
339 for candidate in candidates:
340 if candidate['list_label'].lower().find(aFragment) != -1:
341
342 matches.append(candidate)
343
344 if len(matches) == 0:
345 return (False, [])
346
347 matches.sort(self.__cmp_candidates)
348 return (True, matches)
349
351 """Return all candidates."""
352 return self._get_candidates()
353
355 """naive ordering"""
356 return 0
357
358
359
360
361
362
363
364
366 """Match provider which searches matches
367 in possibly several database tables.
368
369 queries:
370 - a list of unicode strings
371 - each string is a query
372 - each string must contain: "... WHERE <column> %(fragment_condition)s ..."
373 - each string can contain in the where clause: "... %(<ctxt_key1>)s ..."
374 - each query must return (data, list_label, field_label)
375
376 context definitions to be used in the queries, example:
377 {'ctxt_key1': {'where_part': 'AND country = %(country)s', 'placeholder': 'country'}}
378
379 client code using .set_context() must use the 'placeholder':
380 <phrasewheel>/<match provider>.set_context('country', 'Germany')
381
382 _SQL_data2match:
383 SQL to retrieve a match by, say, primary key
384 wherein the only keyword argument is 'pk'
385 """
386 - def __init__(self, queries = None, context = None):
387
388 cMatchProvider.__init__(self)
389
390 if type(queries) == type([]):
391 self._queries = queries
392 else:
393 self._queries = [queries]
394
395 if context is None:
396 self._context = {}
397 else:
398 self._context = context
399
400 self._args = {}
401
402 self._SQL_data2match = None
403
404
405
406
407
408
409
410
412 """Return matches for aFragment at start of phrases."""
413
414 fragment_condition = u"ILIKE %(fragment)s"
415 self._args['fragment'] = u"%s%%" % aFragment
416
417 return self._find_matches(fragment_condition)
418
420 """Return matches for aFragment at start of words inside phrases."""
421
422 fragment_condition = u"~* %(fragment)s"
423 aFragment = gmPG2.sanitize_pg_regex(expression = aFragment, escape_all = False)
424 self._args['fragment'] = u"( %s)|(^%s)" % (aFragment, aFragment)
425
426 return self._find_matches(fragment_condition)
427
429 """Return matches for aFragment as a true substring."""
430
431 fragment_condition = u"ILIKE %(fragment)s"
432 self._args['fragment'] = u"%%%s%%" % aFragment
433
434 return self._find_matches(fragment_condition)
435
439
441 if self._SQL_data2match is None:
442 return None
443
444 query = {'cmd': self._SQL_data2match, 'args': {'pk': data}}
445 try:
446 rows, idx = gmPG2.run_ro_queries(queries = [query], get_col_idx = False)
447 except:
448 _log.exception('[%s]: error running _SQL_data2match, dropping query', self.__class__.__name__)
449 self._SQL_data2match = None
450 return None
451
452
453 if len(rows) == 1:
454 return rows[0]
455
456 _log.error('[%s]: 0 or >1 rows found by running _SQL_data2match, ambiguous, ignoring', self.__class__.__name__)
457 return None
458
460 if self.print_queries:
461 print "----------------------"
462 matches = []
463 for query in self._queries:
464 where_fragments = {'fragment_condition': fragment_condition}
465
466 for context_key, context_def in self._context.items():
467 try:
468 placeholder = context_def['placeholder']
469 where_part = context_def['where_part']
470 self._args[placeholder] = self._context_vals[placeholder]
471
472 where_fragments[context_key] = where_part
473 if self.print_queries:
474 print "ctxt ph:", placeholder
475 print "ctxt where:", where_part
476 print "ctxt val:", self._context_vals[placeholder]
477 except KeyError:
478
479 where_fragments[context_key] = u''
480 if self.print_queries:
481 print "invalid ctxt key:", context_key
482
483 cmd = query % where_fragments
484
485 if self.print_queries:
486 print "class:", self.__class__.__name__
487 print "ctxt:", self._context_vals
488 print "args:", self._args
489 print "query:", cmd
490
491 try:
492 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': self._args}], get_col_idx = False)
493 except:
494 _log.exception('[%s]: error running match provider SQL, dropping query', self.__class__.__name__)
495 idx = self._queries.index(query)
496 del self._queries[idx]
497 break
498
499
500 if len(rows) == 0:
501 continue
502
503 for row in rows:
504 match = {'weight': 0}
505
506 try:
507 match['data'] = row['data']
508 except KeyError:
509 match['data'] = row[0]
510
511 try:
512 match['list_label'] = row['list_label']
513 except KeyError:
514 match['list_label'] = row[1]
515
516
517 try:
518 match['field_label'] = row['field_label']
519
520 except KeyError:
521
522 try:
523 match['field_label'] = row[2]
524
525 except IndexError:
526 match['field_label'] = match['list_label']
527
528
529
530
531
532
533 matches.append(match)
534
535 return (True, matches)
536
537
538 return (False, [])
539
540 if __name__ == '__main__':
541 pass
542
543
544