1
2 """ATC handling code.
3
4 http://who.no
5
6 There is no DDD handling because DDD explicitely
7 does not carry clinical meaning.
8
9 license: GPL v2 or later
10 """
11
12 __version__ = "$Revision: 1.7 $"
13 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
14
15 import sys, codecs, logging, csv, re as regex, os.path
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmPG2, gmTools, gmCfg2
21
22
23 _log = logging.getLogger('gm.atc')
24 _log.info(__version__)
25
26 _cfg = gmCfg2.gmCfgData()
27
29
30 _log.debug('substance <%s>, ATC <%s>', substance, atc)
31
32 if atc is not None:
33 if atc.strip() == u'':
34 atc = None
35
36 if atc is None:
37 atcs = text2atc(text = substance, fuzzy = False)
38 if len(atcs) == 0:
39 _log.debug(u'no ATC found, aborting')
40 return atc
41 if len(atcs) > 1:
42 _log.debug(u'non-unique ATC mapping, aborting')
43 return atc
44 atc = atcs[0][0].strip()
45
46 args = {'atc': atc, 'term': substance.strip()}
47 queries = [
48 {'cmd': u"UPDATE ref.consumable_substance SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL",
49 'args': args},
50 {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL",
51 'args': args}
52 ]
53 gmPG2.run_rw_queries(queries = queries)
54
55 return atc
56
57 -def text2atc(text=None, fuzzy=False):
58
59 text = text.strip()
60
61 if fuzzy:
62 args = {'term': u'%%%s%%' % text}
63 cmd = u"""
64 SELECT DISTINCT ON (atc_code) *
65 FROM (
66 SELECT atc as atc_code, is_group_code, pk_data_source
67 FROM ref.v_atc
68 WHERE term ilike %(term)s AND atc IS NOT NULL
69 UNION
70 SELECT atc_code, null, null
71 FROM ref.consumable_substance
72 WHERE description ilike %(term)s AND atc_code IS NOT NULL
73 UNION
74 SELECT atc_code, null, null
75 FROM ref.branded_drug
76 WHERE description ilike %(term)s AND atc_code IS NOT NULL
77 ) as tmp
78 ORDER BY atc_code
79 """
80 else:
81 args = {'term': text.lower()}
82 cmd = u"""
83 SELECT DISTINCT ON (atc_code) *
84 FROM (
85 SELECT atc as atc_code, is_group_code, pk_data_source
86 FROM ref.v_atc
87 WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL
88 UNION
89 SELECT atc_code, null, null
90 FROM ref.consumable_substance
91 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL
92 UNION
93 SELECT atc_code, null, null
94 FROM ref.branded_drug
95 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL
96 ) as tmp
97 ORDER BY atc_code
98 """
99
100 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
101
102 _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy)
103
104 return rows
105
106
108 cmd = u'SELECT * FROM ref.v_atc ORDER BY %s' % order_by
109 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False)
110 return rows
111
112
114
115
116 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8')
117
118 data_fname = os.path.join (
119 os.path.dirname(cfg_fname),
120 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')])
121 )
122 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')])
123 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')])
124 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')])
125 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')])
126 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')])
127 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')])
128
129 _cfg.remove_source(source = 'atc')
130
131 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname)
132
133 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
134
135
136 queries = [
137 {
138 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
139 'args': args
140 }, {
141 'cmd': u"""
142 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
143 %(name_long)s,
144 %(name_short)s,
145 %(ver)s,
146 %(desc)s,
147 %(lang)s,
148 %(url)s
149 )""",
150 'args': args
151 }, {
152 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
153 'args': args
154 }
155 ]
156 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
157 data_src_pk = rows[0][0]
158 _log.debug('ATC data source record created, pk is #%s', data_src_pk)
159
160
161 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
162 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"')
163
164
165 curs = conn.cursor()
166 cmd = u"""delete from ref.atc_staging"""
167 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
168 curs.close()
169 conn.commit()
170 _log.debug('ATC staging table emptied')
171
172
173 curs = conn.cursor()
174 cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s)"""
175 first = False
176 for atc_line in atc_reader:
177
178 if not first:
179 first = True
180 continue
181
182
183 if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == u'':
184 continue
185
186 comment = u''
187 unit = u''
188 adro = u''
189
190
191 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
192 tmp, unit, adro = regex.split('\s', atc_line[4])
193
194 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
195 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
196
197 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
198 tmp, unit, adro = regex.split('\s', atc_line[4])
199
200 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
201 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
202
203 else:
204 comment = atc_line[4]
205
206 args = [
207 atc_line[0].strip(),
208 atc_line[2],
209 unit,
210 adro,
211 comment
212 ]
213
214 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
215
216 curs.close()
217 conn.commit()
218 csv_file.close()
219 _log.debug('ATC staging table loaded')
220
221
222 curs = conn.cursor()
223 args = {'src_pk': data_src_pk}
224 cmd = u"""
225 insert into ref.atc (
226 fk_data_source,
227 code,
228 term,
229 comment,
230 unit,
231 administration_route
232 ) select
233 %(src_pk)s,
234 atc,
235 name,
236 nullif(comment, ''),
237 nullif(unit, ''),
238 nullif(adro, '')
239
240 from
241 ref.atc_staging
242 """
243
244 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
245
246 curs.close()
247 conn.commit()
248 _log.debug('transfer from ATC staging table to real ATC table done')
249
250
251 curs = conn.cursor()
252 cmd = u"""delete from ref.atc_staging"""
253 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
254 curs.close()
255 conn.commit()
256 _log.debug('ATC staging table emptied')
257
258 return True
259
260
261
262 if __name__ == "__main__":
263
264 if len(sys.argv) == 1:
265 sys.exit()
266
267 if sys.argv[1] != 'test':
268 sys.exit()
269
270 from Gnumed.pycommon import gmLog2
271 from Gnumed.pycommon import gmI18N
272
273 gmI18N.activate_locale()
274
275
276
279
281 print 'searching ATC code for:', sys.argv[2]
282 print ' ', text2atc(sys.argv[2])
283 print ' ', text2atc(sys.argv[2], True)
284
289
290
291
292 test_get_reference_atcs()
293
294
295