1
2 """ATC handling code.
3
4 http://who.no
5
6 There is no DDD handling because DDD explicitely
7 does not carry clinical meaning.
8 """
9
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
11 __license__ = "GPL v2 or later"
12
13 import sys
14 import io
15 import logging
16 import csv
17 import os.path
18 import re as regex
19
20
21 if __name__ == '__main__':
22 sys.path.insert(0, '../../')
23 from Gnumed.pycommon import gmPG2
24 from Gnumed.pycommon import gmTools
25 from Gnumed.pycommon import gmCfg2
26
27
28 _log = logging.getLogger('gm.atc')
29 _cfg = gmCfg2.gmCfgData()
30
31
32 ATC_NICOTINE = 'N07BA01'
33 ATC_ETHANOL = 'V03AB16'
34
35
37
38 _log.debug('substance <%s>, ATC <%s>', substance, atc)
39
40 if atc is not None:
41 if atc.strip() == '':
42 atc = None
43
44 if atc is None:
45 atcs = text2atc(text = substance, fuzzy = False, link_obj = link_obj)
46 if len(atcs) == 0:
47 _log.debug('no ATC found, aborting')
48 return atc
49 if len(atcs) > 1:
50 _log.debug('non-unique ATC mapping, aborting')
51 return atc
52 atc = atcs[0][0].strip()
53
54 args = {'atc': atc, 'term': substance.strip()}
55 queries = [
56 {'cmd': "UPDATE ref.substance SET atc = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc IS NULL",
57 'args': args},
58 {'cmd': "UPDATE ref.drug_product SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL",
59 'args': args}
60 ]
61 gmPG2.run_rw_queries(link_obj = link_obj, queries = queries)
62
63 return atc
64
65
66 -def text2atc(text=None, fuzzy=False, link_obj=None):
67
68 text = text.strip()
69
70 if fuzzy:
71 args = {'term': '%%%s%%' % text}
72 cmd = """
73 SELECT DISTINCT ON (atc_code) *
74 FROM (
75 SELECT atc as atc_code, is_group_code, pk_data_source
76 FROM ref.v_atc
77 WHERE term ilike %(term)s AND atc IS NOT NULL
78 UNION
79 SELECT atc as atc_code, null, null
80 FROM ref.substance
81 WHERE description ilike %(term)s AND atc IS NOT NULL
82 UNION
83 SELECT atc_code, null, null
84 FROM ref.drug_product
85 WHERE description ilike %(term)s AND atc_code IS NOT NULL
86 ) as tmp
87 ORDER BY atc_code
88 """
89 else:
90 args = {'term': text.lower()}
91 cmd = """
92 SELECT DISTINCT ON (atc_code) *
93 FROM (
94 SELECT atc as atc_code, is_group_code, pk_data_source
95 FROM ref.v_atc
96 WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL
97 UNION
98 SELECT atc as atc_code, null, null
99 FROM ref.substance
100 WHERE lower(description) = lower(%(term)s) AND atc IS NOT NULL
101 UNION
102 SELECT atc_code, null, null
103 FROM ref.drug_product
104 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL
105 ) as tmp
106 ORDER BY atc_code
107 """
108
109 rows, idx = gmPG2.run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
110
111 _log.debug('term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy)
112
113 return rows
114
115
117 args = {'term': substance}
118 cmd = 'SELECT EXISTS (SELECT 1 FROM ref.atc WHERE lower(term) = lower(%(term)s))'
119 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
120 return rows[0][0]
121
122
124 cmd = 'SELECT * FROM ref.v_atc ORDER BY %s' % order_by
125 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False)
126 return rows
127
128
130
131
132 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8')
133
134 data_fname = os.path.join (
135 os.path.dirname(cfg_fname),
136 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')])
137 )
138 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')])
139 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')])
140 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')])
141 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')])
142 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')])
143 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')])
144
145 _cfg.remove_source(source = 'atc')
146
147 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname)
148
149 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
150
151
152 cmd = u"select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s"
153 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}])
154 if len(rows) > 0:
155 data_src_pk = rows[0][0]
156 _log.debug('ATC data source record existed, pk is #%s, refreshing fields', data_src_pk)
157
158 args['pk'] = data_src_pk
159 cmd = u"""UPDATE ref.data_source SET
160 name_long = %(name_long)s,
161 description = %(desc)s,
162 lang = %(lang)s,
163 source = %(url)s
164 WHERE
165 pk = %(pk)s
166 """
167 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}])
168 else:
169 _log.debug('ATC data source record not found, creating')
170
171 cmd = u"""insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
172 %(name_long)s,
173 %(name_short)s,
174 %(ver)s,
175 %(desc)s,
176 %(lang)s,
177 %(url)s
178 ) returning pk"""
179 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True)
180 data_src_pk = rows[0][0]
181 _log.debug('ATC data source record created, pk is #%s', data_src_pk)
182
183
184 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
185 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"')
186
187
188 curs = conn.cursor()
189 cmd = """delete from ref.atc_staging"""
190 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
191 curs.close()
192 conn.commit()
193 _log.debug('ATC staging table emptied')
194
195
196 curs = conn.cursor()
197 cmd = """insert into ref.atc_staging values (%s, %s, %s, %s, %s)"""
198 first = False
199 for atc_line in atc_reader:
200
201 if not first:
202 first = True
203 continue
204
205
206 if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == '':
207 continue
208
209 comment = ''
210 unit = ''
211 adro = ''
212
213
214 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
215 tmp, unit, adro = regex.split('\s', atc_line[4])
216
217 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
218 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
219
220 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
221 tmp, unit, adro = regex.split('\s', atc_line[4])
222
223 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
224 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
225
226 else:
227 comment = atc_line[4]
228
229 args = [
230 atc_line[0].strip(),
231 atc_line[2],
232 unit,
233 adro,
234 comment
235 ]
236
237 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
238
239 curs.close()
240 conn.commit()
241 csv_file.close()
242 _log.debug('ATC staging table loaded')
243
244
245 args = {'src_pk': data_src_pk}
246 queries = []
247
248 cmd = u"""
249 insert into ref.atc (
250 fk_data_source,
251 code,
252 term,
253 comment,
254 administration_route
255 ) select
256 %(src_pk)s,
257 atc,
258 name,
259 nullif(comment, ''),
260 nullif(adro, '')
261 FROM
262 ref.atc_staging
263 WHERE
264 not exists (
265 select 1 FROM ref.atc WHERE fk_data_source = %(src_pk)s AND code = ref.atc_staging.atc
266 )
267 """
268 queries.append({'cmd': cmd, 'args': args})
269
270 cmd = u"""
271 UPDATE ref.atc SET
272 code = r_as.atc,
273 term = r_as.name,
274 comment = nullif(r_as.comment, ''),
275 administration_route = nullif(r_as.adro, '')
276 FROM
277 (SELECT atc, name, comment, adro FROM ref.atc_staging) AS r_as
278 WHERE
279 fk_data_source = %(src_pk)s
280 """
281 queries.append({'cmd': cmd, 'args': args})
282 curs = conn.cursor()
283 gmPG2.run_rw_queries(link_obj = curs, queries = queries)
284 curs.close()
285 conn.commit()
286 _log.debug('transfer from ATC staging table to real ATC table done')
287
288
289 curs = conn.cursor()
290 cmd = """delete from ref.atc_staging"""
291 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
292 curs.close()
293 conn.commit()
294 _log.debug('ATC staging table emptied')
295
296 return True
297
298
299
300
301 if __name__ == "__main__":
302
303 if len(sys.argv) == 1:
304 sys.exit()
305
306 if sys.argv[1] != 'test':
307 sys.exit()
308
309 from Gnumed.pycommon import gmLog2
310 from Gnumed.pycommon import gmI18N
311
312 gmI18N.activate_locale()
313
314
315
318
320 print('searching ATC code for:', sys.argv[2])
321 print(' ', text2atc(sys.argv[2]))
322 print(' ', text2atc(sys.argv[2], True))
323
328
329
330
331 test_get_reference_atcs()
332
333
334