Package Gnumed :: Package business :: Module gmATC
[frames] | no frames]

Source Code for Module Gnumed.business.gmATC

  1  # -*- coding: utf-8 -*- 
  2  """ATC handling code. 
  3   
  4  http://who.no 
  5   
  6  There is no DDD handling because DDD explicitely 
  7  does not carry clinical meaning. 
  8  """ 
  9  #============================================================ 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11  __license__ = "GPL v2 or later" 
 12   
 13  import sys 
 14  import io 
 15  import logging 
 16  import csv 
 17  import os.path 
 18  import re as regex 
 19   
 20   
 21  if __name__ == '__main__': 
 22          sys.path.insert(0, '../../') 
 23  from Gnumed.pycommon import gmPG2 
 24  from Gnumed.pycommon import gmTools 
 25  from Gnumed.pycommon import gmCfg2 
 26   
 27   
 28  _log = logging.getLogger('gm.atc') 
 29  _cfg = gmCfg2.gmCfgData() 
 30   
 31   
 32  ATC_NICOTINE = 'N07BA01' 
 33  ATC_ETHANOL  = 'V03AB16' 
 34   
 35  #============================================================ 
36 -def propagate_atc(substance=None, atc=None, link_obj=None):
37 38 _log.debug('substance <%s>, ATC <%s>', substance, atc) 39 40 if atc is not None: 41 if atc.strip() == '': 42 atc = None 43 44 if atc is None: 45 atcs = text2atc(text = substance, fuzzy = False, link_obj = link_obj) 46 if len(atcs) == 0: 47 _log.debug('no ATC found, aborting') 48 return atc 49 if len(atcs) > 1: 50 _log.debug('non-unique ATC mapping, aborting') 51 return atc 52 atc = atcs[0][0].strip() 53 54 args = {'atc': atc, 'term': substance.strip()} 55 queries = [ 56 {'cmd': "UPDATE ref.substance SET atc = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc IS NULL", 57 'args': args}, 58 {'cmd': "UPDATE ref.drug_product SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 59 'args': args} 60 ] 61 gmPG2.run_rw_queries(link_obj = link_obj, queries = queries) 62 63 return atc
64 65 #============================================================
66 -def text2atc(text=None, fuzzy=False, link_obj=None):
67 68 text = text.strip() 69 70 if fuzzy: 71 args = {'term': '%%%s%%' % text} 72 cmd = """ 73 SELECT DISTINCT ON (atc_code) * 74 FROM ( 75 SELECT atc as atc_code, is_group_code, pk_data_source 76 FROM ref.v_atc 77 WHERE term ilike %(term)s AND atc IS NOT NULL 78 UNION 79 SELECT atc as atc_code, null, null 80 FROM ref.substance 81 WHERE description ilike %(term)s AND atc IS NOT NULL 82 UNION 83 SELECT atc_code, null, null 84 FROM ref.drug_product 85 WHERE description ilike %(term)s AND atc_code IS NOT NULL 86 ) as tmp 87 ORDER BY atc_code 88 """ 89 else: 90 args = {'term': text.lower()} 91 cmd = """ 92 SELECT DISTINCT ON (atc_code) * 93 FROM ( 94 SELECT atc as atc_code, is_group_code, pk_data_source 95 FROM ref.v_atc 96 WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL 97 UNION 98 SELECT atc as atc_code, null, null 99 FROM ref.substance 100 WHERE lower(description) = lower(%(term)s) AND atc IS NOT NULL 101 UNION 102 SELECT atc_code, null, null 103 FROM ref.drug_product 104 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 105 ) as tmp 106 ORDER BY atc_code 107 """ 108 109 rows, idx = gmPG2.run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 110 111 _log.debug('term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy) 112 113 return rows
114 115 #============================================================
116 -def exists_as_atc(substance):
117 args = {'term': substance} 118 cmd = 'SELECT EXISTS (SELECT 1 FROM ref.atc WHERE lower(term) = lower(%(term)s))' 119 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 120 return rows[0][0]
121 122 #============================================================
123 -def get_reference_atcs(order_by='atc, term, lang'):
124 cmd = 'SELECT * FROM ref.v_atc ORDER BY %s' % order_by 125 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 126 return rows
127 128 #============================================================
129 -def atc_import(cfg_fname=None, conn=None):
130 131 # read meta data 132 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') 133 134 data_fname = os.path.join ( 135 os.path.dirname(cfg_fname), 136 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) 137 ) # must be in same dir as conf file 138 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) 139 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) 140 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) 141 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) 142 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) 143 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) 144 145 _cfg.remove_source(source = 'atc') 146 147 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) 148 149 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 150 151 # create data source record 152 queries = [ 153 { 154 'cmd': """delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 155 'args': args 156 }, { 157 'cmd': """ 158 insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 159 %(name_long)s, 160 %(name_short)s, 161 %(ver)s, 162 %(desc)s, 163 %(lang)s, 164 %(url)s 165 )""", 166 'args': args 167 }, { 168 'cmd': """select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 169 'args': args 170 } 171 ] 172 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 173 data_src_pk = rows[0][0] 174 _log.debug('ATC data source record created, pk is #%s', data_src_pk) 175 176 # import data 177 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 178 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') 179 180 # clean out staging area 181 curs = conn.cursor() 182 cmd = """delete from ref.atc_staging""" 183 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 184 curs.close() 185 conn.commit() 186 _log.debug('ATC staging table emptied') 187 188 # from file into staging table 189 curs = conn.cursor() 190 cmd = """insert into ref.atc_staging values (%s, %s, %s, %s, %s)""" 191 first = False 192 for atc_line in atc_reader: 193 # skip first 194 if not first: 195 first = True 196 continue 197 198 # skip blanks 199 if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == '': 200 continue 201 202 comment = '' 203 unit = '' 204 adro = '' 205 206 # "1,1 mg O,P,R,..." 207 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 208 tmp, unit, adro = regex.split('\s', atc_line[4]) 209 # "1,1 mg O,P,R bezogen auf ..." 210 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 211 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 212 # "20 mg O" 213 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 214 tmp, unit, adro = regex.split('\s', atc_line[4]) 215 # "20 mg O bezogen auf ..." 216 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 217 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 218 # "Standarddosis: 1 Tablette oder 30 ml Mixtur" 219 else: 220 comment = atc_line[4] 221 222 args = [ 223 atc_line[0].strip(), 224 atc_line[2], 225 unit, 226 adro, 227 comment 228 ] 229 230 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 231 232 curs.close() 233 conn.commit() 234 csv_file.close() 235 _log.debug('ATC staging table loaded') 236 237 # from staging table to real table 238 curs = conn.cursor() 239 args = {'src_pk': data_src_pk} 240 cmd = """ 241 insert into ref.atc ( 242 fk_data_source, 243 code, 244 term, 245 comment, 246 unit, 247 administration_route 248 ) select 249 %(src_pk)s, 250 atc, 251 name, 252 nullif(comment, ''), 253 nullif(unit, ''), 254 nullif(adro, '') 255 256 from 257 ref.atc_staging 258 """ 259 260 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 261 262 curs.close() 263 conn.commit() 264 _log.debug('transfer from ATC staging table to real ATC table done') 265 266 # clean out staging area 267 curs = conn.cursor() 268 cmd = """delete from ref.atc_staging""" 269 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 270 curs.close() 271 conn.commit() 272 _log.debug('ATC staging table emptied') 273 274 return True
275 276 #============================================================ 277 # main 278 #------------------------------------------------------------ 279 if __name__ == "__main__": 280 281 if len(sys.argv) == 1: 282 sys.exit() 283 284 if sys.argv[1] != 'test': 285 sys.exit() 286 287 from Gnumed.pycommon import gmLog2 288 from Gnumed.pycommon import gmI18N 289 290 gmI18N.activate_locale() 291 # gmDateTime.init() 292 293 #--------------------------------------------------------
294 - def test_atc_import():
295 atc_import(cfg_fname = sys.argv[2], conn = gmPG2.get_connection(readonly = False))
296 #--------------------------------------------------------
297 - def test_text2atc():
298 print('searching ATC code for:', sys.argv[2]) 299 print(' ', text2atc(sys.argv[2])) 300 print(' ', text2atc(sys.argv[2], True))
301 #--------------------------------------------------------
302 - def test_get_reference_atcs():
303 print("reference_of_atc_codes:") 304 for atc in get_reference_atcs(): 305 print(atc)
306 #-------------------------------------------------------- 307 #test_atc_import() 308 #test_text2atc() 309 test_get_reference_atcs() 310 311 #============================================================ 312