Package Gnumed :: Package business :: Module gmATC
[frames] | no frames]

Source Code for Module Gnumed.business.gmATC

  1  # -*- coding: utf-8 -*- 
  2  """ATC handling code. 
  3   
  4  http://who.no 
  5   
  6  There is no DDD handling because DDD explicitely 
  7  does not carry clinical meaning. 
  8  """ 
  9  #============================================================ 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11  __license__ = "GPL v2 or later" 
 12   
 13  import sys 
 14  import io 
 15  import logging 
 16  import csv 
 17  import os.path 
 18  import re as regex 
 19   
 20   
 21  if __name__ == '__main__': 
 22          sys.path.insert(0, '../../') 
 23  from Gnumed.pycommon import gmPG2 
 24  from Gnumed.pycommon import gmTools 
 25  from Gnumed.pycommon import gmCfg2 
 26   
 27   
 28  _log = logging.getLogger('gm.atc') 
 29  _cfg = gmCfg2.gmCfgData() 
 30   
 31   
 32  ATC_NICOTINE = 'N07BA01' 
 33  ATC_ETHANOL  = 'V03AB16' 
 34   
 35  #============================================================ 
36 -def propagate_atc(substance=None, atc=None, link_obj=None):
37 38 _log.debug('substance <%s>, ATC <%s>', substance, atc) 39 40 if atc is not None: 41 if atc.strip() == '': 42 atc = None 43 44 if atc is None: 45 atcs = text2atc(text = substance, fuzzy = False, link_obj = link_obj) 46 if len(atcs) == 0: 47 _log.debug('no ATC found, aborting') 48 return atc 49 if len(atcs) > 1: 50 _log.debug('non-unique ATC mapping, aborting') 51 return atc 52 atc = atcs[0][0].strip() 53 54 args = {'atc': atc, 'term': substance.strip()} 55 queries = [ 56 {'cmd': "UPDATE ref.substance SET atc = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc IS NULL", 57 'args': args}, 58 {'cmd': "UPDATE ref.drug_product SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 59 'args': args} 60 ] 61 gmPG2.run_rw_queries(link_obj = link_obj, queries = queries) 62 63 return atc
64 65 #============================================================
66 -def text2atc(text=None, fuzzy=False, link_obj=None):
67 68 text = text.strip() 69 70 if fuzzy: 71 args = {'term': '%%%s%%' % text} 72 cmd = """ 73 SELECT DISTINCT ON (atc_code) * 74 FROM ( 75 SELECT atc as atc_code, is_group_code, pk_data_source 76 FROM ref.v_atc 77 WHERE term ilike %(term)s AND atc IS NOT NULL 78 UNION 79 SELECT atc as atc_code, null, null 80 FROM ref.substance 81 WHERE description ilike %(term)s AND atc IS NOT NULL 82 UNION 83 SELECT atc_code, null, null 84 FROM ref.drug_product 85 WHERE description ilike %(term)s AND atc_code IS NOT NULL 86 ) as tmp 87 ORDER BY atc_code 88 """ 89 else: 90 args = {'term': text.lower()} 91 cmd = """ 92 SELECT DISTINCT ON (atc_code) * 93 FROM ( 94 SELECT atc as atc_code, is_group_code, pk_data_source 95 FROM ref.v_atc 96 WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL 97 UNION 98 SELECT atc as atc_code, null, null 99 FROM ref.substance 100 WHERE lower(description) = lower(%(term)s) AND atc IS NOT NULL 101 UNION 102 SELECT atc_code, null, null 103 FROM ref.drug_product 104 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 105 ) as tmp 106 ORDER BY atc_code 107 """ 108 109 rows, idx = gmPG2.run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 110 111 _log.debug('term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy) 112 113 return rows
114 115 #============================================================
116 -def exists_as_atc(substance):
117 args = {'term': substance} 118 cmd = 'SELECT EXISTS (SELECT 1 FROM ref.atc WHERE lower(term) = lower(%(term)s))' 119 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 120 return rows[0][0]
121 122 #============================================================
123 -def get_reference_atcs(order_by='atc, term, lang'):
124 cmd = 'SELECT * FROM ref.v_atc ORDER BY %s' % order_by 125 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 126 return rows
127 128 #============================================================
129 -def atc_import(cfg_fname=None, conn=None):
130 131 # read meta data 132 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') 133 134 data_fname = os.path.join ( 135 os.path.dirname(cfg_fname), 136 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) 137 ) # must be in same dir as conf file 138 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) 139 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) 140 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) 141 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) 142 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) 143 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) 144 145 _cfg.remove_source(source = 'atc') 146 147 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) 148 149 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 150 151 # find or create data source record 152 cmd = u"select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s" 153 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}]) 154 if len(rows) > 0: 155 data_src_pk = rows[0][0] 156 _log.debug('ATC data source record existed, pk is #%s, refreshing fields', data_src_pk) 157 # exists - update 158 args['pk'] = data_src_pk 159 cmd = u"""UPDATE ref.data_source SET 160 name_long = %(name_long)s, 161 description = %(desc)s, 162 lang = %(lang)s, 163 source = %(url)s 164 WHERE 165 pk = %(pk)s 166 """ 167 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}]) 168 else: 169 _log.debug('ATC data source record not found, creating') 170 # create 171 cmd = u"""insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 172 %(name_long)s, 173 %(name_short)s, 174 %(ver)s, 175 %(desc)s, 176 %(lang)s, 177 %(url)s 178 ) returning pk""" 179 rows, idx = gmPG2.run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = True) 180 data_src_pk = rows[0][0] 181 _log.debug('ATC data source record created, pk is #%s', data_src_pk) 182 183 # import data 184 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 185 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') 186 187 # clean out staging area 188 curs = conn.cursor() 189 cmd = """delete from ref.atc_staging""" 190 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 191 curs.close() 192 conn.commit() 193 _log.debug('ATC staging table emptied') 194 195 # from file into staging table 196 curs = conn.cursor() 197 cmd = """insert into ref.atc_staging values (%s, %s, %s, %s, %s)""" 198 first = False 199 for atc_line in atc_reader: 200 # skip first 201 if not first: 202 first = True 203 continue 204 205 # skip blanks 206 if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == '': 207 continue 208 209 comment = '' 210 unit = '' 211 adro = '' 212 213 # "1,1 mg O,P,R,..." 214 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 215 tmp, unit, adro = regex.split('\s', atc_line[4]) 216 # "1,1 mg O,P,R bezogen auf ..." 217 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 218 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 219 # "20 mg O" 220 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 221 tmp, unit, adro = regex.split('\s', atc_line[4]) 222 # "20 mg O bezogen auf ..." 223 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 224 tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) 225 # "Standarddosis: 1 Tablette oder 30 ml Mixtur" 226 else: 227 comment = atc_line[4] 228 229 args = [ 230 atc_line[0].strip(), 231 atc_line[2], 232 unit, 233 adro, 234 comment 235 ] 236 237 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 238 239 curs.close() 240 conn.commit() 241 csv_file.close() 242 _log.debug('ATC staging table loaded') 243 244 # from staging table to real table 245 args = {'src_pk': data_src_pk} 246 queries = [] 247 # transfer new records 248 cmd = u""" 249 insert into ref.atc ( 250 fk_data_source, 251 code, 252 term, 253 comment, 254 administration_route 255 ) select 256 %(src_pk)s, 257 atc, 258 name, 259 nullif(comment, ''), 260 nullif(adro, '') 261 FROM 262 ref.atc_staging 263 WHERE 264 not exists ( 265 select 1 FROM ref.atc WHERE fk_data_source = %(src_pk)s AND code = ref.atc_staging.atc 266 ) 267 """ 268 queries.append({'cmd': cmd, 'args': args}) 269 # update records so pre-existing ones are refreshed 270 cmd = u""" 271 UPDATE ref.atc SET 272 code = r_as.atc, 273 term = r_as.name, 274 comment = nullif(r_as.comment, ''), 275 administration_route = nullif(r_as.adro, '') 276 FROM 277 (SELECT atc, name, comment, adro FROM ref.atc_staging) AS r_as 278 WHERE 279 fk_data_source = %(src_pk)s 280 """ 281 queries.append({'cmd': cmd, 'args': args}) 282 curs = conn.cursor() 283 gmPG2.run_rw_queries(link_obj = curs, queries = queries) 284 curs.close() 285 conn.commit() 286 _log.debug('transfer from ATC staging table to real ATC table done') 287 288 # clean out staging area 289 curs = conn.cursor() 290 cmd = """delete from ref.atc_staging""" 291 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 292 curs.close() 293 conn.commit() 294 _log.debug('ATC staging table emptied') 295 296 return True
297 298 #============================================================ 299 # main 300 #------------------------------------------------------------ 301 if __name__ == "__main__": 302 303 if len(sys.argv) == 1: 304 sys.exit() 305 306 if sys.argv[1] != 'test': 307 sys.exit() 308 309 from Gnumed.pycommon import gmLog2 310 from Gnumed.pycommon import gmI18N 311 312 gmI18N.activate_locale() 313 # gmDateTime.init() 314 315 #--------------------------------------------------------
316 - def test_atc_import():
317 atc_import(cfg_fname = sys.argv[2], conn = gmPG2.get_connection(readonly = False))
318 #--------------------------------------------------------
319 - def test_text2atc():
320 print('searching ATC code for:', sys.argv[2]) 321 print(' ', text2atc(sys.argv[2])) 322 print(' ', text2atc(sys.argv[2], True))
323 #--------------------------------------------------------
324 - def test_get_reference_atcs():
325 print("reference_of_atc_codes:") 326 for atc in get_reference_atcs(): 327 print(atc)
328 #-------------------------------------------------------- 329 #test_atc_import() 330 #test_text2atc() 331 test_get_reference_atcs() 332 333 #============================================================ 334