1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
10
11 import sys
12 import codecs
13 import logging
14 import csv
15 import re as regex
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmPG2
21 from Gnumed.pycommon import gmTools
22 from Gnumed.pycommon import gmMatchProvider
23
24
25 _log = logging.getLogger('gm.loinc')
26
27
28 origin_url = u'http://loinc.org'
29 file_encoding = 'latin1'
30 license_delimiter = u'Clip Here for Data'
31 version_tag = u'LOINC(R) Database Version'
32 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
33 name_short = u'LOINC'
34
35 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
36
37
38
39 LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7']
40 LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1']
41 LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9']
42 LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9']
43
44
46
47
48 cmd = u"""
49 SELECT coalesce (
50 (SELECT term
51 FROM ref.v_coded_terms
52 WHERE
53 coding_system = 'LOINC'
54 AND
55 code = %(loinc)s
56 AND
57 lang = i18n.get_curr_lang()
58 ),
59 (SELECT term
60 FROM ref.v_coded_terms
61 WHERE
62 coding_system = 'LOINC'
63 AND
64 code = %(loinc)s
65 AND
66 lang = 'en_EN'
67 ),
68 (SELECT term
69 FROM ref.v_coded_terms
70 WHERE
71 coding_system = 'LOINC'
72 AND
73 code = %(loinc)s
74 )
75 )"""
76 args = {'loinc': loinc}
77 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
78
79 if rows[0][0] is None:
80 return []
81
82 return [ r[0] for r in rows ]
83
85
86 _log.debug('splitting LOINC source file [%s]', input_fname)
87
88 if license_fname is None:
89 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
90 _log.debug('LOINC header: %s', license_fname)
91
92 if data_fname is None:
93 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
94 _log.debug('LOINC data: %s', data_fname)
95
96 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
97 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
98
99 for line in loinc_file:
100
101 if license_delimiter in line:
102 out_file.write(line)
103 out_file.close()
104 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
105 continue
106
107 out_file.write(line)
108
109 out_file.close()
110
111 return data_fname, license_fname
112
114
115 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
116 first_line = csv_file.readline()
117 sniffer = csv.Sniffer()
118 if sniffer.has_header(first_line):
119 pass
120
122
123 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
124
125 version = None
126 for line in in_file:
127 if line.startswith(version_tag):
128 version = line[len(version_tag):].strip()
129 break
130
131 in_file.close()
132 return version
133
134 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
135
136 if version is None:
137 version = get_version(license_fname = license_fname)
138
139 if version is None:
140 raise ValueError('cannot detect LOINC version')
141
142 _log.debug('importing LOINC version [%s]', version)
143
144
145 curs = conn.cursor()
146 cmd = u"""DELETE FROM ref.loinc_staging"""
147 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
148 curs.close()
149 conn.commit()
150 _log.debug('staging table emptied')
151
152
153 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
154 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
155 curs = conn.cursor()
156 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
157 first = False
158 for loinc_line in loinc_reader:
159 if not first:
160 first = True
161 continue
162 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
163 curs.close()
164 conn.commit()
165 csv_file.close()
166 _log.debug('staging table loaded')
167
168
169 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
170 desc = in_file.read()
171 in_file.close()
172 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
173 queries = [
174
175 {'args': args, 'cmd': u"""
176 INSERT INTO ref.data_source (name_long, name_short, version) SELECT
177 %(name_long)s,
178 %(name_short)s,
179 %(ver)s
180 WHERE NOT EXISTS (
181 SELECT 1 FROM ref.data_source WHERE
182 name_long = %(name_long)s
183 AND
184 name_short = %(name_short)s
185 AND
186 version = %(ver)s
187 )"""
188 },
189
190 {'args': args, 'cmd': u"""
191 UPDATE ref.data_source SET
192 description = %(desc)s,
193 source = %(url)s,
194 lang = %(lang)s
195 WHERE
196 name_long = %(name_long)s
197 AND
198 name_short = %(name_short)s
199 AND
200 version = %(ver)s
201 """
202 },
203
204 {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""}
205 ]
206 curs = conn.cursor()
207 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True)
208 data_src_pk = rows[0][0]
209 curs.close()
210 _log.debug('data source record created or updated, pk is #%s', data_src_pk)
211
212
213 args = {'src_pk': data_src_pk}
214 queries = []
215 queries.append ({
216 'args': args,
217 'cmd': u"""
218 INSERT INTO ref.loinc (
219 fk_data_source, term, code
220 )
221 SELECT
222 %(src_pk)s,
223 coalesce (
224 nullif(long_common_name, ''),
225 (
226 coalesce(nullif(component, '') || ':', '') ||
227 coalesce(nullif(property, '') || ':', '') ||
228 coalesce(nullif(time_aspect, '') || ':', '') ||
229 coalesce(nullif(system, '') || ':', '') ||
230 coalesce(nullif(scale_type, '') || ':', '') ||
231 coalesce(nullif(method_type, '') || ':', '')
232 )
233 ),
234 nullif(loinc_num, '')
235 FROM
236 ref.loinc_staging r_ls
237 WHERE NOT EXISTS (
238 SELECT 1 FROM ref.loinc r_l WHERE
239 r_l.fk_data_source = %(src_pk)s
240 AND
241 r_l.code = nullif(r_ls.loinc_num, '')
242 AND
243 r_l.term = coalesce (
244 nullif(r_ls.long_common_name, ''),
245 (
246 coalesce(nullif(r_ls.component, '') || ':', '') ||
247 coalesce(nullif(r_ls.property, '') || ':', '') ||
248 coalesce(nullif(r_ls.time_aspect, '') || ':', '') ||
249 coalesce(nullif(r_ls.system, '') || ':', '') ||
250 coalesce(nullif(r_ls.scale_type, '') || ':', '') ||
251 coalesce(nullif(r_ls.method_type, '') || ':', '')
252 )
253 )
254 )"""
255 })
256 queries.append ({
257 'args': args,
258 'cmd': u"""
259 UPDATE ref.loinc SET
260 comment = nullif(r_ls.comments, ''),
261 component = nullif(r_ls.component, ''),
262 property = nullif(r_ls.property, ''),
263 time_aspect = nullif(r_ls.time_aspect, ''),
264 system = nullif(r_ls.system, ''),
265 scale_type = nullif(r_ls.scale_type, ''),
266 method_type = nullif(r_ls.method_type, ''),
267 related_names_1_old = nullif(r_ls.related_names_1_old, ''),
268 grouping_class = nullif(r_ls.class, ''),
269 loinc_internal_source = nullif(r_ls.source, ''),
270 dt_last_change = nullif(r_ls.dt_last_change, ''),
271 change_type = nullif(r_ls.change_type, ''),
272 answer_list = nullif(r_ls.answer_list, ''),
273 code_status = nullif(r_ls.status, ''),
274 maps_to = nullif(r_ls.map_to, ''),
275 scope = nullif(r_ls.scope, ''),
276 normal_range = nullif(r_ls.normal_range, ''),
277 ipcc_units = nullif(r_ls.ipcc_units, ''),
278 reference = nullif(r_ls.reference, ''),
279 exact_component_synonym = nullif(r_ls.exact_component_synonym, ''),
280 molar_mass = nullif(r_ls.molar_mass, ''),
281 grouping_class_type = nullif(r_ls.class_type, '')::smallint,
282 formula = nullif(r_ls.formula, ''),
283 species = nullif(r_ls.species, ''),
284 example_answers = nullif(r_ls.example_answers, ''),
285 acs_synonyms = nullif(r_ls.acs_synonyms, ''),
286 base_name = nullif(r_ls.base_name, ''),
287 final = nullif(r_ls.final, ''),
288 naa_ccr_id = nullif(r_ls.naa_ccr_id, ''),
289 code_table = nullif(r_ls.code_table, ''),
290 is_set_root = nullif(r_ls.is_set_root, '')::boolean,
291 panel_elements = nullif(r_ls.panel_elements, ''),
292 survey_question_text = nullif(r_ls.survey_question_text, ''),
293 survey_question_source = nullif(r_ls.survey_question_source, ''),
294 units_required = nullif(r_ls.units_required, ''),
295 submitted_units = nullif(r_ls.submitted_units, ''),
296 related_names_2 = nullif(r_ls.related_names_2, ''),
297 short_name = nullif(r_ls.short_name, ''),
298 order_obs = nullif(r_ls.order_obs, ''),
299 cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''),
300 hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''),
301 external_copyright_notice = nullif(r_ls.external_copyright_notice, ''),
302 example_units = nullif(r_ls.example_units, ''),
303 inpc_percentage = nullif(r_ls.inpc_percentage, ''),
304 long_common_name = nullif(r_ls.long_common_name, '')
305 FROM
306 ref.loinc_staging r_ls
307 WHERE
308 fk_data_source = %(src_pk)s
309 AND
310 code = nullif(r_ls.loinc_num, '')
311 AND
312 term = coalesce (
313 nullif(r_ls.long_common_name, ''),
314 (
315 coalesce(nullif(r_ls.component, '') || ':', '') ||
316 coalesce(nullif(r_ls.property, '') || ':', '') ||
317 coalesce(nullif(r_ls.time_aspect, '') || ':', '') ||
318 coalesce(nullif(r_ls.system, '') || ':', '') ||
319 coalesce(nullif(r_ls.scale_type, '') || ':', '') ||
320 coalesce(nullif(r_ls.method_type, '') || ':', '')
321 )
322 )
323 """
324 })
325 curs = conn.cursor()
326 gmPG2.run_rw_queries(link_obj = curs, queries = queries)
327 curs.close()
328 conn.commit()
329 _log.debug('transfer from staging table to real table done')
330
331
332 curs = conn.cursor()
333 cmd = u"""DELETE FROM ref.loinc_staging"""
334 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
335 curs.close()
336 conn.commit()
337 _log.debug('staging table emptied')
338
339 return True
340
341
342 _SQL_LOINC_from_test_type = u"""
343 -- from test type
344 SELECT
345 loinc AS data,
346 loinc AS field_label,
347 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label
348 FROM clin.test_type
349 WHERE loinc %(fragment_condition)s
350 """
351
352 _SQL_LOINC_from_i18n_coded_term = u"""
353 -- from coded term, in user language
354 SELECT
355 code AS data,
356 code AS field_label,
357 (code || ': ' || term) AS list_label
358 FROM ref.v_coded_terms
359 WHERE
360 coding_system = 'LOINC'
361 AND
362 lang = i18n.get_curr_lang()
363 AND
364 (code %(fragment_condition)s
365 OR
366 term %(fragment_condition)s)
367 """
368
369 _SQL_LOINC_from_en_EN_coded_term = u"""
370 -- from coded term, in English
371 SELECT
372 code AS data,
373 code AS field_label,
374 (code || ': ' || term) AS list_label
375 FROM ref.v_coded_terms
376 WHERE
377 coding_system = 'LOINC'
378 AND
379 lang = 'en_EN'
380 AND
381 (code %(fragment_condition)s
382 OR
383 term %(fragment_condition)s)
384 """
385
386 _SQL_LOINC_from_any_coded_term = u"""
387 -- from coded term, in any language
388 SELECT
389 code AS data,
390 code AS field_label,
391 (code || ': ' || term) AS list_label
392 FROM ref.v_coded_terms
393 WHERE
394 coding_system = 'LOINC'
395 AND
396 (code %(fragment_condition)s
397 OR
398 term %(fragment_condition)s)
399 """
400
458
459
460
461 if __name__ == "__main__":
462
463 if len(sys.argv) < 2:
464 sys.exit()
465
466 if sys.argv[1] != 'test':
467 sys.exit()
468
469 from Gnumed.pycommon import gmLog2
470 from Gnumed.pycommon import gmI18N
471
472 gmI18N.activate_locale()
473
474
475
478
481
483 term = loinc2term(sys.argv[2])
484 print sys.argv[2], '->', term
485
486 test_loinc_split()
487
488
489
490
491