1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
10
11 import sys
12 import io
13 import logging
14 import csv
15 import re as regex
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmPG2
21 from Gnumed.pycommon import gmTools
22 from Gnumed.pycommon import gmMatchProvider
23
24
25 _log = logging.getLogger('gm.loinc')
26
27
28 origin_url = 'http://loinc.org'
29 file_encoding = 'latin1'
30 license_delimiter = 'Clip Here for Data'
31 version_tag = 'LOINC(R) Database Version'
32 name_long = 'LOINC® (Logical Observation Identifiers Names and Codes)'
33 name_short = 'LOINC'
34
35 loinc_fields = "LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
36
37
38
39 LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7']
40 LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1']
41 LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9']
42 LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9']
43 LOINC_rr_quantity = ['8478-0', '8448-3', '8449-1', '8456-6', '8457-4', '8458-2', '55284-4', '50403-5', '50402-7', '45372-0']
44 LOINC_heart_rate_quantity = ['8867-4', '67129-7', '40443-4', '69000-8', '69001-6', '68999-2']
45 LOINC_inr_quantity = ['34714-6', '46418-0', '6301-6', '38875-1']
46
47
48
49
60
61
63 cmd = 'SELECT * FROM ref.loinc WHERE code = %(loinc)s'
64 args = {'loinc': loinc}
65 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
66 if len(rows) == 0:
67 return None
68 return rows[0]
69
70
72
73
74 cmd = """
75 SELECT coalesce (
76 (SELECT term
77 FROM ref.v_coded_terms
78 WHERE
79 coding_system = 'LOINC'
80 AND
81 code = %(loinc)s
82 AND
83 lang = i18n.get_curr_lang()
84 ),
85 (SELECT term
86 FROM ref.v_coded_terms
87 WHERE
88 coding_system = 'LOINC'
89 AND
90 code = %(loinc)s
91 AND
92 lang = 'en_EN'
93 ),
94 (SELECT term
95 FROM ref.v_coded_terms
96 WHERE
97 coding_system = 'LOINC'
98 AND
99 code = %(loinc)s
100 )
101 )"""
102 args = {'loinc': loinc}
103 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
104
105 if rows[0][0] is None:
106 return []
107
108 return [ r[0] for r in rows ]
109
110
111
112
114
115 _log.debug('splitting LOINC source file [%s]', input_fname)
116
117 if license_fname is None:
118 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
119 _log.debug('LOINC header: %s', license_fname)
120
121 if data_fname is None:
122 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
123 _log.debug('LOINC data: %s', data_fname)
124
125 loinc_file = io.open(input_fname, mode = 'rt', encoding = file_encoding, errors = 'replace')
126 out_file = io.open(license_fname, mode = 'wt', encoding = 'utf8', errors = 'replace')
127
128 for line in loinc_file:
129
130 if license_delimiter in line:
131 out_file.write(line)
132 out_file.close()
133 out_file = io.open(data_fname, mode = 'wt', encoding = 'utf8', errors = 'replace')
134 continue
135
136 out_file.write(line)
137
138 out_file.close()
139
140 return data_fname, license_fname
141
142
144
145 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
146 first_line = csv_file.readline()
147 sniffer = csv.Sniffer()
148 if sniffer.has_header(first_line):
149 pass
150
151
153
154 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
155
156 version = None
157 for line in in_file:
158 if line.startswith(version_tag):
159 version = line[len(version_tag):].strip()
160 break
161
162 in_file.close()
163 return version
164
165
166 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
167
168 if version is None:
169 version = get_version(license_fname = license_fname)
170
171 if version is None:
172 raise ValueError('cannot detect LOINC version')
173
174 _log.debug('importing LOINC version [%s]', version)
175
176
177 curs = conn.cursor()
178 cmd = """DELETE FROM staging.loinc_staging"""
179 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
180 curs.close()
181 conn.commit()
182 _log.debug('staging table emptied')
183
184
185 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
186 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
187 curs = conn.cursor()
188 cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ('%s, ' * (len(loinc_fields) - 1))
189 first = False
190 for loinc_line in loinc_reader:
191 if not first:
192 first = True
193 continue
194 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
195 curs.close()
196 conn.commit()
197 csv_file.close()
198 _log.debug('staging table loaded')
199
200
201 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
202 desc = in_file.read()
203 in_file.close()
204 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
205 queries = [
206
207 {'args': args, 'cmd': """
208 INSERT INTO ref.data_source (name_long, name_short, version) SELECT
209 %(name_long)s,
210 %(name_short)s,
211 %(ver)s
212 WHERE NOT EXISTS (
213 SELECT 1 FROM ref.data_source WHERE
214 name_long = %(name_long)s
215 AND
216 name_short = %(name_short)s
217 AND
218 version = %(ver)s
219 )"""
220 },
221
222 {'args': args, 'cmd': """
223 UPDATE ref.data_source SET
224 description = %(desc)s,
225 source = %(url)s,
226 lang = %(lang)s
227 WHERE
228 name_long = %(name_long)s
229 AND
230 name_short = %(name_short)s
231 AND
232 version = %(ver)s
233 """
234 },
235
236 {'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""}
237 ]
238 curs = conn.cursor()
239 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True)
240 data_src_pk = rows[0][0]
241 curs.close()
242 _log.debug('data source record created or updated, pk is #%s', data_src_pk)
243
244
245 args = {'src_pk': data_src_pk}
246 queries = []
247 queries.append ({
248 'args': args,
249 'cmd': """
250 INSERT INTO ref.loinc (
251 fk_data_source, term, code
252 )
253 SELECT
254 %(src_pk)s,
255 coalesce (
256 nullif(long_common_name, ''),
257 (
258 coalesce(nullif(component, '') || ':', '') ||
259 coalesce(nullif(property, '') || ':', '') ||
260 coalesce(nullif(time_aspect, '') || ':', '') ||
261 coalesce(nullif(system, '') || ':', '') ||
262 coalesce(nullif(scale_type, '') || ':', '') ||
263 coalesce(nullif(method_type, '') || ':', '')
264 )
265 ),
266 nullif(loinc_num, '')
267 FROM
268 staging.loinc_staging st_ls
269 WHERE NOT EXISTS (
270 SELECT 1 FROM ref.loinc r_l WHERE
271 r_l.fk_data_source = %(src_pk)s
272 AND
273 r_l.code = nullif(st_ls.loinc_num, '')
274 AND
275 r_l.term = coalesce (
276 nullif(st_ls.long_common_name, ''),
277 (
278 coalesce(nullif(st_ls.component, '') || ':', '') ||
279 coalesce(nullif(st_ls.property, '') || ':', '') ||
280 coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
281 coalesce(nullif(st_ls.system, '') || ':', '') ||
282 coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
283 coalesce(nullif(st_ls.method_type, '') || ':', '')
284 )
285 )
286 )"""
287 })
288 queries.append ({
289 'args': args,
290 'cmd': """
291 UPDATE ref.loinc SET
292 comment = nullif(st_ls.comments, ''),
293 component = nullif(st_ls.component, ''),
294 property = nullif(st_ls.property, ''),
295 time_aspect = nullif(st_ls.time_aspect, ''),
296 system = nullif(st_ls.system, ''),
297 scale_type = nullif(st_ls.scale_type, ''),
298 method_type = nullif(st_ls.method_type, ''),
299 related_names_1_old = nullif(st_ls.related_names_1_old, ''),
300 grouping_class = nullif(st_ls.class, ''),
301 loinc_internal_source = nullif(st_ls.source, ''),
302 dt_last_change = nullif(st_ls.dt_last_change, ''),
303 change_type = nullif(st_ls.change_type, ''),
304 answer_list = nullif(st_ls.answer_list, ''),
305 code_status = nullif(st_ls.status, ''),
306 maps_to = nullif(st_ls.map_to, ''),
307 scope = nullif(st_ls.scope, ''),
308 normal_range = nullif(st_ls.normal_range, ''),
309 ipcc_units = nullif(st_ls.ipcc_units, ''),
310 reference = nullif(st_ls.reference, ''),
311 exact_component_synonym = nullif(st_ls.exact_component_synonym, ''),
312 molar_mass = nullif(st_ls.molar_mass, ''),
313 grouping_class_type = nullif(st_ls.class_type, '')::smallint,
314 formula = nullif(st_ls.formula, ''),
315 species = nullif(st_ls.species, ''),
316 example_answers = nullif(st_ls.example_answers, ''),
317 acs_synonyms = nullif(st_ls.acs_synonyms, ''),
318 base_name = nullif(st_ls.base_name, ''),
319 final = nullif(st_ls.final, ''),
320 naa_ccr_id = nullif(st_ls.naa_ccr_id, ''),
321 code_table = nullif(st_ls.code_table, ''),
322 is_set_root = nullif(st_ls.is_set_root, '')::boolean,
323 panel_elements = nullif(st_ls.panel_elements, ''),
324 survey_question_text = nullif(st_ls.survey_question_text, ''),
325 survey_question_source = nullif(st_ls.survey_question_source, ''),
326 units_required = nullif(st_ls.units_required, ''),
327 submitted_units = nullif(st_ls.submitted_units, ''),
328 related_names_2 = nullif(st_ls.related_names_2, ''),
329 short_name = nullif(st_ls.short_name, ''),
330 order_obs = nullif(st_ls.order_obs, ''),
331 cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''),
332 hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''),
333 external_copyright_notice = nullif(st_ls.external_copyright_notice, ''),
334 example_units = nullif(st_ls.example_units, ''),
335 inpc_percentage = nullif(st_ls.inpc_percentage, ''),
336 long_common_name = nullif(st_ls.long_common_name, '')
337 FROM
338 staging.loinc_staging st_ls
339 WHERE
340 fk_data_source = %(src_pk)s
341 AND
342 code = nullif(st_ls.loinc_num, '')
343 AND
344 term = coalesce (
345 nullif(st_ls.long_common_name, ''),
346 (
347 coalesce(nullif(st_ls.component, '') || ':', '') ||
348 coalesce(nullif(st_ls.property, '') || ':', '') ||
349 coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
350 coalesce(nullif(st_ls.system, '') || ':', '') ||
351 coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
352 coalesce(nullif(st_ls.method_type, '') || ':', '')
353 )
354 )
355 """
356 })
357 curs = conn.cursor()
358 gmPG2.run_rw_queries(link_obj = curs, queries = queries)
359 curs.close()
360 conn.commit()
361 _log.debug('transfer from staging table to real table done')
362
363
364 curs = conn.cursor()
365 cmd = """DELETE FROM staging.loinc_staging"""
366 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
367 curs.close()
368 conn.commit()
369 _log.debug('staging table emptied')
370
371 return True
372
373
374 _SQL_LOINC_from_test_type = """
375 -- from test type
376 SELECT
377 loinc AS data,
378 loinc AS field_label,
379 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label
380 FROM clin.test_type
381 WHERE loinc %(fragment_condition)s
382 """
383
384 _SQL_LOINC_from_i18n_coded_term = """
385 -- from coded term, in user language
386 SELECT
387 code AS data,
388 code AS field_label,
389 (code || ': ' || term) AS list_label
390 FROM ref.v_coded_terms
391 WHERE
392 coding_system = 'LOINC'
393 AND
394 lang = i18n.get_curr_lang()
395 AND
396 (code %(fragment_condition)s
397 OR
398 term %(fragment_condition)s)
399 """
400
401 _SQL_LOINC_from_en_EN_coded_term = """
402 -- from coded term, in English
403 SELECT
404 code AS data,
405 code AS field_label,
406 (code || ': ' || term) AS list_label
407 FROM ref.v_coded_terms
408 WHERE
409 coding_system = 'LOINC'
410 AND
411 lang = 'en_EN'
412 AND
413 (code %(fragment_condition)s
414 OR
415 term %(fragment_condition)s)
416 """
417
418 _SQL_LOINC_from_any_coded_term = """
419 -- from coded term, in any language
420 SELECT
421 code AS data,
422 code AS field_label,
423 (code || ': ' || term) AS list_label
424 FROM ref.v_coded_terms
425 WHERE
426 coding_system = 'LOINC'
427 AND
428 (code %(fragment_condition)s
429 OR
430 term %(fragment_condition)s)
431 """
432
433
494
495
496
497
498 if __name__ == "__main__":
499
500 if len(sys.argv) < 2:
501 sys.exit()
502
503 if sys.argv[1] != 'test':
504 sys.exit()
505
506 from Gnumed.pycommon import gmLog2
507 from Gnumed.pycommon import gmI18N
508
509 gmI18N.activate_locale()
510
511
512
515
518
520 term = loinc2term(sys.argv[2])
521 print(sys.argv[2], '->', term)
522
523
528
529
530
531
532
533 test_format_loinc()
534
535
536