1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __version__ = "$Revision: 1.7 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
11
12 import sys, codecs, logging, csv
13
14
15 if __name__ == '__main__':
16 sys.path.insert(0, '../../')
17 from Gnumed.pycommon import gmPG2
18 from Gnumed.pycommon import gmTools
19
20
21 _log = logging.getLogger('gm.loinc')
22 _log.info(__version__)
23
24 origin_url = u'http://loinc.org'
25 file_encoding = 'latin1'
26 license_delimiter = u'Clip Here for Data'
27 version_tag = u'LOINC(R) Database Version'
28 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
29 name_short = u'LOINC'
30
31 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
32
33
34
35 LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1']
36 LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9']
37 LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9']
38
39
41
42
43 cmd = u"""
44 SELECT coalesce (
45 (SELECT term
46 FROM ref.v_coded_terms
47 WHERE
48 coding_system = 'LOINC'
49 AND
50 code = %(loinc)s
51 AND
52 lang = i18n.get_curr_lang()
53 ),
54 (SELECT term
55 FROM ref.v_coded_terms
56 WHERE
57 coding_system = 'LOINC'
58 AND
59 code = %(loinc)s
60 AND
61 lang = 'en_EN'
62 ),
63 (SELECT term
64 FROM ref.v_coded_terms
65 WHERE
66 coding_system = 'LOINC'
67 AND
68 code = %(loinc)s
69 )
70 )"""
71 args = {'loinc': loinc}
72 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
73
74 if rows[0][0] is None:
75 return []
76
77 return [ r[0] for r in rows ]
78
80
81 _log.debug('splitting LOINC source file [%s]', input_fname)
82
83 if license_fname is None:
84 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
85 _log.debug('LOINC header: %s', license_fname)
86
87 if data_fname is None:
88 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
89 _log.debug('LOINC data: %s', data_fname)
90
91 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
92 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
93
94 for line in loinc_file:
95
96 if license_delimiter in line:
97 out_file.write(line)
98 out_file.close()
99 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
100 continue
101
102 out_file.write(line)
103
104 out_file.close()
105
106 return data_fname, license_fname
107
109
110 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
111 first_line = csv_file.readline()
112 sniffer = csv.Sniffer()
113 if sniffer.has_header(first_line):
114 pass
115
117
118 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
119
120 version = None
121 for line in in_file:
122 if line.startswith(version_tag):
123 version = line[len(version_tag):].strip()
124 break
125
126 in_file.close()
127 return version
128
129 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
130
131 if version is None:
132 version = get_version(license_fname = license_fname)
133
134 if version is None:
135 raise ValueError('cannot detect LOINC version')
136
137 _log.debug('importing LOINC version [%s]', version)
138
139
140 curs = conn.cursor()
141 cmd = u"""DELETE FROM ref.loinc_staging"""
142 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
143 curs.close()
144 conn.commit()
145 _log.debug('staging table emptied')
146
147
148 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
149 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
150 curs = conn.cursor()
151 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
152 first = False
153 for loinc_line in loinc_reader:
154 if not first:
155 first = True
156 continue
157 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
158 curs.close()
159 conn.commit()
160 csv_file.close()
161 _log.debug('staging table loaded')
162
163
164 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
165 desc = in_file.read()
166 in_file.close()
167 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
168 queries = [
169
170 {'args': args, 'cmd': u"""
171 INSERT INTO ref.data_source (name_long, name_short, version) SELECT
172 %(name_long)s,
173 %(name_short)s,
174 %(ver)s
175 WHERE NOT EXISTS (
176 SELECT 1 FROM ref.data_source WHERE
177 name_long = %(name_long)s
178 AND
179 name_short = %(name_short)s
180 AND
181 version = %(ver)s
182 )"""
183 },
184
185 {'args': args, 'cmd': u"""
186 UPDATE ref.data_source SET
187 description = %(desc)s,
188 source = %(url)s,
189 lang = %(lang)s
190 WHERE
191 name_long = %(name_long)s
192 AND
193 name_short = %(name_short)s
194 AND
195 version = %(ver)s
196 """
197 },
198
199 {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""}
200 ]
201 curs = conn.cursor()
202 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True)
203 data_src_pk = rows[0][0]
204 curs.close()
205 _log.debug('data source record created or updated, pk is #%s', data_src_pk)
206
207
208 args = {'src_pk': data_src_pk}
209 queries = []
210 queries.append ({
211 'args': args,
212 'cmd': u"""
213 INSERT INTO ref.loinc (
214 fk_data_source, term, code
215 )
216 SELECT
217 %(src_pk)s,
218 coalesce (
219 nullif(long_common_name, ''),
220 (
221 coalesce(nullif(component, '') || ':', '') ||
222 coalesce(nullif(property, '') || ':', '') ||
223 coalesce(nullif(time_aspect, '') || ':', '') ||
224 coalesce(nullif(system, '') || ':', '') ||
225 coalesce(nullif(scale_type, '') || ':', '') ||
226 coalesce(nullif(method_type, '') || ':', '')
227 )
228 ),
229 nullif(loinc_num, '')
230 FROM
231 ref.loinc_staging r_ls
232 WHERE NOT EXISTS (
233 SELECT 1 FROM ref.loinc r_l WHERE
234 r_l.fk_data_source = %(src_pk)s
235 AND
236 r_l.code = nullif(r_ls.loinc_num, '')
237 AND
238 r_l.term = coalesce (
239 nullif(r_ls.long_common_name, ''),
240 (
241 coalesce(nullif(r_ls.component, '') || ':', '') ||
242 coalesce(nullif(r_ls.property, '') || ':', '') ||
243 coalesce(nullif(r_ls.time_aspect, '') || ':', '') ||
244 coalesce(nullif(r_ls.system, '') || ':', '') ||
245 coalesce(nullif(r_ls.scale_type, '') || ':', '') ||
246 coalesce(nullif(r_ls.method_type, '') || ':', '')
247 )
248 )
249 )"""
250 })
251 queries.append ({
252 'args': args,
253 'cmd': u"""
254 UPDATE ref.loinc SET
255 comment = nullif(r_ls.comments, ''),
256 component = nullif(r_ls.component, ''),
257 property = nullif(r_ls.property, ''),
258 time_aspect = nullif(r_ls.time_aspect, ''),
259 system = nullif(r_ls.system, ''),
260 scale_type = nullif(r_ls.scale_type, ''),
261 method_type = nullif(r_ls.method_type, ''),
262 related_names_1_old = nullif(r_ls.related_names_1_old, ''),
263 grouping_class = nullif(r_ls.class, ''),
264 loinc_internal_source = nullif(r_ls.source, ''),
265 dt_last_change = nullif(r_ls.dt_last_change, ''),
266 change_type = nullif(r_ls.change_type, ''),
267 answer_list = nullif(r_ls.answer_list, ''),
268 code_status = nullif(r_ls.status, ''),
269 maps_to = nullif(r_ls.map_to, ''),
270 scope = nullif(r_ls.scope, ''),
271 normal_range = nullif(r_ls.normal_range, ''),
272 ipcc_units = nullif(r_ls.ipcc_units, ''),
273 reference = nullif(r_ls.reference, ''),
274 exact_component_synonym = nullif(r_ls.exact_component_synonym, ''),
275 molar_mass = nullif(r_ls.molar_mass, ''),
276 grouping_class_type = nullif(r_ls.class_type, '')::smallint,
277 formula = nullif(r_ls.formula, ''),
278 species = nullif(r_ls.species, ''),
279 example_answers = nullif(r_ls.example_answers, ''),
280 acs_synonyms = nullif(r_ls.acs_synonyms, ''),
281 base_name = nullif(r_ls.base_name, ''),
282 final = nullif(r_ls.final, ''),
283 naa_ccr_id = nullif(r_ls.naa_ccr_id, ''),
284 code_table = nullif(r_ls.code_table, ''),
285 is_set_root = nullif(r_ls.is_set_root, '')::boolean,
286 panel_elements = nullif(r_ls.panel_elements, ''),
287 survey_question_text = nullif(r_ls.survey_question_text, ''),
288 survey_question_source = nullif(r_ls.survey_question_source, ''),
289 units_required = nullif(r_ls.units_required, ''),
290 submitted_units = nullif(r_ls.submitted_units, ''),
291 related_names_2 = nullif(r_ls.related_names_2, ''),
292 short_name = nullif(r_ls.short_name, ''),
293 order_obs = nullif(r_ls.order_obs, ''),
294 cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''),
295 hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''),
296 external_copyright_notice = nullif(r_ls.external_copyright_notice, ''),
297 example_units = nullif(r_ls.example_units, ''),
298 inpc_percentage = nullif(r_ls.inpc_percentage, ''),
299 long_common_name = nullif(r_ls.long_common_name, '')
300 FROM
301 ref.loinc_staging r_ls
302 WHERE
303 fk_data_source = %(src_pk)s
304 AND
305 code = nullif(r_ls.loinc_num, '')
306 AND
307 term = coalesce (
308 nullif(r_ls.long_common_name, ''),
309 (
310 coalesce(nullif(r_ls.component, '') || ':', '') ||
311 coalesce(nullif(r_ls.property, '') || ':', '') ||
312 coalesce(nullif(r_ls.time_aspect, '') || ':', '') ||
313 coalesce(nullif(r_ls.system, '') || ':', '') ||
314 coalesce(nullif(r_ls.scale_type, '') || ':', '') ||
315 coalesce(nullif(r_ls.method_type, '') || ':', '')
316 )
317 )
318 """
319 })
320 curs = conn.cursor()
321 gmPG2.run_rw_queries(link_obj = curs, queries = queries)
322 curs.close()
323 conn.commit()
324 _log.debug('transfer from staging table to real table done')
325
326
327 curs = conn.cursor()
328 cmd = u"""DELETE FROM ref.loinc_staging"""
329 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
330 curs.close()
331 conn.commit()
332 _log.debug('staging table emptied')
333
334 return True
335
336
337
338 if __name__ == "__main__":
339
340 if len(sys.argv) < 2:
341 sys.exit()
342
343 if sys.argv[1] != 'test':
344 sys.exit()
345
346 from Gnumed.pycommon import gmLog2
347 from Gnumed.pycommon import gmI18N
348
349 gmI18N.activate_locale()
350
351
352
355
358
360 term = loinc2term(sys.argv[2])
361 print sys.argv[2], '->', term
362
363 test_loinc_split()
364
365
366
367
368