1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __version__ = "$Revision: 1.7 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
11
12 import sys, codecs, logging, csv
13
14
15 if __name__ == '__main__':
16 sys.path.insert(0, '../../')
17 from Gnumed.pycommon import gmPG2
18 from Gnumed.pycommon import gmTools
19
20
21 _log = logging.getLogger('gm.loinc')
22 _log.info(__version__)
23
24 origin_url = u'http://loinc.org'
25 file_encoding = 'latin1'
26 license_delimiter = u'Clip Here for Data'
27 version_tag = u'LOINC(R) Database Version'
28 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
29 name_short = u'LOINC'
30
31 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
32
33
34
35 LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1']
36 LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9']
37 LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9']
38
39
41
42
43 cmd = u"""
44 SELECT coalesce (
45 (SELECT term
46 FROM ref.v_coded_terms
47 WHERE
48 coding_system = 'LOINC'
49 AND
50 code = %(loinc)s
51 AND
52 lang = i18n.get_curr_lang()
53 ),
54 (SELECT term
55 FROM ref.v_coded_terms
56 WHERE
57 coding_system = 'LOINC'
58 AND
59 code = %(loinc)s
60 AND
61 lang = 'en_EN'
62 ),
63 (SELECT term
64 FROM ref.v_coded_terms
65 WHERE
66 coding_system = 'LOINC'
67 AND
68 code = %(loinc)s
69 )
70 )"""
71 args = {'loinc': loinc}
72 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
73
74 if rows[0][0] is None:
75 return []
76
77 return [ r[0] for r in rows ]
78
80
81 _log.debug('splitting LOINC source file [%s]', input_fname)
82
83 if license_fname is None:
84 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
85 _log.debug('LOINC header: %s', license_fname)
86
87 if data_fname is None:
88 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
89 _log.debug('LOINC data: %s', data_fname)
90
91 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
92 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
93
94 for line in loinc_file:
95
96 if license_delimiter in line:
97 out_file.write(line)
98 out_file.close()
99 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
100 continue
101
102 out_file.write(line)
103
104 out_file.close()
105
106 return data_fname, license_fname
107
109
110 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
111 first_line = csv_file.readline()
112 sniffer = csv.Sniffer()
113 if sniffer.has_header(first_line):
114 pass
115
117
118 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
119
120 version = None
121 for line in in_file:
122 if line.startswith(version_tag):
123 version = line[len(version_tag):].strip()
124 break
125
126 in_file.close()
127 return version
128
129 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
130
131 if version is None:
132 version = get_version(license_fname = license_fname)
133
134 if version is None:
135 raise ValueError('cannot detect LOINC version')
136
137 _log.debug('importing LOINC version [%s]', version)
138
139 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
140 desc = in_file.read()
141 in_file.close()
142
143 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
144
145
146 queries = [{
147 'cmd': u"""DELETE FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""",
148 'args': args
149 }, {
150 'cmd': u"""
151 INSERT INTO ref.data_source (name_long, name_short, version, description, lang, source) values (
152 %(name_long)s,
153 %(name_short)s,
154 %(ver)s,
155 %(desc)s,
156 %(lang)s,
157 %(url)s
158 )""",
159 'args': args
160 }, {
161 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""",
162 'args': args
163 }]
164 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
165 data_src_pk = rows[0][0]
166 _log.debug('data source record created, pk is #%s', data_src_pk)
167
168
169 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
170 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
171
172
173 curs = conn.cursor()
174 cmd = u"""DELETE FROM ref.loinc_staging"""
175 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
176 curs.close()
177 conn.commit()
178 _log.debug('staging table emptied')
179
180
181 curs = conn.cursor()
182 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
183 first = False
184 for loinc_line in loinc_reader:
185 if not first:
186 first = True
187 continue
188 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
189 curs.close()
190 conn.commit()
191 csv_file.close()
192 _log.debug('staging table loaded')
193
194
195 curs = conn.cursor()
196 args = {'src_pk': data_src_pk}
197 cmd = u"""
198 INSERT INTO ref.loinc (
199 fk_data_source,
200
201 term,
202
203 code,
204 comment,
205 component,
206 property,
207 time_aspect,
208 system,
209 scale_type,
210 method_type,
211 related_names_1_old,
212 grouping_class,
213 loinc_internal_source,
214 dt_last_change,
215 change_type,
216 answer_list,
217 code_status,
218 maps_to,
219 scope,
220 normal_range,
221 ipcc_units,
222 reference,
223 exact_component_synonym,
224 molar_mass,
225 grouping_class_type,
226 formula,
227 species,
228 example_answers,
229 acs_synonyms,
230 base_name,
231 final,
232 naa_ccr_id,
233 code_table,
234 is_set_root,
235 panel_elements,
236 survey_question_text,
237 survey_question_source,
238 units_required,
239 submitted_units,
240 related_names_2,
241 short_name,
242 order_obs,
243 cdisc_common_tests,
244 hl7_field_subfield_id,
245 external_copyright_notice,
246 example_units,
247 inpc_percentage,
248 long_common_name
249 )
250
251 SELECT
252
253 %(src_pk)s,
254
255 coalesce (
256 nullif(long_common_name, ''),
257 (
258 coalesce(nullif(component, '') || ':', '') ||
259 coalesce(nullif(property, '') || ':', '') ||
260 coalesce(nullif(time_aspect, '') || ':', '') ||
261 coalesce(nullif(system, '') || ':', '') ||
262 coalesce(nullif(scale_type, '') || ':', '') ||
263 coalesce(nullif(method_type, '') || ':', '')
264 )
265 ),
266
267 nullif(loinc_num, ''),
268 nullif(comments, ''),
269 nullif(component, ''),
270 nullif(property, ''),
271 nullif(time_aspect, ''),
272 nullif(system, ''),
273 nullif(scale_type, ''),
274 nullif(method_type, ''),
275 nullif(related_names_1_old, ''),
276 nullif(class, ''),
277 nullif(source, ''),
278 nullif(dt_last_change, ''),
279 nullif(change_type, ''),
280 nullif(answer_list, ''),
281 nullif(status, ''),
282 nullif(map_to, ''),
283 nullif(scope, ''),
284 nullif(normal_range, ''),
285 nullif(ipcc_units, ''),
286 nullif(reference, ''),
287 nullif(exact_component_synonym, ''),
288 nullif(molar_mass, ''),
289 nullif(class_type, '')::smallint,
290 nullif(formula, ''),
291 nullif(species, ''),
292 nullif(example_answers, ''),
293 nullif(acs_synonyms, ''),
294 nullif(base_name, ''),
295 nullif(final, ''),
296 nullif(naa_ccr_id, ''),
297 nullif(code_table, ''),
298 nullif(is_set_root, '')::boolean,
299 nullif(panel_elements, ''),
300 nullif(survey_question_text, ''),
301 nullif(survey_question_source, ''),
302 nullif(units_required, ''),
303 nullif(submitted_units, ''),
304 nullif(related_names_2, ''),
305 nullif(short_name, ''),
306 nullif(order_obs, ''),
307 nullif(cdisc_common_tests, ''),
308 nullif(hl7_field_subfield_id, ''),
309 nullif(external_copyright_notice, ''),
310 nullif(example_units, ''),
311 nullif(inpc_percentage, ''),
312 nullif(long_common_name, '')
313
314 FROM
315 ref.loinc_staging
316 """
317
318 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
319
320 curs.close()
321 conn.commit()
322 _log.debug('transfer from staging table to real table done')
323
324
325 curs = conn.cursor()
326 cmd = u"""DELETE FROM ref.loinc_staging"""
327 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
328 curs.close()
329 conn.commit()
330 _log.debug('staging table emptied')
331
332 return True
333
334
335
336 if __name__ == "__main__":
337
338 if len(sys.argv) < 2:
339 sys.exit()
340
341 if sys.argv[1] != 'test':
342 sys.exit()
343
344 from Gnumed.pycommon import gmLog2
345 from Gnumed.pycommon import gmI18N
346
347 gmI18N.activate_locale()
348
349
350
353
356
358 term = loinc2term(sys.argv[2])
359 print sys.argv[2], '->', term
360
361 test_loinc_split()
362
363
364
365
366