1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __version__ = "$Revision: 1.7 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
11
12 import sys, codecs, logging, csv
13
14
15 if __name__ == '__main__':
16 sys.path.insert(0, '../../')
17 from Gnumed.pycommon import gmPG2
18 from Gnumed.pycommon import gmTools
19
20
21 _log = logging.getLogger('gm.loinc')
22 _log.info(__version__)
23
24 origin_url = u'http://loinc.org'
25 file_encoding = 'latin1'
26 license_delimiter = u'Clip Here for Data'
27 version_tag = u'LOINC(R) Database Version'
28 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
29 name_short = u'LOINC'
30
31 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
32
33
35
36
37 cmd = u"""
38 SELECT coalesce (
39 (SELECT term
40 FROM ref.v_coded_terms
41 WHERE
42 coding_system = 'LOINC'
43 AND
44 code = %(loinc)s
45 AND
46 lang = i18n.get_curr_lang()
47 ),
48 (SELECT term
49 FROM ref.v_coded_terms
50 WHERE
51 coding_system = 'LOINC'
52 AND
53 code = %(loinc)s
54 AND
55 lang = 'en_EN'
56 ),
57 (SELECT term
58 FROM ref.v_coded_terms
59 WHERE
60 coding_system = 'LOINC'
61 AND
62 code = %(loinc)s
63 )
64 )"""
65 args = {'loinc': loinc}
66 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
67
68 if rows[0][0] is None:
69 return []
70
71 return [ r[0] for r in rows ]
72
74
75 _log.debug('splitting LOINC source file [%s]', input_fname)
76
77 if license_fname is None:
78 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
79 _log.debug('LOINC header: %s', license_fname)
80
81 if data_fname is None:
82 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
83 _log.debug('LOINC data: %s', data_fname)
84
85 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
86 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
87
88 for line in loinc_file:
89
90 if license_delimiter in line:
91 out_file.write(line)
92 out_file.close()
93 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
94 continue
95
96 out_file.write(line)
97
98 out_file.close()
99
100 return data_fname, license_fname
101
103
104 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
105 first_line = csv_file.readline()
106 sniffer = csv.Sniffer()
107 if sniffer.has_header(first_line):
108 pass
109
111
112 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
113
114 version = None
115 for line in in_file:
116 if line.startswith(version_tag):
117 version = line[len(version_tag):].strip()
118 break
119
120 in_file.close()
121 return version
122
123 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
124
125 if version is None:
126 version = get_version(license_fname = license_fname)
127
128 if version is None:
129 raise ValueError('cannot detect LOINC version')
130
131 _log.debug('importing LOINC version [%s]', version)
132
133 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
134 desc = in_file.read()
135 in_file.close()
136
137 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
138
139
140 queries = [{
141 'cmd': u"""DELETE FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""",
142 'args': args
143 }, {
144 'cmd': u"""
145 INSERT INTO ref.data_source (name_long, name_short, version, description, lang, source) values (
146 %(name_long)s,
147 %(name_short)s,
148 %(ver)s,
149 %(desc)s,
150 %(lang)s,
151 %(url)s
152 )""",
153 'args': args
154 }, {
155 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""",
156 'args': args
157 }]
158 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
159 data_src_pk = rows[0][0]
160 _log.debug('data source record created, pk is #%s', data_src_pk)
161
162
163 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
164 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
165
166
167 curs = conn.cursor()
168 cmd = u"""DELETE FROM ref.loinc_staging"""
169 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
170 curs.close()
171 conn.commit()
172 _log.debug('staging table emptied')
173
174
175 curs = conn.cursor()
176 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
177 first = False
178 for loinc_line in loinc_reader:
179 if not first:
180 first = True
181 continue
182 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
183 curs.close()
184 conn.commit()
185 csv_file.close()
186 _log.debug('staging table loaded')
187
188
189 curs = conn.cursor()
190 args = {'src_pk': data_src_pk}
191 cmd = u"""
192 INSERT INTO ref.loinc (
193 fk_data_source,
194
195 term,
196
197 code,
198 comment,
199 component,
200 property,
201 time_aspect,
202 system,
203 scale_type,
204 method_type,
205 related_names_1_old,
206 grouping_class,
207 loinc_internal_source,
208 dt_last_change,
209 change_type,
210 answer_list,
211 code_status,
212 maps_to,
213 scope,
214 normal_range,
215 ipcc_units,
216 reference,
217 exact_component_synonym,
218 molar_mass,
219 grouping_class_type,
220 formula,
221 species,
222 example_answers,
223 acs_synonyms,
224 base_name,
225 final,
226 naa_ccr_id,
227 code_table,
228 is_set_root,
229 panel_elements,
230 survey_question_text,
231 survey_question_source,
232 units_required,
233 submitted_units,
234 related_names_2,
235 short_name,
236 order_obs,
237 cdisc_common_tests,
238 hl7_field_subfield_id,
239 external_copyright_notice,
240 example_units,
241 inpc_percentage,
242 long_common_name
243 )
244
245 SELECT
246
247 %(src_pk)s,
248
249 coalesce (
250 nullif(long_common_name, ''),
251 (
252 coalesce(nullif(component, '') || ':', '') ||
253 coalesce(nullif(property, '') || ':', '') ||
254 coalesce(nullif(time_aspect, '') || ':', '') ||
255 coalesce(nullif(system, '') || ':', '') ||
256 coalesce(nullif(scale_type, '') || ':', '') ||
257 coalesce(nullif(method_type, '') || ':', '')
258 )
259 ),
260
261 nullif(loinc_num, ''),
262 nullif(comments, ''),
263 nullif(component, ''),
264 nullif(property, ''),
265 nullif(time_aspect, ''),
266 nullif(system, ''),
267 nullif(scale_type, ''),
268 nullif(method_type, ''),
269 nullif(related_names_1_old, ''),
270 nullif(class, ''),
271 nullif(source, ''),
272 nullif(dt_last_change, ''),
273 nullif(change_type, ''),
274 nullif(answer_list, ''),
275 nullif(status, ''),
276 nullif(map_to, ''),
277 nullif(scope, ''),
278 nullif(normal_range, ''),
279 nullif(ipcc_units, ''),
280 nullif(reference, ''),
281 nullif(exact_component_synonym, ''),
282 nullif(molar_mass, ''),
283 nullif(class_type, '')::smallint,
284 nullif(formula, ''),
285 nullif(species, ''),
286 nullif(example_answers, ''),
287 nullif(acs_synonyms, ''),
288 nullif(base_name, ''),
289 nullif(final, ''),
290 nullif(naa_ccr_id, ''),
291 nullif(code_table, ''),
292 nullif(is_set_root, '')::boolean,
293 nullif(panel_elements, ''),
294 nullif(survey_question_text, ''),
295 nullif(survey_question_source, ''),
296 nullif(units_required, ''),
297 nullif(submitted_units, ''),
298 nullif(related_names_2, ''),
299 nullif(short_name, ''),
300 nullif(order_obs, ''),
301 nullif(cdisc_common_tests, ''),
302 nullif(hl7_field_subfield_id, ''),
303 nullif(external_copyright_notice, ''),
304 nullif(example_units, ''),
305 nullif(inpc_percentage, ''),
306 nullif(long_common_name, '')
307
308 FROM
309 ref.loinc_staging
310 """
311
312 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
313
314 curs.close()
315 conn.commit()
316 _log.debug('transfer from staging table to real table done')
317
318
319 curs = conn.cursor()
320 cmd = u"""DELETE FROM ref.loinc_staging"""
321 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
322 curs.close()
323 conn.commit()
324 _log.debug('staging table emptied')
325
326 return True
327
328
329
330 if __name__ == "__main__":
331
332 if len(sys.argv) < 2:
333 sys.exit()
334
335 if sys.argv[1] != 'test':
336 sys.exit()
337
338 from Gnumed.pycommon import gmLog2
339 from Gnumed.pycommon import gmI18N
340
341 gmI18N.activate_locale()
342
343
344
347
350
352 term = loinc2term(sys.argv[2])
353 print sys.argv[2], '->', term
354
355 test_loinc_split()
356
357
358
359
360