1 __doc__ = """GNUmed client internationalization/localization.
2
3 All i18n/l10n issues should be handled through this modules.
4
5 Theory of operation:
6
7 To activate proper locale settings and translation services you need to
8
9 - import this module
10 - call activate_locale()
11 - call install_domain()
12
13 The translating method gettext.gettext() will then be
14 installed into the global (!) namespace as _(). Your own
15 modules thus need not do _anything_ (not even import gmI18N)
16 to have _() available to them for translating strings. You
17 need to make sure, however, that gmI18N is imported in your
18 main module before any of the modules using it. In order to
19 resolve circular references involving modules that
20 absolutely _have_ to be imported before this module you can
21 explicitly import gmI18N into them at the very beginning.
22
23 The text domain (i.e. the name of the message catalog file)
24 is derived from the name of the main executing script unless
25 explicitly passed to install_domain(). The language you
26 want to translate to is derived from environment variables
27 by the locale system unless explicitly passed to
28 install_domain().
29
30 This module searches for message catalog files in 3 main locations:
31
32 - standard POSIX places (/usr/share/locale/ ...)
33 - below "${YOURAPPNAME_DIR}/po/"
34 - below "<directory of binary of your app>/../po/"
35
36 For DOS/Windows I don't know of standard places so probably
37 only the last option will work. I don't know a thing about
38 classic Mac behaviour. New Macs are POSIX, of course.
39
40 It will then try to install candidates and *verify* whether
41 the translation works by checking for the translation of a
42 tag within itself (this is similar to the self-compiling
43 compiler inserting a backdoor into its self-compiled
44 copies).
45
46 If none of this works it will fall back to making _() a noop.
47
48 @copyright: authors
49 """
50
51 __author__ = "H. Herb <hherb@gnumed.net>, I. Haywood <i.haywood@ugrad.unimelb.edu.au>, K. Hilbert <Karsten.Hilbert@gmx.net>"
52 __license__ = "GPL v2 or later (details at http://www.gnu.org)"
53
54
55
56 import sys
57 import os.path
58 import os
59 import locale
60 import gettext
61 import logging
62 import codecs
63 import builtins
64 import re as regex
65
66
67 builtins._ = lambda x:x
68
69 _log = logging.getLogger('gm.i18n')
70
71 system_locale = ''
72 system_locale_level = {}
73
74 _translate_via_gettext = lambda x:x
75 _substitutes_regex = regex.compile(r'%\(.+?\)s')
76
77
78
79
80
81
82 __orig_tag__ = 'Translate this or i18n into <en_EN> will not work properly !'
83
84
85
86
106
107
109 _setlocale_categories = {}
110 for category in 'LC_ALL LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split():
111 try:
112 _setlocale_categories[category] = getattr(locale, category)
113 except Exception:
114 _log.warning('this OS does not have locale.%s', category)
115
116 _getlocale_categories = {}
117 for category in 'LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split():
118 try:
119 _getlocale_categories[category] = getattr(locale, category)
120 except Exception:
121 pass
122
123 if message is not None:
124 _log.debug(message)
125
126 _log.debug('current locale settings:')
127 _log.debug('locale.getlocale(): %s' % str(locale.getlocale()))
128 for category in _getlocale_categories:
129 _log.debug('locale.getlocale(%s): %s' % (category, locale.getlocale(_getlocale_categories[category])))
130
131 for category in _setlocale_categories:
132 _log.debug('(locale.setlocale(%s): %s)' % (category, locale.setlocale(_setlocale_categories[category])))
133
134 try:
135 _log.debug('locale.getdefaultlocale() - default (user) locale: %s' % str(locale.getdefaultlocale()))
136 except ValueError:
137 _log.exception('the OS locale setup seems faulty')
138
139 _log.debug('encoding sanity check (also check "locale.nl_langinfo(CODESET)" below):')
140 pref_loc_enc = locale.getpreferredencoding(do_setlocale=False)
141 loc_enc = locale.getlocale()[1]
142 py_str_enc = sys.getdefaultencoding()
143 sys_fs_enc = sys.getfilesystemencoding()
144 _log.debug('sys.getdefaultencoding(): [%s]' % py_str_enc)
145 _log.debug('locale.getpreferredencoding(): [%s]' % pref_loc_enc)
146 _log.debug('locale.getlocale()[1]: [%s]' % loc_enc)
147 _log.debug('sys.getfilesystemencoding(): [%s]' % sys_fs_enc)
148 if loc_enc is not None:
149 loc_enc = loc_enc.upper()
150 loc_enc_compare = loc_enc.replace('-', '')
151 else:
152 loc_enc_compare = loc_enc
153 if pref_loc_enc.upper().replace('-', '') != loc_enc_compare:
154 _log.warning('encoding suggested by locale (%s) does not match encoding currently set in locale (%s)' % (pref_loc_enc, loc_enc))
155 _log.warning('this might lead to encoding errors')
156 for enc in [pref_loc_enc, loc_enc, py_str_enc, sys_fs_enc]:
157 if enc is not None:
158 try:
159 codecs.lookup(enc)
160 _log.debug('<codecs> module CAN handle encoding [%s]' % enc)
161 except LookupError:
162 _log.warning('<codecs> module can NOT handle encoding [%s]' % enc)
163 _log.debug('on Linux you can determine a likely candidate for the encoding by running "locale charmap"')
164
165 _log.debug('locale related environment variables (${LANG} is typically used):')
166 for var in 'LANGUAGE LC_ALL LC_CTYPE LANG'.split():
167 try:
168 _log.debug('${%s}=%s' % (var, os.environ[var]))
169 except KeyError:
170 _log.debug('${%s} not set' % (var))
171
172 _log.debug('database of locale conventions:')
173 data = locale.localeconv()
174 for key in data:
175 if loc_enc is None:
176 _log.debug('locale.localeconv(%s): %s', key, data[key])
177 else:
178 try:
179 _log.debug('locale.localeconv(%s): %s', key, str(data[key]))
180 except UnicodeDecodeError:
181 _log.debug('locale.localeconv(%s): %s', key, str(data[key], loc_enc))
182 _nl_langinfo_categories = {}
183 for category in 'CODESET D_T_FMT D_FMT T_FMT T_FMT_AMPM RADIXCHAR THOUSEP YESEXPR NOEXPR CRNCYSTR ERA ERA_D_T_FMT ERA_D_FMT ALT_DIGITS'.split():
184 try:
185 _nl_langinfo_categories[category] = getattr(locale, category)
186 except Exception:
187 _log.warning('this OS does not support nl_langinfo category locale.%s' % category)
188 try:
189 for category in _nl_langinfo_categories:
190 if loc_enc is None:
191 _log.debug('locale.nl_langinfo(%s): %s' % (category, locale.nl_langinfo(_nl_langinfo_categories[category])))
192 else:
193 try:
194 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category])))
195 except UnicodeDecodeError:
196 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category]), loc_enc))
197 except Exception:
198 _log.exception('this OS does not support nl_langinfo')
199
200 _log.debug('gmI18N.get_encoding(): %s', get_encoding())
201
202
204 """This wraps _().
205
206 It protects against translation errors such as a different number of "%s".
207 """
208 translation = _translate_via_gettext(term)
209
210
211 if translation.count('%s') != term.count('%s'):
212 _log.error('count("%s") mismatch, returning untranslated string')
213 _log.error('original : %s', term)
214 _log.error('translation: %s', translation)
215 return term
216
217 substitution_keys_in_original = set(_substitutes_regex.findall(term))
218 substitution_keys_in_translation = set(_substitutes_regex.findall(translation))
219
220 if not substitution_keys_in_translation.issubset(substitution_keys_in_original):
221 _log.error('"%(...)s" keys in translation not a subset of keys in original, returning untranslated string')
222 _log.error('original : %s', term)
223 _log.error('translation: %s', translation)
224 return term
225
226 return translation
227
228
229
230
232 """Get system locale from environment."""
233 global system_locale
234
235 __log_locale_settings('unmodified startup locale settings (should be [C])')
236
237
238 loc, enc = None, None
239 try:
240
241 loc, loc_enc = locale.getlocale()
242 if loc is None:
243 loc = locale.setlocale(locale.LC_ALL, '')
244 _log.debug("activating user-default locale with <locale.setlocale(locale.LC_ALL, '')> returns: [%s]" % loc)
245 else:
246 _log.info('user-default locale already activated')
247 loc, loc_enc = locale.getlocale()
248 except AttributeError:
249 _log.exception('Windows does not support locale.LC_ALL')
250 except Exception:
251 _log.exception('error activating user-default locale')
252
253 __log_locale_settings('locale settings after activating user-default locale')
254
255
256 if loc in [None, 'C']:
257 _log.error('the current system locale is still [None] or [C], assuming [en_EN]')
258 system_locale = "en_EN"
259 else:
260 system_locale = loc
261
262
263 __split_locale_into_levels()
264
265 return True
266
267
268 -def install_domain(domain=None, language=None, prefer_local_catalog=False):
269 """Install a text domain suitable for the main script."""
270
271
272 if domain is None:
273 _log.info('domain not specified, deriving from script name')
274
275 domain = os.path.splitext(os.path.basename(sys.argv[0]))[0]
276 _log.info('text domain is [%s]' % domain)
277
278 _log.debug('searching message catalog file for system locale [%s]' % system_locale)
279 _log.debug('checking process environment:')
280 for env_var in ['LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG']:
281 tmp = os.getenv(env_var)
282 if env_var is None:
283 _log.debug(' ${%s} not set' % env_var)
284 else:
285 _log.debug(' ${%s} = [%s]' % (env_var, tmp))
286
287 lang_candidates = []
288
289
290
291 lang_candidates.append(language)
292 if language is not None:
293 _log.info('explicit request for target language [%s]' % language)
294
295 lang_candidates.append(None)
296
297 if locale.getlocale()[0] not in lang_candidates:
298 lang_candidates.append(locale.getlocale()[0])
299
300 if locale.getdefaultlocale()[0] not in lang_candidates:
301 lang_candidates.append(locale.getdefaultlocale()[0])
302 _log.debug('languages to try for translation: %s (None: implicit system default)', lang_candidates)
303 initial_lang = os.getenv('LANG')
304 _log.info('initial ${LANG} setting: %s', initial_lang)
305
306 for lang_candidate in lang_candidates:
307
308 _log.debug('resetting ${LANG} to initial user default [%s]', initial_lang)
309 if initial_lang is None:
310 del os.environ['LANG']
311 lang2log = '$LANG=<>'
312 else:
313 os.environ['LANG'] = initial_lang
314 lang2log = '$LANG(default)=%s' % initial_lang
315
316 if lang_candidate is not None:
317 _log.info('explicitely overriding system locale language [%s] by setting ${LANG} to [%s]', initial_lang, lang_candidate)
318 os.environ['LANG'] = lang_candidate
319 lang2log = '$LANG(explicit)=%s' % lang_candidate
320 if __install_domain(domain = domain, prefer_local_catalog = prefer_local_catalog, language = lang2log):
321 return True
322
323
324 _log.warning("falling back to NullTranslations() class")
325
326 dummy = gettext.NullTranslations()
327 dummy.install()
328 return True
329
330
331 -def __install_domain(domain, prefer_local_catalog, language='?'):
332
333
334
335 candidate_PO_dirs = []
336
337 if prefer_local_catalog:
338 _log.debug('prioritizing local message catalog')
339
340
341
342
343 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po'))
344 _log.debug('looking one level above binary install directory: %s', loc_dir)
345 candidate_PO_dirs.append(loc_dir)
346
347 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po'))
348 _log.debug('looking in binary install directory: %s', loc_dir)
349 candidate_PO_dirs.append(loc_dir)
350
351 if os.name == 'posix':
352 _log.debug('system is POSIX, looking in standard locations (see Python Manual)')
353
354
355 candidate_PO_dirs.append(gettext.bindtextdomain(domain))
356 else:
357 _log.debug('No use looking in standard POSIX locations - not a POSIX system.')
358
359 env_key = "%s_DIR" % os.path.splitext(os.path.basename(sys.argv[0]))[0].upper()
360 _log.debug('looking at ${%s}' % env_key)
361 if env_key in os.environ:
362 loc_dir = os.path.abspath(os.path.join(os.environ[env_key], 'po'))
363 _log.debug('${%s} = "%s" -> [%s]' % (env_key, os.environ[env_key], loc_dir))
364 candidate_PO_dirs.append(loc_dir)
365 else:
366 _log.info("${%s} not set" % env_key)
367
368 if not prefer_local_catalog:
369
370
371
372
373 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po'))
374 _log.debug('looking above binary install directory [%s]' % loc_dir)
375 candidate_PO_dirs.append(loc_dir)
376
377 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po' ))
378 _log.debug('looking in binary install directory [%s]' % loc_dir)
379 candidate_PO_dirs.append(loc_dir)
380
381 for candidate_PO_dir in candidate_PO_dirs:
382 _log.debug('trying with (base=%s, %s, domain=%s)', candidate_PO_dir, language, domain)
383 _log.debug(' -> %s.mo', os.path.join(candidate_PO_dir, language, domain))
384 if not os.path.exists(candidate_PO_dir):
385 continue
386 try:
387 gettext.install(domain, candidate_PO_dir)
388 except Exception:
389 _log.exception('installing text domain [%s] failed from [%s]', domain, candidate_PO_dir)
390 continue
391 global _
392
393 if _(__orig_tag__) == __orig_tag__:
394 _log.debug('does not translate: [%s] => [%s]', __orig_tag__, _(__orig_tag__))
395 continue
396 _log.debug('found msg catalog: [%s] => [%s]', __orig_tag__, _(__orig_tag__))
397 global _translate_via_gettext
398 _translate_via_gettext = builtins._
399 builtins._ = _translate_safely
400 return True
401
402 return False
403
404
405 _encoding_mismatch_already_logged = False
406 _current_encoding = None
407
409 """Try to get a sane encoding.
410
411 On MaxOSX locale.setlocale(locale.LC_ALL, '') does not
412 have the desired effect, so that locale.getlocale()[1]
413 still returns None. So in that case try to fallback to
414 locale.getpreferredencoding().
415
416 <sys.getdefaultencoding()>
417 - what Python itself uses to convert string <-> unicode
418 when no other encoding was specified
419 - ascii by default
420 - can be set in site.py and sitecustomize.py
421 <locale.getlocale()[1]>
422 - what the current locale is *actually* using
423 as the encoding for text conversion
424 <locale.getpreferredencoding()>
425 - what the current locale would *recommend* using
426 as the encoding for text conversion
427 """
428 global _current_encoding
429 if _current_encoding is not None:
430 return _current_encoding
431
432 enc = sys.getdefaultencoding()
433 if enc != 'ascii':
434 _current_encoding = enc
435 return _current_encoding
436
437 enc = locale.getlocale()[1]
438 if enc is not None:
439 _current_encoding = enc
440 return _current_encoding
441
442 global _encoding_mismatch_already_logged
443 if not _encoding_mismatch_already_logged:
444 _log.debug('*actual* encoding of locale is None, using encoding *recommended* by locale')
445 _encoding_mismatch_already_logged = True
446
447 return locale.getpreferredencoding(do_setlocale=False)
448
449
450
451
452 if __name__ == "__main__":
453
454 if len(sys.argv) == 1:
455 sys.exit()
456
457 if sys.argv[1] != 'test':
458 sys.exit()
459
460 logging.basicConfig(level = logging.DEBUG)
461
463 candidates = [
464
465
466
467
468
469
470 ('\u270d', '\u270d'),
471 ('4', '\u270d' + '4'),
472 ('4.4', '\u270d' + '4.4'),
473 ('44', '\u270d' + '44'),
474 ('4', '\u270d' + '9'),
475 ('4', '\u270d' + '2'),
476
477
478
479 ]
480 for cands in candidates:
481 print(cands[0], '<vs>', cands[1], '=', locale.strcoll(cands[0], cands[1]))
482
483
484
485 print("======================================================================")
486 print("GNUmed i18n")
487 print("")
488 print("authors:", __author__)
489 print("license:", __license__)
490 print("======================================================================")
491
492 activate_locale()
493 print("system locale: ", system_locale, "; levels:", system_locale_level)
494 print("likely encoding:", get_encoding())
495
496 if len(sys.argv) > 2:
497 install_domain(domain = sys.argv[2])
498 else:
499 install_domain()
500
501 test_strcoll()
502
503
504
505
506
507
508 tmp = _('Translate this or i18n into <en_EN> will not work properly !')
509
510
511