Package Gnumed :: Package pycommon :: Module gmI18N
[frames] | no frames]

Source Code for Module Gnumed.pycommon.gmI18N

  1  __doc__ = """GNUmed client internationalization/localization. 
  2   
  3  All i18n/l10n issues should be handled through this modules. 
  4   
  5  Theory of operation: 
  6   
  7  To activate proper locale settings and translation services you need to 
  8   
  9  - import this module 
 10  - call activate_locale() 
 11  - call install_domain() 
 12   
 13  The translating method gettext.gettext() will then be 
 14  installed into the global (!) namespace as _(). Your own 
 15  modules thus need not do _anything_ (not even import gmI18N) 
 16  to have _() available to them for translating strings. You 
 17  need to make sure, however, that gmI18N is imported in your 
 18  main module before any of the modules using it. In order to 
 19  resolve circular references involving modules that 
 20  absolutely _have_ to be imported before this module you can 
 21  explicitly import gmI18N into them at the very beginning. 
 22   
 23  The text domain (i.e. the name of the message catalog file) 
 24  is derived from the name of the main executing script unless 
 25  explicitly passed to install_domain(). The language you 
 26  want to translate to is derived from environment variables 
 27  by the locale system unless explicitly passed to 
 28  install_domain(). 
 29   
 30  This module searches for message catalog files in 3 main locations: 
 31   
 32   - standard POSIX places (/usr/share/locale/ ...) 
 33   - below "${YOURAPPNAME_DIR}/po/" 
 34   - below "<directory of binary of your app>/../po/" 
 35   
 36  For DOS/Windows I don't know of standard places so probably 
 37  only the last option will work. I don't know a thing about 
 38  classic Mac behaviour. New Macs are POSIX, of course. 
 39   
 40  It will then try to install candidates and *verify* whether 
 41  the translation works by checking for the translation of a 
 42  tag within itself (this is similar to the self-compiling 
 43  compiler inserting a backdoor into its self-compiled 
 44  copies). 
 45   
 46  If none of this works it will fall back to making _() a noop. 
 47   
 48  @copyright: authors 
 49  """ 
 50  #=========================================================================== 
 51  __author__ = "H. Herb <hherb@gnumed.net>, I. Haywood <i.haywood@ugrad.unimelb.edu.au>, K. Hilbert <Karsten.Hilbert@gmx.net>" 
 52  __license__ = "GPL v2 or later (details at http://www.gnu.org)" 
 53   
 54   
 55  # stdlib 
 56  import sys 
 57  import os.path 
 58  import os 
 59  import locale 
 60  import gettext 
 61  import logging 
 62  import codecs 
 63  import builtins 
 64  import re as regex 
 65   
 66   
 67  builtins._ = lambda x:x 
 68   
 69  _log = logging.getLogger('gm.i18n') 
 70   
 71  system_locale = '' 
 72  system_locale_level = {} 
 73   
 74  _translate_via_gettext = lambda x:x 
 75  _substitutes_regex = regex.compile(r'%\(.+?\)s') 
 76   
 77  # *************************************************************************** 
 78  # *************************************************************************** 
 79  # The following line is needed to check for successful 
 80  # installation of the desired message catalog. 
 81  # -- do not remove or change this line -------------------------------------- 
 82  __orig_tag__ = 'Translate this or i18n into <en_EN> will not work properly !' 
 83  # *************************************************************************** 
 84  # *************************************************************************** 
 85   
 86  #=========================================================================== 
87 -def __split_locale_into_levels():
88 """Split locale into language, country and variant parts. 89 90 - we have observed the following formats in the wild: 91 - de_DE@euro 92 - ec_CA.UTF-8 93 - en_US:en 94 - German_Germany.1252 95 """ 96 _log.debug('splitting canonical locale [%s] into levels', system_locale) 97 98 global system_locale_level 99 system_locale_level['full'] = system_locale 100 # trim '@<variant>' part 101 system_locale_level['country'] = regex.split('@|:|\.', system_locale, 1)[0] 102 # trim '_<COUNTRY>@<variant>' part 103 system_locale_level['language'] = system_locale.split('_', 1)[0] 104 105 _log.debug('system locale levels: %s', system_locale_level)
106 107 #---------------------------------------------------------------------------
108 -def __log_locale_settings(message=None):
109 _setlocale_categories = {} 110 for category in 'LC_ALL LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split(): 111 try: 112 _setlocale_categories[category] = getattr(locale, category) 113 except Exception: 114 _log.warning('this OS does not have locale.%s', category) 115 116 _getlocale_categories = {} 117 for category in 'LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split(): 118 try: 119 _getlocale_categories[category] = getattr(locale, category) 120 except Exception: 121 pass 122 123 if message is not None: 124 _log.debug(message) 125 126 _log.debug('current locale settings:') 127 _log.debug('locale.getlocale(): %s' % str(locale.getlocale())) 128 for category in _getlocale_categories: 129 _log.debug('locale.getlocale(%s): %s' % (category, locale.getlocale(_getlocale_categories[category]))) 130 131 for category in _setlocale_categories: 132 _log.debug('(locale.setlocale(%s): %s)' % (category, locale.setlocale(_setlocale_categories[category]))) 133 134 try: 135 _log.debug('locale.getdefaultlocale() - default (user) locale: %s' % str(locale.getdefaultlocale())) 136 except ValueError: 137 _log.exception('the OS locale setup seems faulty') 138 139 _log.debug('encoding sanity check (also check "locale.nl_langinfo(CODESET)" below):') 140 pref_loc_enc = locale.getpreferredencoding(do_setlocale=False) 141 loc_enc = locale.getlocale()[1] 142 py_str_enc = sys.getdefaultencoding() 143 sys_fs_enc = sys.getfilesystemencoding() 144 _log.debug('sys.getdefaultencoding(): [%s]' % py_str_enc) 145 _log.debug('locale.getpreferredencoding(): [%s]' % pref_loc_enc) 146 _log.debug('locale.getlocale()[1]: [%s]' % loc_enc) 147 _log.debug('sys.getfilesystemencoding(): [%s]' % sys_fs_enc) 148 if loc_enc is not None: 149 loc_enc = loc_enc.upper() 150 loc_enc_compare = loc_enc.replace('-', '') 151 else: 152 loc_enc_compare = loc_enc 153 if pref_loc_enc.upper().replace('-', '') != loc_enc_compare: 154 _log.warning('encoding suggested by locale (%s) does not match encoding currently set in locale (%s)' % (pref_loc_enc, loc_enc)) 155 _log.warning('this might lead to encoding errors') 156 for enc in [pref_loc_enc, loc_enc, py_str_enc, sys_fs_enc]: 157 if enc is not None: 158 try: 159 codecs.lookup(enc) 160 _log.debug('<codecs> module CAN handle encoding [%s]' % enc) 161 except LookupError: 162 _log.warning('<codecs> module can NOT handle encoding [%s]' % enc) 163 _log.debug('on Linux you can determine a likely candidate for the encoding by running "locale charmap"') 164 165 _log.debug('locale related environment variables (${LANG} is typically used):') 166 for var in 'LANGUAGE LC_ALL LC_CTYPE LANG'.split(): 167 try: 168 _log.debug('${%s}=%s' % (var, os.environ[var])) 169 except KeyError: 170 _log.debug('${%s} not set' % (var)) 171 172 _log.debug('database of locale conventions:') 173 data = locale.localeconv() 174 for key in data: 175 if loc_enc is None: 176 _log.debug('locale.localeconv(%s): %s', key, data[key]) 177 else: 178 try: 179 _log.debug('locale.localeconv(%s): %s', key, str(data[key])) 180 except UnicodeDecodeError: 181 _log.debug('locale.localeconv(%s): %s', key, str(data[key], loc_enc)) 182 _nl_langinfo_categories = {} 183 for category in 'CODESET D_T_FMT D_FMT T_FMT T_FMT_AMPM RADIXCHAR THOUSEP YESEXPR NOEXPR CRNCYSTR ERA ERA_D_T_FMT ERA_D_FMT ALT_DIGITS'.split(): 184 try: 185 _nl_langinfo_categories[category] = getattr(locale, category) 186 except Exception: 187 _log.warning('this OS does not support nl_langinfo category locale.%s' % category) 188 try: 189 for category in _nl_langinfo_categories: 190 if loc_enc is None: 191 _log.debug('locale.nl_langinfo(%s): %s' % (category, locale.nl_langinfo(_nl_langinfo_categories[category]))) 192 else: 193 try: 194 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category]))) 195 except UnicodeDecodeError: 196 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category]), loc_enc)) 197 except Exception: 198 _log.exception('this OS does not support nl_langinfo') 199 200 _log.debug('gmI18N.get_encoding(): %s', get_encoding())
201 202 #---------------------------------------------------------------------------
203 -def _translate_safely(term):
204 """This wraps _(). 205 206 It protects against translation errors such as a different number of "%s". 207 """ 208 translation = _translate_via_gettext(term) 209 210 # different number of %s substitutes ? 211 if translation.count('%s') != term.count('%s'): 212 _log.error('count("%s") mismatch, returning untranslated string') 213 _log.error('original : %s', term) 214 _log.error('translation: %s', translation) 215 return term 216 217 substitution_keys_in_original = set(_substitutes_regex.findall(term)) 218 substitution_keys_in_translation = set(_substitutes_regex.findall(translation)) 219 220 if not substitution_keys_in_translation.issubset(substitution_keys_in_original): 221 _log.error('"%(...)s" keys in translation not a subset of keys in original, returning untranslated string') 222 _log.error('original : %s', term) 223 _log.error('translation: %s', translation) 224 return term 225 226 return translation
227 228 #--------------------------------------------------------------------------- 229 # external API 230 #---------------------------------------------------------------------------
231 -def activate_locale():
232 """Get system locale from environment.""" 233 global system_locale 234 235 __log_locale_settings('unmodified startup locale settings (should be [C])') 236 237 # activate user-preferred locale 238 loc, enc = None, None 239 try: 240 # check whether already set 241 loc, loc_enc = locale.getlocale() 242 if loc is None: 243 loc = locale.setlocale(locale.LC_ALL, '') 244 _log.debug("activating user-default locale with <locale.setlocale(locale.LC_ALL, '')> returns: [%s]" % loc) 245 else: 246 _log.info('user-default locale already activated') 247 loc, loc_enc = locale.getlocale() 248 except AttributeError: 249 _log.exception('Windows does not support locale.LC_ALL') 250 except Exception: 251 _log.exception('error activating user-default locale') 252 253 __log_locale_settings('locale settings after activating user-default locale') 254 255 # did we find any locale setting ? assume en_EN if not 256 if loc in [None, 'C']: 257 _log.error('the current system locale is still [None] or [C], assuming [en_EN]') 258 system_locale = "en_EN" 259 else: 260 system_locale = loc 261 262 # generate system locale levels 263 __split_locale_into_levels() 264 265 return True
266 267 #---------------------------------------------------------------------------
268 -def install_domain(domain=None, language=None, prefer_local_catalog=False):
269 """Install a text domain suitable for the main script.""" 270 271 # text domain directly specified ? 272 if domain is None: 273 _log.info('domain not specified, deriving from script name') 274 # get text domain from name of script 275 domain = os.path.splitext(os.path.basename(sys.argv[0]))[0] 276 _log.info('text domain is [%s]' % domain) 277 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html 278 _log.debug('searching message catalog file for system locale [%s]' % system_locale) 279 _log.debug('checking process environment:') 280 for env_var in ['LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG']: 281 tmp = os.getenv(env_var) 282 if env_var is None: 283 _log.debug(' ${%s} not set' % env_var) 284 else: 285 _log.debug(' ${%s} = [%s]' % (env_var, tmp)) 286 # language codes to try 287 lang_candidates = [] 288 # first: explicit language or default system language 289 # language=None: unadulterated default language for user (locale.getlocale()[0] value) 290 # language != None: explicit language setting as passed in by the caller 291 lang_candidates.append(language) 292 if language is not None: 293 _log.info('explicit request for target language [%s]' % language) 294 # next: try default language for user if explicit language fails 295 lang_candidates.append(None) 296 # next try locale.getlocale()[0], if different (this can be strange on, say, Windows: Hungarian_Hungary) 297 if locale.getlocale()[0] not in lang_candidates: 298 lang_candidates.append(locale.getlocale()[0]) 299 # next try locale.get*default*locale()[0], if different 300 if locale.getdefaultlocale()[0] not in lang_candidates: 301 lang_candidates.append(locale.getdefaultlocale()[0]) 302 _log.debug('languages to try for translation: %s (None: implicit system default)', lang_candidates) 303 initial_lang = os.getenv('LANG') 304 _log.info('initial ${LANG} setting: %s', initial_lang) 305 # loop over language candidates 306 for lang_candidate in lang_candidates: 307 # setup baseline 308 _log.debug('resetting ${LANG} to initial user default [%s]', initial_lang) 309 if initial_lang is None: 310 del os.environ['LANG'] 311 lang2log = '$LANG=<>' 312 else: 313 os.environ['LANG'] = initial_lang 314 lang2log = '$LANG(default)=%s' % initial_lang 315 # setup candidate language 316 if lang_candidate is not None: 317 _log.info('explicitely overriding system locale language [%s] by setting ${LANG} to [%s]', initial_lang, lang_candidate) 318 os.environ['LANG'] = lang_candidate 319 lang2log = '$LANG(explicit)=%s' % lang_candidate 320 if __install_domain(domain = domain, prefer_local_catalog = prefer_local_catalog, language = lang2log): 321 return True 322 323 # install a dummy translation class 324 _log.warning("falling back to NullTranslations() class") 325 # this shouldn't fail 326 dummy = gettext.NullTranslations() 327 dummy.install() 328 return True
329 330 #---------------------------------------------------------------------------
331 -def __install_domain(domain, prefer_local_catalog, language='?'):
332 # <language> only used for logging 333 334 # search for message catalog 335 candidate_PO_dirs = [] 336 # - locally 337 if prefer_local_catalog: 338 _log.debug('prioritizing local message catalog') 339 # - one level above path to binary 340 # last resort for inferior operating systems such as DOS/Windows 341 # strip one directory level 342 # this is a rather neat trick :-) 343 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po')) 344 _log.debug('looking one level above binary install directory: %s', loc_dir) 345 candidate_PO_dirs.append(loc_dir) 346 # - in path to binary 347 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po')) 348 _log.debug('looking in binary install directory: %s', loc_dir) 349 candidate_PO_dirs.append(loc_dir) 350 # - standard places 351 if os.name == 'posix': 352 _log.debug('system is POSIX, looking in standard locations (see Python Manual)') 353 # if this is reported to segfault/fail/except on some 354 # systems we may have to assume "sys.prefix/share/locale/" 355 candidate_PO_dirs.append(gettext.bindtextdomain(domain)) 356 else: 357 _log.debug('No use looking in standard POSIX locations - not a POSIX system.') 358 # - $(<script-name>_DIR)/ 359 env_key = "%s_DIR" % os.path.splitext(os.path.basename(sys.argv[0]))[0].upper() 360 _log.debug('looking at ${%s}' % env_key) 361 if env_key in os.environ: 362 loc_dir = os.path.abspath(os.path.join(os.environ[env_key], 'po')) 363 _log.debug('${%s} = "%s" -> [%s]' % (env_key, os.environ[env_key], loc_dir)) 364 candidate_PO_dirs.append(loc_dir) 365 else: 366 _log.info("${%s} not set" % env_key) 367 # - locally 368 if not prefer_local_catalog: 369 # - one level above path to binary 370 # last resort for inferior operating systems such as DOS/Windows 371 # strip one directory level 372 # this is a rather neat trick :-) 373 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po')) 374 _log.debug('looking above binary install directory [%s]' % loc_dir) 375 candidate_PO_dirs.append(loc_dir) 376 # - in path to binary 377 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po' )) 378 _log.debug('looking in binary install directory [%s]' % loc_dir) 379 candidate_PO_dirs.append(loc_dir) 380 # now try to actually install it 381 for candidate_PO_dir in candidate_PO_dirs: 382 _log.debug('trying with (base=%s, %s, domain=%s)', candidate_PO_dir, language, domain) 383 _log.debug(' -> %s.mo', os.path.join(candidate_PO_dir, language, domain)) 384 if not os.path.exists(candidate_PO_dir): 385 continue 386 try: 387 gettext.install(domain, candidate_PO_dir) 388 except Exception: 389 _log.exception('installing text domain [%s] failed from [%s]', domain, candidate_PO_dir) 390 continue 391 global _ 392 # does it translate ? 393 if _(__orig_tag__) == __orig_tag__: 394 _log.debug('does not translate: [%s] => [%s]', __orig_tag__, _(__orig_tag__)) 395 continue 396 _log.debug('found msg catalog: [%s] => [%s]', __orig_tag__, _(__orig_tag__)) 397 global _translate_via_gettext 398 _translate_via_gettext = builtins._ 399 builtins._ = _translate_safely 400 return True 401 402 return False
403 404 #=========================================================================== 405 _encoding_mismatch_already_logged = False 406 _current_encoding = None 407
408 -def get_encoding():
409 """Try to get a sane encoding. 410 411 On MaxOSX locale.setlocale(locale.LC_ALL, '') does not 412 have the desired effect, so that locale.getlocale()[1] 413 still returns None. So in that case try to fallback to 414 locale.getpreferredencoding(). 415 416 <sys.getdefaultencoding()> 417 - what Python itself uses to convert string <-> unicode 418 when no other encoding was specified 419 - ascii by default 420 - can be set in site.py and sitecustomize.py 421 <locale.getlocale()[1]> 422 - what the current locale is *actually* using 423 as the encoding for text conversion 424 <locale.getpreferredencoding()> 425 - what the current locale would *recommend* using 426 as the encoding for text conversion 427 """ 428 global _current_encoding 429 if _current_encoding is not None: 430 return _current_encoding 431 432 enc = sys.getdefaultencoding() 433 if enc != 'ascii': 434 _current_encoding = enc 435 return _current_encoding 436 437 enc = locale.getlocale()[1] 438 if enc is not None: 439 _current_encoding = enc 440 return _current_encoding 441 442 global _encoding_mismatch_already_logged 443 if not _encoding_mismatch_already_logged: 444 _log.debug('*actual* encoding of locale is None, using encoding *recommended* by locale') 445 _encoding_mismatch_already_logged = True 446 447 return locale.getpreferredencoding(do_setlocale=False)
448 449 #=========================================================================== 450 # Main 451 #--------------------------------------------------------------------------- 452 if __name__ == "__main__": 453 454 if len(sys.argv) == 1: 455 sys.exit() 456 457 if sys.argv[1] != 'test': 458 sys.exit() 459 460 logging.basicConfig(level = logging.DEBUG) 461 #----------------------------------------------------------------------
462 - def test_strcoll():
463 candidates = [ 464 # (u'a', u'a'), 465 # (u'a', u'b'), 466 # (u'1', u'1'), 467 # (u'1', u'2'), 468 # (u'A', u'A'), 469 # (u'a', u'A'), 470 ('\u270d', '\u270d'), 471 ('4', '\u270d' + '4'), 472 ('4.4', '\u270d' + '4.4'), 473 ('44', '\u270d' + '44'), 474 ('4', '\u270d' + '9'), 475 ('4', '\u270d' + '2'), 476 # (u'9', u'\u270d' + u'9'), 477 # (u'9', u'\u270d' + u'4'), 478 479 ] 480 for cands in candidates: 481 print(cands[0], '<vs>', cands[1], '=', locale.strcoll(cands[0], cands[1]))
482 # print(cands[1], u'<vs>', cands[0], '=', locale.strcoll(cands[1], cands[0])) 483 484 #---------------------------------------------------------------------- 485 print("======================================================================") 486 print("GNUmed i18n") 487 print("") 488 print("authors:", __author__) 489 print("license:", __license__) 490 print("======================================================================") 491 492 activate_locale() 493 print("system locale: ", system_locale, "; levels:", system_locale_level) 494 print("likely encoding:", get_encoding()) 495 496 if len(sys.argv) > 2: 497 install_domain(domain = sys.argv[2]) 498 else: 499 install_domain() 500 501 test_strcoll() 502 503 # ********************************************************************* # 504 # == do not remove this line ========================================== # 505 # it is needed to check for successful installation of # 506 # the desired message catalog # 507 # ********************************************************************* # 508 tmp = _('Translate this or i18n into <en_EN> will not work properly !') # 509 # ********************************************************************* # 510 # ********************************************************************* # 511