Package Gnumed :: Package pycommon :: Module gmI18N
[frames] | no frames]

Source Code for Module Gnumed.pycommon.gmI18N

  1  __doc__ = """GNUmed client internationalization/localization. 
  2   
  3  All i18n/l10n issues should be handled through this modules. 
  4   
  5  Theory of operation: 
  6   
  7  To activate proper locale settings and translation services you need to 
  8   
  9  - import this module 
 10  - call activate_locale() 
 11  - call install_domain() 
 12   
 13  The translating method gettext.gettext() will then be 
 14  installed into the global (!) namespace as _(). Your own 
 15  modules thus need not do _anything_ (not even import gmI18N) 
 16  to have _() available to them for translating strings. You 
 17  need to make sure, however, that gmI18N is imported in your 
 18  main module before any of the modules using it. In order to 
 19  resolve circular references involving modules that 
 20  absolutely _have_ to be imported before this module you can 
 21  explicitly import gmI18N into them at the very beginning. 
 22   
 23  The text domain (i.e. the name of the message catalog file) 
 24  is derived from the name of the main executing script unless 
 25  explicitly passed to install_domain(). The language you 
 26  want to translate to is derived from environment variables 
 27  by the locale system unless explicitly passed to 
 28  install_domain(). 
 29   
 30  This module searches for message catalog files in 3 main locations: 
 31   
 32   - standard POSIX places (/usr/share/locale/ ...) 
 33   - below "${YOURAPPNAME_DIR}/po/" 
 34   - below "<directory of binary of your app>/../po/" 
 35   
 36  For DOS/Windows I don't know of standard places so probably 
 37  only the last option will work. I don't know a thing about 
 38  classic Mac behaviour. New Macs are POSIX, of course. 
 39   
 40  It will then try to install candidates and *verify* whether 
 41  the translation works by checking for the translation of a 
 42  tag within itself (this is similar to the self-compiling 
 43  compiler inserting a backdoor into its self-compiled 
 44  copies). 
 45   
 46  If none of this works it will fall back to making _() a noop. 
 47   
 48  @copyright: authors 
 49  """ 
 50  #=========================================================================== 
 51  __author__ = "H. Herb <hherb@gnumed.net>, I. Haywood <i.haywood@ugrad.unimelb.edu.au>, K. Hilbert <Karsten.Hilbert@gmx.net>" 
 52  __license__ = "GPL v2 or later (details at http://www.gnu.org)" 
 53   
 54   
 55  # stdlib 
 56  import sys 
 57  import os.path 
 58  import os 
 59  import locale 
 60  import gettext 
 61  import logging 
 62  import codecs 
 63  import builtins 
 64  import re as regex 
 65   
 66   
 67  builtins._ = lambda x:x 
 68   
 69  _log = logging.getLogger('gm.i18n') 
 70   
 71  system_locale = '' 
 72  system_locale_level = {} 
 73   
 74  _translate_original = lambda x:x 
 75  _substitutes_regex = regex.compile(r'%\(.+?\)s') 
 76   
 77  # *************************************************************************** 
 78  # *************************************************************************** 
 79  # The following line is needed to check for successful 
 80  # installation of the desired message catalog. 
 81  # -- do not remove or change this line -------------------------------------- 
 82  __orig_tag__ = 'Translate this or i18n into <en_EN> will not work properly !' 
 83  # *************************************************************************** 
 84  # *************************************************************************** 
 85   
 86  #=========================================================================== 
87 -def __split_locale_into_levels():
88 """Split locale into language, country and variant parts. 89 90 - we have observed the following formats in the wild: 91 - de_DE@euro 92 - ec_CA.UTF-8 93 - en_US:en 94 - German_Germany.1252 95 """ 96 _log.debug('splitting canonical locale [%s] into levels', system_locale) 97 98 global system_locale_level 99 system_locale_level['full'] = system_locale 100 # trim '@<variant>' part 101 system_locale_level['country'] = regex.split('@|:|\.', system_locale, 1)[0] 102 # trim '_<COUNTRY>@<variant>' part 103 system_locale_level['language'] = system_locale.split('_', 1)[0] 104 105 _log.debug('system locale levels: %s', system_locale_level)
106 107 #---------------------------------------------------------------------------
108 -def __log_locale_settings(message=None):
109 _setlocale_categories = {} 110 for category in 'LC_ALL LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split(): 111 try: 112 _setlocale_categories[category] = getattr(locale, category) 113 except: 114 _log.warning('this OS does not have locale.%s', category) 115 116 _getlocale_categories = {} 117 for category in 'LC_CTYPE LC_COLLATE LC_TIME LC_MONETARY LC_MESSAGES LC_NUMERIC'.split(): 118 try: 119 _getlocale_categories[category] = getattr(locale, category) 120 except: 121 pass 122 123 if message is not None: 124 _log.debug(message) 125 126 _log.debug('current locale settings:') 127 _log.debug('locale.getlocale(): %s' % str(locale.getlocale())) 128 for category in _getlocale_categories.keys(): 129 _log.debug('locale.getlocale(%s): %s' % (category, locale.getlocale(_getlocale_categories[category]))) 130 131 for category in _setlocale_categories.keys(): 132 _log.debug('(locale.setlocale(%s): %s)' % (category, locale.setlocale(_setlocale_categories[category]))) 133 134 try: 135 _log.debug('locale.getdefaultlocale() - default (user) locale: %s' % str(locale.getdefaultlocale())) 136 except ValueError: 137 _log.exception('the OS locale setup seems faulty') 138 139 _log.debug('encoding sanity check (also check "locale.nl_langinfo(CODESET)" below):') 140 pref_loc_enc = locale.getpreferredencoding(do_setlocale=False) 141 loc_enc = locale.getlocale()[1] 142 py_str_enc = sys.getdefaultencoding() 143 sys_fs_enc = sys.getfilesystemencoding() 144 _log.debug('sys.getdefaultencoding(): [%s]' % py_str_enc) 145 _log.debug('locale.getpreferredencoding(): [%s]' % pref_loc_enc) 146 _log.debug('locale.getlocale()[1]: [%s]' % loc_enc) 147 _log.debug('sys.getfilesystemencoding(): [%s]' % sys_fs_enc) 148 if loc_enc is not None: 149 loc_enc = loc_enc.upper() 150 loc_enc_compare = loc_enc.replace('-', '') 151 else: 152 loc_enc_compare = loc_enc 153 if pref_loc_enc.upper().replace('-', '') != loc_enc_compare: 154 _log.warning('encoding suggested by locale (%s) does not match encoding currently set in locale (%s)' % (pref_loc_enc, loc_enc)) 155 _log.warning('this might lead to encoding errors') 156 for enc in [pref_loc_enc, loc_enc, py_str_enc, sys_fs_enc]: 157 if enc is not None: 158 try: 159 codecs.lookup(enc) 160 _log.debug('<codecs> module CAN handle encoding [%s]' % enc) 161 except LookupError: 162 _log.warning('<codecs> module can NOT handle encoding [%s]' % enc) 163 _log.debug('on Linux you can determine a likely candidate for the encoding by running "locale charmap"') 164 165 _log.debug('locale related environment variables (${LANG} is typically used):') 166 for var in 'LANGUAGE LC_ALL LC_CTYPE LANG'.split(): 167 try: 168 _log.debug('${%s}=%s' % (var, os.environ[var])) 169 except KeyError: 170 _log.debug('${%s} not set' % (var)) 171 172 _log.debug('database of locale conventions:') 173 data = locale.localeconv() 174 for key in data.keys(): 175 if loc_enc is None: 176 _log.debug('locale.localeconv(%s): %s', key, data[key]) 177 else: 178 try: 179 _log.debug('locale.localeconv(%s): %s', key, str(data[key])) 180 except UnicodeDecodeError: 181 _log.debug('locale.localeconv(%s): %s', key, str(data[key], loc_enc)) 182 _nl_langinfo_categories = {} 183 for category in 'CODESET D_T_FMT D_FMT T_FMT T_FMT_AMPM RADIXCHAR THOUSEP YESEXPR NOEXPR CRNCYSTR ERA ERA_D_T_FMT ERA_D_FMT ALT_DIGITS'.split(): 184 try: 185 _nl_langinfo_categories[category] = getattr(locale, category) 186 except: 187 _log.warning('this OS does not support nl_langinfo category locale.%s' % category) 188 try: 189 for category in _nl_langinfo_categories.keys(): 190 if loc_enc is None: 191 _log.debug('locale.nl_langinfo(%s): %s' % (category, locale.nl_langinfo(_nl_langinfo_categories[category]))) 192 else: 193 try: 194 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category]))) 195 except UnicodeDecodeError: 196 _log.debug('locale.nl_langinfo(%s): %s', category, str(locale.nl_langinfo(_nl_langinfo_categories[category]), loc_enc)) 197 except: 198 _log.exception('this OS does not support nl_langinfo') 199 200 _log.debug('gmI18N.get_encoding(): %s', get_encoding())
201 202 #---------------------------------------------------------------------------
203 -def _translate_protected(term):
204 """This wraps _(). 205 206 It protects against translation errors such as a different number of "%s". 207 """ 208 translation = _translate_original(term) 209 210 # different number of %s substitutes ? 211 if translation.count('%s') != term.count('%s'): 212 _log.error('count("%s") mismatch, returning untranslated string') 213 _log.error('original : %s', term) 214 _log.error('translation: %s', translation) 215 return term 216 217 substitution_keys_in_original = set(_substitutes_regex.findall(term)) 218 substitution_keys_in_translation = set(_substitutes_regex.findall(translation)) 219 220 if not substitution_keys_in_translation.issubset(substitution_keys_in_original): 221 _log.error('"%(...)s" keys in translation not a subset of keys in original, returning untranslated string') 222 _log.error('original : %s', term) 223 _log.error('translation: %s', translation) 224 return term 225 226 return translation
227 228 #--------------------------------------------------------------------------- 229 # external API 230 #---------------------------------------------------------------------------
231 -def activate_locale():
232 """Get system locale from environment.""" 233 global system_locale 234 235 __log_locale_settings('unmodified startup locale settings (should be [C])') 236 237 # activate user-preferred locale 238 loc, enc = None, None 239 try: 240 # check whether already set 241 loc, loc_enc = locale.getlocale() 242 if loc is None: 243 loc = locale.setlocale(locale.LC_ALL, '') 244 _log.debug("activating user-default locale with <locale.setlocale(locale.LC_ALL, '')> returns: [%s]" % loc) 245 else: 246 _log.info('user-default locale already activated') 247 loc, loc_enc = locale.getlocale() 248 except AttributeError: 249 _log.exception('Windows does not support locale.LC_ALL') 250 except: 251 _log.exception('error activating user-default locale') 252 253 __log_locale_settings('locale settings after activating user-default locale') 254 255 # did we find any locale setting ? assume en_EN if not 256 if loc in [None, 'C']: 257 _log.error('the current system locale is still [None] or [C], assuming [en_EN]') 258 system_locale = "en_EN" 259 else: 260 system_locale = loc 261 262 # generate system locale levels 263 __split_locale_into_levels() 264 265 return True
266 267 #---------------------------------------------------------------------------
268 -def install_domain(domain=None, language=None, prefer_local_catalog=False):
269 """Install a text domain suitable for the main script.""" 270 271 # text domain directly specified ? 272 if domain is None: 273 _log.info('domain not specified, deriving from script name') 274 # get text domain from name of script 275 domain = os.path.splitext(os.path.basename(sys.argv[0]))[0] 276 _log.info('text domain is [%s]' % domain) 277 278 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html 279 _log.debug('searching message catalog file for system locale [%s]' % system_locale) 280 281 _log.debug('checking process environment:') 282 for env_var in ['LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG']: 283 tmp = os.getenv(env_var) 284 if env_var is None: 285 _log.debug(' ${%s} not set' % env_var) 286 else: 287 _log.debug(' ${%s} = [%s]' % (env_var, tmp)) 288 289 # language codes to try 290 lang_candidates = [] 291 # first: explicit language or default system language 292 # language=None: unadulterated default language for user (locale.getlocale()[0] value) 293 # language != None: explicit language setting as passed in by the caller 294 lang_candidates.append(language) 295 if language is not None: 296 _log.info('explicit request for target language [%s]' % language) 297 # next: try default language for user if explicit language fails 298 lang_candidates.append(None) 299 300 # next try locale.getlocale()[0], if different (this can be strange on, say, Windows: Hungarian_Hungary) 301 if locale.getlocale()[0] not in lang_candidates: 302 lang_candidates.append(locale.getlocale()[0]) 303 304 # next try locale.get*default*locale()[0], if different 305 if locale.getdefaultlocale()[0] not in lang_candidates: 306 lang_candidates.append(locale.getdefaultlocale()[0]) 307 308 _log.debug('languages to try for translation: %s (None: implicit system default)', lang_candidates) 309 initial_lang = os.getenv('LANG') 310 _log.info('initial ${LANG} setting: %s', initial_lang) 311 312 # loop over language candidates 313 for lang_candidate in lang_candidates: 314 # setup baseline 315 _log.debug('resetting ${LANG} to initial user default [%s]', initial_lang) 316 if initial_lang is None: 317 del os.environ['LANG'] 318 lang2log = '$LANG=<>' 319 else: 320 os.environ['LANG'] = initial_lang 321 lang2log = '$LANG(default)=%s' % initial_lang 322 # setup candidate language 323 if lang_candidate is not None: 324 _log.info('explicitely overriding system locale language [%s] by setting ${LANG} to [%s]', initial_lang, lang_candidate) 325 os.environ['LANG'] = lang_candidate 326 lang2log = '$LANG(explicit)=%s' % lang_candidate 327 328 if __install_domain(domain = domain, prefer_local_catalog = prefer_local_catalog, language = lang2log): 329 return True 330 331 # install a dummy translation class 332 _log.warning("falling back to NullTranslations() class") 333 # this shouldn't fail 334 dummy = gettext.NullTranslations() 335 dummy.install() 336 return True
337 338 #---------------------------------------------------------------------------
339 -def __install_domain(domain, prefer_local_catalog, language='?'):
340 # <language> only used for logging 341 342 # search for message catalog 343 candidate_PO_dirs = [] 344 345 # - locally 346 if prefer_local_catalog: 347 _log.debug('prioritizing local message catalog') 348 # - one level above path to binary 349 # last resort for inferior operating systems such as DOS/Windows 350 # strip one directory level 351 # this is a rather neat trick :-) 352 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po')) 353 _log.debug('looking one level above binary install directory: %s', loc_dir) 354 candidate_PO_dirs.append(loc_dir) 355 # - in path to binary 356 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po')) 357 _log.debug('looking in binary install directory: %s', loc_dir) 358 candidate_PO_dirs.append(loc_dir) 359 360 # - standard places 361 if os.name == 'posix': 362 _log.debug('system is POSIX, looking in standard locations (see Python Manual)') 363 # if this is reported to segfault/fail/except on some 364 # systems we may have to assume "sys.prefix/share/locale/" 365 candidate_PO_dirs.append(gettext.bindtextdomain(domain)) 366 else: 367 _log.debug('No use looking in standard POSIX locations - not a POSIX system.') 368 369 # - $(<script-name>_DIR)/ 370 env_key = "%s_DIR" % os.path.splitext(os.path.basename(sys.argv[0]))[0].upper() 371 _log.debug('looking at ${%s}' % env_key) 372 if env_key in os.environ: 373 loc_dir = os.path.abspath(os.path.join(os.environ[env_key], 'po')) 374 _log.debug('${%s} = "%s" -> [%s]' % (env_key, os.environ[env_key], loc_dir)) 375 candidate_PO_dirs.append(loc_dir) 376 else: 377 _log.info("${%s} not set" % env_key) 378 379 # - locally 380 if not prefer_local_catalog: 381 # - one level above path to binary 382 # last resort for inferior operating systems such as DOS/Windows 383 # strip one directory level 384 # this is a rather neat trick :-) 385 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '..', 'po')) 386 _log.debug('looking above binary install directory [%s]' % loc_dir) 387 candidate_PO_dirs.append(loc_dir) 388 # - in path to binary 389 loc_dir = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), 'po' )) 390 _log.debug('looking in binary install directory [%s]' % loc_dir) 391 candidate_PO_dirs.append(loc_dir) 392 393 # now try to actually install it 394 for candidate_PO_dir in candidate_PO_dirs: 395 _log.debug('trying with (base=%s, %s, domain=%s)', candidate_PO_dir, language, domain) 396 _log.debug(' -> %s.mo', os.path.join(candidate_PO_dir, language, domain)) 397 if not os.path.exists(candidate_PO_dir): 398 continue 399 try: 400 gettext.install(domain, candidate_PO_dir) 401 except: 402 _log.exception('installing text domain [%s] failed from [%s]', domain, candidate_PO_dir) 403 continue 404 global _ 405 # does it translate ? 406 if _(__orig_tag__) == __orig_tag__: 407 _log.debug('does not translate: [%s] => [%s]', __orig_tag__, _(__orig_tag__)) 408 continue 409 else: 410 _log.debug('found msg catalog: [%s] => [%s]', __orig_tag__, _(__orig_tag__)) 411 global _translate_original 412 _translate_original = builtins._ 413 builtins._ = _translate_protected 414 return True 415 416 return False
417 418 #=========================================================================== 419 _encoding_mismatch_already_logged = False 420 _current_encoding = None 421
422 -def get_encoding():
423 """Try to get a sane encoding. 424 425 On MaxOSX locale.setlocale(locale.LC_ALL, '') does not 426 have the desired effect, so that locale.getlocale()[1] 427 still returns None. So in that case try to fallback to 428 locale.getpreferredencoding(). 429 430 <sys.getdefaultencoding()> 431 - what Python itself uses to convert string <-> unicode 432 when no other encoding was specified 433 - ascii by default 434 - can be set in site.py and sitecustomize.py 435 <locale.getlocale()[1]> 436 - what the current locale is *actually* using 437 as the encoding for text conversion 438 <locale.getpreferredencoding()> 439 - what the current locale would *recommend* using 440 as the encoding for text conversion 441 """ 442 global _current_encoding 443 if _current_encoding is not None: 444 return _current_encoding 445 446 enc = sys.getdefaultencoding() 447 if enc != 'ascii': 448 _current_encoding = enc 449 return _current_encoding 450 451 enc = locale.getlocale()[1] 452 if enc is not None: 453 _current_encoding = enc 454 return _current_encoding 455 456 global _encoding_mismatch_already_logged 457 if not _encoding_mismatch_already_logged: 458 _log.debug('*actual* encoding of locale is None, using encoding *recommended* by locale') 459 _encoding_mismatch_already_logged = True 460 461 return locale.getpreferredencoding(do_setlocale=False)
462 463 #=========================================================================== 464 # Main 465 #--------------------------------------------------------------------------- 466 if __name__ == "__main__": 467 468 if len(sys.argv) == 1: 469 sys.exit() 470 471 if sys.argv[1] != 'test': 472 sys.exit() 473 474 logging.basicConfig(level = logging.DEBUG) 475 #----------------------------------------------------------------------
476 - def test_strcoll():
477 candidates = [ 478 # (u'a', u'a'), 479 # (u'a', u'b'), 480 # (u'1', u'1'), 481 # (u'1', u'2'), 482 # (u'A', u'A'), 483 # (u'a', u'A'), 484 ('\u270d', '\u270d'), 485 ('4', '\u270d' + '4'), 486 ('4.4', '\u270d' + '4.4'), 487 ('44', '\u270d' + '44'), 488 ('4', '\u270d' + '9'), 489 ('4', '\u270d' + '2'), 490 # (u'9', u'\u270d' + u'9'), 491 # (u'9', u'\u270d' + u'4'), 492 493 ] 494 for cands in candidates: 495 print(cands[0], '<vs>', cands[1], '=', locale.strcoll(cands[0], cands[1]))
496 # print(cands[1], u'<vs>', cands[0], '=', locale.strcoll(cands[1], cands[0])) 497 498 #---------------------------------------------------------------------- 499 print("======================================================================") 500 print("GNUmed i18n") 501 print("") 502 print("authors:", __author__) 503 print("license:", __license__) 504 print("======================================================================") 505 506 activate_locale() 507 print("system locale: ", system_locale, "; levels:", system_locale_level) 508 print("likely encoding:", get_encoding()) 509 510 if len(sys.argv) > 2: 511 install_domain(domain = sys.argv[2]) 512 else: 513 install_domain() 514 515 test_strcoll() 516 517 # ********************************************************************* # 518 # == do not remove this line ========================================== # 519 # it is needed to check for successful installation of # 520 # the desired message catalog # 521 # ********************************************************************* # 522 tmp = _('Translate this or i18n into <en_EN> will not work properly !') # 523 # ********************************************************************* # 524 # ********************************************************************* # 525