Package Gnumed :: Package pycommon :: Module gmPG2
[frames] | no frames]

Source Code for Module Gnumed.pycommon.gmPG2

   1  """GNUmed PostgreSQL connection handling. 
   2   
   3  TODO: iterator/generator batch fetching: 
   4          - http://groups-beta.google.com/group/comp.lang.python/msg/7ff516d7d9387dad 
   5          - search Google for "Geneator/Iterator Nesting Problem - Any Ideas? 2.4" 
   6   
   7  winner: 
   8  def resultset_functional_batchgenerator(cursor, size=100): 
   9          for results in iter(lambda: cursor.fetchmany(size), []): 
  10                  for rec in results: 
  11                          yield rec 
  12  """ 
  13  # ======================================================================= 
  14  __author__  = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
  15  __license__ = 'GPL v2 or later (details at http://www.gnu.org)' 
  16   
  17   
  18  # stdlib 
  19  import time 
  20  import sys 
  21  import os 
  22  import stat 
  23  import codecs 
  24  import logging 
  25  import datetime as pydt 
  26  import re as regex 
  27  import threading 
  28  import hashlib 
  29  import shutil 
  30   
  31   
  32  # GNUmed 
  33  if __name__ == '__main__': 
  34          sys.path.insert(0, '../../') 
  35  from Gnumed.pycommon import gmLoginInfo 
  36  from Gnumed.pycommon import gmExceptions 
  37  from Gnumed.pycommon import gmDateTime 
  38  from Gnumed.pycommon import gmI18N 
  39  from Gnumed.pycommon import gmLog2 
  40  from Gnumed.pycommon import gmTools 
  41  from Gnumed.pycommon import gmConnectionPool 
  42  from Gnumed.pycommon.gmTools import prompted_input, u_replacement_character, format_dict_like 
  43   
  44   
  45  _log = logging.getLogger('gm.db') 
  46   
  47   
  48  # 3rd party 
  49  try: 
  50          import psycopg2 as dbapi 
  51  except ImportError: 
  52          _log.exception("Python database adapter psycopg2 not found.") 
  53          print("CRITICAL ERROR: Cannot find module psycopg2 for connecting to the database server.") 
  54          raise 
  55   
  56  import psycopg2.errorcodes as sql_error_codes 
  57   
  58  PG_ERROR_EXCEPTION = dbapi.Error 
  59   
  60  # ======================================================================= 
  61  default_database = 'gnumed_v22' 
  62   
  63  postgresql_version_string = None 
  64   
  65  # ======================================================================= 
  66  # global data 
  67  # ======================================================================= 
  68   
  69  known_schema_hashes = { 
  70          0: 'not released, testing only', 
  71          2: 'b09d50d7ed3f91ddf4c4ddb8ea507720', 
  72          3: 'e73718eaf230d8f1d2d01afa8462e176', 
  73          4: '4428ccf2e54c289136819e701bb095ea', 
  74          5: '7e7b093af57aea48c288e76632a382e5',  # ... old (v1) style hashes 
  75          6: '90e2026ac2efd236da9c8608b8685b2d',  # new (v2) style hashes ... 
  76          7: '6c9f6d3981483f8e9433df99d1947b27', 
  77          8: '89b13a7af83337c3aad153b717e52360', 
  78          9: '641a9b2be3c378ffc2bb2f0b1c9f051d', 
  79          10: '7ef42a8fb2bd929a2cdd0c63864b4e8a', 
  80          11: '03042ae24f3f92877d986fb0a6184d76', 
  81          12: '06183a6616db62257e22814007a8ed07', 
  82          13: 'fab7c1ae408a6530c47f9b5111a0841e', 
  83          14: 'e170d543f067d1ea60bfe9076b1560cf', 
  84          15: '70012ff960b77ecdff4981c94b5b55b6', 
  85          16: '0bcf44ca22c479b52976e5eda1de8161', 
  86          17: '161428ee97a00e3bf56168c3a15b7b50', 
  87          18: 'a0f9efcabdecfb4ddb6d8c0b69c02092', 
  88          #19: '419e5225259c53dd36ad80d82066ff02' # 19.0 only 
  89          #19: '9765373098b03fb208332498f34cd4b5' # until 19.11 
  90          19: '57f009a159f55f77525cc0291e0c8b60', # starting with 19.12 
  91          20: 'baed1901ed4c2f272b56c8cb2c6d88e8', 
  92          21: 'e6a51a89dd22b75b61ead8f7083f251f', 
  93          22: 'bf45f01327fb5feb2f5d3c06ba4a6792' 
  94  } 
  95   
  96  map_schema_hash2version = { 
  97          'b09d50d7ed3f91ddf4c4ddb8ea507720': 2, 
  98          'e73718eaf230d8f1d2d01afa8462e176': 3, 
  99          '4428ccf2e54c289136819e701bb095ea': 4, 
 100          '7e7b093af57aea48c288e76632a382e5': 5, 
 101          '90e2026ac2efd236da9c8608b8685b2d': 6, 
 102          '6c9f6d3981483f8e9433df99d1947b27': 7, 
 103          '89b13a7af83337c3aad153b717e52360': 8, 
 104          '641a9b2be3c378ffc2bb2f0b1c9f051d': 9, 
 105          '7ef42a8fb2bd929a2cdd0c63864b4e8a': 10, 
 106          '03042ae24f3f92877d986fb0a6184d76': 11, 
 107          '06183a6616db62257e22814007a8ed07': 12, 
 108          'fab7c1ae408a6530c47f9b5111a0841e': 13, 
 109          'e170d543f067d1ea60bfe9076b1560cf': 14, 
 110          '70012ff960b77ecdff4981c94b5b55b6': 15, 
 111          '0bcf44ca22c479b52976e5eda1de8161': 16, 
 112          '161428ee97a00e3bf56168c3a15b7b50': 17, 
 113          'a0f9efcabdecfb4ddb6d8c0b69c02092': 18, 
 114          #'419e5225259c53dd36ad80d82066ff02': 19 # 19.0 only 
 115          #'9765373098b03fb208332498f34cd4b5': 19 # until 19.11 
 116          '57f009a159f55f77525cc0291e0c8b60': 19, # starting with 19.12 
 117          'baed1901ed4c2f272b56c8cb2c6d88e8': 20, 
 118          'e6a51a89dd22b75b61ead8f7083f251f': 21, 
 119          'bf45f01327fb5feb2f5d3c06ba4a6792': 22 
 120  } 
 121   
 122  map_client_branch2required_db_version = { 
 123          'GIT tree': 0, 
 124          '0.3': 9, 
 125          '0.4': 10, 
 126          '0.5': 11, 
 127          '0.6': 12, 
 128          '0.7': 13, 
 129          '0.8': 14, 
 130          '0.9': 15, 
 131          '1.0': 16,              # intentional duplicate with 1.1 
 132          '1.1': 16, 
 133          '1.2': 17, 
 134          '1.3': 18, 
 135          '1.4': 19, 
 136          '1.5': 20, 
 137          '1.6': 21, 
 138          '1.7': 22, 
 139          '1.8': 22               # Yes, SAME as 1.7, no DB change. 
 140  } 
 141   
 142  # get columns and data types for a given table 
 143  query_table_col_defs = """select 
 144          cols.column_name, 
 145          cols.udt_name 
 146  from 
 147          information_schema.columns cols 
 148  where 
 149          cols.table_schema = %s 
 150                  and 
 151          cols.table_name = %s 
 152  order by 
 153          cols.ordinal_position""" 
 154   
 155  query_table_attributes = """select 
 156          cols.column_name 
 157  from 
 158          information_schema.columns cols 
 159  where 
 160          cols.table_schema = %s 
 161                  and 
 162          cols.table_name = %s 
 163  order by 
 164          cols.ordinal_position""" 
 165   
 166  # only works for single-column FKs but that's fine 
 167  # needs gm-dbo, any-doc won't work 
 168  SQL_foreign_key_name = """SELECT 
 169          fk_tbl.*, 
 170          (SELECT nspname FROM pg_catalog.pg_namespace WHERE oid = fk_tbl.connamespace) AS constraint_schema, 
 171          fk_tbl.conname AS constraint_name, 
 172          (SELECT nspname FROM pg_catalog.pg_namespace WHERE oid = (SELECT relnamespace FROM pg_class where oid = fk_tbl.conrelid)) AS source_schema, 
 173          (SELECT relname FROM pg_catalog.pg_class where oid = fk_tbl.conrelid) AS source_table, 
 174          (SELECT attname FROM pg_catalog.pg_attribute WHERE attnum = fk_tbl.conkey[1] AND attrelid = (%(src_schema)s || '.' || %(src_tbl)s)::regclass) AS source_column, 
 175          (SELECT nspname FROM pg_catalog.pg_namespace WHERE oid = (SELECT relnamespace FROM pg_class where oid = fk_tbl.confrelid)) AS target_schema, 
 176          (SELECT relname FROM pg_catalog.pg_class where oid = fk_tbl.confrelid) AS target_table, 
 177          (SELECT attname FROM pg_catalog.pg_attribute WHERE attnum = fk_tbl.confkey[1] AND attrelid = (%(target_schema)s || '.' || %(target_tbl)s)::regclass) AS target_column 
 178  FROM 
 179          pg_catalog.pg_constraint fk_tbl 
 180  WHERE 
 181          fk_tbl.contype = 'f' 
 182                  AND 
 183          fk_tbl.conrelid = (%(src_schema)s || '.' || %(src_tbl)s)::regclass 
 184                  AND 
 185          fk_tbl.conkey[1] = ( 
 186                  SELECT 
 187                          col_tbl1.attnum 
 188                  FROM 
 189                          pg_catalog.pg_attribute col_tbl1 
 190                  WHERE 
 191                          col_tbl1.attname = %(src_col)s 
 192                                  AND 
 193                          col_tbl1.attrelid = (%(src_schema)s || '.' || %(src_tbl)s)::regclass 
 194          ) 
 195                  AND 
 196          fk_tbl.confrelid = (%(target_schema)s || '.' || %(target_tbl)s)::regclass 
 197                  AND 
 198          fk_tbl.confkey[1] = ( 
 199                  SELECT 
 200                          col_tbl2.attnum 
 201                  FROM 
 202                          pg_catalog.pg_attribute col_tbl2 
 203                  WHERE 
 204                          col_tbl2.attname = %(target_col)s 
 205                                  AND 
 206                          col_tbl2.attrelid = (%(target_schema)s || '.' || %(target_tbl)s)::regclass 
 207          ) 
 208  """ 
 209   
 210  SQL_get_index_name = """ 
 211  SELECT 
 212          (SELECT nspname FROM pg_namespace WHERE pg_namespace.oid = pg_class.relnamespace) 
 213                  AS index_schema, 
 214          pg_class.relname 
 215                  AS index_name 
 216  FROM 
 217          pg_class 
 218  WHERE 
 219          pg_class.oid IN ( 
 220                  SELECT 
 221                          indexrelid 
 222                  FROM 
 223                          pg_index 
 224                  WHERE 
 225                          pg_index.indrelid = %(idx_tbl)s::regclass 
 226                                  AND 
 227                          pg_index.indnatts = 1           -- only one column in index 
 228                                  AND 
 229                          pg_index.indkey[0] IN ( 
 230                                  SELECT 
 231                                          pg_attribute.attnum 
 232                                  FROM 
 233                                          pg_attribute 
 234                                  WHERE 
 235                                          pg_attribute.attrelid = %(idx_tbl)s::regclass 
 236                                                  AND 
 237                                          pg_attribute.attname = %(idx_col)s 
 238                                  ) 
 239          ) 
 240  """ 
 241   
 242  SQL_get_pk_col_def = """ 
 243  SELECT 
 244          pg_attribute.attname 
 245                  AS pk_col, 
 246          format_type(pg_attribute.atttypid, pg_attribute.atttypmod) 
 247                  AS pk_type 
 248  FROM pg_index, pg_class, pg_attribute, pg_namespace 
 249  WHERE 
 250          pg_class.oid = %(table)s::regclass 
 251                  AND 
 252          indrelid = pg_class.oid 
 253                  AND 
 254  --      nspname = %%(schema)s 
 255  --              AND 
 256          pg_class.relnamespace = pg_namespace.oid 
 257                  AND 
 258          pg_attribute.attrelid = pg_class.oid 
 259                  AND 
 260          pg_attribute.attnum = any(pg_index.indkey) 
 261                  AND 
 262          indisprimary 
 263  """ 
 264  # ======================================================================= 
 265  # login API 
 266  # ======================================================================= 
267 -def __request_login_params_tui():
268 """Text mode request of database login parameters""" 269 270 import getpass 271 login = gmLoginInfo.LoginInfo() 272 273 print("\nPlease enter the required login parameters:") 274 try: 275 login.host = prompted_input(prompt = "host ('' = non-TCP/IP)", default = '') 276 login.database = prompted_input(prompt = "database", default = default_database) 277 login.user = prompted_input(prompt = "user name", default = '') 278 tmp = 'password for "%s" (not shown): ' % login.user 279 login.password = getpass.getpass(tmp) 280 gmLog2.add_word2hide(login.password) 281 login.port = prompted_input(prompt = "port", default = 5432) 282 except KeyboardInterrupt: 283 _log.warning("user cancelled text mode login dialog") 284 print("user cancelled text mode login dialog") 285 raise gmExceptions.ConnectionError(_("Cannot connect to database without login information!")) 286 287 creds = gmConnectionPool.cPGCredentials() 288 creds.database = login.database 289 creds.host = login.host 290 creds.port = login.port 291 creds.user = login.user 292 creds.password = login.password 293 294 return login, creds
295 296 #---------------------------------------------------
297 -def __request_login_params_gui_wx():
298 """GUI (wx) input request for database login parameters. 299 300 Returns gmLoginInfo.LoginInfo object 301 """ 302 import wx 303 # OK, wxPython was already loaded. But has the main Application instance 304 # been initialized yet ? if not, the exception will kick us out 305 if wx.GetApp() is None: 306 raise AssertionError(_("The wxPython GUI framework hasn't been initialized yet!")) 307 308 # Let's launch the login dialog 309 # if wx was not initialized /no main App loop, an exception should be raised anyway 310 import gmAuthWidgets 311 dlg = gmAuthWidgets.cLoginDialog(None, -1) 312 dlg.ShowModal() 313 login = dlg.panel.GetLoginInfo() 314 dlg.DestroyLater() 315 #if user cancelled or something else went wrong, raise an exception 316 if login is None: 317 raise gmExceptions.ConnectionError(_("Can't connect to database without login information!")) 318 319 gmLog2.add_word2hide(login.password) 320 creds = gmConnectionPool.cPGCredentials() 321 creds.database = login.database 322 creds.host = login.host 323 creds.port = login.port 324 creds.user = login.user 325 creds.password = login.password 326 return login, creds
327 328 #---------------------------------------------------
329 -def request_login_params():
330 """Request login parameters for database connection.""" 331 332 # are we inside X ? 333 # if we aren't wxGTK will crash hard at the C-level with "can't open Display" 334 if 'DISPLAY' in os.environ: 335 # try wxPython GUI 336 try: 337 return __request_login_params_gui_wx() 338 except Exception: 339 pass 340 341 # well, either we are on the console or 342 # wxPython does not work, use text mode 343 return __request_login_params_tui()
344 345 # ======================================================================= 346 # netadata API 347 # =======================================================================
348 -def database_schema_compatible(link_obj=None, version=None, verbose=True):
349 expected_hash = known_schema_hashes[version] 350 if version == 0: 351 args = {'ver': 9999} 352 else: 353 args = {'ver': version} 354 rows, idx = run_ro_queries ( 355 link_obj = link_obj, 356 queries = [{ 357 'cmd': 'select md5(gm.concat_table_structure(%(ver)s::integer)) as md5', 358 'args': args 359 }] 360 ) 361 if rows[0]['md5'] != expected_hash: 362 _log.error('database schema version mismatch') 363 _log.error('expected: %s (%s)' % (version, expected_hash)) 364 _log.error('detected: %s (%s)' % (get_schema_version(link_obj=link_obj), rows[0]['md5'])) 365 if verbose: 366 _log.debug('schema dump follows:') 367 for line in get_schema_structure(link_obj = link_obj).split(): 368 _log.debug(line) 369 _log.debug('schema revision history dump follows:') 370 for line in get_schema_revision_history(link_obj = link_obj): 371 _log.debug(' - '.join(line)) 372 return False 373 _log.info('detected schema version [%s], hash [%s]' % (map_schema_hash2version[rows[0]['md5']], rows[0]['md5'])) 374 return True
375 376 #------------------------------------------------------------------------
377 -def get_schema_version(link_obj=None):
378 rows, idx = run_ro_queries(link_obj=link_obj, queries = [{'cmd': 'select md5(gm.concat_table_structure()) as md5'}]) 379 try: 380 return map_schema_hash2version[rows[0]['md5']] 381 except KeyError: 382 return 'unknown database schema version, MD5 hash is [%s]' % rows[0]['md5']
383 384 #------------------------------------------------------------------------
385 -def get_schema_structure(link_obj=None):
386 rows, idx = run_ro_queries(link_obj=link_obj, queries = [{'cmd': 'select gm.concat_table_structure()'}]) 387 return rows[0][0]
388 389 #------------------------------------------------------------------------
390 -def get_schema_hash(link_obj=None):
391 rows, idx = run_ro_queries(link_obj=link_obj, queries = [{'cmd': 'select md5(gm.concat_table_structure()) as md5'}]) 392 return rows[0]['md5']
393 394 #------------------------------------------------------------------------
395 -def get_schema_revision_history(link_obj=None):
396 397 if table_exists(link_obj = link_obj, schema = 'gm', table = 'schema_revision'): 398 cmd = """ 399 SELECT 400 imported::text, 401 version, 402 filename 403 FROM gm.schema_revision 404 ORDER BY imported""" 405 elif table_exists(link_obj = link_obj, schema = 'public', table = 'gm_schema_revision'): 406 cmd = """ 407 SELECT 408 imported::text, 409 version, 410 filename 411 FROM public.gm_schema_revision 412 ORDER BY imported""" 413 else: 414 return [] 415 416 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd}]) 417 return rows
418 #------------------------------------------------------------------------
419 -def get_current_user():
420 rows, idx = run_ro_queries(queries = [{'cmd': 'select CURRENT_USER'}]) 421 return rows[0][0]
422 423 #------------------------------------------------------------------------
424 -def get_foreign_keys2column(schema='public', table=None, column=None, link_obj=None):
425 """Get the foreign keys pointing to schema.table.column. 426 427 Does not properly work with multi-column FKs. 428 GNUmed doesn't use any, however. 429 """ 430 args = { 431 'schema': schema, 432 'tbl': table, 433 'col': column 434 } 435 cmd = """ 436 SELECT 437 %(schema)s AS referenced_schema, 438 %(tbl)s AS referenced_table, 439 %(col)s AS referenced_column, 440 pgc.confkey AS referenced_column_list, 441 442 pgc.conrelid::regclass AS referencing_table, 443 pgc.conkey AS referencing_column_list, 444 (select attname from pg_attribute where attnum = pgc.conkey[1] and attrelid = pgc.conrelid) AS referencing_column 445 FROM 446 pg_constraint pgc 447 WHERE 448 pgc.contype = 'f' 449 AND 450 pgc.confrelid = ( 451 select oid from pg_class where relname = %(tbl)s and relnamespace = ( 452 select oid from pg_namespace where nspname = %(schema)s 453 ) 454 ) and 455 ( 456 select attnum 457 from pg_attribute 458 where 459 attrelid = (select oid from pg_class where relname = %(tbl)s and relnamespace = ( 460 select oid from pg_namespace where nspname = %(schema)s 461 )) 462 and 463 attname = %(col)s 464 ) = any(pgc.confkey) 465 """ 466 rows, idx = run_ro_queries ( 467 link_obj = link_obj, 468 queries = [ 469 {'cmd': cmd, 'args': args} 470 ] 471 ) 472 473 return rows
474 475 #------------------------------------------------------------------------
476 -def get_index_name(indexed_table=None, indexed_column=None, link_obj=None):
477 478 args = { 479 'idx_tbl': indexed_table, 480 'idx_col': indexed_column 481 } 482 rows, idx = run_ro_queries ( 483 link_obj = link_obj, 484 queries = [{'cmd': SQL_get_index_name, 'args': args}], 485 get_col_idx = False 486 ) 487 488 return rows
489 490 #------------------------------------------------------------------------
491 -def get_foreign_key_names(src_schema=None, src_table=None, src_column=None, target_schema=None, target_table=None, target_column=None, link_obj=None):
492 493 args = { 494 'src_schema': src_schema, 495 'src_tbl': src_table, 496 'src_col': src_column, 497 'target_schema': target_schema, 498 'target_tbl': target_table, 499 'target_col': target_column 500 } 501 502 rows, idx = run_ro_queries ( 503 link_obj = link_obj, 504 queries = [{'cmd': SQL_foreign_key_name, 'args': args}], 505 get_col_idx = False 506 ) 507 508 return rows
509 510 #------------------------------------------------------------------------
511 -def get_child_tables(schema='public', table=None, link_obj=None):
512 """Return child tables of <table>.""" 513 cmd = """ 514 select 515 pgn.nspname as namespace, 516 pgc.relname as table 517 from 518 pg_namespace pgn, 519 pg_class pgc 520 where 521 pgc.relnamespace = pgn.oid 522 and 523 pgc.oid in ( 524 select inhrelid from pg_inherits where inhparent = ( 525 select oid from pg_class where 526 relnamespace = (select oid from pg_namespace where nspname = %(schema)s) and 527 relname = %(table)s 528 ) 529 )""" 530 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': {'schema': schema, 'table': table}}]) 531 return rows
532 533 #------------------------------------------------------------------------
534 -def schema_exists(link_obj=None, schema='gm'):
535 cmd = """SELECT EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = %(schema)s)""" 536 args = {'schema': schema} 537 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}]) 538 return rows[0][0]
539 540 #------------------------------------------------------------------------
541 -def table_exists(link_obj=None, schema=None, table=None):
542 """Returns false, true.""" 543 cmd = """ 544 select exists ( 545 select 1 from information_schema.tables 546 where 547 table_schema = %s and 548 table_name = %s and 549 table_type = 'BASE TABLE' 550 )""" 551 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': (schema, table)}]) 552 return rows[0][0]
553 554 #------------------------------------------------------------------------
555 -def function_exists(link_obj=None, schema=None, function=None):
556 557 cmd = """ 558 SELECT EXISTS ( 559 SELECT 1 FROM pg_proc 560 WHERE proname = %(func)s AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = %(schema)s) 561 ) 562 """ 563 args = { 564 'func': function, 565 'schema': schema 566 } 567 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}]) 568 return rows[0][0]
569 570 #------------------------------------------------------------------------
571 -def get_col_indices(cursor = None):
572 if cursor.description is None: 573 _log.error('no result description available: unused cursor or last query did not select rows') 574 return None 575 col_indices = {} 576 col_index = 0 577 for col_desc in cursor.description: 578 col_name = col_desc[0] 579 # a query like "select 1,2;" will return two columns of the same name ! 580 # hence adjust to that, note, however, that dict-style access won't work 581 # on results of such queries ... 582 if col_name in col_indices: 583 col_name = '%s_%s' % (col_name, col_index) 584 col_indices[col_name] = col_index 585 col_index += 1 586 587 return col_indices
588 #------------------------------------------------------------------------
589 -def get_col_defs(link_obj=None, schema='public', table=None):
590 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': query_table_col_defs, 'args': (schema, table)}]) 591 col_names = [] 592 col_type = {} 593 for row in rows: 594 col_names.append(row[0]) 595 # map array types 596 if row[1].startswith('_'): 597 col_type[row[0]] = row[1][1:] + '[]' 598 else: 599 col_type[row[0]] = row[1] 600 col_defs = [] 601 col_defs.append(col_names) 602 col_defs.append(col_type) 603 return col_defs
604 #------------------------------------------------------------------------
605 -def get_col_names(link_obj=None, schema='public', table=None):
606 """Return column attributes of table""" 607 rows, idx = run_ro_queries(link_obj = link_obj, queries = [{'cmd': query_table_attributes, 'args': (schema, table)}]) 608 cols = [] 609 for row in rows: 610 cols.append(row[0]) 611 return cols
612 613 #------------------------------------------------------------------------ 614 # i18n functions 615 #------------------------------------------------------------------------
616 -def export_translations_from_database(filename=None):
617 tx_file = open(filename, mode = 'wt', encoding = 'utf8') 618 tx_file.write('-- GNUmed database string translations exported %s\n' % gmDateTime.pydt_now_here().strftime('%Y-%m-%d %H:%M')) 619 tx_file.write('-- - contains translations for each of [%s]\n' % ', '.join(get_translation_languages())) 620 tx_file.write('-- - user database language is set to [%s]\n\n' % get_current_user_language()) 621 tx_file.write('-- Please email this file to <gnumed-devel@gnu.org>.\n') 622 tx_file.write('-- ----------------------------------------------------------------------------------------------\n\n') 623 tx_file.write('set default_transaction_read_only to off;\n\n') 624 tx_file.write("set client_encoding to 'utf-8';\n\n") 625 tx_file.write('\\unset ON_ERROR_STOP\n\n') 626 627 cmd = 'SELECT lang, orig, trans FROM i18n.translations ORDER BY lang, orig' 628 rows, idx = run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 629 for row in rows: 630 line = "select i18n.upd_tx(E'%s', E'%s', E'%s');\n" % ( 631 row['lang'].replace("'", "\\'"), 632 row['orig'].replace("'", "\\'"), 633 row['trans'].replace("'", "\\'") 634 ) 635 tx_file.write(line) 636 tx_file.write('\n') 637 638 tx_file.write('\set ON_ERROR_STOP 1\n') 639 tx_file.close() 640 641 return True
642 643 #------------------------------------------------------------------------
644 -def delete_translation_from_database(link_obj=None, language=None, original=None):
645 cmd = 'DELETE FROM i18n.translations WHERE lang = %(lang)s AND orig = %(orig)s' 646 args = {'lang': language, 'orig': original} 647 run_rw_queries(link_obj = link_obj, queries = [{'cmd': cmd, 'args': args}], return_data = False, end_tx = True) 648 return True
649 650 #------------------------------------------------------------------------
651 -def update_translation_in_database(language=None, original=None, translation=None, link_obj=None):
652 if language is None: 653 cmd = 'SELECT i18n.upd_tx(%(orig)s, %(trans)s)' 654 else: 655 cmd = 'SELECT i18n.upd_tx(%(lang)s, %(orig)s, %(trans)s)' 656 args = {'lang': language, 'orig': original, 'trans': translation} 657 run_rw_queries(queries = [{'cmd': cmd, 'args': args}], return_data = False, link_obj = link_obj) 658 return args
659 660 #------------------------------------------------------------------------
661 -def get_translation_languages():
662 rows, idx = run_ro_queries ( 663 queries = [{'cmd': 'select distinct lang from i18n.translations'}] 664 ) 665 return [ r[0] for r in rows ]
666 667 #------------------------------------------------------------------------
668 -def get_database_translations(language=None, order_by=None):
669 670 args = {'lang': language} 671 _log.debug('language [%s]', language) 672 673 if order_by is None: 674 order_by = 'ORDER BY %s' % order_by 675 else: 676 order_by = 'ORDER BY lang, orig' 677 678 if language is None: 679 cmd = """ 680 SELECT DISTINCT ON (orig, lang) 681 lang, orig, trans 682 FROM (( 683 684 -- strings stored as translation keys whether translated or not 685 SELECT 686 NULL as lang, 687 ik.orig, 688 NULL AS trans 689 FROM 690 i18n.keys ik 691 692 ) UNION ALL ( 693 694 -- already translated strings 695 SELECT 696 it.lang, 697 it.orig, 698 it.trans 699 FROM 700 i18n.translations it 701 702 )) as translatable_strings 703 %s""" % order_by 704 else: 705 cmd = """ 706 SELECT DISTINCT ON (orig, lang) 707 lang, orig, trans 708 FROM (( 709 710 -- strings stored as translation keys whether translated or not 711 SELECT 712 %%(lang)s as lang, 713 ik.orig, 714 i18n._(ik.orig, %%(lang)s) AS trans 715 FROM 716 i18n.keys ik 717 718 ) UNION ALL ( 719 720 -- already translated strings 721 SELECT 722 %%(lang)s as lang, 723 it.orig, 724 i18n._(it.orig, %%(lang)s) AS trans 725 FROM 726 i18n.translations it 727 728 )) AS translatable_strings 729 %s""" % order_by 730 731 rows, idx = run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 732 733 if rows is None: 734 _log.error('no translatable strings found') 735 else: 736 _log.debug('%s translatable strings found', len(rows)) 737 738 return rows
739 740 #------------------------------------------------------------------------
741 -def get_current_user_language():
742 cmd = 'select i18n.get_curr_lang()' 743 rows, idx = run_ro_queries(queries = [{'cmd': cmd}]) 744 return rows[0][0]
745 746 #------------------------------------------------------------------------
747 -def set_user_language(user=None, language=None):
748 """Set the user language in the database. 749 750 user = None: current db user 751 language = None: unset 752 """ 753 _log.info('setting database language for user [%s] to [%s]', user, language) 754 args = {'usr': user, 'lang': language} 755 if language is None: 756 if user is None: 757 queries = [{'cmd': 'select i18n.unset_curr_lang()'}] 758 else: 759 queries = [{'cmd': 'select i18n.unset_curr_lang(%(usr)s)', 'args': args}] 760 queries.append({'cmd': 'select True'}) 761 else: 762 if user is None: 763 queries = [{'cmd': 'select i18n.set_curr_lang(%(lang)s)', 'args': args}] 764 else: 765 queries = [{'cmd': 'select i18n.set_curr_lang(%(lang)s, %(usr)s)', 'args': args}] 766 rows, idx = run_rw_queries(queries = queries, return_data = True) 767 if not rows[0][0]: 768 _log.error('cannot set database language to [%s] for user [%s]', language, user) 769 return rows[0][0]
770 771 #------------------------------------------------------------------------
772 -def force_user_language(language=None):
773 """Set the user language in the database. 774 775 - regardless of whether there is any translation available. 776 - only for the current user 777 """ 778 _log.info('forcing database language for current db user to [%s]', language) 779 780 run_rw_queries(queries = [{ 781 'cmd': 'select i18n.force_curr_lang(%(lang)s)', 782 'args': {'lang': language} 783 }])
784 785 # ======================================================================= 786 # query runners and helpers 787 # =======================================================================
788 -def send_maintenance_notification():
789 cmd = 'notify "db_maintenance_warning"' 790 run_rw_queries(queries = [{'cmd': cmd}], return_data = False)
791 792 #------------------------------------------------------------------------
793 -def send_maintenance_shutdown():
794 cmd = 'notify "db_maintenance_disconnect"' 795 run_rw_queries(queries = [{'cmd': cmd}], return_data = False)
796 797 #------------------------------------------------------------------------
798 -def is_pg_interval(candidate=None):
799 cmd = 'SELECT %(candidate)s::interval' 800 try: 801 rows, idx = run_ro_queries(queries = [{'cmd': cmd, 'args': {'candidate': candidate}}]) 802 return True 803 except Exception: 804 cmd = 'SELECT %(candidate)s::text::interval' 805 try: 806 rows, idx = run_ro_queries(queries = [{'cmd': cmd, 'args': {'candidate': candidate}}]) 807 return True 808 except Exception: 809 return False
810 811 #------------------------------------------------------------------------
812 -def lock_row(link_obj=None, table=None, pk=None, exclusive=False):
813 """Uses pg_advisory(_shared). 814 815 - locks stack upon each other and need one unlock per lock 816 - same connection: 817 - all locks succeed 818 - different connections: 819 - shared + shared succeed 820 - shared + exclusive fail 821 """ 822 _log.debug('locking row: [%s] [%s] (exclusive: %s)', table, pk, exclusive) 823 if exclusive: 824 cmd = """SELECT pg_try_advisory_lock('%s'::regclass::oid::int, %s)""" % (table, pk) 825 else: 826 cmd = """SELECT pg_try_advisory_lock_shared('%s'::regclass::oid::int, %s)""" % (table, pk) 827 rows, idx = run_rw_queries(link_obj = link_obj, queries = [{'cmd': cmd}], get_col_idx = False, return_data = True) 828 if rows[0][0]: 829 return True 830 831 _log.warning('cannot lock row: [%s] [%s] (exclusive: %s)', table, pk, exclusive) 832 return False
833 834 #------------------------------------------------------------------------
835 -def unlock_row(link_obj=None, table=None, pk=None, exclusive=False):
836 """Uses pg_advisory_unlock(_shared). 837 838 - each lock needs one unlock 839 """ 840 _log.debug('trying to unlock row: [%s] [%s] (exclusive: %s)', table, pk, exclusive) 841 if exclusive: 842 cmd = "SELECT pg_advisory_unlock('%s'::regclass::oid::int, %s)" % (table, pk) 843 else: 844 cmd = "SELECT pg_advisory_unlock_shared('%s'::regclass::oid::int, %s)" % (table, pk) 845 rows, idx = run_rw_queries(link_obj = link_obj, queries = [{'cmd': cmd}], get_col_idx = False, return_data = True) 846 if rows[0][0]: 847 return True 848 849 _log.warning('cannot unlock row: [%s] [%s] (exclusive: %s)', table, pk, exclusive) 850 return False
851 852 #------------------------------------------------------------------------
853 -def row_is_locked(table=None, pk=None):
854 """Looks at pk_locks 855 856 - does not take into account locks other than 'advisory', however 857 """ 858 cmd = """SELECT EXISTS ( 859 SELECT 1 FROM pg_locks WHERE 860 classid = '%s'::regclass::oid::int 861 AND 862 objid = %s 863 AND 864 locktype = 'advisory' 865 )""" % (table, pk) 866 rows, idx = run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 867 if rows[0][0]: 868 _log.debug('row is locked: [%s] [%s]', table, pk) 869 return True 870 _log.debug('row is NOT locked: [%s] [%s]', table, pk) 871 return False
872 873 #------------------------------------------------------------------------ 874 # BYTEA cache handling 875 #------------------------------------------------------------------------
876 -def __generate_cached_filename(cache_key_data):
877 md5 = hashlib.md5() 878 md5.update(('%s' % cache_key_data).encode('utf8')) 879 return os.path.join(gmTools.gmPaths().bytea_cache_dir, md5.hexdigest())
880 881 #------------------------------------------------------------------------
882 -def __store_file_in_cache(filename, cache_key_data):
883 cached_name = __generate_cached_filename(cache_key_data) 884 _log.debug('[%s] -> [%s] -> [%s]', filename, cache_key_data, cached_name) 885 if not gmTools.remove_file(cached_name, log_error = True, force = True): 886 _log.error('cannot remove existing file [%s] for key [%s] from cache', filename, cached_name) 887 return None 888 889 PERMS_owner_only = 0o0660 890 try: 891 shutil.copyfile(filename, cached_name, follow_symlinks = True) 892 os.chmod(cached_name, PERMS_owner_only) 893 except shutil.SameFileError: 894 _log.exception('file seems to exist in cache, despite having checked and possible removed it just before') 895 # don't use that file, it is unsafe, it might have come from 896 # a race being exploited to make us use the wrong data, this 897 # then constitutes a DOS attack against the cache but that's 898 # far less problematic than using the wrong data for care 899 return None 900 except OSError: 901 _log.exception('cannot copy file into cache: [%s] -> [%s]', filename, cached_name) 902 return None 903 except PermissionError: 904 _log.exception('cannot set cache file [%s] permissions to [%s]', cached_name, stat.filemode(PERMS_owner_only)) 905 return None 906 907 return cached_name
908 909 #------------------------------------------------------------------------
910 -def __get_filename_in_cache(cache_key_data=None, data_size=None):
911 """Calculate and verify filename in cache given cache key details.""" 912 cached_name = __generate_cached_filename(cache_key_data) 913 try: 914 stat = os.stat(cached_name) 915 except FileNotFoundError: 916 return None 917 918 _log.debug('cache hit: [%s] -> [%s] (%s)', cache_key_data, cached_name, stat) 919 if os.path.islink(cached_name) or (not os.path.isfile(cached_name)): 920 _log.error('object in cache is not a regular file: %s', cached_name) 921 _log.error('possibly an attack, removing') 922 if gmTools.remove_file(cached_name, log_error = True): 923 return None 924 925 raise Exception('cannot delete suspicious object in cache dir: %s', cached_name) 926 927 if stat.st_size == data_size: 928 return cached_name 929 930 _log.debug('size in cache [%s] <> expected size [%s], removing cached file', stat.st_size, data_size) 931 if gmTools.remove_file(cached_name, log_error = True): 932 return None 933 934 raise Exception('cannot remove suspicious object from cache dir: %s', cached_name)
935 936 #------------------------------------------------------------------------
937 -def __get_file_from_cache(filename, cache_key_data=None, data_size=None, link2cached=True):
938 """Get file from cache if available.""" 939 cached_filename = __get_filename_in_cache(cache_key_data = cache_key_data, data_size = data_size) 940 if cached_filename is None: 941 return False 942 943 if link2cached: 944 try: 945 # (hard)link as desired name, quite a few security 946 # and archival tools refuse to handle softlinks 947 os.link(cached_filename, filename) 948 _log.debug('hardlinked [%s] as [%s]', cached_filename, filename) 949 return True 950 951 except Exception: 952 pass 953 _log.debug('cannot hardlink to cache, trying copy-from-cache') 954 try: 955 shutil.copyfile(cached_filename, filename, follow_symlinks = True) 956 return True 957 958 except shutil.SameFileError: 959 # flaky - might be same name but different content 960 pass 961 except OSError: 962 _log.exception('cannot copy cached file [%s] into [%s]', cached_filename, filename) 963 # if cache fails entirely -> fall through to new file 964 _log.debug('downloading new copy of file, despite found in cache') 965 return False
966 967 #------------------------------------------------------------------------
968 -def bytea2file(data_query=None, filename=None, chunk_size=0, data_size=None, data_size_query=None, conn=None, link2cached=True):
969 970 if data_size == 0: 971 open(filename, 'wb').close() 972 return True 973 974 if data_size is None: 975 rows, idx = run_ro_queries(link_obj = conn, queries = [data_size_query]) 976 data_size = rows[0][0] 977 if data_size == 0: 978 open(filename, 'wb').close() 979 return True 980 981 if data_size is None: 982 return False 983 984 if conn is None: 985 conn = gmConnectionPool.gmConnectionPool().get_connection() 986 cache_key_data = '%s::%s' % (conn.dsn, data_query) 987 found_in_cache = __get_file_from_cache(filename, cache_key_data = cache_key_data, data_size = data_size, link2cached = link2cached) 988 if found_in_cache: 989 # FIXME: start thread checking cache staleness on file 990 return True 991 992 outfile = open(filename, 'wb') 993 result = bytea2file_object ( 994 data_query = data_query, 995 file_obj = outfile, 996 chunk_size = chunk_size, 997 data_size = data_size, 998 data_size_query = data_size_query, 999 conn = conn 1000 ) 1001 outfile.close() 1002 __store_file_in_cache(filename, cache_key_data) 1003 return result
1004 1005 #------------------------------------------------------------------------
1006 -def bytea2file_object(data_query=None, file_obj=None, chunk_size=0, data_size=None, data_size_query=None, conn=None):
1007 """Store data from a bytea field into a file. 1008 1009 <data_query> 1010 - dict {'cmd': ..., 'args': ...} 1011 - 'cmd' must be a string containing "... substring(data from %(start)s for %(size)s) ..." 1012 - 'args' must be a dict 1013 - must return one row with one field of type bytea 1014 <file> 1015 - must be a file like Python object 1016 <data_size> 1017 - integer of the total size of the expected data or None 1018 <data_size_query> 1019 - dict {'cmd': ..., 'args': ...} 1020 - must return one row with one field with the octet_length() of the data field 1021 - used only when <data_size> is None 1022 """ 1023 if data_size == 0: 1024 return True 1025 1026 # If the client sets an encoding other than the default we 1027 # will receive encoding-parsed data which isn't the binary 1028 # content we want. Hence we need to get our own connection. 1029 # It must be a read-write one so that we don't affect the 1030 # encoding for other users of the shared read-only 1031 # connections. 1032 # Actually, encodings shouldn't be applied to binary data 1033 # (eg. bytea types) in the first place but that is only 1034 # reported to be fixed > v7.4. 1035 # further tests reveal that at least on PG 8.0 this bug still 1036 # manifests itself 1037 if conn is None: 1038 conn = get_raw_connection(readonly = True) 1039 1040 if data_size is None: 1041 rows, idx = run_ro_queries(link_obj = conn, queries = [data_size_query]) 1042 data_size = rows[0][0] 1043 if data_size in [None, 0]: 1044 conn.rollback() 1045 return True 1046 1047 max_chunk_size = 1024 * 1024 * 20 # 20 MB, works for typical CR DICOMs 1048 if chunk_size == 0: 1049 chunk_size = min(data_size, max_chunk_size) 1050 1051 _log.debug('expecting %s bytes of BYTEA data in chunks of %s bytes', data_size, chunk_size) 1052 1053 # Windoze sucks: it can't transfer objects of arbitrary size, 1054 # anyways, we need to split the transfer, 1055 # however, only possible if postgres >= 7.2 1056 needed_chunks, remainder = divmod(data_size, chunk_size) 1057 _log.debug('%s chunk(s), %s byte(s) remainder', needed_chunks, remainder) 1058 1059 # retrieve chunks, skipped if data size < chunk size, 1060 # does this not carry the danger of cutting up multi-byte escape sequences ? 1061 # no, since bytea is binary, 1062 # yes, since in bytea there are *some* escaped values, still 1063 # no, since those are only escaped during *transfer*, not on-disk, hence 1064 # only complete escape sequences are put on the wire 1065 for chunk_id in range(needed_chunks): 1066 chunk_start = (chunk_id * chunk_size) + 1 1067 data_query['args']['start'] = chunk_start 1068 data_query['args']['size'] = chunk_size 1069 try: 1070 rows, idx = run_ro_queries(link_obj=conn, queries=[data_query]) 1071 except Exception: 1072 _log.error('cannot retrieve chunk [%s/%s], size [%s], try decreasing chunk size' % (chunk_id+1, needed_chunks, chunk_size)) 1073 conn.rollback() 1074 raise 1075 # it would be a fatal error to see more than one result as ids are supposed to be unique 1076 file_obj.write(rows[0][0]) 1077 1078 # retrieve remainder 1079 if remainder > 0: 1080 chunk_start = (needed_chunks * chunk_size) + 1 1081 data_query['args']['start'] = chunk_start 1082 data_query['args']['size'] = remainder 1083 try: 1084 rows, idx = run_ro_queries(link_obj=conn, queries=[data_query]) 1085 except Exception: 1086 _log.error('cannot retrieve remaining [%s] bytes' % remainder) 1087 conn.rollback() 1088 raise 1089 # it would be a fatal error to see more than one result as ids are supposed to be unique 1090 file_obj.write(rows[0][0]) 1091 1092 conn.rollback() 1093 return True
1094 1095 #------------------------------------------------------------------------
1096 -def file2bytea(query=None, filename=None, args=None, conn=None, file_md5=None):
1097 """Store data from a file into a bytea field. 1098 1099 The query must: 1100 - contain a format spec identifying the row (eg a primary key) 1101 matching <args> if it is an UPDATE 1102 - contain a format spec " <field> = %(data)s::bytea" 1103 1104 The query CAN return the MD5 of the inserted data: 1105 RETURNING md5(<field>) AS md5 1106 in which case the returned hash will compared to the md5 of the file. 1107 """ 1108 retry_delay = 100 # milliseconds 1109 attempt = 0 1110 max_attempts = 3 1111 while attempt < max_attempts: 1112 attempt += 1 1113 try: 1114 infile = open(filename, "rb") 1115 except (BlockingIOError, FileNotFoundError, PermissionError): 1116 _log.exception('#%s: cannot open [%s]', attempt, filename) 1117 _log.error('retrying after %sms', retry_delay) 1118 infile = None 1119 time.sleep(retry_delay / 1000) 1120 if infile is None: 1121 return False 1122 1123 data_as_byte_string = infile.read() 1124 infile.close() 1125 if args is None: 1126 args = {} 1127 # really still needed for BYTEA input ? 1128 args['data'] = memoryview(data_as_byte_string) 1129 del(data_as_byte_string) 1130 # insert the data 1131 if conn is None: 1132 conn = get_raw_connection(readonly = False) 1133 close_conn = True 1134 else: 1135 close_conn = False 1136 rows, idx = run_rw_queries(link_obj = conn, queries = [{'cmd': query, 'args': args}], end_tx = False, return_data = (file_md5 is not None)) 1137 success_status = True 1138 if file_md5 is None: 1139 conn.commit() 1140 else: 1141 db_md5 = rows[0]['md5'] 1142 if file_md5 != db_md5: 1143 conn.rollback() 1144 success_status = False 1145 _log.error('MD5 sums of data file and database BYTEA field do not match: [file::%s] <> [DB::%s]', file_md5, db_md5) 1146 else: 1147 conn.commit() 1148 _log.debug('MD5 sums of data file and database BYTEA field match: [file::%s] = [DB::%s]', file_md5, db_md5) 1149 if close_conn: 1150 conn.close() 1151 return success_status
1152 1153 #------------------------------------------------------------------------
1154 -def file2lo(filename=None, conn=None, check_md5=False):
1155 # 1 GB limit unless 64 bit Python build ... 1156 file_size = os.path.getsize(filename) 1157 if file_size > (1024 * 1024) * 1024: 1158 _log.debug('file size of [%s] > 1 GB, supposedly not supported by psycopg2 large objects (but seems to work anyway ?)', file_size) 1159 # return -1 1160 1161 if conn is None: 1162 conn = get_raw_connection(readonly = False) 1163 close_conn = conn.close 1164 else: 1165 close_conn = __noop 1166 _log.debug('[%s] -> large object', filename) 1167 1168 # insert the data 1169 lo = conn.lobject(0, 'w', 0, filename) 1170 lo_oid = lo.oid 1171 lo.close() 1172 _log.debug('large object OID: %s', lo_oid) 1173 1174 # verify 1175 if file_md5 is None: 1176 conn.commit() 1177 close_conn() 1178 return lo_oid 1179 cmd = 'SELECT md5(lo_get(%(loid)s::oid))' 1180 args = {'loid': lo_oid} 1181 rows, idx = run_ro_queries(link_obj = conn, queries = [{'cmd': cmd, 'args': args}]) 1182 db_md5 = rows[0][0] 1183 if file_md5 == db_md5: 1184 conn.commit() 1185 close_conn() 1186 _log.debug('MD5 sums of data file and database large object match: [file::%s] = [DB::%s]', file_md5, db_md5) 1187 return lo_oid 1188 conn.rollback() 1189 close_conn() 1190 _log.error('MD5 sums of data file and database large object [%s] do not match: [file::%s] <> [DB::%s]', lo_oid, file_md5, db_md5) 1191 return -1
1192 1193 #------------------------------------------------------------------------
1194 -def file2bytea_lo(filename=None, conn=None, file_md5=None):
1195 # 1 GB limit unless 64 bit Python build ... 1196 file_size = os.path.getsize(filename) 1197 if file_size > (1024 * 1024) * 1024: 1198 _log.debug('file size of [%s] > 1 GB, supposedly not supported by psycopg2 large objects (but seems to work anyway ?)', file_size) 1199 # return -1 1200 1201 if conn is None: 1202 conn = get_raw_connection(readonly = False) 1203 close_conn = conn.close 1204 else: 1205 close_conn = __noop 1206 _log.debug('[%s] -> large object', filename) 1207 1208 # insert the data 1209 lo = conn.lobject(0, 'w', 0, filename) 1210 lo_oid = lo.oid 1211 lo.close() 1212 _log.debug('large object OID: %s', lo_oid) 1213 1214 # verify 1215 if file_md5 is None: 1216 conn.commit() 1217 close_conn() 1218 return lo_oid 1219 cmd = 'SELECT md5(lo_get(%(loid)s::oid))' 1220 args = {'loid': lo_oid} 1221 rows, idx = run_ro_queries(link_obj = conn, queries = [{'cmd': cmd, 'args': args}]) 1222 db_md5 = rows[0][0] 1223 if file_md5 == db_md5: 1224 conn.commit() 1225 close_conn() 1226 _log.debug('MD5 sums of data file and database large object match: [file::%s] = [DB::%s]', file_md5, db_md5) 1227 return lo_oid 1228 conn.rollback() 1229 close_conn() 1230 _log.error('MD5 sums of data file and database large object [%s] do not match: [file::%s] <> [DB::%s]', lo_oid, file_md5, db_md5) 1231 return -1
1232 1233 #------------------------------------------------------------------------
1234 -def file2bytea_copy_from(table=None, columns=None, filename=None, conn=None, md5_query=None, file_md5=None):
1235 # md5_query: dict{'cmd': ..., 'args': ...} 1236 1237 # UNTESTED 1238 1239 chunk_size = 32 * (1024 * 1024) 1240 _log.debug('[%s] (%s bytes) --(%s bytes)-> %s(%s)', filename, os.path.getsize(filename), chunk_size, table, columns) 1241 if conn is None: 1242 conn = get_raw_connection(readonly = False) 1243 close_conn = True 1244 else: 1245 close_conn = False 1246 curs = conn.cursor() 1247 # write 1248 infile = open(filename, "rb") 1249 curs.copy_from(infile, table, size = chunk_size, columns = columns) 1250 infile.close() 1251 curs.close() 1252 if None in [file_md5, md5_query]: 1253 conn.commit() 1254 close_conn() 1255 return True 1256 # verify 1257 rows, idx = run_ro_queries(link_obj = conn, queries = [md5_query]) 1258 db_md5 = rows[0][0] 1259 if file_md5 == db_md5: 1260 conn.commit() 1261 close_conn() 1262 _log.debug('MD5 sums of data file and database BYTEA field match: [file::%s] = [DB::%s]', file_md5, db_md5) 1263 return True 1264 close_conn() 1265 _log.error('MD5 sums of data file and database BYTEA field do not match: [file::%s] <> [DB::%s]', file_md5, db_md5) 1266 return False
1267 1268 #------------------------------------------------------------------------
1269 -def file2bytea_overlay(query=None, args=None, filename=None, conn=None, md5_query=None, file_md5=None):
1270 """Store data from a file into a bytea field. 1271 1272 The query must: 1273 - 'cmd' must be in unicode 1274 - 'cmd' must contain a format spec identifying the row (eg 1275 a primary key) matching <args> if it is an UPDATE 1276 - 'cmd' must contain "... SET ... <some_bytea_field> = OVERLAY(some_bytea_field PLACING %(data)s::bytea FROM %(start)s FOR %(size)s) ..." 1277 - 'args' must be a dict matching 'cmd' 1278 1279 The query CAN return the MD5 of the inserted data: 1280 RETURNING md5(<field>) AS md5 1281 in which case it will compare it to the md5 1282 of the file. 1283 1284 UPDATE 1285 the_table 1286 SET 1287 bytea_field = OVERLAY ( 1288 coalesce(bytea_field, '':bytea), 1289 PLACING 1290 %(data)s::bytea 1291 FROM 1292 %(start)s 1293 FOR 1294 %(size)s 1295 ) 1296 WHERE 1297 primary_key = pk_value 1298 1299 SELECT md5(bytea_field) FROM the_table WHERE primary_key = pk_value 1300 """ 1301 chunk_size = 32 * (1024 * 1024) 1302 file_size = os.path.getsize(filename) 1303 if file_size <= chunk_size: 1304 chunk_size = file_size 1305 needed_chunks, remainder = divmod(file_size, chunk_size) 1306 _log.debug('file data: %s bytes, chunks: %s, chunk size: %s bytes, remainder: %s bytes', file_size, needed_chunks, chunk_size, remainder) 1307 1308 if conn is None: 1309 conn = get_raw_connection(readonly = False) 1310 close_conn = conn.close 1311 else: 1312 close_conn = __noop 1313 1314 infile = open(filename, "rb") 1315 # write chunks 1316 for chunk_id in range(needed_chunks): 1317 chunk_start = (chunk_id * chunk_size) + 1 1318 args['start'] = chunk_start 1319 args['size'] = chunk_size 1320 data_as_byte_string = infile.read(chunk_size) 1321 # really still needed for BYTEA input ? 1322 args['data'] = memoryview(data_as_byte_string) 1323 del(data_as_byte_string) 1324 try: 1325 rows, idx = run_rw_queries(link_obj = conn, queries = [{'cmd': query, 'args': args}], end_tx = False, return_data = False) 1326 except Exception: 1327 _log.exception('cannot write chunk [%s/%s] of size [%s], try decreasing chunk size', chunk_id+1, needed_chunks, chunk_size) 1328 conn.rollback() 1329 close_conn() 1330 infile.close() 1331 raise 1332 # write remainder 1333 if remainder > 0: 1334 chunk_start = (needed_chunks * chunk_size) + 1 1335 args['start'] = chunk_start 1336 args['size'] = remainder 1337 data_as_byte_string = infile.read(remainder) 1338 # really still needed for BYTEA input ? 1339 args['data'] = memoryview(data_as_byte_string) 1340 del(data_as_byte_string) 1341 try: 1342 rows, idx = run_rw_queries(link_obj = conn, queries = [{'cmd': query, 'args': args}], end_tx = False, return_data = False) 1343 except Exception: 1344 _log.error('cannot retrieve remaining [%s] bytes' % remainder) 1345 conn.rollback() 1346 close_conn() 1347 infile.close() 1348 raise 1349 infile.close() 1350 if None in [file_md5, md5_query]: 1351 conn.commit() 1352 close_conn() 1353 return True 1354 # verify 1355 rows, idx = run_ro_queries(link_obj = conn, queries = [{'cmd': md5_query, 'args': args}]) 1356 db_md5 = rows[0][0] 1357 if file_md5 == db_md5: 1358 conn.commit() 1359 close_conn() 1360 _log.debug('MD5 sums of data file and database BYTEA field match: [file::%s] = [DB::%s]', file_md5, db_md5) 1361 return True 1362 close_conn() 1363 _log.error('MD5 sums of data file and database BYTEA field do not match: [file::%s] <> [DB::%s]', file_md5, db_md5) 1364 return False
1365 1366 #------------------------------------------------------------------------ 1367 #---------------------------------------------------------------------------
1368 -def run_sql_script(sql_script, conn=None):
1369 1370 if conn is None: 1371 conn = get_connection(readonly = False) 1372 1373 from Gnumed.pycommon import gmPsql 1374 psql = gmPsql.Psql(conn) 1375 1376 if psql.run(sql_script) == 0: 1377 query = { 1378 'cmd': 'select gm.log_script_insertion(%(name)s, %(ver)s)', 1379 'args': {'name': sql_script, 'ver': 'current'} 1380 } 1381 run_rw_queries(link_obj = conn, queries = [query]) 1382 conn.commit() 1383 return True 1384 1385 _log.error('error running sql script: %s', sql_script) 1386 return False
1387 1388 #------------------------------------------------------------------------
1389 -def sanitize_pg_regex(expression=None, escape_all=False):
1390 """Escape input for use in a PostgreSQL regular expression. 1391 1392 If a fragment comes from user input and is to be used 1393 as a regular expression we need to make sure it doesn't 1394 contain invalid regex patterns such as unbalanced ('s. 1395 1396 <escape_all> 1397 True: try to escape *all* metacharacters 1398 False: only escape those which render the regex invalid 1399 """ 1400 return expression.replace ( 1401 '(', '\(' 1402 ).replace ( 1403 ')', '\)' 1404 ).replace ( 1405 '[', '\[' 1406 ).replace ( 1407 '+', '\+' 1408 ).replace ( 1409 '.', '\.' 1410 ).replace ( 1411 '*', '\*' 1412 ).replace ( 1413 '?', '\?' 1414 )
1415 #']', '\]', # not needed 1416 1417 #------------------------------------------------------------------------
1418 -def run_ro_queries(link_obj=None, queries=None, verbose=False, return_data=True, get_col_idx=False):
1419 """Run read-only queries. 1420 1421 <queries> must be a list of dicts: 1422 [ 1423 {'cmd': <string>, 'args': <dict> or <tuple>}, 1424 {...}, 1425 ... 1426 ] 1427 """ 1428 if isinstance(link_obj, dbapi._psycopg.cursor): 1429 curs = link_obj 1430 curs_close = lambda *x:None 1431 tx_rollback = lambda *x:None 1432 readonly_rollback_just_in_case = lambda *x:None 1433 elif isinstance(link_obj, dbapi._psycopg.connection): 1434 curs = link_obj.cursor() 1435 curs_close = curs.close 1436 tx_rollback = link_obj.rollback 1437 if link_obj.autocommit is True: # readonly connection ? 1438 readonly_rollback_just_in_case = link_obj.rollback 1439 else: 1440 # do not rollback readonly queries on passed-in readwrite 1441 # connections just in case because they may have already 1442 # seen fully legitimate write action which would get lost 1443 readonly_rollback_just_in_case = lambda *x:None 1444 elif link_obj is None: 1445 conn = get_connection(readonly = True, verbose = verbose) 1446 curs = conn.cursor() 1447 curs_close = curs.close 1448 tx_rollback = conn.rollback 1449 readonly_rollback_just_in_case = conn.rollback 1450 else: 1451 raise ValueError('link_obj must be cursor, connection or None but not [%s]' % link_obj) 1452 1453 if verbose: 1454 _log.debug('cursor: %s', curs) 1455 1456 for query in queries: 1457 try: 1458 args = query['args'] 1459 except KeyError: 1460 args = None 1461 try: 1462 curs.execute(query['cmd'], args) 1463 if verbose: 1464 gmConnectionPool.log_cursor_state(curs) 1465 except PG_ERROR_EXCEPTION as pg_exc: 1466 _log.error('query failed in RO connection') 1467 gmConnectionPool.log_pg_exception_details(pg_exc) 1468 try: 1469 curs_close() 1470 except PG_ERROR_EXCEPTION as pg_exc2: 1471 _log.exception('cannot close cursor') 1472 gmConnectionPool.log_pg_exception_details(pg_exc2) 1473 try: 1474 tx_rollback() # need to rollback so ABORT state isn't preserved in pooled conns 1475 except PG_ERROR_EXCEPTION as pg_exc2: 1476 _log.exception('cannot rollback transaction') 1477 gmConnectionPool.log_pg_exception_details(pg_exc2) 1478 if pg_exc.pgcode == sql_error_codes.INSUFFICIENT_PRIVILEGE: 1479 details = 'Query: [%s]' % curs.query.strip().strip('\n').strip().strip('\n') 1480 if curs.statusmessage != '': 1481 details = 'Status: %s\n%s' % ( 1482 curs.statusmessage.strip().strip('\n').strip().strip('\n'), 1483 details 1484 ) 1485 if pg_exc.pgerror is None: 1486 msg = '[%s]' % pg_exc.pgcode 1487 else: 1488 msg = '[%s]: %s' % (pg_exc.pgcode, pg_exc.pgerror) 1489 raise gmExceptions.AccessDenied ( 1490 msg, 1491 source = 'PostgreSQL', 1492 code = pg_exc.pgcode, 1493 details = details 1494 ) 1495 raise 1496 except Exception: 1497 _log.exception('error during query run in RO connection') 1498 gmConnectionPool.log_cursor_state(curs) 1499 try: 1500 curs_close() 1501 except PG_ERROR_EXCEPTION as pg_exc: 1502 _log.exception('cannot close cursor') 1503 gmConnectionPool.log_pg_exception_details(pg_exc) 1504 try: 1505 tx_rollback() # need to rollback so ABORT state isn't preserved in pooled conns 1506 except PG_ERROR_EXCEPTION as pg_exc: 1507 _log.exception('cannot rollback transation') 1508 gmConnectionPool.log_pg_exception_details(pg_exc) 1509 raise 1510 1511 data = None 1512 col_idx = None 1513 if return_data: 1514 data = curs.fetchall() 1515 if verbose: 1516 _log.debug('last query returned [%s (%s)] rows', curs.rowcount, len(data)) 1517 _log.debug('cursor description: %s', curs.description) 1518 if get_col_idx: 1519 col_idx = get_col_indices(curs) 1520 1521 curs_close() 1522 # so we can see data committed meanwhile if the 1523 # link object had been passed in and thusly might 1524 # be part of a long-running read-only transaction 1525 readonly_rollback_just_in_case() 1526 return (data, col_idx)
1527 1528 #------------------------------------------------------------------------
1529 -def run_rw_queries(link_obj=None, queries=None, end_tx=False, return_data=None, get_col_idx=False, verbose=False):
1530 """Convenience function for running a transaction 1531 that is supposed to get committed. 1532 1533 <link_obj> 1534 can be either: 1535 - a cursor 1536 - a connection 1537 1538 <queries> 1539 is a list of dicts [{'cmd': <string>, 'args': <dict> or <tuple>) 1540 to be executed as a single transaction, the last 1541 query may usefully return rows (such as a 1542 "SELECT currval('some_sequence')" statement) 1543 1544 <end_tx> 1545 - controls whether the transaction is finalized (eg. 1546 COMMITted/ROLLed BACK) or not, this allows the 1547 call to run_rw_queries() to be part of a framing 1548 transaction 1549 - if link_obj is a *connection* then <end_tx> will 1550 default to False unless it is explicitly set to 1551 True which is taken to mean "yes, you do have full 1552 control over the transaction" in which case the 1553 transaction is properly finalized 1554 - if link_obj is a *cursor* we CANNOT finalize the 1555 transaction because we would need the connection for that 1556 - if link_obj is *None* <end_tx> will, of course, always be True 1557 1558 <return_data> 1559 - if true, the returned data will include the rows 1560 the last query selected 1561 - if false, it returns None instead 1562 1563 <get_col_idx> 1564 - if true, the returned data will include a dictionary 1565 mapping field names to column positions 1566 - if false, the returned data returns None instead 1567 1568 method result: 1569 - returns a tuple (data, idx) 1570 - <data>: 1571 * (None, None) if last query did not return rows 1572 * ("fetchall() result", <index>) if last query returned any rows 1573 * for <index> see <get_col_idx> 1574 """ 1575 if isinstance(link_obj, dbapi._psycopg.cursor): 1576 conn_close = lambda *x:None 1577 conn_commit = lambda *x:None 1578 tx_rollback = lambda *x:None 1579 curs = link_obj 1580 curs_close = lambda *x:None 1581 notices_accessor = curs.connection 1582 elif isinstance(link_obj, dbapi._psycopg.connection): 1583 conn_close = lambda *x:None 1584 if end_tx: 1585 conn_commit = link_obj.commit 1586 tx_rollback = link_obj.rollback 1587 else: 1588 conn_commit = lambda *x:None 1589 tx_rollback = lambda *x:None 1590 curs = link_obj.cursor() 1591 curs_close = curs.close 1592 notices_accessor = link_obj 1593 elif link_obj is None: 1594 conn = get_connection(readonly=False) 1595 conn_close = conn.close 1596 conn_commit = conn.commit 1597 tx_rollback = conn.rollback 1598 curs = conn.cursor() 1599 curs_close = curs.close 1600 notices_accessor = conn 1601 else: 1602 raise ValueError('link_obj must be cursor, connection or None but not [%s]' % link_obj) 1603 1604 for query in queries: 1605 try: 1606 args = query['args'] 1607 except KeyError: 1608 args = None 1609 try: 1610 curs.execute(query['cmd'], args) 1611 if verbose: 1612 gmConnectionPool.log_cursor_state(curs) 1613 for notice in notices_accessor.notices: 1614 _log.debug(notice.replace('\n', '/').replace('\n', '/')) 1615 del notices_accessor.notices[:] 1616 # DB related exceptions 1617 except dbapi.Error as pg_exc: 1618 _log.error('query failed in RW connection') 1619 gmConnectionPool.log_pg_exception_details(pg_exc) 1620 for notice in notices_accessor.notices: 1621 _log.debug(notice.replace('\n', '/').replace('\n', '/')) 1622 del notices_accessor.notices[:] 1623 try: 1624 curs_close() 1625 except PG_ERROR_EXCEPTION as pg_exc2: 1626 _log.exception('cannot close cursor') 1627 gmConnectionPool.log_pg_exception_details(pg_exc2) 1628 try: 1629 tx_rollback() # need to rollback so ABORT state isn't preserved in pooled conns 1630 except PG_ERROR_EXCEPTION as pg_exc2: 1631 _log.exception('cannot rollback transaction') 1632 gmConnectionPool.log_pg_exception_details(pg_exc2) 1633 # privilege problem 1634 if pg_exc.pgcode == sql_error_codes.INSUFFICIENT_PRIVILEGE: 1635 details = 'Query: [%s]' % curs.query.strip().strip('\n').strip().strip('\n') 1636 if curs.statusmessage != '': 1637 details = 'Status: %s\n%s' % ( 1638 curs.statusmessage.strip().strip('\n').strip().strip('\n'), 1639 details 1640 ) 1641 if pg_exc.pgerror is None: 1642 msg = '[%s]' % pg_exc.pgcode 1643 else: 1644 msg = '[%s]: %s' % (pg_exc.pgcode, pg_exc.pgerror) 1645 try: 1646 curs_close() 1647 tx_rollback() # just for good measure 1648 conn_close() 1649 except dbapi.InterfaceError: 1650 _log.exception('cannot cleanup') 1651 raise gmExceptions.AccessDenied ( 1652 msg, 1653 source = 'PostgreSQL', 1654 code = pg_exc.pgcode, 1655 details = details 1656 ) 1657 # other problem 1658 gmLog2.log_stack_trace() 1659 try: 1660 curs_close() 1661 tx_rollback() # just for good measure 1662 conn_close() 1663 except dbapi.InterfaceError: 1664 _log.exception('cannot cleanup') 1665 raise 1666 # other exception 1667 except Exception: 1668 _log.exception('error running query in RW connection') 1669 gmConnectionPool.log_cursor_state(curs) 1670 for notice in notices_accessor.notices: 1671 _log.debug(notice.replace('\n', '/').replace('\n', '/')) 1672 del notices_accessor.notices[:] 1673 gmLog2.log_stack_trace() 1674 try: 1675 curs_close() 1676 except PG_ERROR_EXCEPTION as pg_exc: 1677 _log.exception('cannot close cursor') 1678 gmConnectionPool.log_pg_exception_details(pg_exc) 1679 try: 1680 tx_rollback() # need to rollback so ABORT state isn't preserved in pooled conns 1681 conn_close() 1682 except PG_ERROR_EXCEPTION as pg_exc: 1683 _log.exception('cannot rollback transation') 1684 gmConnectionPool.log_pg_exception_details(pg_exc) 1685 raise 1686 1687 data = None 1688 col_idx = None 1689 if return_data: 1690 try: 1691 data = curs.fetchall() 1692 except Exception: 1693 _log.exception('error fetching data from RW query') 1694 gmLog2.log_stack_trace() 1695 try: 1696 curs_close() 1697 tx_rollback() 1698 conn_close() 1699 except dbapi.InterfaceError: 1700 _log.exception('cannot cleanup') 1701 raise 1702 raise 1703 if get_col_idx: 1704 col_idx = get_col_indices(curs) 1705 1706 curs_close() 1707 conn_commit() 1708 conn_close() 1709 1710 return (data, col_idx)
1711 1712 #------------------------------------------------------------------------
1713 -def run_insert(link_obj=None, schema=None, table=None, values=None, returning=None, end_tx=False, get_col_idx=False, verbose=False):
1714 """Generates SQL for an INSERT query. 1715 1716 values: dict of values keyed by field to insert them into 1717 """ 1718 if schema is None: 1719 schema = 'public' 1720 1721 fields = values.keys() # that way val_snippets and fields really should end up in the same order 1722 val_snippets = [] 1723 for field in fields: 1724 val_snippets.append('%%(%s)s' % field) 1725 1726 if returning is None: 1727 returning = '' 1728 return_data = False 1729 else: 1730 returning = '\n\tRETURNING\n\t\t%s' % ', '.join(returning) 1731 return_data = True 1732 1733 cmd = """\nINSERT INTO %s.%s ( 1734 %s 1735 ) VALUES ( 1736 %s 1737 )%s""" % ( 1738 schema, 1739 table, 1740 ',\n\t\t'.join(fields), 1741 ',\n\t\t'.join(val_snippets), 1742 returning 1743 ) 1744 1745 _log.debug('running SQL: >>>%s<<<', cmd) 1746 1747 return run_rw_queries ( 1748 link_obj = link_obj, 1749 queries = [{'cmd': cmd, 'args': values}], 1750 end_tx = end_tx, 1751 return_data = return_data, 1752 get_col_idx = get_col_idx, 1753 verbose = verbose 1754 )
1755 1756 # ======================================================================= 1757 # connection handling API 1758 # -----------------------------------------------------------------------
1759 -def get_raw_connection(dsn=None, verbose=False, readonly=True, connection_name=None, autocommit=False):
1760 """Get a raw, unadorned connection. 1761 1762 - this will not set any parameters such as encoding, timezone, datestyle 1763 - the only requirement is a valid DSN 1764 - hence it can be used for "service" connections 1765 for verifying encodings etc 1766 """ 1767 return gmConnectionPool.gmConnectionPool().get_raw_connection ( 1768 readonly = readonly, 1769 verbose = verbose, 1770 connection_name = connection_name, 1771 autocommit = autocommit 1772 )
1773 1774 # =======================================================================
1775 -def get_connection(dsn=None, readonly=True, verbose=False, pooled=True, connection_name=None, autocommit=False):
1776 return gmConnectionPool.gmConnectionPool().get_connection ( 1777 readonly = readonly, 1778 verbose = verbose, 1779 connection_name = connection_name, 1780 autocommit = autocommit 1781 )
1782 1783 #-----------------------------------------------------------------------
1784 -def discard_pooled_connection_of_thread():
1785 gmConnectionPool.gmConnectionPool().discard_pooled_connection_of_thread()
1786 1787 #-----------------------------------------------------------------------
1788 -def shutdown():
1789 gmConnectionPool.gmConnectionPool().shutdown()
1790 1791 # ====================================================================== 1792 # internal helpers 1793 #-----------------------------------------------------------------------
1794 -def __noop():
1795 pass
1796 1797 #-----------------------------------------------------------------------
1798 -def log_database_access(action=None):
1799 run_insert ( 1800 schema = 'gm', 1801 table = 'access_log', 1802 values = {'user_action': action}, 1803 end_tx = True 1804 )
1805 1806 #-----------------------------------------------------------------------
1807 -def sanity_check_time_skew(tolerance=60):
1808 """Check server time and local time to be within 1809 the given tolerance of each other. 1810 1811 tolerance: seconds 1812 """ 1813 _log.debug('maximum skew tolerance (seconds): %s', tolerance) 1814 1815 cmd = "SELECT now() at time zone 'UTC'" 1816 conn = get_raw_connection(readonly=True) 1817 curs = conn.cursor() 1818 1819 start = time.time() 1820 rows, idx = run_ro_queries(link_obj = curs, queries = [{'cmd': cmd}]) 1821 end = time.time() 1822 client_now_as_utc = pydt.datetime.utcnow() 1823 1824 curs.close() 1825 conn.commit() 1826 1827 server_now_as_utc = rows[0][0] 1828 query_duration = end - start 1829 _log.info('server "now" (UTC): %s', server_now_as_utc) 1830 _log.info('client "now" (UTC): %s', client_now_as_utc) 1831 _log.debug('wire roundtrip (seconds): %s', query_duration) 1832 1833 if query_duration > tolerance: 1834 _log.error('useless to check client/server time skew, wire roundtrip > tolerance') 1835 return False 1836 1837 if server_now_as_utc > client_now_as_utc: 1838 real_skew = server_now_as_utc - client_now_as_utc 1839 else: 1840 real_skew = client_now_as_utc - server_now_as_utc 1841 1842 _log.debug('client/server time skew: %s', real_skew) 1843 1844 if real_skew > pydt.timedelta(seconds = tolerance): 1845 _log.error('client/server time skew > tolerance') 1846 return False 1847 1848 return True
1849 1850 #-----------------------------------------------------------------------
1851 -def sanity_check_database_settings():
1852 """Checks database settings. 1853 1854 returns (status, message) 1855 status: 1856 0: no problem 1857 1: non-fatal problem 1858 2: fatal problem 1859 """ 1860 _log.debug('checking database settings') 1861 1862 conn = get_connection() 1863 1864 # - version string 1865 global postgresql_version_string 1866 if postgresql_version_string is None: 1867 curs = conn.cursor() 1868 curs.execute('SELECT version()') 1869 postgresql_version_string = curs.fetchone()['version'] 1870 curs.close() 1871 _log.info('PostgreSQL version (string): "%s"' % postgresql_version_string) 1872 1873 options2check = { 1874 # setting: [expected value, risk, fatal?] 1875 'allow_system_table_mods': [['off'], 'system breakage', False], 1876 'check_function_bodies': [['on'], 'suboptimal error detection', False], 1877 'datestyle': [['ISO'], 'faulty timestamp parsing', True], 1878 'default_transaction_isolation': [['read committed'], 'faulty database reads', True], 1879 'default_transaction_read_only': [['on'], 'accidental database writes', False], 1880 'fsync': [['on'], 'data loss/corruption', True], 1881 'full_page_writes': [['on'], 'data loss/corruption', False], 1882 'lc_messages': [['C'], 'suboptimal error detection', False], 1883 'password_encryption': [['on', 'md5', 'scram-sha-256'], 'breach of confidentiality', False], 1884 #u'regex_flavor': [[u'advanced'], u'query breakage', False], # 9.0 doesn't support this anymore, default now advanced anyway 1885 'synchronous_commit': [['on'], 'data loss/corruption', False], 1886 'sql_inheritance': [['on'], 'query breakage, data loss/corruption', True], # IF returned (<PG10): better be ON, if NOT returned (PG10): hardwired 1887 'ignore_checksum_failure': [['off'], 'data loss/corruption', False], # starting with PG 9.3 1888 'track_commit_timestamp': [['on'], 'suboptimal auditing', False] # starting with PG 9.3 1889 } 1890 1891 from Gnumed.pycommon import gmCfg2 1892 _cfg = gmCfg2.gmCfgData() 1893 if _cfg.get(option = 'hipaa'): 1894 options2check['log_connections'] = [['on'], 'non-compliance with HIPAA', True] 1895 options2check['log_disconnections'] = [['on'], 'non-compliance with HIPAA', True] 1896 else: 1897 options2check['log_connections'] = [['on'], 'non-compliance with HIPAA', None] 1898 options2check['log_disconnections'] = [['on'], 'non-compliance with HIPAA', None] 1899 1900 cmd = "SELECT name, setting from pg_settings where name in %(settings)s" 1901 rows, idx = run_ro_queries ( 1902 link_obj = conn, 1903 queries = [{'cmd': cmd, 'args': {'settings': tuple(options2check.keys())}}], 1904 get_col_idx = False 1905 ) 1906 1907 found_error = False 1908 found_problem = False 1909 msg = [] 1910 for row in rows: 1911 option = row['name'] 1912 value_found = row['setting'] 1913 values_expected = options2check[option][0] 1914 risk = options2check[option][1] 1915 fatal_setting = options2check[option][2] 1916 if value_found not in values_expected: 1917 if fatal_setting is True: 1918 found_error = True 1919 elif fatal_setting is False: 1920 found_problem = True 1921 elif fatal_setting is None: 1922 pass 1923 else: 1924 _log.error(options2check[option]) 1925 raise ValueError('invalid database configuration sanity check') 1926 msg.append(_(' option [%s]: %s') % (option, value_found)) 1927 msg.append(_(' risk: %s') % risk) 1928 _log.warning('PG option [%s] set to [%s], expected %s, risk: <%s>' % (option, value_found, values_expected, risk)) 1929 1930 if found_error: 1931 return 2, '\n'.join(msg) 1932 1933 if found_problem: 1934 return 1, '\n'.join(msg) 1935 1936 return 0, ''
1937 1938 #======================================================================= 1939 # main 1940 #----------------------------------------------------------------------- 1941 log_pg_settings = gmConnectionPool.log_pg_settings 1942 log_pg_exception_details = gmConnectionPool.log_pg_exception_details 1943 1944 exception_is_connection_loss = gmConnectionPool.exception_is_connection_loss 1945 1946 cAuthenticationError = gmConnectionPool.cAuthenticationError 1947 1948 #----------------------------------------------------------------------- 1949 if __name__ == "__main__": 1950 1951 if len(sys.argv) < 2: 1952 sys.exit() 1953 1954 if sys.argv[1] != 'test': 1955 sys.exit() 1956 1957 from Gnumed.pycommon.gmTools import file2md5 1958 1959 logging.basicConfig(level=logging.DEBUG) 1960 1961 #--------------------------------------------------------------------
1962 - def test_file2bytea():
1963 login, creds = request_login_params() 1964 pool = gmConnectionPool.gmConnectionPool() 1965 pool.credentials = creds 1966 run_rw_queries(queries = [ 1967 {'cmd': 'drop table if exists test_bytea'}, 1968 {'cmd': 'create table test_bytea (data bytea)'} 1969 ]) 1970 try: 1971 #file2bytea(query = 'insert into test_bytea values (%(data)s::bytea) returning md5(data) as md5', filename = sys.argv[2], file_md5 = file2md5(sys.argv[2], True)) 1972 file2bytea(query = 'insert into test_bytea values (%(data)s::bytea)', filename = sys.argv[2]) 1973 except Exception: 1974 _log.exception('error') 1975 1976 run_rw_queries(queries = [ 1977 {'cmd': 'drop table test_bytea'} 1978 ])
1979 1980 #--------------------------------------------------------------------
1981 - def test_file2bytea_lo():
1982 login, creds = request_login_params() 1983 pool = gmConnectionPool.gmConnectionPool() 1984 pool.credentials = creds 1985 1986 lo_oid = file2bytea_lo ( 1987 filename = sys.argv[2] 1988 #, file_md5 = file2md5(sys.argv[2], True) 1989 ) 1990 print(lo_oid)
1991 # if lo_oid != -1: 1992 # run_rw_queries(queries = [ 1993 # {'cmd': u'select lo_unlink(%(loid)s::oid)', 'args': {'loid': lo_oid}} 1994 # ]) 1995 1996 #--------------------------------------------------------------------
1997 - def test_file2bytea_copy_from():
1998 login, creds = request_login_params() 1999 pool = gmConnectionPool.gmConnectionPool() 2000 pool.credentials = creds 2001 2002 run_rw_queries(queries = [ 2003 {'cmd': 'drop table if exists test_bytea'}, 2004 {'cmd': 'create table test_bytea (pk serial primary key, data bytea)'}, 2005 {'cmd': "insert into test_bytea (data) values (NULL::bytea)"} 2006 ]) 2007 2008 md5_query = { 2009 'cmd': 'select md5(data) AS md5 FROM test_bytea WHERE pk = %(pk)s', 2010 'args': {'pk': 1} 2011 } 2012 2013 file2bytea_copy_from ( 2014 table = 'test_bytea', 2015 columns = ['data'], 2016 filename = sys.argv[2], 2017 md5_query = md5_query, 2018 file_md5 = file2md5(sys.argv[2], True) 2019 ) 2020 2021 run_rw_queries(queries = [ 2022 {'cmd': 'drop table if exists test_bytea'} 2023 ])
2024 2025 #--------------------------------------------------------------------
2026 - def test_file2bytea_overlay():
2027 login, creds = request_login_params() 2028 pool = gmConnectionPool.gmConnectionPool() 2029 pool.credentials = creds 2030 2031 run_rw_queries(queries = [ 2032 {'cmd': 'drop table if exists test_bytea'}, 2033 {'cmd': 'create table test_bytea (pk serial primary key, data bytea)'}, 2034 {'cmd': "insert into test_bytea (data) values (NULL::bytea)"} 2035 ]) 2036 2037 cmd = """ 2038 update test_bytea 2039 set data = overlay ( 2040 coalesce(data, ''::bytea) 2041 placing %(data)s::bytea 2042 from %(start)s 2043 for %(size)s 2044 ) 2045 where 2046 pk > %(pk)s 2047 """ 2048 md5_cmd = 'select md5(data) from test_bytea' 2049 args = {'pk': 0} 2050 file2bytea_overlay ( 2051 query = cmd, 2052 args = args, 2053 filename = sys.argv[2], 2054 conn = None, 2055 md5_query = md5_cmd, 2056 file_md5 = file2md5(sys.argv[2], True) 2057 ) 2058 2059 run_rw_queries(queries = [ 2060 {'cmd': 'drop table test_bytea'} 2061 ])
2062 2063 #--------------------------------------------------------------------
2064 - def test_get_connection():
2065 print("testing get_connection()") 2066 2067 login, creds = request_login_params() 2068 pool = gmConnectionPool.gmConnectionPool() 2069 pool.credentials = creds 2070 2071 print('') 2072 dsn = 'foo' 2073 try: 2074 conn = get_connection(dsn=dsn) 2075 except dbapi.ProgrammingError as e: 2076 print("1) SUCCESS: get_connection(%s) failed as expected" % dsn) 2077 t, v = sys.exc_info()[:2] 2078 print (' ', t) 2079 print (' ', v) 2080 2081 print('') 2082 dsn = 'dbname=gnumed_v22' 2083 try: 2084 conn = get_connection(dsn=dsn) 2085 print("2) ERROR: get_connection() did not fail") 2086 except cAuthenticationError: 2087 print("2) SUCCESS: get_connection(%s) failed as expected" % dsn) 2088 t, v = sys.exc_info()[:2] 2089 print(' ', t) 2090 print(' ', v) 2091 2092 print('') 2093 dsn = 'dbname=gnumed_v22 user=abc' 2094 try: 2095 conn = get_connection(dsn=dsn) 2096 print("3) ERROR: get_connection() did not fail") 2097 except cAuthenticationError: 2098 print("3) SUCCESS: get_connection(%s) failed as expected" % dsn) 2099 t, v = sys.exc_info()[:2] 2100 print(' ', t) 2101 print(' ', v) 2102 2103 print('') 2104 dsn = 'dbname=gnumed_v22 user=any-doc password=abc' 2105 try: 2106 conn = get_connection(dsn=dsn) 2107 print("4) ERROR: get_connection() did not fail") 2108 except cAuthenticationError: 2109 print("4) SUCCESS: get_connection(%s) failed as expected" % dsn) 2110 t, v = sys.exc_info()[:2] 2111 print(' ', t) 2112 print(' ', v) 2113 2114 print('') 2115 dsn = 'dbname=gnumed_v22 user=any-doc password=any-doc' 2116 conn = get_connection(dsn=dsn, readonly=True) 2117 print('5) SUCCESS: get_connection(ro)') 2118 2119 dsn = 'dbname=gnumed_v22 user=any-doc password=any-doc' 2120 conn = get_connection(dsn=dsn, readonly=False, verbose=True) 2121 print('6) SUCCESS: get_connection(rw)') 2122 2123 print('') 2124 dsn = 'dbname=gnumed_v22 user=any-doc' 2125 try: 2126 conn = get_connection(dsn=dsn) 2127 print("8) SUCCESS:", dsn) 2128 print('pid:', conn.get_backend_pid()) 2129 except cAuthenticationError: 2130 print("4) SUCCESS: get_connection(%s) failed" % dsn) 2131 t, v = sys.exc_info()[:2] 2132 print(' ', t) 2133 print(' ', v) 2134 2135 try: 2136 curs = conn.cursor() 2137 input('hit enter to run query') 2138 curs.execute('selec 1') 2139 except Exception as exc: 2140 print('ERROR') 2141 _log.exception('exception occurred') 2142 gmConnectionPool.log_pg_exception_details(exc) 2143 if gmConnectionPool.exception_is_connection_loss(exc): 2144 _log.error('lost connection')
2145 2146 #--------------------------------------------------------------------
2147 - def test_ro_queries():
2148 login, creds = request_login_params() 2149 pool = gmConnectionPool.gmConnectionPool() 2150 pool.credentials = creds 2151 2152 print("testing run_ro_queries()") 2153 2154 dsn = 'dbname=gnumed_v22 user=any-doc password=any-doc' 2155 conn = get_connection(dsn, readonly=True) 2156 2157 data, idx = run_ro_queries(link_obj=conn, queries=[{'cmd': 'SELECT version()'}], return_data=True, get_col_idx=True, verbose=True) 2158 print(data) 2159 print(idx) 2160 data, idx = run_ro_queries(link_obj=conn, queries=[{'cmd': 'SELECT 1'}], return_data=True, get_col_idx=True) 2161 print(data) 2162 print(idx) 2163 2164 curs = conn.cursor() 2165 2166 data, idx = run_ro_queries(link_obj=curs, queries=[{'cmd': 'SELECT version()'}], return_data=True, get_col_idx=True, verbose=True) 2167 print(data) 2168 print(idx) 2169 2170 data, idx = run_ro_queries(link_obj=curs, queries=[{'cmd': 'SELECT 1'}], return_data=True, get_col_idx=True, verbose=True) 2171 print(data) 2172 print(idx) 2173 2174 try: 2175 data, idx = run_ro_queries(link_obj=curs, queries=[{'cmd': 'selec 1'}], return_data=True, get_col_idx=True, verbose=True) 2176 print(data) 2177 print(idx) 2178 except psycopg2.ProgrammingError: 2179 print('SUCCESS: run_ro_queries("selec 1") failed as expected') 2180 t, v = sys.exc_info()[:2] 2181 print(' ', t) 2182 print(' ', v) 2183 2184 curs.close()
2185 2186 #--------------------------------------------------------------------
2187 - def test_connection_pool():
2188 login, creds = request_login_params() 2189 pool = gmConnectionPool.gmConnectionPool() 2190 pool.credentials = creds 2191 print(pool) 2192 print(pool.get_connection()) 2193 print(pool.get_connection()) 2194 print(pool.get_connection()) 2195 print(type(pool.get_connection()))
2196 2197 #--------------------------------------------------------------------
2198 - def test_list_args():
2199 login, creds = request_login_params() 2200 pool = gmConnectionPool.gmConnectionPool() 2201 pool.credentials = creds 2202 conn = get_connection('', readonly=True) 2203 curs = conn.cursor() 2204 curs.execute('SELECT * from clin.clin_narrative where narrative = %s', ['a'])
2205 2206 #--------------------------------------------------------------------
2207 - def test_sanitize_pg_regex():
2208 tests = [ 2209 ['(', '\\('] 2210 , ['[', '\\['] 2211 , [')', '\\)'] 2212 ] 2213 for test in tests: 2214 result = sanitize_pg_regex(test[0]) 2215 if result != test[1]: 2216 print('ERROR: sanitize_pg_regex(%s) returned "%s", expected "%s"' % (test[0], result, test[1]))
2217 2218 #--------------------------------------------------------------------
2219 - def test_is_pg_interval():
2220 login, creds = request_login_params() 2221 pool = gmConnectionPool.gmConnectionPool() 2222 pool.credentials = creds 2223 status = True 2224 tests = [ 2225 [None, True], # None == NULL == succeeds ! 2226 [1, True], 2227 ['1', True], 2228 ['abc', False] 2229 ] 2230 2231 if not is_pg_interval(): 2232 print('ERROR: is_pg_interval() returned "False", expected "True"') 2233 status = False 2234 2235 for test in tests: 2236 result = is_pg_interval(test[0]) 2237 if result != test[1]: 2238 print('ERROR: is_pg_interval(%s) returned "%s", expected "%s"' % (test[0], result, test[1])) 2239 status = False 2240 2241 return status
2242 2243 #--------------------------------------------------------------------
2244 - def test_sanity_check_time_skew():
2245 login, creds = request_login_params() 2246 pool = gmConnectionPool.gmConnectionPool() 2247 pool.credentials = creds 2248 sanity_check_time_skew()
2249 2250 #--------------------------------------------------------------------
2251 - def test_get_foreign_key_details():
2252 login, creds = request_login_params() 2253 pool = gmConnectionPool.gmConnectionPool() 2254 pool.credentials = creds 2255 schema = 'clin' 2256 table = 'episode' 2257 col = 'pk' 2258 print('column %s.%s.%s is referenced by:' % (schema, table, col)) 2259 for row in get_foreign_keys2column ( 2260 schema = schema, 2261 table = table, 2262 column = col 2263 ): 2264 print(' <- %s.%s' % ( 2265 row['referencing_table'], 2266 row['referencing_column'] 2267 ))
2268 2269 #--------------------------------------------------------------------
2270 - def test_set_user_language():
2271 login, creds = request_login_params() 2272 pool = gmConnectionPool.gmConnectionPool() 2273 pool.credentials = creds 2274 # (user, language, result, exception type) 2275 tests = [ 2276 # current user 2277 [None, 'de_DE', True], 2278 [None, 'lang_w/o_tx', False], 2279 [None, None, True], 2280 # valid user 2281 ['any-doc', 'de_DE', True], 2282 ['any-doc', 'lang_w/o_tx', False], 2283 ['any-doc', None, True], 2284 # invalid user 2285 ['invalid user', 'de_DE', None], 2286 ['invalid user', 'lang_w/o_tx', False], # lang checking happens before user checking 2287 ['invalid user', None, True] 2288 ] 2289 for test in tests: 2290 print('testing: %s', test) 2291 try: 2292 result = set_user_language(user = test[0], language = test[1]) 2293 if result != test[2]: 2294 print("test:", test) 2295 print("result:", result, "expected:", test[2]) 2296 except psycopg2.IntegrityError as e: 2297 print(e) 2298 if test[2] is None: 2299 continue 2300 print("test:", test) 2301 print("expected exception") 2302 print("result:", e)
2303 2304 #--------------------------------------------------------------------
2305 - def test_get_schema_revision_history():
2306 login, creds = request_login_params() 2307 pool = gmConnectionPool.gmConnectionPool() 2308 pool.credentials = creds 2309 for line in get_schema_revision_history(): 2310 print(' - '.join(line))
2311 2312 #--------------------------------------------------------------------
2313 - def test_run_query():
2314 login, creds = request_login_params() 2315 pool = gmConnectionPool.gmConnectionPool() 2316 pool.credentials = creds 2317 gmDateTime.init() 2318 args = {'dt': gmDateTime.pydt_max_here()} 2319 cmd = "SELECT %(dt)s" 2320 2321 #cmd = u"SELECT 'infinity'::timestamp with time zone" 2322 2323 cmd = """ 2324 SELECT to_timestamp (foofoo,'YYMMDD.HH24MI') FROM ( 2325 SELECT REGEXP_REPLACE ( 2326 't1.130729.0902.tif', -- string 2327 E'(.1)\.([0-9\.]+)(\.tif)', -- pattern 2328 E'\\\\2' -- replacement 2329 ) AS foofoo 2330 ) AS foo""" 2331 rows, idx = run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 2332 print(rows) 2333 print(rows[0]) 2334 print(rows[0][0])
2335 2336 #--------------------------------------------------------------------
2337 - def test_schema_exists():
2338 login, creds = request_login_params() 2339 pool = gmConnectionPool.gmConnectionPool() 2340 pool.credentials = creds 2341 print(schema_exists())
2342 2343 #--------------------------------------------------------------------
2344 - def test_row_locks():
2345 login, creds = request_login_params() 2346 pool = gmConnectionPool.gmConnectionPool() 2347 pool.credentials = creds 2348 2349 row_is_locked(table = 'dem.identity', pk = 12) 2350 2351 print("1st connection:") 2352 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2353 print(" 1st shared lock succeeded:", lock_row(table = 'dem.identity', pk = 12, exclusive = False)) 2354 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2355 2356 print(" 2nd shared lock should succeed:", lock_row(table = 'dem.identity', pk = 12, exclusive = False)) 2357 print(" `-> unlock succeeded:", unlock_row(table = 'dem.identity', pk = 12, exclusive = False)) 2358 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2359 print(" exclusive lock should succeed:", lock_row(table = 'dem.identity', pk = 12, exclusive = True)) 2360 print(" `-> unlock succeeded:", unlock_row(table = 'dem.identity', pk = 12, exclusive = True)) 2361 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2362 2363 print("2nd connection:") 2364 conn = get_raw_connection(readonly=True) 2365 print(" shared lock should succeed:", lock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = False)) 2366 print(" `-> unlock succeeded:", unlock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = False)) 2367 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2368 print(" exclusive lock succeeded ?", lock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = True), "(should fail)") 2369 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2370 2371 print("1st connection:") 2372 print(" unlock succeeded:", unlock_row(table = 'dem.identity', pk = 12, exclusive = False)) 2373 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2374 2375 print("2nd connection:") 2376 print(" exclusive lock should succeed", lock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = True)) 2377 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2378 print(" shared lock should succeed:", lock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = False)) 2379 print(" `-> unlock succeeded:", unlock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = False)) 2380 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2381 print(" unlock succeeded:", unlock_row(link_obj = conn, table = 'dem.identity', pk = 12, exclusive = False)) 2382 print(" locked:", row_is_locked(table = 'dem.identity', pk = 12)) 2383 2384 conn.close()
2385 2386 #--------------------------------------------------------------------
2387 - def test_get_foreign_key_names():
2388 login, creds = request_login_params() 2389 pool = gmConnectionPool.gmConnectionPool() 2390 pool.credentials = creds 2391 print(get_foreign_key_names ( 2392 src_schema = 'clin', 2393 src_table = 'vaccination', 2394 src_column = 'fk_episode', 2395 target_schema = 'clin', 2396 target_table = 'episode', 2397 target_column = 'pk' 2398 )) 2399 print(get_foreign_key_names ( 2400 src_schema = 'dem', 2401 src_table = 'names', 2402 src_column = 'id_identity', 2403 target_schema = 'dem', 2404 target_table = 'identity', 2405 target_column = 'pk' 2406 ))
2407 2408 #--------------------------------------------------------------------
2409 - def test_get_index_name():
2410 login, creds = request_login_params() 2411 pool = gmConnectionPool.gmConnectionPool() 2412 pool.credentials = creds 2413 print(get_index_name(indexed_table = 'clin.vaccination', indexed_column = 'fk_episode'))
2414 2415 #--------------------------------------------------------------------
2416 - def test_faulty_SQL():
2417 login, creds = request_login_params() 2418 pool = gmConnectionPool.gmConnectionPool() 2419 pool.credentials = creds 2420 conn = get_connection() 2421 run_rw_queries(queries = [{'cmd': 'SELEC 1'}])
2422 2423 #--------------------------------------------------------------------
2424 - def test_log_settings():
2425 login, creds = request_login_params() 2426 pool = gmConnectionPool.gmConnectionPool() 2427 pool.credentials = creds 2428 conn = get_connection() 2429 gmConnectionPool.log_pg_settings(curs = conn.cursor())
2430 2431 #-------------------------------------------------------------------- 2432 # run tests 2433 2434 # legacy: 2435 #test_connection_pool() 2436 2437 # tested: 2438 #test_file2bytea_lo() 2439 #test_file2bytea_copy_from() # not fully implemented 2440 #test_file2bytea_overlay() 2441 #test_file2bytea() 2442 #test_exceptions() 2443 #test_get_connection() 2444 #test_ro_queries() 2445 #test_list_args() 2446 #test_sanitize_pg_regex() 2447 #test_is_pg_interval() 2448 #test_sanity_check_time_skew() 2449 #test_get_foreign_key_details() 2450 #test_get_index_name() 2451 #test_set_user_language() 2452 #test_get_schema_revision_history() 2453 test_run_query() 2454 #test_schema_exists() 2455 #test_get_foreign_key_names() 2456 #test_row_locks() 2457 #test_faulty_SQL() 2458 test_log_settings() 2459 2460 # ====================================================================== 2461