2018-05-31 02:34:15 +02:00
|
|
|
from psycopg2.extensions import cursor
|
2019-02-02 23:53:55 +01:00
|
|
|
from typing import List, TypeVar
|
2018-05-31 02:34:15 +02:00
|
|
|
|
2014-02-26 21:50:36 +01:00
|
|
|
import time
|
|
|
|
|
2018-05-31 02:34:15 +02:00
|
|
|
CursorObj = TypeVar('CursorObj', bound=cursor)
|
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def create_index_if_not_exist(index_name: str, table_name: str, column_string: str,
|
|
|
|
where_clause: str) -> str:
|
2017-06-05 15:10:08 +02:00
|
|
|
#
|
|
|
|
# FUTURE TODO: When we no longer need to support postgres 9.3 for Trusty,
|
|
|
|
# we can use "IF NOT EXISTS", which is part of postgres 9.5
|
|
|
|
# (and which already is supported on Xenial systems).
|
|
|
|
stmt = '''
|
|
|
|
DO $$
|
|
|
|
BEGIN
|
|
|
|
IF NOT EXISTS (
|
|
|
|
SELECT 1
|
|
|
|
FROM pg_class
|
|
|
|
where relname = '%s'
|
|
|
|
) THEN
|
|
|
|
CREATE INDEX
|
|
|
|
%s
|
|
|
|
ON %s (%s)
|
|
|
|
%s;
|
|
|
|
END IF;
|
|
|
|
END$$;
|
|
|
|
''' % (index_name, index_name, table_name, column_string, where_clause)
|
|
|
|
return stmt
|
2018-05-31 02:34:15 +02:00
|
|
|
|
|
|
|
|
|
|
|
def do_batch_update(cursor: CursorObj,
|
|
|
|
table: str,
|
|
|
|
cols: List[str],
|
|
|
|
vals: List[str],
|
|
|
|
batch_size: int=10000,
|
|
|
|
sleep: float=0.1,
|
|
|
|
escape: bool=True) -> None: # nocoverage
|
2019-05-29 23:57:49 +02:00
|
|
|
# The string substitution below is complicated by our need to
|
|
|
|
# support multiple postgres versions.
|
2018-05-31 02:34:15 +02:00
|
|
|
stmt = '''
|
|
|
|
UPDATE %s
|
2019-05-29 23:57:49 +02:00
|
|
|
SET %s
|
2018-05-31 02:34:15 +02:00
|
|
|
WHERE id >= %%s AND id < %%s
|
2019-05-29 23:57:49 +02:00
|
|
|
''' % (table, ', '.join(['%s = %%s' % (col) for col in cols]))
|
2018-05-31 02:34:15 +02:00
|
|
|
|
|
|
|
cursor.execute("SELECT MIN(id), MAX(id) FROM %s" % (table,))
|
|
|
|
(min_id, max_id) = cursor.fetchall()[0]
|
|
|
|
if min_id is None:
|
|
|
|
return
|
|
|
|
|
|
|
|
print("\n Range of rows to update: [%s, %s]" % (min_id, max_id))
|
|
|
|
while min_id <= max_id:
|
|
|
|
lower = min_id
|
|
|
|
upper = min_id + batch_size
|
|
|
|
print(' Updating range [%s,%s)' % (lower, upper))
|
|
|
|
params = list(vals) + [lower, upper]
|
|
|
|
if escape:
|
|
|
|
cursor.execute(stmt, params=params)
|
|
|
|
else:
|
|
|
|
cursor.execute(stmt % tuple(params))
|
|
|
|
|
|
|
|
min_id = upper
|
|
|
|
time.sleep(sleep)
|
|
|
|
|
|
|
|
# Once we've finished, check if any new rows were inserted to the table
|
|
|
|
if min_id > max_id:
|
|
|
|
cursor.execute("SELECT MAX(id) FROM %s" % (table,))
|
|
|
|
max_id = cursor.fetchall()[0][0]
|
|
|
|
|
|
|
|
print(" Finishing...", end='')
|