2020-06-11 00:54:34 +02:00
|
|
|
import time
|
2019-02-02 23:53:55 +01:00
|
|
|
from typing import List, TypeVar
|
2018-05-31 02:34:15 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from psycopg2.extensions import cursor
|
|
|
|
from psycopg2.sql import SQL, Composable, Identifier
|
2014-02-26 21:50:36 +01:00
|
|
|
|
2018-05-31 02:34:15 +02:00
|
|
|
CursorObj = TypeVar('CursorObj', bound=cursor)
|
|
|
|
|
|
|
|
|
|
|
|
def do_batch_update(cursor: CursorObj,
|
|
|
|
table: str,
|
2020-05-04 01:15:36 +02:00
|
|
|
assignments: List[Composable],
|
2018-05-31 02:34:15 +02:00
|
|
|
batch_size: int=10000,
|
2020-05-04 01:15:36 +02:00
|
|
|
sleep: float=0.1) -> None: # nocoverage
|
2019-05-29 23:57:49 +02:00
|
|
|
# The string substitution below is complicated by our need to
|
|
|
|
# support multiple postgres versions.
|
2020-05-04 01:15:36 +02:00
|
|
|
stmt = SQL('''
|
|
|
|
UPDATE {}
|
|
|
|
SET {}
|
|
|
|
WHERE id >= %s AND id < %s
|
|
|
|
''').format(
|
|
|
|
Identifier(table),
|
|
|
|
SQL(', ').join(assignments),
|
|
|
|
)
|
|
|
|
|
|
|
|
cursor.execute(SQL("SELECT MIN(id), MAX(id) FROM {}").format(Identifier(table)))
|
|
|
|
(min_id, max_id) = cursor.fetchone()
|
2018-05-31 02:34:15 +02:00
|
|
|
if min_id is None:
|
|
|
|
return
|
|
|
|
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"\n Range of rows to update: [{min_id}, {max_id}]")
|
2018-05-31 02:34:15 +02:00
|
|
|
while min_id <= max_id:
|
|
|
|
lower = min_id
|
|
|
|
upper = min_id + batch_size
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f' Updating range [{lower},{upper})')
|
2020-05-04 01:15:36 +02:00
|
|
|
cursor.execute(stmt, [lower, upper])
|
2018-05-31 02:34:15 +02:00
|
|
|
|
|
|
|
min_id = upper
|
|
|
|
time.sleep(sleep)
|
|
|
|
|
|
|
|
# Once we've finished, check if any new rows were inserted to the table
|
|
|
|
if min_id > max_id:
|
2020-05-04 01:15:36 +02:00
|
|
|
cursor.execute(SQL("SELECT MAX(id) FROM {}").format(Identifier(table)))
|
|
|
|
(max_id,) = cursor.fetchone()
|
2018-05-31 02:34:15 +02:00
|
|
|
|
|
|
|
print(" Finishing...", end='')
|