migrate: Add do_batch_update method for running batch updates.

* Refactor pgroonga_0002 migration to use new method.
This commit is contained in:
Sampriti Panda 2018-05-31 06:04:15 +05:30 committed by Tim Abbott
parent eddcec5e86
commit e95f972e24
3 changed files with 50 additions and 11 deletions

View File

@ -3,19 +3,13 @@ from django.db import models, migrations, connection
from django.contrib.postgres import operations
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
from zerver.lib.migrate import do_batch_update
def rebuild_pgroonga_index(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
BATCH_SIZE = 10000
Message = apps.get_model("zerver", "Message")
message_ids = Message.objects.values_list('id', flat=True)
with connection.cursor() as cursor:
for i in range(0, len(message_ids), BATCH_SIZE):
batch_ids = ', '.join(str(id) for id in message_ids[i:i+BATCH_SIZE])
cursor.execute("UPDATE zerver_message SET "
"search_pgroonga = "
"escape_html(subject) || ' ' || rendered_content "
"WHERE id IN (%s)" % (batch_ids,))
do_batch_update(cursor, 'zerver_message', ['search_pgroonga'],
["escape_html(subject) || ' ' || rendered_content"],
escape=False, batch_size=10000)
class Migration(migrations.Migration):
atomic = False

View File

@ -449,6 +449,7 @@ def build_custom_checkers(by_lang):
'zerver/migrations/0041_create_attachments_for_old_messages.py',
'zerver/migrations/0060_move_avatars_to_be_uid_based.py',
'zerver/migrations/0104_fix_unreads.py',
'pgroonga/migrations/0002_html_escape_subject.py',
]),
'description': "Don't import models or other code in migrations; see docs/subsystems/schema-migrations.md",
},

View File

@ -1,8 +1,12 @@
from typing import Any, Callable, Dict, List, Tuple
from django.db.models.query import QuerySet
from psycopg2.extensions import cursor
from typing import Any, Callable, Dict, List, Tuple, TypeVar
import re
import time
CursorObj = TypeVar('CursorObj', bound=cursor)
def create_index_if_not_exist(index_name: str, table_name: str, column_string: str,
where_clause: str) -> str:
#
@ -25,3 +29,43 @@ def create_index_if_not_exist(index_name: str, table_name: str, column_string: s
END$$;
''' % (index_name, index_name, table_name, column_string, where_clause)
return stmt
def do_batch_update(cursor: CursorObj,
table: str,
cols: List[str],
vals: List[str],
batch_size: int=10000,
sleep: float=0.1,
escape: bool=True) -> None: # nocoverage
stmt = '''
UPDATE %s
SET (%s) = (%s)
WHERE id >= %%s AND id < %%s
''' % (table, ', '.join(cols), ', '.join(['%s'] * len(cols)))
cursor.execute("SELECT MIN(id), MAX(id) FROM %s" % (table,))
(min_id, max_id) = cursor.fetchall()[0]
if min_id is None:
return
print("\n Range of rows to update: [%s, %s]" % (min_id, max_id))
while min_id <= max_id:
lower = min_id
upper = min_id + batch_size
print(' Updating range [%s,%s)' % (lower, upper))
params = list(vals) + [lower, upper]
if escape:
cursor.execute(stmt, params=params)
else:
cursor.execute(stmt % tuple(params))
min_id = upper
time.sleep(sleep)
# Once we've finished, check if any new rows were inserted to the table
if min_id > max_id:
cursor.execute("SELECT MAX(id) FROM %s" % (table,))
max_id = cursor.fetchall()[0][0]
print(" Finishing...", end='')