migrations: Remove disallowed characters from topics.

Following b3c58f454f, we want to clean up
old topics that may contain the disallowed characters. The Message table
is large, so we go in batches, making sure we limit topic fetches and
UPDATE query to no more than BATCH_SIZE Message rows per query.
This commit is contained in:
Mateusz Mandera 2021-12-09 13:22:00 +01:00 committed by Tim Abbott
parent 1d54b383bd
commit 93e18fe289
1 changed files with 67 additions and 0 deletions

View File

@ -0,0 +1,67 @@
import unicodedata
from django.db import connection, migrations, models
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
def fix_topics(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
Message = apps.get_model("zerver", "Message")
BATCH_SIZE = 10000
messages_updated = 0
lower_bound = 0
max_id = Message.objects.aggregate(models.Max("id"))["id__max"]
if max_id is None:
# Nothing to do if there are no messages.
return
print("")
while lower_bound < max_id:
print(f"Processed {lower_bound} / {max_id}")
with connection.cursor() as cursor:
cursor.execute(
"SELECT DISTINCT subject FROM zerver_message WHERE id > %s AND id <= %s",
[lower_bound, lower_bound + BATCH_SIZE],
)
results = cursor.fetchall()
topics = [r[0] for r in results]
for topic in topics:
fixed_topic = "".join(
[
character
for character in topic
if unicodedata.category(character) not in ["Cc", "Cs", "Cn"]
]
)
if fixed_topic == topic:
continue
# We don't want empty topics for stream messages, so we
# use (no topic) if the above clean-up leaves us with an empty string.
if fixed_topic == "":
fixed_topic = "(no topic)"
cursor.execute(
"UPDATE zerver_message SET subject = %s WHERE subject = %s AND id > %s AND id <= %s",
[fixed_topic, topic, lower_bound, lower_bound + BATCH_SIZE],
)
messages_updated += cursor.rowcount
lower_bound += BATCH_SIZE
if messages_updated > 0:
print(f"Fixed invalid topics for {messages_updated} messages.")
class Migration(migrations.Migration):
atomic = False
dependencies = [
("zerver", "0370_realm_enable_spectator_access"),
]
operations = [
migrations.RunPython(fix_topics, reverse_code=migrations.RunPython.noop),
]