zulip/zerver/migrations/0257_fix_has_link_attribute.py

97 lines
3.4 KiB
Python

# Generated by Django 1.11.24 on 2019-10-07 05:25
import time
import lxml
from django.db import migrations
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
BATCH_SIZE = 1000
def process_batch(apps: StateApps, id_start: int, id_end: int, last_id: int) -> None:
Message = apps.get_model("zerver", "Message")
for message in Message.objects.filter(id__gte=id_start, id__lte=id_end).order_by("id"):
if message.rendered_content == "":
# There have been bugs in the past that made it possible
# for a message to have "" as its rendered_content; we
# need to skip those because lxml won't process them.
#
# They should safely already have the correct state
# has_link=has_image=has_attachment=False.
continue
if message.id % 1000 == 0:
print(f"Processed {message.id} / {last_id}")
# Because we maintain the Attachment table, this should be as
# simple as just just checking if there's any Attachment
# objects associated with this message.
has_attachment = message.attachment_set.exists()
# For has_link and has_image, we need to parse the messages.
# Links are simple -- look for a link in the message.
lxml_obj = lxml.html.fromstring(message.rendered_content)
has_link = False
for link in lxml_obj.xpath("//a"):
has_link = True
break
# has_image refers to inline image previews, so we just check
# for the relevant CSS class.
has_image = False
for img in lxml_obj.find_class("message_inline_image"):
has_image = True
break
if (
message.has_link == has_link
and message.has_attachment == has_attachment
and message.has_image == has_image
):
# No need to spend time with the database if there aren't changes.
continue
message.has_image = has_image
message.has_link = has_link
message.has_attachment = has_attachment
message.save(update_fields=["has_link", "has_attachment", "has_image"])
def fix_has_link(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
Message = apps.get_model("zerver", "Message")
if not Message.objects.exists():
# Nothing to do, and Message.objects.latest() will crash.
return
# This migration logic assumes that either the server is not
# running, or that it's being run after the logic to correct how
# `has_link` and friends are set for new messages have been
# deployed.
last_id = Message.objects.latest("id").id
id_range_lower_bound = 0
id_range_upper_bound = 0 + BATCH_SIZE
while id_range_upper_bound <= last_id:
process_batch(apps, id_range_lower_bound, id_range_upper_bound, last_id)
id_range_lower_bound = id_range_upper_bound + 1
id_range_upper_bound = id_range_lower_bound + BATCH_SIZE
time.sleep(0.1)
if last_id > id_range_lower_bound:
# Copy for the last batch.
process_batch(apps, id_range_lower_bound, last_id, last_id)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("zerver", "0256_userprofile_stream_set_recipient_column_values"),
]
operations = [
migrations.RunPython(fix_has_link, reverse_code=migrations.RunPython.noop, elidable=True),
]