2019-10-07 07:55:10 +02:00
|
|
|
# Generated by Django 1.11.24 on 2019-10-07 05:25
|
|
|
|
|
2020-01-14 21:59:46 +01:00
|
|
|
import time
|
2022-02-19 03:03:08 +01:00
|
|
|
from typing import cast
|
2020-01-14 21:59:46 +01:00
|
|
|
|
|
|
|
import lxml
|
2019-10-07 07:55:10 +02:00
|
|
|
from django.db import migrations
|
2023-03-04 01:40:40 +01:00
|
|
|
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
|
2019-10-07 07:55:10 +02:00
|
|
|
from django.db.migrations.state import StateApps
|
|
|
|
|
2019-12-12 23:45:53 +01:00
|
|
|
BATCH_SIZE = 1000
|
2019-10-07 07:55:10 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-12-12 23:45:53 +01:00
|
|
|
def process_batch(apps: StateApps, id_start: int, id_end: int, last_id: int) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
Message = apps.get_model("zerver", "Message")
|
2019-12-12 23:45:53 +01:00
|
|
|
for message in Message.objects.filter(id__gte=id_start, id__lte=id_end).order_by("id"):
|
2021-08-02 11:12:48 +02:00
|
|
|
if message.rendered_content in ["", None]:
|
2019-12-12 22:59:22 +01:00
|
|
|
# There have been bugs in the past that made it possible
|
2021-08-02 11:12:48 +02:00
|
|
|
# for a message to have "" or None as its rendered_content; we
|
2019-12-12 22:59:22 +01:00
|
|
|
# need to skip those because lxml won't process them.
|
|
|
|
#
|
|
|
|
# They should safely already have the correct state
|
|
|
|
# has_link=has_image=has_attachment=False.
|
|
|
|
continue
|
|
|
|
|
|
|
|
if message.id % 1000 == 0:
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"Processed {message.id} / {last_id}")
|
2019-12-12 22:59:22 +01:00
|
|
|
|
2019-10-07 07:55:10 +02:00
|
|
|
# Because we maintain the Attachment table, this should be as
|
2022-02-08 00:13:33 +01:00
|
|
|
# simple as just checking if there's any Attachment
|
2019-10-07 07:55:10 +02:00
|
|
|
# objects associated with this message.
|
|
|
|
has_attachment = message.attachment_set.exists()
|
|
|
|
|
|
|
|
# For has_link and has_image, we need to parse the messages.
|
|
|
|
# Links are simple -- look for a link in the message.
|
|
|
|
lxml_obj = lxml.html.fromstring(message.rendered_content)
|
2022-02-19 03:03:08 +01:00
|
|
|
has_link = any(True for link in lxml_obj.iter("a"))
|
2019-10-07 07:55:10 +02:00
|
|
|
|
|
|
|
# has_image refers to inline image previews, so we just check
|
|
|
|
# for the relevant CSS class.
|
2022-02-19 03:03:08 +01:00
|
|
|
has_image = any(
|
|
|
|
True for img in cast(lxml.html.HtmlMixin, lxml_obj).find_class("message_inline_image")
|
|
|
|
)
|
2019-10-07 07:55:10 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
if (
|
|
|
|
message.has_link == has_link
|
|
|
|
and message.has_attachment == has_attachment
|
|
|
|
and message.has_image == has_image
|
|
|
|
):
|
2019-10-07 07:55:10 +02:00
|
|
|
# No need to spend time with the database if there aren't changes.
|
|
|
|
continue
|
|
|
|
message.has_image = has_image
|
|
|
|
message.has_link = has_link
|
|
|
|
message.has_attachment = has_attachment
|
2021-02-12 08:20:45 +01:00
|
|
|
message.save(update_fields=["has_link", "has_attachment", "has_image"])
|
2019-10-07 07:55:10 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2022-05-27 23:33:51 +02:00
|
|
|
def fix_has_link(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
Message = apps.get_model("zerver", "Message")
|
2019-12-12 23:45:53 +01:00
|
|
|
if not Message.objects.exists():
|
|
|
|
# Nothing to do, and Message.objects.latest() will crash.
|
|
|
|
return
|
|
|
|
|
|
|
|
# This migration logic assumes that either the server is not
|
|
|
|
# running, or that it's being run after the logic to correct how
|
|
|
|
# `has_link` and friends are set for new messages have been
|
|
|
|
# deployed.
|
|
|
|
last_id = Message.objects.latest("id").id
|
|
|
|
|
|
|
|
id_range_lower_bound = 0
|
|
|
|
id_range_upper_bound = 0 + BATCH_SIZE
|
|
|
|
while id_range_upper_bound <= last_id:
|
|
|
|
process_batch(apps, id_range_lower_bound, id_range_upper_bound, last_id)
|
|
|
|
|
|
|
|
id_range_lower_bound = id_range_upper_bound + 1
|
|
|
|
id_range_upper_bound = id_range_lower_bound + BATCH_SIZE
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
|
|
|
if last_id > id_range_lower_bound:
|
|
|
|
# Copy for the last batch.
|
|
|
|
process_batch(apps, id_range_lower_bound, last_id, last_id)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-10-07 07:55:10 +02:00
|
|
|
class Migration(migrations.Migration):
|
2019-12-12 23:45:53 +01:00
|
|
|
atomic = False
|
2019-10-07 07:55:10 +02:00
|
|
|
|
|
|
|
dependencies = [
|
2021-02-12 08:20:45 +01:00
|
|
|
("zerver", "0256_userprofile_stream_set_recipient_column_values"),
|
2019-10-07 07:55:10 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
operations = [
|
2021-02-12 08:19:30 +01:00
|
|
|
migrations.RunPython(fix_has_link, reverse_code=migrations.RunPython.noop, elidable=True),
|
2019-10-07 07:55:10 +02:00
|
|
|
]
|