migrations: Fix migration 0576 local backfill path and repeat.

Unfortunately, because this migration has already been run for many
installations, we need to ship another copy of the migration.

It should be a noop when repeated.
This commit is contained in:
Tim Abbott 2024-10-30 09:58:57 -07:00
parent 9b0e26e954
commit 66537c8bf8
3 changed files with 145 additions and 1 deletions

View File

@ -77,7 +77,8 @@ def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaE
source: pyvips.Source = pyvips.SourceCustom()
source.on_read(partial(s3_read, metadata["Body"]))
else:
attachment_path = os.path.join(settings.LOCAL_UPLOADS_DIR, attachment.path_id)
assert settings.LOCAL_FILES_DIR is not None
attachment_path = os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id)
if not os.path.exists(attachment_path):
print(f"{attachment.path_id}: Missing!")
continue

View File

@ -0,0 +1,131 @@
# Duplicate of database migration 0576, because the original used the
# wrong path for servers using the local file upload backend, and
# many servers had already upgraded to 9.2 where it was backported.
import os
from functools import reduce
from operator import or_
import boto3
import pyvips
from botocore.client import Config
from botocore.exceptions import ClientError
from botocore.response import StreamingBody
from django.conf import settings
from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Exists, OuterRef, Q
from zerver.lib.partial import partial
def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
ImageAttachment = apps.get_model("zerver", "ImageAttachment")
Attachment = apps.get_model("zerver", "Attachment")
if settings.LOCAL_UPLOADS_DIR is None:
upload_bucket = boto3.resource(
"s3",
aws_access_key_id=settings.S3_KEY,
aws_secret_access_key=settings.S3_SECRET_KEY,
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
config=Config(
signature_version=None,
s3={"addressing_style": settings.S3_ADDRESSING_STYLE},
),
).Bucket(settings.S3_AUTH_UPLOADS_BUCKET)
# Historical attachments do not have a mime_type value, so we used
# to rely on the file extension. We replicate that when
# backfilling. This is the value from zerver.lib.markdown:
IMAGE_EXTENSIONS = [".bmp", ".gif", ".jpe", ".jpeg", ".jpg", ".png", ".webp"]
extension_limits = Q()
extension_limits = reduce(
or_,
[Q(file_name__endswith=extension) for extension in IMAGE_EXTENSIONS],
extension_limits,
)
min_id: int | None = 0
while True:
attachments = (
Attachment.objects.alias(
has_imageattachment=Exists(
ImageAttachment.objects.filter(path_id=OuterRef("path_id"))
)
)
.filter(extension_limits, has_imageattachment=False, id__gt=min_id)
.order_by("id")
)[:1000]
min_id = None
for attachment in attachments:
min_id = attachment.id
if settings.LOCAL_UPLOADS_DIR is None:
try:
metadata = upload_bucket.Object(attachment.path_id).get()
except ClientError:
print(f"{attachment.path_id}: Missing!")
continue
def s3_read(streamingbody: StreamingBody, size: int) -> bytes:
return streamingbody.read(amt=size)
# We use the streaming body to only pull down as much
# of the image as we need to examine the headers --
# generally about 40k
source: pyvips.Source = pyvips.SourceCustom()
source.on_read(partial(s3_read, metadata["Body"]))
else:
assert settings.LOCAL_FILES_DIR is not None
attachment_path = os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id)
if not os.path.exists(attachment_path):
print(f"{attachment.path_id}: Missing!")
continue
source = pyvips.Source.new_from_file(attachment_path)
try:
image = pyvips.Image.new_from_source(source, "", access="sequential")
# "original_width_px" and "original_height_px" here are
# _as rendered_, after applying the orientation
# information which the image may contain.
if (
"orientation" in image.get_fields()
and image.get("orientation") >= 5
and image.get("orientation") <= 8
):
(width, height) = (image.height, image.width)
else:
(width, height) = (image.width, image.height)
ImageAttachment.objects.create(
realm_id=attachment.realm_id,
path_id=attachment.path_id,
original_width_px=width,
original_height_px=height,
frames=image.get_n_pages(),
thumbnail_metadata=[],
)
except pyvips.Error:
pass
if min_id is None:
break
class Migration(migrations.Migration):
atomic = False
dependencies = [
# Because this will be backported to 9.x, we only depend on the last migration in 9.x
("zerver", "0576_backfill_imageattachment"),
]
operations = [
migrations.RunPython(
backfill_imageattachment, reverse_code=migrations.RunPython.noop, elidable=True
)
]

View File

@ -0,0 +1,12 @@
# Generated by Django 5.0.9 on 2024-10-30 18:35
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("zerver", "0621_remove_realm_edit_topic_policy"),
("zerver", "0622_backfill_imageattachment_again"),
]
operations = []