mirror of https://github.com/zulip/zulip.git
thumbnail: Backfill ImageAttachment rows.
We previously used the file extension to determine if we should
attempt to inline an image. After b42863be4b
, we rely on the
existence of ImageAttachment rows to determine if something is an
image which can be viewed inline. This means that messages
containing files uploaded before that commit, when (re-)rendered, will
be judged as not having inline'able images.
Backfill all of the ImageAttachment rows for image-like file
extensions. We are careful to only download the bytes that we need in
the image headers, to minimize bandwidth from S3 in the event that the
S3 backend is in use. We do _not_ produce thumbnails for the images
during this migration; see the subsequent commit.
Because this migration will be backported to 9.x, it is marked as only
depending on the last migration in `9.x`, with a subsequent merge
migration into the tip of `main`.
This commit is contained in:
parent
d796deddf8
commit
df91cdf333
|
@ -54,6 +54,7 @@ rules:
|
||||||
|
|
||||||
- id: dont-import-models-in-migrations
|
- id: dont-import-models-in-migrations
|
||||||
patterns:
|
patterns:
|
||||||
|
- pattern-not: from zerver.lib.partial import partial
|
||||||
- pattern-not: from zerver.lib.mime_types import $X
|
- pattern-not: from zerver.lib.mime_types import $X
|
||||||
- pattern-not: from zerver.lib.redis_utils import get_redis_client
|
- pattern-not: from zerver.lib.redis_utils import get_redis_client
|
||||||
- pattern-not: from zerver.lib.utils import generate_api_key
|
- pattern-not: from zerver.lib.utils import generate_api_key
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
import os
|
||||||
|
from functools import reduce
|
||||||
|
from operator import or_
|
||||||
|
|
||||||
|
import boto3
|
||||||
|
import pyvips
|
||||||
|
from botocore.client import Config
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
from botocore.response import StreamingBody
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations
|
||||||
|
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
|
||||||
|
from django.db.migrations.state import StateApps
|
||||||
|
from django.db.models import Exists, OuterRef, Q
|
||||||
|
|
||||||
|
from zerver.lib.partial import partial
|
||||||
|
|
||||||
|
|
||||||
|
def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
|
||||||
|
ImageAttachment = apps.get_model("zerver", "ImageAttachment")
|
||||||
|
Attachment = apps.get_model("zerver", "Attachment")
|
||||||
|
|
||||||
|
if settings.LOCAL_UPLOADS_DIR is None:
|
||||||
|
upload_bucket = boto3.resource(
|
||||||
|
"s3",
|
||||||
|
aws_access_key_id=settings.S3_KEY,
|
||||||
|
aws_secret_access_key=settings.S3_SECRET_KEY,
|
||||||
|
region_name=settings.S3_REGION,
|
||||||
|
endpoint_url=settings.S3_ENDPOINT_URL,
|
||||||
|
config=Config(
|
||||||
|
signature_version=None,
|
||||||
|
s3={"addressing_style": settings.S3_ADDRESSING_STYLE},
|
||||||
|
),
|
||||||
|
).Bucket(settings.S3_AUTH_UPLOADS_BUCKET)
|
||||||
|
|
||||||
|
# Historical attachments do not have a mime_type value, so we used
|
||||||
|
# to rely on the file extension. We replicate that when
|
||||||
|
# backfilling. This is the value from zerver.lib.markdown:
|
||||||
|
IMAGE_EXTENSIONS = [".bmp", ".gif", ".jpe", ".jpeg", ".jpg", ".png", ".webp"]
|
||||||
|
|
||||||
|
extension_limits = Q()
|
||||||
|
extension_limits = reduce(
|
||||||
|
or_,
|
||||||
|
[Q(file_name__endswith=extension) for extension in IMAGE_EXTENSIONS],
|
||||||
|
extension_limits,
|
||||||
|
)
|
||||||
|
|
||||||
|
min_id: int | None = 0
|
||||||
|
while True:
|
||||||
|
attachments = (
|
||||||
|
Attachment.objects.alias(
|
||||||
|
has_imageattachment=Exists(
|
||||||
|
ImageAttachment.objects.filter(path_id=OuterRef("path_id"))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.filter(extension_limits, has_imageattachment=False, id__gt=min_id)
|
||||||
|
.order_by("id")
|
||||||
|
)[:1000]
|
||||||
|
|
||||||
|
min_id = None
|
||||||
|
for attachment in attachments:
|
||||||
|
min_id = attachment.id
|
||||||
|
|
||||||
|
if settings.LOCAL_UPLOADS_DIR is None:
|
||||||
|
try:
|
||||||
|
metadata = upload_bucket.Object(attachment.path_id).get()
|
||||||
|
except ClientError:
|
||||||
|
print(f"{attachment.path_id}: Missing!")
|
||||||
|
continue
|
||||||
|
|
||||||
|
def s3_read(streamingbody: StreamingBody, size: int) -> bytes:
|
||||||
|
return streamingbody.read(amt=size)
|
||||||
|
|
||||||
|
# We use the streaming body to only pull down as much
|
||||||
|
# of the image as we need to examine the headers --
|
||||||
|
# generally about 40k
|
||||||
|
source: pyvips.Source = pyvips.SourceCustom()
|
||||||
|
source.on_read(partial(s3_read, metadata["Body"]))
|
||||||
|
else:
|
||||||
|
attachment_path = os.path.join(settings.LOCAL_UPLOADS_DIR, attachment.path_id)
|
||||||
|
if not os.path.exists(attachment_path):
|
||||||
|
print(f"{attachment.path_id}: Missing!")
|
||||||
|
continue
|
||||||
|
source = pyvips.Source.new_from_file(attachment_path)
|
||||||
|
try:
|
||||||
|
image = pyvips.Image.new_from_source(source, "", access="sequential")
|
||||||
|
|
||||||
|
# "original_width_px" and "original_height_px" here are
|
||||||
|
# _as rendered_, after applying the orientation
|
||||||
|
# information which the image may contain.
|
||||||
|
if (
|
||||||
|
"orientation" in image.get_fields()
|
||||||
|
and image.get("orientation") >= 5
|
||||||
|
and image.get("orientation") <= 8
|
||||||
|
):
|
||||||
|
(width, height) = (image.height, image.width)
|
||||||
|
else:
|
||||||
|
(width, height) = (image.width, image.height)
|
||||||
|
|
||||||
|
ImageAttachment.objects.create(
|
||||||
|
realm_id=attachment.realm_id,
|
||||||
|
path_id=attachment.path_id,
|
||||||
|
original_width_px=width,
|
||||||
|
original_height_px=height,
|
||||||
|
frames=image.get_n_pages(),
|
||||||
|
thumbnail_metadata=[],
|
||||||
|
)
|
||||||
|
except pyvips.Error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if min_id is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
atomic = False
|
||||||
|
dependencies = [
|
||||||
|
# Because this will be backported to 9.x, we only depend on the last migration in 9.x
|
||||||
|
("zerver", "0558_realmuserdefault_web_animate_image_previews_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(
|
||||||
|
backfill_imageattachment, reverse_code=migrations.RunPython.noop, elidable=True
|
||||||
|
)
|
||||||
|
]
|
|
@ -0,0 +1,10 @@
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("zerver", "0575_alter_directmessagegroup_group_size"),
|
||||||
|
("zerver", "0576_backfill_imageattachment"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = []
|
Loading…
Reference in New Issue