migrations: Derive emoji content-type from the bytes.

This commit is contained in:
Alex Vandiver 2024-07-17 16:14:17 +00:00 committed by Tim Abbott
parent 8271c2b083
commit ab6b6639e6
2 changed files with 45 additions and 20 deletions

View File

@ -54,7 +54,7 @@ rules:
- id: dont-import-models-in-migrations
patterns:
- pattern-not: from zerver.lib.mime_types import guess_type
- pattern-not: from zerver.lib.mime_types import $X
- pattern-not: from zerver.lib.redis_utils import get_redis_client
- pattern-not: from zerver.lib.utils import generate_api_key
- pattern-not: from zerver.models.linkifiers import filter_pattern_validator

View File

@ -7,6 +7,7 @@ from typing import Any
import boto3
import botocore
import magic
import pyvips
from botocore.client import Config
from django.conf import settings
@ -14,7 +15,7 @@ from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from zerver.lib.mime_types import guess_extension, guess_type
from zerver.lib.mime_types import guess_extension
# From zerver.lib.thumbnail
DEFAULT_EMOJI_SIZE = 64
@ -142,31 +143,50 @@ def thumbnail_local_emoji(apps: StateApps) -> None:
if total_processed % 100 == 0:
print(f"Processed {total_processed} custom emoji")
try:
old_file_name = emoji.file_name
content_type = guess_type(old_file_name)[0]
try:
base_path = os.path.join(
settings.LOCAL_AVATARS_DIR, str(emoji.realm_id), "emoji/images"
)
copy_from_path = f"{base_path}/{old_file_name}.original"
if not os.path.exists(copy_from_path) and os.path.exists(
f"{base_path}/{old_file_name}"
):
# Imports currently don't write ".original" files, so check without that
copy_from_path = f"{base_path}/{old_file_name}"
if not os.path.exists(copy_from_path):
raise SkipImageError("Failed to read .original file: Does not exist")
with open(copy_from_path, "rb") as fh:
original_bytes = fh.read()
# We used to accept any bytes which pillow could
# thumbnail, with any filename, and would use the
# guessed-from-filename content-type when serving the
# emoji. Examine the bytes of the image to verify that it
# is an image of reasonable type, and then derive the real
# filename extension (which we will still use for deriving
# content-type at serving time) from that. This ensures
# that the contents are a valid image, and that we put the
# right content-type on it when served -- the filename
# used for the initial upload becomes completely
# irrelevant.
content_type = magic.from_buffer(original_bytes[:1024], mime=True)
if content_type not in VALID_EMOJI_CONTENT_TYPE:
raise SkipImageError(f"Invalid content-type: {content_type}")
new_file_name = get_emoji_file_name(content_type, emoji.id)
if old_file_name == new_file_name:
continue
base_path = os.path.join(
settings.LOCAL_AVATARS_DIR, str(emoji.realm_id), "emoji/images"
)
print(f"{base_path}/{old_file_name} -> {base_path}/{new_file_name}")
try:
if os.path.exists(f"{base_path}/{new_file_name}.original"):
os.unlink(f"{base_path}/{new_file_name}.original")
from_file = f"{base_path}/{old_file_name}.original"
if not os.path.exists(from_file) and os.path.exists(f"{base_path}/{old_file_name}"):
# Imports currently don't write ".original" files, so check without that
from_file = f"{base_path}/{old_file_name}"
os.link(from_file, f"{base_path}/{new_file_name}.original")
with open(f"{base_path}/{new_file_name}.original", "rb") as fh:
original_bytes = fh.read()
os.link(copy_from_path, f"{base_path}/{new_file_name}.original")
except OSError as e:
raise SkipImageError(f"Failed to read original file: {e}")
raise SkipImageError(f"Failed to update .original file: {e}")
animated, still = resize_emoji(original_bytes, new_file_name)
try:
@ -230,7 +250,6 @@ def thumbnail_s3(apps: StateApps) -> None:
try:
old_data = avatar_bucket.Object(copy_from_path).get()
original_bytes = old_data["Body"].read()
content_type = old_data["ContentType"]
except botocore.exceptions.ClientError:
# Imports currently don't write ".original" files, so check without that
try:
@ -239,10 +258,16 @@ def thumbnail_s3(apps: StateApps) -> None:
except botocore.exceptions.ClientError as e:
raise SkipImageError(f"Failed to read .original file: {e}")
original_bytes = old_data["Body"].read()
# They also may have uploaded as "application/octet-stream", so guess the
# content-type from the filename. If we can't guess, then we'll hit the
# SkipImageError case right below this.
content_type = guess_type(old_file_name)[0] or "application/octet-stream"
# We used to accept any bytes which pillow could
# thumbnail, with any filename, and would store the
# guessed-from-filename content-type in S3, to be used
# when serving the emoji. Examine the bytes of the image
# to verify that it is an image of reasonable type, and
# then both store that content-type in S3 (for later
# serving), as well as using it to derive the right
# filename extension (for clarity).
content_type = magic.from_buffer(original_bytes[:1024], mime=True)
if content_type not in VALID_EMOJI_CONTENT_TYPE:
raise SkipImageError(f"Invalid content-type: {content_type}")