migrations: Derive emoji content-type from the bytes.

This commit is contained in:
Alex Vandiver 2024-07-17 16:14:17 +00:00 committed by Tim Abbott
parent 8271c2b083
commit ab6b6639e6
2 changed files with 45 additions and 20 deletions

View File

@ -54,7 +54,7 @@ rules:
- id: dont-import-models-in-migrations - id: dont-import-models-in-migrations
patterns: patterns:
- pattern-not: from zerver.lib.mime_types import guess_type - pattern-not: from zerver.lib.mime_types import $X
- pattern-not: from zerver.lib.redis_utils import get_redis_client - pattern-not: from zerver.lib.redis_utils import get_redis_client
- pattern-not: from zerver.lib.utils import generate_api_key - pattern-not: from zerver.lib.utils import generate_api_key
- pattern-not: from zerver.models.linkifiers import filter_pattern_validator - pattern-not: from zerver.models.linkifiers import filter_pattern_validator

View File

@ -7,6 +7,7 @@ from typing import Any
import boto3 import boto3
import botocore import botocore
import magic
import pyvips import pyvips
from botocore.client import Config from botocore.client import Config
from django.conf import settings from django.conf import settings
@ -14,7 +15,7 @@ from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps from django.db.migrations.state import StateApps
from zerver.lib.mime_types import guess_extension, guess_type from zerver.lib.mime_types import guess_extension
# From zerver.lib.thumbnail # From zerver.lib.thumbnail
DEFAULT_EMOJI_SIZE = 64 DEFAULT_EMOJI_SIZE = 64
@ -142,31 +143,50 @@ def thumbnail_local_emoji(apps: StateApps) -> None:
if total_processed % 100 == 0: if total_processed % 100 == 0:
print(f"Processed {total_processed} custom emoji") print(f"Processed {total_processed} custom emoji")
old_file_name = emoji.file_name
try: try:
old_file_name = emoji.file_name base_path = os.path.join(
content_type = guess_type(old_file_name)[0] settings.LOCAL_AVATARS_DIR, str(emoji.realm_id), "emoji/images"
)
copy_from_path = f"{base_path}/{old_file_name}.original"
if not os.path.exists(copy_from_path) and os.path.exists(
f"{base_path}/{old_file_name}"
):
# Imports currently don't write ".original" files, so check without that
copy_from_path = f"{base_path}/{old_file_name}"
if not os.path.exists(copy_from_path):
raise SkipImageError("Failed to read .original file: Does not exist")
with open(copy_from_path, "rb") as fh:
original_bytes = fh.read()
# We used to accept any bytes which pillow could
# thumbnail, with any filename, and would use the
# guessed-from-filename content-type when serving the
# emoji. Examine the bytes of the image to verify that it
# is an image of reasonable type, and then derive the real
# filename extension (which we will still use for deriving
# content-type at serving time) from that. This ensures
# that the contents are a valid image, and that we put the
# right content-type on it when served -- the filename
# used for the initial upload becomes completely
# irrelevant.
content_type = magic.from_buffer(original_bytes[:1024], mime=True)
if content_type not in VALID_EMOJI_CONTENT_TYPE: if content_type not in VALID_EMOJI_CONTENT_TYPE:
raise SkipImageError(f"Invalid content-type: {content_type}") raise SkipImageError(f"Invalid content-type: {content_type}")
new_file_name = get_emoji_file_name(content_type, emoji.id) new_file_name = get_emoji_file_name(content_type, emoji.id)
if old_file_name == new_file_name: if old_file_name == new_file_name:
continue continue
base_path = os.path.join(
settings.LOCAL_AVATARS_DIR, str(emoji.realm_id), "emoji/images"
)
print(f"{base_path}/{old_file_name} -> {base_path}/{new_file_name}") print(f"{base_path}/{old_file_name} -> {base_path}/{new_file_name}")
try: try:
if os.path.exists(f"{base_path}/{new_file_name}.original"): if os.path.exists(f"{base_path}/{new_file_name}.original"):
os.unlink(f"{base_path}/{new_file_name}.original") os.unlink(f"{base_path}/{new_file_name}.original")
from_file = f"{base_path}/{old_file_name}.original" os.link(copy_from_path, f"{base_path}/{new_file_name}.original")
if not os.path.exists(from_file) and os.path.exists(f"{base_path}/{old_file_name}"):
# Imports currently don't write ".original" files, so check without that
from_file = f"{base_path}/{old_file_name}"
os.link(from_file, f"{base_path}/{new_file_name}.original")
with open(f"{base_path}/{new_file_name}.original", "rb") as fh:
original_bytes = fh.read()
except OSError as e: except OSError as e:
raise SkipImageError(f"Failed to read original file: {e}") raise SkipImageError(f"Failed to update .original file: {e}")
animated, still = resize_emoji(original_bytes, new_file_name) animated, still = resize_emoji(original_bytes, new_file_name)
try: try:
@ -230,7 +250,6 @@ def thumbnail_s3(apps: StateApps) -> None:
try: try:
old_data = avatar_bucket.Object(copy_from_path).get() old_data = avatar_bucket.Object(copy_from_path).get()
original_bytes = old_data["Body"].read() original_bytes = old_data["Body"].read()
content_type = old_data["ContentType"]
except botocore.exceptions.ClientError: except botocore.exceptions.ClientError:
# Imports currently don't write ".original" files, so check without that # Imports currently don't write ".original" files, so check without that
try: try:
@ -239,10 +258,16 @@ def thumbnail_s3(apps: StateApps) -> None:
except botocore.exceptions.ClientError as e: except botocore.exceptions.ClientError as e:
raise SkipImageError(f"Failed to read .original file: {e}") raise SkipImageError(f"Failed to read .original file: {e}")
original_bytes = old_data["Body"].read() original_bytes = old_data["Body"].read()
# They also may have uploaded as "application/octet-stream", so guess the
# content-type from the filename. If we can't guess, then we'll hit the # We used to accept any bytes which pillow could
# SkipImageError case right below this. # thumbnail, with any filename, and would store the
content_type = guess_type(old_file_name)[0] or "application/octet-stream" # guessed-from-filename content-type in S3, to be used
# when serving the emoji. Examine the bytes of the image
# to verify that it is an image of reasonable type, and
# then both store that content-type in S3 (for later
# serving), as well as using it to derive the right
# filename extension (for clarity).
content_type = magic.from_buffer(original_bytes[:1024], mime=True)
if content_type not in VALID_EMOJI_CONTENT_TYPE: if content_type not in VALID_EMOJI_CONTENT_TYPE:
raise SkipImageError(f"Invalid content-type: {content_type}") raise SkipImageError(f"Invalid content-type: {content_type}")