zulip/zerver/migrations/0544_copy_avatar_images.py

226 lines
8.2 KiB
Python

import contextlib
import hashlib
import os
from typing import Any
import boto3
import pyvips
from botocore.client import Config
from botocore.exceptions import ClientError
from django.conf import settings
from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import QuerySet
from django.utils.timezone import now as timezone_now
IMAGE_BOMB_TOTAL_PIXELS = 90000000
DEFAULT_AVATAR_SIZE = 100
MEDIUM_AVATAR_SIZE = 500
def resize_avatar(
image_data: bytes | pyvips.Image,
size: int,
) -> bytes | None:
try:
source_image = pyvips.Image.new_from_buffer(image_data, "")
if source_image.width * source_image.height > IMAGE_BOMB_TOTAL_PIXELS:
return None
return pyvips.Image.thumbnail_buffer(
image_data,
size,
height=size,
crop=pyvips.Interesting.CENTRE,
).write_to_buffer(".png")
except pyvips.Error:
return None
def new_hash(user_profile: Any) -> str:
user_key = (
str(user_profile.id) + ":" + str(user_profile.avatar_version) + ":" + settings.AVATAR_SALT
)
return hashlib.sha256(user_key.encode()).hexdigest()[:40]
def old_hash(user_profile: Any) -> str:
user_key = str(user_profile.id) + settings.AVATAR_SALT
return hashlib.sha1(user_key.encode()).hexdigest()
def do_remove_avatar(user_profile: Any, apps: StateApps) -> None:
avatar_source = "G" # UserProfile.AVATAR_FROM_GRAVATAR
user_profile.avatar_source = avatar_source
user_profile.avatar_version += 1
user_profile.save(update_fields=["avatar_source", "avatar_version"])
RealmAuditLog = apps.get_model("zerver", "RealmAuditLog")
RealmAuditLog.objects.create(
realm_id=user_profile.realm_id,
modified_user_id=user_profile.id,
event_type=123, # RealmAuditLog.USER_AVATAR_SOURCE_CHANGED,
extra_data={"avatar_source": avatar_source},
event_time=timezone_now(),
acting_user=None,
)
class SkipImageError(Exception):
def __init__(self, message: str, user: Any) -> None:
super().__init__(message)
self.user = user
# Just the image types from zerver.lib.upload.INLINE_MIME_TYPES
INLINE_IMAGE_MIME_TYPES = [
"image/apng",
"image/avif",
"image/gif",
"image/jpeg",
"image/png",
"image/webp",
# To avoid cross-site scripting attacks, DO NOT add types such
# as image/svg+xml.
]
def thumbnail_s3_avatars(users: QuerySet[Any], apps: StateApps) -> None:
avatar_bucket = boto3.resource(
"s3",
aws_access_key_id=settings.S3_KEY,
aws_secret_access_key=settings.S3_SECRET_KEY,
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
config=Config(
signature_version=None,
s3={"addressing_style": settings.S3_ADDRESSING_STYLE},
),
).Bucket(settings.S3_AVATAR_BUCKET)
for total_processed, user in enumerate(users):
try:
old_base = os.path.join(str(user.realm_id), old_hash(user))
new_base = os.path.join(str(user.realm_id), new_hash(user))
if total_processed % 100 == 0:
print(f"Processing {total_processed}/{len(users)} user avatars")
with contextlib.suppress(ClientError):
# Check if we've already uploaded this one; if so, continue.
avatar_bucket.Object(new_base + ".original").load()
continue
try:
old_data = avatar_bucket.Object(old_base + ".original").get()
metadata = old_data["Metadata"]
metadata["avatar_version"] = str(user.avatar_version)
original_bytes = old_data["Body"].read()
except ClientError:
raise SkipImageError(f"Failed to fetch {old_base}", user)
# INLINE_IMAGE_MIME_TYPES changing (e.g. adding
# "image/avif") means this may not match the old
# content-disposition.
inline_type = old_data["ContentType"] in INLINE_IMAGE_MIME_TYPES
extra_params = {}
if not inline_type:
extra_params["ContentDisposition"] = "attachment"
avatar_bucket.Object(new_base + ".original").copy_from(
CopySource=f"{settings.S3_AVATAR_BUCKET}/{old_base}.original",
MetadataDirective="REPLACE",
Metadata=metadata,
ContentType=old_data["ContentType"],
CacheControl="public, max-age=31536000, immutable",
**extra_params, # type: ignore[arg-type] # The dynamic kwargs here confuse mypy.
)
small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE)
if small is None:
raise SkipImageError(f"Failed to resize {old_base}", user)
avatar_bucket.Object(new_base + ".png").put(
Metadata=metadata,
ContentType="image/png",
CacheControl="public, max-age=31536000, immutable",
Body=small,
)
medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE)
if medium is None:
raise SkipImageError(f"Failed to medium resize {old_base}", user)
avatar_bucket.Object(new_base + "-medium.png").put(
Metadata=metadata,
ContentType="image/png",
CacheControl="public, max-age=31536000, immutable",
Body=medium,
)
except SkipImageError as e:
print(f"{e!s} for {e.user}; reverting to gravatar")
do_remove_avatar(e.user, apps)
def thumbnail_local_avatars(users: QuerySet[Any], apps: StateApps) -> None:
total_processed = 0
assert settings.LOCAL_AVATARS_DIR is not None
for total_processed, user in enumerate(users):
try:
old_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), old_hash(user))
new_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), new_hash(user))
if total_processed % 100 == 0:
print(f"Processing {total_processed}/{len(users)} user avatars")
if os.path.exists(new_base + "-medium.png"):
# This user's avatar has already been migrated.
continue
with contextlib.suppress(FileNotFoundError):
# Remove the hard link, if present from a previous failed run.
os.remove(new_base + ".original")
# We hardlink, rather than copying, so we don't take any extra space.
try:
os.link(old_base + ".original", new_base + ".original")
with open(old_base + ".original", "rb") as f:
original_bytes = f.read()
except OSError:
raise SkipImageError(f"Failed to read {old_base}", user)
small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE)
if small is None:
raise SkipImageError(f"Failed to resize {old_base}", user)
with open(new_base + ".png", "wb") as f:
f.write(small)
medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE)
if medium is None:
raise SkipImageError(f"Failed to medium resize {old_base}", user)
with open(new_base + "-medium.png", "wb") as f:
f.write(medium)
except SkipImageError as e:
print(f"{e!s} for {e.user}; reverting to gravatar")
do_remove_avatar(e.user, apps)
def thumbnail_avatars(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
UserProfile = apps.get_model("zerver", "UserProfile")
users = (
UserProfile.objects.filter(avatar_source="U")
.only("id", "realm_id", "avatar_version")
.order_by("id")
)
if settings.LOCAL_AVATARS_DIR is not None:
thumbnail_local_avatars(users, apps)
else:
thumbnail_s3_avatars(users, apps)
class Migration(migrations.Migration):
atomic = False
elidable = True
dependencies = [
("zerver", "0543_preregistrationuser_notify_referrer_on_join"),
]
operations = [migrations.RunPython(thumbnail_avatars, elidable=True)]