From e1a9473bd67fc217ae00ad49099b90c18ad93fee Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Thu, 11 Jul 2024 13:40:26 +0000 Subject: [PATCH] thumbnail: Log and revert to gravatar on migration failure. This is preferable to leaving the user with a broken avatar image. --- zerver/migrations/0544_copy_avatar_images.py | 205 +++++++++++-------- 1 file changed, 115 insertions(+), 90 deletions(-) diff --git a/zerver/migrations/0544_copy_avatar_images.py b/zerver/migrations/0544_copy_avatar_images.py index 54e766b3a4..bfdbb4a8b3 100644 --- a/zerver/migrations/0544_copy_avatar_images.py +++ b/zerver/migrations/0544_copy_avatar_images.py @@ -12,6 +12,7 @@ from django.db import migrations from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.migrations.state import StateApps from django.db.models import QuerySet +from django.utils.timezone import now as timezone_now IMAGE_BOMB_TOTAL_PIXELS = 90000000 DEFAULT_AVATAR_SIZE = 100 @@ -49,6 +50,28 @@ def old_hash(user_profile: Any) -> str: return hashlib.sha1(user_key.encode()).hexdigest() +def do_remove_avatar(user_profile: Any, apps: StateApps) -> None: + avatar_source = "G" # UserProfile.AVATAR_FROM_GRAVATAR + user_profile.avatar_source = avatar_source + user_profile.avatar_version += 1 + user_profile.save(update_fields=["avatar_source", "avatar_version"]) + RealmAuditLog = apps.get_model("zerver", "RealmAuditLog") + RealmAuditLog.objects.create( + realm_id=user_profile.realm_id, + modified_user_id=user_profile.id, + event_type=123, # RealmAuditLog.USER_AVATAR_SOURCE_CHANGED, + extra_data={"avatar_source": avatar_source}, + event_time=timezone_now(), + acting_user=None, + ) + + +class SkipImageError(Exception): + def __init__(self, message: str, user: Any) -> None: + super().__init__(message) + self.user = user + + # Just the image types from zerver.lib.upload.INLINE_MIME_TYPES INLINE_IMAGE_MIME_TYPES = [ "image/apng", @@ -62,7 +85,7 @@ INLINE_IMAGE_MIME_TYPES = [ ] -def thumbnail_s3_avatars(users: QuerySet[Any]) -> None: +def thumbnail_s3_avatars(users: QuerySet[Any], apps: StateApps) -> None: avatar_bucket = boto3.resource( "s3", aws_access_key_id=settings.S3_KEY, @@ -75,105 +98,107 @@ def thumbnail_s3_avatars(users: QuerySet[Any]) -> None: ), ).Bucket(settings.S3_AVATAR_BUCKET) for total_processed, user in enumerate(users): - old_base = os.path.join(str(user.realm_id), old_hash(user)) - new_base = os.path.join(str(user.realm_id), new_hash(user)) - - if total_processed % 100 == 0: - print(f"Processing {total_processed}/{len(users)} user avatars") - - with contextlib.suppress(ClientError): - # Check if we've already uploaded this one; if so, continue. - avatar_bucket.Object(new_base + ".original").load() - continue - try: - old_data = avatar_bucket.Object(old_base + ".original").get() - metadata = old_data["Metadata"] - metadata["avatar_version"] = str(user.avatar_version) - original_bytes = old_data["Body"].read() - except ClientError: - print(f"Failed to fetch {old_base}") - continue + old_base = os.path.join(str(user.realm_id), old_hash(user)) + new_base = os.path.join(str(user.realm_id), new_hash(user)) - # INLINE_IMAGE_MIME_TYPES changing (e.g. adding - # "image/avif") means this may not match the old - # content-disposition. - inline_type = old_data["ContentType"] in INLINE_IMAGE_MIME_TYPES - extra_params = {} - if not inline_type: - extra_params["ContentDisposition"] = "attachment" + if total_processed % 100 == 0: + print(f"Processing {total_processed}/{len(users)} user avatars") - avatar_bucket.Object(new_base + ".original").copy_from( - CopySource=f"{settings.S3_AVATAR_BUCKET}/{old_base}.original", - MetadataDirective="REPLACE", - Metadata=metadata, - ContentType=old_data["ContentType"], - CacheControl="public, max-age=31536000, immutable", - **extra_params, # type: ignore[arg-type] # The dynamic kwargs here confuse mypy. - ) + with contextlib.suppress(ClientError): + # Check if we've already uploaded this one; if so, continue. + avatar_bucket.Object(new_base + ".original").load() + continue - small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE) - if small is None: - print(f"Failed to resize {old_base}") - continue - avatar_bucket.Object(new_base + ".png").put( - Metadata=metadata, - ContentType="image/png", - CacheControl="public, max-age=31536000, immutable", - Body=small, - ) - medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE) - if medium is None: - print(f"Failed to medium resize {old_base}") - continue - avatar_bucket.Object(new_base + "-medium.png").put( - Metadata=metadata, - ContentType="image/png", - CacheControl="public, max-age=31536000, immutable", - Body=medium, - ) + try: + old_data = avatar_bucket.Object(old_base + ".original").get() + metadata = old_data["Metadata"] + metadata["avatar_version"] = str(user.avatar_version) + original_bytes = old_data["Body"].read() + except ClientError: + raise SkipImageError(f"Failed to fetch {old_base}", user) + + # INLINE_IMAGE_MIME_TYPES changing (e.g. adding + # "image/avif") means this may not match the old + # content-disposition. + inline_type = old_data["ContentType"] in INLINE_IMAGE_MIME_TYPES + extra_params = {} + if not inline_type: + extra_params["ContentDisposition"] = "attachment" + + avatar_bucket.Object(new_base + ".original").copy_from( + CopySource=f"{settings.S3_AVATAR_BUCKET}/{old_base}.original", + MetadataDirective="REPLACE", + Metadata=metadata, + ContentType=old_data["ContentType"], + CacheControl="public, max-age=31536000, immutable", + **extra_params, # type: ignore[arg-type] # The dynamic kwargs here confuse mypy. + ) + + small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE) + if small is None: + raise SkipImageError(f"Failed to resize {old_base}", user) + avatar_bucket.Object(new_base + ".png").put( + Metadata=metadata, + ContentType="image/png", + CacheControl="public, max-age=31536000, immutable", + Body=small, + ) + medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE) + if medium is None: + raise SkipImageError(f"Failed to medium resize {old_base}", user) + avatar_bucket.Object(new_base + "-medium.png").put( + Metadata=metadata, + ContentType="image/png", + CacheControl="public, max-age=31536000, immutable", + Body=medium, + ) + except SkipImageError as e: + print(f"{e!s} for {e.user}; reverting to gravatar") + do_remove_avatar(e.user, apps) -def thumbnail_local_avatars(users: QuerySet[Any]) -> None: +def thumbnail_local_avatars(users: QuerySet[Any], apps: StateApps) -> None: total_processed = 0 assert settings.LOCAL_AVATARS_DIR is not None for total_processed, user in enumerate(users): - old_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), old_hash(user)) - new_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), new_hash(user)) - - if total_processed % 100 == 0: - print(f"Processing {total_processed}/{len(users)} user avatars") - - if os.path.exists(new_base + "-medium.png"): - # This user's avatar has already been migrated. - continue - - with contextlib.suppress(FileNotFoundError): - # Remove the hard link, if present from a previous failed run. - os.remove(new_base + ".original") - - # We hardlink, rather than copying, so we don't take any extra space. try: - os.link(old_base + ".original", new_base + ".original") - with open(old_base + ".original", "rb") as f: - original_bytes = f.read() - except OSError: - print(f"Failed to read {old_base}.original") - raise + old_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), old_hash(user)) + new_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), new_hash(user)) - small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE) - if small is None: - print(f"Failed to resize {old_base}") - continue - with open(new_base + ".png", "wb") as f: - f.write(small) + if total_processed % 100 == 0: + print(f"Processing {total_processed}/{len(users)} user avatars") - medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE) - if medium is None: - print(f"Failed to medium resize {old_base}") - continue - with open(new_base + "-medium.png", "wb") as f: - f.write(medium) + if os.path.exists(new_base + "-medium.png"): + # This user's avatar has already been migrated. + continue + + with contextlib.suppress(FileNotFoundError): + # Remove the hard link, if present from a previous failed run. + os.remove(new_base + ".original") + + # We hardlink, rather than copying, so we don't take any extra space. + try: + os.link(old_base + ".original", new_base + ".original") + with open(old_base + ".original", "rb") as f: + original_bytes = f.read() + except OSError: + raise SkipImageError(f"Failed to read {old_base}", user) + + small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE) + if small is None: + raise SkipImageError(f"Failed to resize {old_base}", user) + with open(new_base + ".png", "wb") as f: + f.write(small) + + medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE) + if medium is None: + raise SkipImageError(f"Failed to medium resize {old_base}", user) + with open(new_base + "-medium.png", "wb") as f: + f.write(medium) + except SkipImageError as e: + print(f"{e!s} for {e.user}; reverting to gravatar") + do_remove_avatar(e.user, apps) def thumbnail_avatars(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: @@ -184,9 +209,9 @@ def thumbnail_avatars(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) .order_by("id") ) if settings.LOCAL_AVATARS_DIR is not None: - thumbnail_local_avatars(users) + thumbnail_local_avatars(users, apps) else: - thumbnail_s3_avatars(users) + thumbnail_s3_avatars(users, apps) class Migration(migrations.Migration):