avatars: Encode version into the filename.

Hash the salt, user-id, and now avatar version into the filename.
This allows the URL contents to be immutable, and thus to be marked as
immutable and cacheable.  Since avatars are served unauthenticated,
hashing with a server-side salt makes the current and past avatars not
enumerable.

This requires plumbing the current (or future) avatar version through
various parts of the upload process.

Since this already requires a full migration of current avatars, also
take the opportunity to fix the missing `.png` on S3 uploads (#12852).

We switch from SHA-1 to SHA-256, but truncate it such that avatar URL
data does not substantially increase in size.

Fixes: #12852.
This commit is contained in:
Alex Vandiver 2024-06-13 12:57:18 +00:00 committed by Tim Abbott
parent feca9939bb
commit e29a455b2d
21 changed files with 288 additions and 107 deletions

View File

@ -16,9 +16,9 @@ def zerver.lib.thumbnail.generate_thumbnail_url(
# filesystem operations. # filesystem operations.
def zerver.lib.upload.sanitize_name(value) -> Sanitize: ... def zerver.lib.upload.sanitize_name(value) -> Sanitize: ...
# This function accepts two integers and then concatenates them into a path # This function accepts three integers and then concatenates them into a path
# segment. The result should be safe for use in filesystem and other operations. # segment. The result should be safe for use in filesystem and other operations.
def zerver.lib.avatar_hash.user_avatar_path_from_ids(user_profile_id, realm_id) -> Sanitize: ... def zerver.lib.avatar_hash.user_avatar_base_path_from_ids(user_profile_id, version, realm_id) -> Sanitize: ...
# This function creates a list of 'UserMessageLite' objects, which contain only # This function creates a list of 'UserMessageLite' objects, which contain only
# integral IDs and flags. These should safe for use with SQL and other # integral IDs and flags. These should safe for use with SQL and other

View File

@ -397,8 +397,9 @@ def do_change_avatar_fields(
def do_delete_avatar_image(user: UserProfile, *, acting_user: Optional[UserProfile]) -> None: def do_delete_avatar_image(user: UserProfile, *, acting_user: Optional[UserProfile]) -> None:
old_version = user.avatar_version
do_change_avatar_fields(user, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=acting_user) do_change_avatar_fields(user, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=acting_user)
delete_avatar_image(user) delete_avatar_image(user, old_version)
def update_scheduled_email_notifications_time( def update_scheduled_email_notifications_time(

View File

@ -27,7 +27,7 @@ from django.utils.timezone import now as timezone_now
from typing_extensions import TypeAlias from typing_extensions import TypeAlias
from zerver.data_import.sequencer import NEXT_ID from zerver.data_import.sequencer import NEXT_ID
from zerver.lib.avatar_hash import user_avatar_path_from_ids from zerver.lib.avatar_hash import user_avatar_base_path_from_ids
from zerver.lib.partial import partial from zerver.lib.partial import partial
from zerver.lib.stream_color import STREAM_ASSIGNMENT_COLORS as STREAM_COLORS from zerver.lib.stream_color import STREAM_ASSIGNMENT_COLORS as STREAM_COLORS
from zerver.models import ( from zerver.models import (
@ -148,6 +148,7 @@ def build_avatar(
path=avatar_url, # Save original avatar URL here, which is downloaded later path=avatar_url, # Save original avatar URL here, which is downloaded later
realm_id=realm_id, realm_id=realm_id,
content_type=None, content_type=None,
avatar_version=1,
user_profile_id=zulip_user_id, user_profile_id=zulip_user_id,
last_modified=timestamp, last_modified=timestamp,
user_profile_email=email, user_profile_email=email,
@ -594,7 +595,9 @@ def process_avatars(
avatar_original_list = [] avatar_original_list = []
avatar_upload_list = [] avatar_upload_list = []
for avatar in avatar_list: for avatar in avatar_list:
avatar_hash = user_avatar_path_from_ids(avatar["user_profile_id"], realm_id) avatar_hash = user_avatar_base_path_from_ids(
avatar["user_profile_id"], avatar["avatar_version"], realm_id
)
avatar_url = avatar["path"] avatar_url = avatar["path"]
avatar_original = dict(avatar) avatar_original = dict(avatar)

View File

@ -6,8 +6,8 @@ from django.contrib.staticfiles.storage import staticfiles_storage
from zerver.lib.avatar_hash import ( from zerver.lib.avatar_hash import (
gravatar_hash, gravatar_hash,
user_avatar_base_path_from_ids,
user_avatar_content_hash, user_avatar_content_hash,
user_avatar_path_from_ids,
) )
from zerver.lib.thumbnail import MEDIUM_AVATAR_SIZE from zerver.lib.thumbnail import MEDIUM_AVATAR_SIZE
from zerver.lib.upload import get_avatar_url from zerver.lib.upload import get_avatar_url
@ -55,13 +55,16 @@ def get_avatar_field(
computing them on the server (mostly to save bandwidth). computing them on the server (mostly to save bandwidth).
""" """
if client_gravatar:
""" """
If our client knows how to calculate gravatar hashes, we If our client knows how to calculate gravatar hashes, we
will return None and let the client compute the gravatar will return None and let the client compute the gravatar
url. url.
""" """
if settings.ENABLE_GRAVATAR and avatar_source == UserProfile.AVATAR_FROM_GRAVATAR: if (
client_gravatar
and settings.ENABLE_GRAVATAR
and avatar_source == UserProfile.AVATAR_FROM_GRAVATAR
):
return None return None
""" """
@ -69,14 +72,11 @@ def get_avatar_field(
either user-uploaded or a gravatar, and then we'll add version either user-uploaded or a gravatar, and then we'll add version
info to try to avoid stale caches. info to try to avoid stale caches.
""" """
url = _get_unversioned_avatar_url( if avatar_source == "U":
user_profile_id=user_id, hash_key = user_avatar_base_path_from_ids(user_id, avatar_version, realm_id)
avatar_source=avatar_source, return get_avatar_url(hash_key, medium=medium)
realm_id=realm_id,
email=email, return get_gravatar_url(email=email, avatar_version=avatar_version, medium=medium)
medium=medium,
)
return append_url_query_string(url, f"version={avatar_version:d}")
def get_gravatar_url(email: str, avatar_version: int, medium: bool = False) -> str: def get_gravatar_url(email: str, avatar_version: int, medium: bool = False) -> str:
@ -95,20 +95,6 @@ def _get_unversioned_gravatar_url(email: str, medium: bool) -> str:
return staticfiles_storage.url("images/default-avatar.png") return staticfiles_storage.url("images/default-avatar.png")
def _get_unversioned_avatar_url(
user_profile_id: int,
avatar_source: str,
realm_id: int,
email: Optional[str] = None,
medium: bool = False,
) -> str:
if avatar_source == "U":
hash_key = user_avatar_path_from_ids(user_profile_id, realm_id)
return get_avatar_url(hash_key, medium=medium)
assert email is not None
return _get_unversioned_gravatar_url(email, medium)
def absolute_avatar_url(user_profile: UserProfile) -> str: def absolute_avatar_url(user_profile: UserProfile) -> str:
""" """
Absolute URLs are used to simplify logic for applications that Absolute URLs are used to simplify logic for applications that

View File

@ -15,25 +15,25 @@ def gravatar_hash(email: str) -> str:
return hashlib.md5(email.lower().encode()).hexdigest() return hashlib.md5(email.lower().encode()).hexdigest()
def user_avatar_hash(uid: str) -> str: def user_avatar_hash(uid: str, version: str) -> str:
# WARNING: If this method is changed, you may need to do a migration # WARNING: If this method is changed, you may need to do a migration
# similar to zerver/migrations/0060_move_avatars_to_be_uid_based.py . # similar to zerver/migrations/0060_move_avatars_to_be_uid_based.py .
# The salt probably doesn't serve any purpose now. In the past we # The salt prevents unauthenticated clients from enumerating the
# used a hash of the email address, not the user ID, and we salted # avatars of all users.
# it in order to make the hashing scheme different from Gravatar's. user_key = uid + ":" + version + ":" + settings.AVATAR_SALT
user_key = uid + settings.AVATAR_SALT return hashlib.sha256(user_key.encode()).hexdigest()[:40]
return hashlib.sha1(user_key.encode()).hexdigest()
def user_avatar_path(user_profile: UserProfile) -> str: def user_avatar_path(user_profile: UserProfile, future: bool = False) -> str:
# WARNING: If this method is changed, you may need to do a migration # 'future' is if this is for the current avatar version, of the next one.
# similar to zerver/migrations/0060_move_avatars_to_be_uid_based.py . return user_avatar_base_path_from_ids(
return user_avatar_path_from_ids(user_profile.id, user_profile.realm_id) user_profile.id, user_profile.avatar_version + (1 if future else 0), user_profile.realm_id
)
def user_avatar_path_from_ids(user_profile_id: int, realm_id: int) -> str: def user_avatar_base_path_from_ids(user_profile_id: int, version: int, realm_id: int) -> str:
user_id_hash = user_avatar_hash(str(user_profile_id)) user_id_hash = user_avatar_hash(str(user_profile_id), str(version))
return f"{realm_id}/{user_id_hash}" return f"{realm_id}/{user_id_hash}"

View File

@ -29,7 +29,7 @@ from typing_extensions import TypeAlias
import zerver.lib.upload import zerver.lib.upload
from analytics.models import RealmCount, StreamCount, UserCount from analytics.models import RealmCount, StreamCount, UserCount
from scripts.lib.zulip_tools import overwrite_symlink from scripts.lib.zulip_tools import overwrite_symlink
from zerver.lib.avatar_hash import user_avatar_path_from_ids from zerver.lib.avatar_hash import user_avatar_base_path_from_ids
from zerver.lib.pysa import mark_sanitized from zerver.lib.pysa import mark_sanitized
from zerver.lib.upload.s3 import get_bucket from zerver.lib.upload.s3 import get_bucket
from zerver.models import ( from zerver.models import (
@ -1543,8 +1543,10 @@ def export_uploads_and_avatars(
) )
avatar_hash_values = set() avatar_hash_values = set()
for user_id in user_ids: for avatar_user in users:
avatar_path = user_avatar_path_from_ids(user_id, realm.id) avatar_path = user_avatar_base_path_from_ids(
avatar_user.id, avatar_user.avatar_version, realm.id
)
avatar_hash_values.add(avatar_path) avatar_hash_values.add(avatar_path)
avatar_hash_values.add(avatar_path + ".original") avatar_hash_values.add(avatar_path + ".original")
@ -1624,6 +1626,9 @@ def _get_exported_s3_record(
else: else:
raise Exception("Missing realm_id") raise Exception("Missing realm_id")
if "avatar_version" in record:
record["avatar_version"] = int(record["avatar_version"])
return record return record
@ -1782,7 +1787,7 @@ def export_avatars_from_local(
if user.avatar_source == UserProfile.AVATAR_FROM_GRAVATAR: if user.avatar_source == UserProfile.AVATAR_FROM_GRAVATAR:
continue continue
avatar_path = user_avatar_path_from_ids(user.id, realm.id) avatar_path = user_avatar_base_path_from_ids(user.id, user.avatar_version, realm.id)
wildcard = os.path.join(local_dir, avatar_path + ".*") wildcard = os.path.join(local_dir, avatar_path + ".*")
for local_path in glob.glob(wildcard): for local_path in glob.glob(wildcard):
@ -1800,6 +1805,7 @@ def export_avatars_from_local(
realm_id=realm.id, realm_id=realm.id,
user_profile_id=user.id, user_profile_id=user.id,
user_profile_email=user.email, user_profile_email=user.email,
avatar_version=user.avatar_version,
s3_path=fn, s3_path=fn,
path=fn, path=fn,
size=stat.st_size, size=stat.st_size,

View File

@ -21,7 +21,7 @@ from analytics.models import RealmCount, StreamCount, UserCount
from zerver.actions.create_realm import set_default_for_realm_permission_group_settings from zerver.actions.create_realm import set_default_for_realm_permission_group_settings
from zerver.actions.realm_settings import do_change_realm_plan_type from zerver.actions.realm_settings import do_change_realm_plan_type
from zerver.actions.user_settings import do_change_avatar_fields from zerver.actions.user_settings import do_change_avatar_fields
from zerver.lib.avatar_hash import user_avatar_path_from_ids from zerver.lib.avatar_hash import user_avatar_base_path_from_ids
from zerver.lib.bulk_create import bulk_set_users_or_streams_recipient_fields from zerver.lib.bulk_create import bulk_set_users_or_streams_recipient_fields
from zerver.lib.export import DATE_FIELDS, Field, Path, Record, TableData, TableName from zerver.lib.export import DATE_FIELDS, Field, Path, Record, TableData, TableName
from zerver.lib.markdown import markdown_convert from zerver.lib.markdown import markdown_convert
@ -797,7 +797,9 @@ def process_avatars(record: Dict[str, Any]) -> None:
return None return None
user_profile = get_user_profile_by_id(record["user_profile_id"]) user_profile = get_user_profile_by_id(record["user_profile_id"])
if settings.LOCAL_AVATARS_DIR is not None: if settings.LOCAL_AVATARS_DIR is not None:
avatar_path = user_avatar_path_from_ids(user_profile.id, record["realm_id"]) avatar_path = user_avatar_base_path_from_ids(
user_profile.id, user_profile.avatar_version, record["realm_id"]
)
medium_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, avatar_path) + "-medium.png" medium_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, avatar_path) + "-medium.png"
if os.path.exists(medium_file_path): if os.path.exists(medium_file_path):
# We remove the image here primarily to deal with # We remove the image here primarily to deal with
@ -864,7 +866,9 @@ def import_uploads(
if processing_avatars: if processing_avatars:
# For avatars, we need to rehash the user ID with the # For avatars, we need to rehash the user ID with the
# new server's avatar salt # new server's avatar salt
relative_path = user_avatar_path_from_ids(record["user_profile_id"], record["realm_id"]) relative_path = user_avatar_base_path_from_ids(
record["user_profile_id"], record["avatar_version"], record["realm_id"]
)
if record["s3_path"].endswith(".original"): if record["s3_path"].endswith(".original"):
relative_path += ".original" relative_path += ".original"
else: else:

View File

@ -246,7 +246,7 @@ def avatar_disk_path(
avatar_disk_path = os.path.join( avatar_disk_path = os.path.join(
settings.LOCAL_AVATARS_DIR, settings.LOCAL_AVATARS_DIR,
avatar_url_path.split("/")[-2], avatar_url_path.split("/")[-2],
avatar_url_path.split("/")[-1].split("?")[0], avatar_url_path.split("/")[-1],
) )
if original: if original:
return avatar_disk_path.replace(".png", ".original") return avatar_disk_path.replace(".png", ".original")

View File

@ -30,7 +30,7 @@ def _transfer_avatar_to_s3(user: UserProfile) -> None:
file_path = os.path.join(settings.LOCAL_AVATARS_DIR, avatar_path) file_path = os.path.join(settings.LOCAL_AVATARS_DIR, avatar_path)
try: try:
with open(file_path + ".original", "rb") as f: with open(file_path + ".original", "rb") as f:
upload_avatar_image(f, user, backend=s3backend) upload_avatar_image(f, user, backend=s3backend, future=False)
logging.info("Uploaded avatar for %s in realm %s", user.id, user.realm.name) logging.info("Uploaded avatar for %s in realm %s", user.id, user.realm.name)
except FileNotFoundError: except FileNotFoundError:
pass pass
@ -66,7 +66,7 @@ def _transfer_message_files_to_s3(attachment: Attachment) -> None:
guessed_type, guessed_type,
attachment.owner, attachment.owner,
f.read(), f.read(),
settings.S3_UPLOADS_STORAGE_CLASS, storage_class=settings.S3_UPLOADS_STORAGE_CLASS,
) )
logging.info("Uploaded message file in path %s", file_path) logging.info("Uploaded message file in path %s", file_path)
except FileNotFoundError: # nocoverage except FileNotFoundError: # nocoverage

View File

@ -11,7 +11,7 @@ from django.conf import settings
from django.core.files.uploadedfile import UploadedFile from django.core.files.uploadedfile import UploadedFile
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from zerver.lib.avatar_hash import user_avatar_path from zerver.lib.avatar_hash import user_avatar_base_path_from_ids, user_avatar_path
from zerver.lib.exceptions import ErrorCode, JsonableError from zerver.lib.exceptions import ErrorCode, JsonableError
from zerver.lib.mime_types import guess_type from zerver.lib.mime_types import guess_type
from zerver.lib.outgoing_http import OutgoingSession from zerver.lib.outgoing_http import OutgoingSession
@ -208,6 +208,7 @@ def write_avatar_images(
*, *,
content_type: Optional[str], content_type: Optional[str],
backend: Optional[ZulipUploadBackend] = None, backend: Optional[ZulipUploadBackend] = None,
future: bool = True,
) -> None: ) -> None:
if backend is None: if backend is None:
backend = upload_backend backend = upload_backend
@ -216,6 +217,7 @@ def write_avatar_images(
user_profile=user_profile, user_profile=user_profile,
image_data=image_data, image_data=image_data,
content_type=content_type, content_type=content_type,
future=future,
) )
backend.upload_single_avatar_image( backend.upload_single_avatar_image(
@ -223,6 +225,7 @@ def write_avatar_images(
user_profile=user_profile, user_profile=user_profile,
image_data=resize_avatar(image_data), image_data=resize_avatar(image_data),
content_type="image/png", content_type="image/png",
future=future,
) )
backend.upload_single_avatar_image( backend.upload_single_avatar_image(
@ -230,6 +233,7 @@ def write_avatar_images(
user_profile=user_profile, user_profile=user_profile,
image_data=resize_avatar(image_data, MEDIUM_AVATAR_SIZE), image_data=resize_avatar(image_data, MEDIUM_AVATAR_SIZE),
content_type="image/png", content_type="image/png",
future=future,
) )
@ -238,23 +242,31 @@ def upload_avatar_image(
user_profile: UserProfile, user_profile: UserProfile,
content_type: Optional[str] = None, content_type: Optional[str] = None,
backend: Optional[ZulipUploadBackend] = None, backend: Optional[ZulipUploadBackend] = None,
future: bool = True,
) -> None: ) -> None:
if content_type is None: if content_type is None:
content_type = guess_type(user_file.name)[0] content_type = guess_type(user_file.name)[0]
file_path = user_avatar_path(user_profile) file_path = user_avatar_path(user_profile, future=future)
image_data = user_file.read() image_data = user_file.read()
write_avatar_images( write_avatar_images(
file_path, user_profile, image_data, content_type=content_type, backend=backend file_path,
user_profile,
image_data,
content_type=content_type,
backend=backend,
future=future,
) )
def copy_avatar(source_profile: UserProfile, target_profile: UserProfile) -> None: def copy_avatar(source_profile: UserProfile, target_profile: UserProfile) -> None:
source_file_path = user_avatar_path(source_profile) source_file_path = user_avatar_path(source_profile, future=False)
target_file_path = user_avatar_path(target_profile) target_file_path = user_avatar_path(target_profile, future=True)
image_data, content_type = upload_backend.get_avatar_contents(source_file_path) image_data, content_type = upload_backend.get_avatar_contents(source_file_path)
write_avatar_images(target_file_path, target_profile, image_data, content_type=content_type) write_avatar_images(
target_file_path, target_profile, image_data, content_type=content_type, future=True
)
def ensure_avatar_image(user_profile: UserProfile, medium: bool = False) -> None: def ensure_avatar_image(user_profile: UserProfile, medium: bool = False) -> None:
@ -282,11 +294,12 @@ def ensure_avatar_image(user_profile: UserProfile, medium: bool = False) -> None
user_profile=user_profile, user_profile=user_profile,
image_data=resized_avatar, image_data=resized_avatar,
content_type="image/png", content_type="image/png",
future=False,
) )
def delete_avatar_image(user_profile: UserProfile) -> None: def delete_avatar_image(user_profile: UserProfile, avatar_version: int) -> None:
path_id = user_avatar_path(user_profile) path_id = user_avatar_base_path_from_ids(user_profile.id, avatar_version, user_profile.realm_id)
upload_backend.delete_avatar_image(path_id) upload_backend.delete_avatar_image(path_id)

View File

@ -78,6 +78,7 @@ class ZulipUploadBackend:
user_profile: UserProfile, user_profile: UserProfile,
image_data: bytes, image_data: bytes,
content_type: Optional[str], content_type: Optional[str],
future: bool = True,
) -> None: ) -> None:
raise NotImplementedError raise NotImplementedError

View File

@ -125,6 +125,7 @@ class LocalUploadBackend(ZulipUploadBackend):
user_profile: UserProfile, user_profile: UserProfile,
image_data: bytes, image_data: bytes,
content_type: Optional[str], content_type: Optional[str],
future: bool = True,
) -> None: ) -> None:
write_local_file("avatars", file_path, image_data) write_local_file("avatars", file_path, image_data)

View File

@ -2,7 +2,7 @@ import logging
import os import os
import secrets import secrets
from datetime import datetime from datetime import datetime
from typing import IO, Any, BinaryIO, Callable, Iterator, List, Literal, Optional, Tuple from typing import IO, Any, BinaryIO, Callable, Dict, Iterator, List, Literal, Optional, Tuple
from urllib.parse import urljoin, urlsplit, urlunsplit from urllib.parse import urljoin, urlsplit, urlunsplit
import boto3 import boto3
@ -66,6 +66,7 @@ def upload_image_to_s3(
content_type: Optional[str], content_type: Optional[str],
user_profile: UserProfile, user_profile: UserProfile,
contents: bytes, contents: bytes,
*,
storage_class: Literal[ storage_class: Literal[
"GLACIER_IR", "GLACIER_IR",
"INTELLIGENT_TIERING", "INTELLIGENT_TIERING",
@ -75,12 +76,15 @@ def upload_image_to_s3(
"STANDARD_IA", "STANDARD_IA",
] = "STANDARD", ] = "STANDARD",
cache_control: Optional[str] = None, cache_control: Optional[str] = None,
extra_metadata: Optional[Dict[str, str]] = None,
) -> None: ) -> None:
key = bucket.Object(file_name) key = bucket.Object(file_name)
metadata = { metadata = {
"user_profile_id": str(user_profile.id), "user_profile_id": str(user_profile.id),
"realm_id": str(user_profile.realm_id), "realm_id": str(user_profile.realm_id),
} }
if extra_metadata is not None:
metadata.update(extra_metadata)
extras = {} extras = {}
if content_type is None: if content_type is None:
@ -219,7 +223,7 @@ class S3UploadBackend(ZulipUploadBackend):
content_type, content_type,
user_profile, user_profile,
file_data, file_data,
settings.S3_UPLOADS_STORAGE_CLASS, storage_class=settings.S3_UPLOADS_STORAGE_CLASS,
) )
@override @override
@ -251,15 +255,6 @@ class S3UploadBackend(ZulipUploadBackend):
item["LastModified"], item["LastModified"],
) )
@override
def get_avatar_path(self, hash_key: str, medium: bool = False) -> str:
# BUG: The else case should be f"{hash_key}.png".
# See #12852 for details on this bug and how to migrate it.
if medium:
return f"{hash_key}-medium.png"
else:
return hash_key
@override @override
def get_avatar_url(self, hash_key: str, medium: bool = False) -> str: def get_avatar_url(self, hash_key: str, medium: bool = False) -> str:
return self.get_public_upload_url(self.get_avatar_path(hash_key, medium)) return self.get_public_upload_url(self.get_avatar_path(hash_key, medium))
@ -279,13 +274,17 @@ class S3UploadBackend(ZulipUploadBackend):
user_profile: UserProfile, user_profile: UserProfile,
image_data: bytes, image_data: bytes,
content_type: Optional[str], content_type: Optional[str],
future: bool = True,
) -> None: ) -> None:
extra_metadata = {"avatar_version": str(user_profile.avatar_version + (1 if future else 0))}
upload_image_to_s3( upload_image_to_s3(
self.avatar_bucket, self.avatar_bucket,
file_path, file_path,
content_type, content_type,
user_profile, user_profile,
image_data, image_data,
extra_metadata=extra_metadata,
cache_control="public, max-age=31536000, immutable",
) )
@override @override

View File

@ -0,0 +1,172 @@
import contextlib
import hashlib
import os
from typing import Any, Optional, Union
import boto3
import pyvips
from botocore.client import Config
from django.conf import settings
from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import QuerySet
IMAGE_BOMB_TOTAL_PIXELS = 90000000
DEFAULT_AVATAR_SIZE = 100
MEDIUM_AVATAR_SIZE = 500
def resize_avatar(
image_data: Union[bytes, pyvips.Image],
size: int,
) -> Optional[bytes]:
try:
source_image = pyvips.Image.new_from_buffer(image_data, "")
if source_image.width * source_image.height > IMAGE_BOMB_TOTAL_PIXELS:
return None
return pyvips.Image.thumbnail_buffer(
image_data,
size,
height=size,
crop=pyvips.Interesting.CENTRE,
).write_to_buffer(".png")
except pyvips.Error:
return None
def new_hash(user_profile: Any) -> str:
user_key = (
str(user_profile.id) + ":" + str(user_profile.avatar_version) + ":" + settings.AVATAR_SALT
)
return hashlib.sha256(user_key.encode()).hexdigest()[:40]
def old_hash(user_profile: Any) -> str:
user_key = str(user_profile.id) + settings.AVATAR_SALT
return hashlib.sha1(user_key.encode()).hexdigest()
def thumbnail_s3_avatars(users: QuerySet[Any]) -> None:
avatar_bucket = boto3.resource(
"s3",
aws_access_key_id=settings.S3_KEY,
aws_secret_access_key=settings.S3_SECRET_KEY,
region_name=settings.S3_REGION,
endpoint_url=settings.S3_ENDPOINT_URL,
config=Config(
signature_version=None,
s3={"addressing_style": settings.S3_ADDRESSING_STYLE},
),
).Bucket(settings.S3_AVATAR_BUCKET)
for user in users:
old_base = os.path.join(str(user.realm_id), old_hash(user))
new_base = os.path.join(str(user.realm_id), new_hash(user))
try:
old_data = avatar_bucket.Object(old_base + ".original").get()
metadata = old_data["Metadata"]
metadata["avatar_version"] = str(user.avatar_version)
original_bytes = old_data["Body"].read()
except Exception:
print(f"Failed to fetch {old_base}")
avatar_bucket.Object(new_base + ".original").copy_from(
CopySource=f"{settings.S3_AVATAR_BUCKET}/{old_base}.original",
MetadataDirective="REPLACE",
Metadata=metadata,
ContentDisposition=old_data["ContentDisposition"],
ContentType=old_data["ContentType"],
CacheControl="public, max-age=31536000, immutable",
)
small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE)
if small is None:
print(f"Failed to resize {old_base}")
continue
avatar_bucket.Object(new_base + ".png").put(
Metadata=metadata,
ContentDisposition=old_data["ContentDisposition"],
ContentType=old_data["ContentType"],
CacheControl="public, max-age=31536000, immutable",
Body=small,
)
medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE)
if medium is None:
print(f"Failed to medium resize {old_base}")
continue
avatar_bucket.Object(new_base + "-medium.png").put(
Metadata=metadata,
ContentDisposition=old_data["ContentDisposition"],
ContentType=old_data["ContentType"],
CacheControl="public, max-age=31536000, immutable",
Body=medium,
)
def thumbnail_local_avatars(users: QuerySet[Any]) -> None:
total_processed = 0
assert settings.LOCAL_AVATARS_DIR is not None
for user in users:
old_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), old_hash(user))
new_base = os.path.join(settings.LOCAL_AVATARS_DIR, str(user.realm_id), new_hash(user))
if os.path.exists(new_base + "-medium.png"):
# This user's avatar has already been migrated.
continue
with contextlib.suppress(Exception):
# Remove the hard link, if present from a previous failed run.
os.remove(new_base + ".original")
# We hardlink, rather than copying, so we don't take any extra space.
try:
os.link(old_base + ".original", new_base + ".original")
with open(old_base + ".original", "rb") as f:
original_bytes = f.read()
except Exception:
print(f"Failed to read {old_base}.original")
raise
small = resize_avatar(original_bytes, DEFAULT_AVATAR_SIZE)
if small is None:
print(f"Failed to resize {old_base}")
continue
with open(new_base + ".png", "wb") as f:
f.write(small)
medium = resize_avatar(original_bytes, MEDIUM_AVATAR_SIZE)
if medium is None:
print(f"Failed to medium resize {old_base}")
continue
with open(new_base + "-medium.png", "wb") as f:
f.write(medium)
total_processed += 1
if total_processed % 100 == 0:
print(f"Processed {total_processed}/{len(users)} user avatars")
def thumbnail_avatars(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
UserProfile = apps.get_model("zerver", "UserProfile")
users = (
UserProfile.objects.filter(avatar_source="U")
.only("id", "realm_id", "avatar_version")
.order_by("id")
)
if settings.LOCAL_AVATARS_DIR is not None:
thumbnail_local_avatars(users)
else:
thumbnail_s3_avatars(users)
class Migration(migrations.Migration):
atomic = False
elidable = True
dependencies = [
("zerver", "0543_preregistrationuser_notify_referrer_on_join"),
]
operations = [migrations.RunPython(thumbnail_avatars, elidable=True)]

View File

@ -175,7 +175,7 @@ class ExportFile(ZulipTestCase):
) )
with get_test_image_file("img.png") as img_file: with get_test_image_file("img.png") as img_file:
upload_avatar_image(img_file, user_profile) upload_avatar_image(img_file, user_profile, future=False)
user_profile.avatar_source = "U" user_profile.avatar_source = "U"
user_profile.save() user_profile.save()

View File

@ -49,7 +49,7 @@ class TransferUploadsToS3Test(ZulipTestCase):
transfer_avatars_to_s3(1) transfer_avatars_to_s3(1)
path_id = user_avatar_path(user) path_id = user_avatar_path(user)
image_key = bucket.Object(path_id) image_key = bucket.Object(path_id + ".png")
original_image_key = bucket.Object(path_id + ".original") original_image_key = bucket.Object(path_id + ".original")
medium_image_key = bucket.Object(path_id + "-medium.png") medium_image_key = bucket.Object(path_id + "-medium.png")

View File

@ -948,7 +948,7 @@ class AvatarTest(UploadSerializeMixin, ZulipTestCase):
self.assertEqual( self.assertEqual(
url, url,
"/user_avatars/5/fc2b9f1a81f4508a4df2d95451a2a77e0524ca0e-medium.png?version=2", "/user_avatars/5/ff062b0fee41738b38c4312bb33bdf3fe2aad463-medium.png",
) )
url = get_avatar_field( url = get_avatar_field(
@ -998,7 +998,7 @@ class AvatarTest(UploadSerializeMixin, ZulipTestCase):
with self.settings(S3_AVATAR_BUCKET="bucket"): with self.settings(S3_AVATAR_BUCKET="bucket"):
backend = S3UploadBackend() backend = S3UploadBackend()
self.assertEqual( self.assertEqual(
backend.get_avatar_url("hash", False), "https://bucket.s3.amazonaws.com/hash" backend.get_avatar_url("hash", False), "https://bucket.s3.amazonaws.com/hash.png"
) )
self.assertEqual( self.assertEqual(
backend.get_avatar_url("hash", True), backend.get_avatar_url("hash", True),
@ -1104,11 +1104,11 @@ class AvatarTest(UploadSerializeMixin, ZulipTestCase):
cordelia.save() cordelia.save()
response = self.client_get("/avatar/cordelia@zulip.com", {"foo": "bar"}) response = self.client_get("/avatar/cordelia@zulip.com", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "?foo=bar"))
response = self.client_get(f"/avatar/{cordelia.id}", {"foo": "bar"}) response = self.client_get(f"/avatar/{cordelia.id}", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "?foo=bar"))
response = self.client_get("/avatar/") response = self.client_get("/avatar/")
self.assertEqual(response.status_code, 404) self.assertEqual(response.status_code, 404)
@ -1119,11 +1119,11 @@ class AvatarTest(UploadSerializeMixin, ZulipTestCase):
# Test /avatar/<email_or_id> endpoint with HTTP basic auth. # Test /avatar/<email_or_id> endpoint with HTTP basic auth.
response = self.api_get(hamlet, "/avatar/cordelia@zulip.com", {"foo": "bar"}) response = self.api_get(hamlet, "/avatar/cordelia@zulip.com", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "?foo=bar"))
response = self.api_get(hamlet, f"/avatar/{cordelia.id}", {"foo": "bar"}) response = self.api_get(hamlet, f"/avatar/{cordelia.id}", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia)) + "?foo=bar"))
# Test cross_realm_bot avatar access using email. # Test cross_realm_bot avatar access using email.
response = self.api_get(hamlet, "/avatar/welcome-bot@zulip.com", {"foo": "bar"}) response = self.api_get(hamlet, "/avatar/welcome-bot@zulip.com", {"foo": "bar"})
@ -1179,11 +1179,11 @@ class AvatarTest(UploadSerializeMixin, ZulipTestCase):
cordelia.save() cordelia.save()
response = self.client_get("/avatar/cordelia@zulip.com/medium", {"foo": "bar"}) response = self.client_get("/avatar/cordelia@zulip.com/medium", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "?foo=bar"))
response = self.client_get(f"/avatar/{cordelia.id}/medium", {"foo": "bar"}) response = self.client_get(f"/avatar/{cordelia.id}/medium", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "?foo=bar"))
self.logout() self.logout()
@ -1191,11 +1191,11 @@ class AvatarTest(UploadSerializeMixin, ZulipTestCase):
# Test /avatar/<email_or_id>/medium endpoint with HTTP basic auth. # Test /avatar/<email_or_id>/medium endpoint with HTTP basic auth.
response = self.api_get(hamlet, "/avatar/cordelia@zulip.com/medium", {"foo": "bar"}) response = self.api_get(hamlet, "/avatar/cordelia@zulip.com/medium", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "?foo=bar"))
response = self.api_get(hamlet, f"/avatar/{cordelia.id}/medium", {"foo": "bar"}) response = self.api_get(hamlet, f"/avatar/{cordelia.id}/medium", {"foo": "bar"})
redirect_url = response["Location"] redirect_url = response["Location"]
self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "&foo=bar")) self.assertTrue(redirect_url.endswith(str(avatar_url(cordelia, True)) + "?foo=bar"))
# Without spectators enabled, no unauthenticated access. # Without spectators enabled, no unauthenticated access.
response = self.client_get("/avatar/cordelia@zulip.com/medium", {"foo": "bar"}) response = self.client_get("/avatar/cordelia@zulip.com/medium", {"foo": "bar"})

View File

@ -283,7 +283,7 @@ class S3Test(ZulipTestCase):
medium_path_id = path_id + "-medium.png" medium_path_id = path_id + "-medium.png"
with get_test_image_file("img.png") as image_file: with get_test_image_file("img.png") as image_file:
zerver.lib.upload.upload_avatar_image(image_file, user_profile) zerver.lib.upload.upload_avatar_image(image_file, user_profile, future=False)
test_image_data = read_test_image_file("img.png") test_image_data = read_test_image_file("img.png")
test_medium_image_data = resize_avatar(test_image_data, MEDIUM_AVATAR_SIZE) test_medium_image_data = resize_avatar(test_image_data, MEDIUM_AVATAR_SIZE)
@ -319,9 +319,9 @@ class S3Test(ZulipTestCase):
target_path_id = user_avatar_path(target_user_profile) target_path_id = user_avatar_path(target_user_profile)
self.assertNotEqual(source_path_id, target_path_id) self.assertNotEqual(source_path_id, target_path_id)
source_image_key = bucket.Object(source_path_id) source_image_key = bucket.Object(source_path_id + ".png")
target_image_key = bucket.Object(target_path_id) target_image_key = bucket.Object(target_path_id + ".png")
self.assertEqual(target_image_key.key, target_path_id) self.assertEqual(target_image_key.key, target_path_id + ".png")
self.assertEqual(source_image_key.content_type, target_image_key.content_type) self.assertEqual(source_image_key.content_type, target_image_key.content_type)
source_image_data = source_image_key.get()["Body"].read() source_image_data = source_image_key.get()["Body"].read()
target_image_data = target_image_key.get()["Body"].read() target_image_data = target_image_key.get()["Body"].read()
@ -354,13 +354,12 @@ class S3Test(ZulipTestCase):
user_profile = self.example_user("hamlet") user_profile = self.example_user("hamlet")
base_file_path = user_avatar_path(user_profile) base_file_path = user_avatar_path(user_profile)
# Bug: This should have + ".png", but the implementation is wrong. file_path = base_file_path + ".png"
file_path = base_file_path
original_file_path = base_file_path + ".original" original_file_path = base_file_path + ".original"
medium_file_path = base_file_path + "-medium.png" medium_file_path = base_file_path + "-medium.png"
with get_test_image_file("img.png") as image_file: with get_test_image_file("img.png") as image_file:
zerver.lib.upload.upload_avatar_image(image_file, user_profile) zerver.lib.upload.upload_avatar_image(image_file, user_profile, future=False)
key = bucket.Object(original_file_path) key = bucket.Object(original_file_path)
image_data = key.get()["Body"].read() image_data = key.get()["Body"].read()
@ -385,9 +384,10 @@ class S3Test(ZulipTestCase):
user = self.example_user("hamlet") user = self.example_user("hamlet")
avatar_path_id = user_avatar_path(user) avatar_base_path = user_avatar_path(user)
avatar_original_image_path_id = avatar_path_id + ".original" avatar_path_id = avatar_base_path + ".png"
avatar_medium_path_id = avatar_path_id + "-medium.png" avatar_original_image_path_id = avatar_base_path + ".original"
avatar_medium_path_id = avatar_base_path + "-medium.png"
self.assertEqual(user.avatar_source, UserProfile.AVATAR_FROM_USER) self.assertEqual(user.avatar_source, UserProfile.AVATAR_FROM_USER)
self.assertIsNotNone(bucket.Object(avatar_path_id)) self.assertIsNotNone(bucket.Object(avatar_path_id))

View File

@ -1310,7 +1310,7 @@ class UserProfileTest(ZulipTestCase):
# Upload cordelia's avatar # Upload cordelia's avatar
with get_test_image_file("img.png") as image_file: with get_test_image_file("img.png") as image_file:
upload_avatar_image(image_file, cordelia) upload_avatar_image(image_file, cordelia, future=False)
OnboardingStep.objects.filter(user=cordelia).delete() OnboardingStep.objects.filter(user=cordelia).delete()
OnboardingStep.objects.filter(user=iago).delete() OnboardingStep.objects.filter(user=iago).delete()

View File

@ -286,8 +286,6 @@ def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponse
# backend; however, there is no reason to not serve the # backend; however, there is no reason to not serve the
# redirect to S3 where the content lives. # redirect to S3 where the content lives.
url = get_public_upload_root_url() + path url = get_public_upload_root_url() + path
if request.GET.urlencode():
url += "?" + request.GET.urlencode()
return redirect(url, permanent=True) return redirect(url, permanent=True)
local_path = os.path.join(settings.LOCAL_AVATARS_DIR, path) local_path = os.path.join(settings.LOCAL_AVATARS_DIR, path)
@ -300,10 +298,7 @@ def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponse
else: else:
response = internal_nginx_redirect(quote(f"/internal/local/user_avatars/{path}")) response = internal_nginx_redirect(quote(f"/internal/local/user_avatars/{path}"))
# We do _not_ mark the contents as immutable for caching purposes, patch_cache_control(response, max_age=31536000, public=True, immutable=True)
# since the path for avatar images is hashed only by their user-id
# and a salt, and as such are reused when a user's avatar is
# updated.
return response return response

View File

@ -554,7 +554,7 @@ def add_bot_backend(
[user_file] = request.FILES.values() [user_file] = request.FILES.values()
assert isinstance(user_file, UploadedFile) assert isinstance(user_file, UploadedFile)
assert user_file.size is not None assert user_file.size is not None
upload_avatar_image(user_file, bot_profile) upload_avatar_image(user_file, bot_profile, future=False)
if bot_type in (UserProfile.OUTGOING_WEBHOOK_BOT, UserProfile.EMBEDDED_BOT): if bot_type in (UserProfile.OUTGOING_WEBHOOK_BOT, UserProfile.EMBEDDED_BOT):
assert isinstance(service_name, str) assert isinstance(service_name, str)