import: Export and import .original emoji files correctly.

The export tool was only exporting the already-thumbnailed emoji file,
omitting the original one. Now we make sure to export the .original file
too, like we do for avatars, and make the import tool process it
directly, to thumbnail it directly and generate a still in the case of
animated emojis.

Otherwise, the imported realm wouldn't have the <emoji>.png.original
file that we generally expect to have accessible, and stills for
animated emojis were completely missing.
This commit is contained in:
Mateusz Mandera 2024-07-23 21:55:01 +02:00 committed by Tim Abbott
parent 2a1da859ea
commit 5476340b52
3 changed files with 135 additions and 23 deletions

View File

@ -1591,7 +1591,11 @@ def export_uploads_and_avatars(
valid_hashes=avatar_hash_values, valid_hashes=avatar_hash_values,
) )
emoji_paths = {get_emoji_path(realm_emoji) for realm_emoji in realm_emojis} emoji_paths = set()
for realm_emoji in realm_emojis:
emoji_path = get_emoji_path(realm_emoji)
emoji_paths.add(emoji_path)
emoji_paths.add(emoji_path + ".original")
export_files_from_s3( export_files_from_s3(
realm, realm,
@ -1632,7 +1636,11 @@ def _get_exported_s3_record(
record.update(key.metadata) record.update(key.metadata)
if processing_emoji: if processing_emoji:
record["file_name"] = os.path.basename(key.key) file_name = os.path.basename(key.key)
# Both the main emoji file and the .original version should have the same
# file_name value in the record, as they reference the same emoji.
file_name = file_name.removesuffix(".original")
record["file_name"] = file_name
if "user_profile_id" in record: if "user_profile_id" in record:
user_profile = get_user_profile_by_id(int(record["user_profile_id"])) user_profile = get_user_profile_by_id(int(record["user_profile_id"]))
@ -1879,13 +1887,23 @@ def export_emoji_from_local(
realm: Realm, local_dir: Path, output_dir: Path, realm_emojis: list[RealmEmoji] realm: Realm, local_dir: Path, output_dir: Path, realm_emojis: list[RealmEmoji]
) -> None: ) -> None:
records = [] records = []
for count, realm_emoji in enumerate(realm_emojis, 1):
emoji_path = get_emoji_path(realm_emoji) realm_emoji_helper_tuples: list[tuple[RealmEmoji, str]] = []
for realm_emoji in realm_emojis:
realm_emoji_path = get_emoji_path(realm_emoji)
# Use 'mark_sanitized' to work around false positive caused by Pysa # Use 'mark_sanitized' to work around false positive caused by Pysa
# thinking that 'realm' (and thus 'attachment' and 'attachment.path_id') # thinking that 'realm' (and thus 'attachment' and 'attachment.path_id')
# are user controlled # are user controlled
emoji_path = mark_sanitized(emoji_path) realm_emoji_path = mark_sanitized(realm_emoji_path)
realm_emoji_path_original = realm_emoji_path + ".original"
realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path))
realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path_original))
for count, realm_emoji_helper_tuple in enumerate(realm_emoji_helper_tuples, 1):
realm_emoji_object, emoji_path = realm_emoji_helper_tuple
local_path = os.path.join(local_dir, emoji_path) local_path = os.path.join(local_dir, emoji_path)
output_path = os.path.join(output_dir, emoji_path) output_path = os.path.join(output_dir, emoji_path)
@ -1893,7 +1911,7 @@ def export_emoji_from_local(
os.makedirs(os.path.dirname(output_path), exist_ok=True) os.makedirs(os.path.dirname(output_path), exist_ok=True)
shutil.copy2(local_path, output_path) shutil.copy2(local_path, output_path)
# Realm emoji author is optional. # Realm emoji author is optional.
author = realm_emoji.author author = realm_emoji_object.author
author_id = None author_id = None
if author: if author:
author_id = author.id author_id = author.id
@ -1902,9 +1920,9 @@ def export_emoji_from_local(
author=author_id, author=author_id,
path=emoji_path, path=emoji_path,
s3_path=emoji_path, s3_path=emoji_path,
file_name=realm_emoji.file_name, file_name=realm_emoji_object.file_name,
name=realm_emoji.name, name=realm_emoji_object.name,
deactivated=realm_emoji.deactivated, deactivated=realm_emoji_object.deactivated,
) )
records.append(record) records.append(record)

View File

@ -28,13 +28,14 @@ from zerver.lib.markdown import markdown_convert
from zerver.lib.markdown import version as markdown_version from zerver.lib.markdown import version as markdown_version
from zerver.lib.message import get_last_message_id from zerver.lib.message import get_last_message_id
from zerver.lib.mime_types import guess_type from zerver.lib.mime_types import guess_type
from zerver.lib.partial import partial
from zerver.lib.push_notifications import sends_notifications_directly from zerver.lib.push_notifications import sends_notifications_directly
from zerver.lib.remote_server import maybe_enqueue_audit_log_upload from zerver.lib.remote_server import maybe_enqueue_audit_log_upload
from zerver.lib.server_initialization import create_internal_realm, server_initialized from zerver.lib.server_initialization import create_internal_realm, server_initialized
from zerver.lib.streams import render_stream_description from zerver.lib.streams import render_stream_description
from zerver.lib.thumbnail import BadImageError from zerver.lib.thumbnail import BadImageError
from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.timestamp import datetime_to_timestamp
from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image
from zerver.lib.upload.s3 import get_bucket from zerver.lib.upload.s3 import get_bucket
from zerver.lib.user_counts import realm_user_count_by_role from zerver.lib.user_counts import realm_user_count_by_role
from zerver.lib.user_groups import create_system_user_groups_for_realm from zerver.lib.user_groups import create_system_user_groups_for_realm
@ -823,6 +824,41 @@ def process_avatars(record: dict[str, Any]) -> None:
do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None) do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None)
def process_emojis(
import_dir: str, default_user_profile_id: int | None, record: dict[str, Any]
) -> None:
if not record["s3_path"].endswith(".original"):
return
if "author_id" in record and record["author_id"] is not None:
user_profile = get_user_profile_by_id(record["author_id"])
else:
assert default_user_profile_id is not None
user_profile = get_user_profile_by_id(default_user_profile_id)
# file_name has the proper file extension without the
# .original suffix.
# application/octet-stream will be rejected by upload_emoji_image,
# but it's easier to use it here as the sensible default value
# and let upload_emoji_image figure out the exact error; or handle
# the file somehow anyway if it's ever changed to do that.
content_type = guess_type(record["file_name"])[0] or "application/octet-stream"
emoji_import_data_file_dath = os.path.join(import_dir, record["path"])
with open(emoji_import_data_file_dath, "rb") as f:
try:
# This will overwrite the files that got copied to the appropriate paths
# for emojis (whether in S3 or in the local uploads dir), ensuring to
# thumbnail them and generate stills for animated emojis.
upload_emoji_image(f, record["file_name"], user_profile, content_type)
except BadImageError:
logging.warning(
"Could not thumbnail emoji image %s; ignoring",
record["s3_path"],
)
# TODO:: should we delete the RealmEmoji object, or keep it with the files
# that did get copied; even though they do generate this error?
def import_uploads( def import_uploads(
realm: Realm, realm: Realm,
import_dir: Path, import_dir: Path,
@ -855,6 +891,16 @@ def import_uploads(
re_map_foreign_keys_internal( re_map_foreign_keys_internal(
records, "records", "user_profile_id", related_table="user_profile", id_field=True records, "records", "user_profile_id", related_table="user_profile", id_field=True
) )
if processing_emojis and records and "author" in records[0]:
# This condition only guarantees author field appears in the generated
# records. Potentially the value of it might be None though. In that
# case, this will be ignored by the remap below.
# Any code further down the codepath that wants to use the author value
# needs to be mindful of it potentially being None and use a fallback
# value, most likely default_user_profile_id being the right choice.
re_map_foreign_keys_internal(
records, "records", "author", related_table="user_profile", id_field=False
)
s3_uploads = settings.LOCAL_UPLOADS_DIR is None s3_uploads = settings.LOCAL_UPLOADS_DIR is None
@ -881,6 +927,8 @@ def import_uploads(
relative_path = RealmEmoji.PATH_ID_TEMPLATE.format( relative_path = RealmEmoji.PATH_ID_TEMPLATE.format(
realm_id=record["realm_id"], emoji_file_name=record["file_name"] realm_id=record["realm_id"], emoji_file_name=record["file_name"]
) )
if record["s3_path"].endswith(".original"):
relative_path += ".original"
record["last_modified"] = timestamp record["last_modified"] = timestamp
elif processing_realm_icons: elif processing_realm_icons:
icon_name = os.path.basename(record["path"]) icon_name = os.path.basename(record["path"])
@ -948,15 +996,25 @@ def import_uploads(
if count % 1000 == 0: if count % 1000 == 0:
logging.info("Processed %s/%s uploads", count, len(records)) logging.info("Processed %s/%s uploads", count, len(records))
if processing_avatars: if processing_avatars or processing_emojis:
if processing_avatars:
process_func = process_avatars
else:
assert processing_emojis
process_func = partial(
process_emojis,
import_dir,
default_user_profile_id,
)
# Ensure that we have medium-size avatar images for every # Ensure that we have medium-size avatar images for every
# avatar. TODO: This implementation is hacky, both in that it # avatar and properly thumbnailed emojis with stills (for animated emoji).
# TODO: This implementation is hacky, both in that it
# does get_user_profile_by_id for each user, and in that it # does get_user_profile_by_id for each user, and in that it
# might be better to require the export to just have these. # might be better to require the export to just have these.
if processes == 1: if processes == 1:
for record in records: for record in records:
process_avatars(record) process_func(record)
else: else:
connection.close() connection.close()
_cache = cache._cache # type: ignore[attr-defined] # not in stubs _cache = cache._cache # type: ignore[attr-defined] # not in stubs
@ -964,7 +1022,7 @@ def import_uploads(
_cache.disconnect_all() _cache.disconnect_all()
with ProcessPoolExecutor(max_workers=processes) as executor: with ProcessPoolExecutor(max_workers=processes) as executor:
for future in as_completed( for future in as_completed(
executor.submit(process_avatars, record) for record in records executor.submit(process_func, record) for record in records
): ):
future.result() future.result()

View File

@ -54,6 +54,7 @@ from zerver.lib.test_helpers import (
read_test_image_file, read_test_image_file,
use_s3_backend, use_s3_backend,
) )
from zerver.lib.thumbnail import BadImageError
from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment
from zerver.lib.utils import assert_is_not_none from zerver.lib.utils import assert_is_not_none
from zerver.models import ( from zerver.models import (
@ -251,16 +252,27 @@ class ExportFile(ZulipTestCase):
emoji_path = f"{realm.id}/emoji/images/{file_name}" emoji_path = f"{realm.id}/emoji/images/{file_name}"
emoji_dir = export_fn(f"emoji/{realm.id}/emoji/images") emoji_dir = export_fn(f"emoji/{realm.id}/emoji/images")
self.assertEqual(os.listdir(emoji_dir), [file_name]) self.assertEqual(set(os.listdir(emoji_dir)), {file_name, file_name + ".original"})
(record1, record2) = read_json("emoji/records.json")
# The return order is not guaranteed, so sort it so that we can reliably
# know which record is for the .original file and which for the actual emoji.
record, record_original = sorted(
(record1, record2), key=lambda r: r["path"].endswith(".original")
)
(record,) = read_json("emoji/records.json")
self.assertEqual(record["file_name"], file_name) self.assertEqual(record["file_name"], file_name)
self.assertEqual(record["path"], emoji_path) self.assertEqual(record["path"], emoji_path)
self.assertEqual(record["s3_path"], emoji_path) self.assertEqual(record["s3_path"], emoji_path)
self.assertEqual(record_original["file_name"], file_name)
self.assertEqual(record_original["path"], emoji_path + ".original")
self.assertEqual(record_original["s3_path"], emoji_path + ".original")
if is_s3: if is_s3:
self.assertEqual(record["realm_id"], realm.id) self.assertEqual(record["realm_id"], realm.id)
self.assertEqual(record["user_profile_id"], user.id) self.assertEqual(record["user_profile_id"], user.id)
self.assertEqual(record_original["realm_id"], realm.id)
self.assertEqual(record_original["user_profile_id"], user.id)
def verify_realm_logo_and_icon(self) -> None: def verify_realm_logo_and_icon(self) -> None:
records = read_json("realm_icons/records.json") records = read_json("realm_icons/records.json")
@ -1622,6 +1634,24 @@ class RealmImportExportTest(ExportFile):
new_realm.id, [realm["id"] for realm in json.loads(m.call_args_list[1][0][2]["realms"])] new_realm.id, [realm["id"] for realm in json.loads(m.call_args_list[1][0][2]["realms"])]
) )
def test_import_emoji_error(self) -> None:
user = self.example_user("hamlet")
realm = user.realm
self.upload_files_for_user(user)
self.upload_files_for_realm(user)
self.export_realm_and_create_auditlog(realm)
with (
self.settings(BILLING_ENABLED=False),
self.assertLogs(level="WARNING") as mock_log,
patch("zerver.lib.import_realm.upload_emoji_image", side_effect=BadImageError("test")),
):
do_import_realm(get_output_dir(), "test-zulip")
self.assert_length(mock_log.output, 1)
self.assertIn("Could not thumbnail emoji image", mock_log.output[0])
def test_import_files_from_local(self) -> None: def test_import_files_from_local(self) -> None:
user = self.example_user("hamlet") user = self.example_user("hamlet")
realm = user.realm realm = user.realm
@ -1646,6 +1676,9 @@ class RealmImportExportTest(ExportFile):
attachment_file_path = os.path.join(settings.LOCAL_FILES_DIR, uploaded_file.path_id) attachment_file_path = os.path.join(settings.LOCAL_FILES_DIR, uploaded_file.path_id)
self.assertTrue(os.path.isfile(attachment_file_path)) self.assertTrue(os.path.isfile(attachment_file_path))
test_image_data = read_test_image_file("img.png")
self.assertIsNotNone(test_image_data)
# Test emojis # Test emojis
realm_emoji = RealmEmoji.objects.get(realm=imported_realm) realm_emoji = RealmEmoji.objects.get(realm=imported_realm)
emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
@ -1653,6 +1686,8 @@ class RealmImportExportTest(ExportFile):
emoji_file_name=realm_emoji.file_name, emoji_file_name=realm_emoji.file_name,
) )
emoji_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, emoji_path) emoji_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, emoji_path)
with open(emoji_file_path + ".original", "rb") as f:
self.assertEqual(f.read(), test_image_data)
self.assertTrue(os.path.isfile(emoji_file_path)) self.assertTrue(os.path.isfile(emoji_file_path))
# Test avatars # Test avatars
@ -1665,9 +1700,6 @@ class RealmImportExportTest(ExportFile):
upload_path = upload.upload_backend.realm_avatar_and_logo_path(imported_realm) upload_path = upload.upload_backend.realm_avatar_and_logo_path(imported_realm)
full_upload_path = os.path.join(settings.LOCAL_AVATARS_DIR, upload_path) full_upload_path = os.path.join(settings.LOCAL_AVATARS_DIR, upload_path)
test_image_data = read_test_image_file("img.png")
self.assertIsNotNone(test_image_data)
with open(os.path.join(full_upload_path, "icon.original"), "rb") as f: with open(os.path.join(full_upload_path, "icon.original"), "rb") as f:
self.assertEqual(f.read(), test_image_data) self.assertEqual(f.read(), test_image_data)
self.assertTrue(os.path.isfile(os.path.join(full_upload_path, "icon.png"))) self.assertTrue(os.path.isfile(os.path.join(full_upload_path, "icon.png")))
@ -1715,9 +1747,13 @@ class RealmImportExportTest(ExportFile):
realm_id=imported_realm.id, realm_id=imported_realm.id,
emoji_file_name=realm_emoji.file_name, emoji_file_name=realm_emoji.file_name,
) )
emoji_key = avatar_bucket.Object(emoji_path) resized_emoji_key = avatar_bucket.Object(emoji_path)
self.assertIsNotNone(emoji_key.get()["Body"].read()) self.assertIsNotNone(resized_emoji_key.get()["Body"].read())
self.assertEqual(emoji_key.key, emoji_path) self.assertEqual(resized_emoji_key.key, emoji_path)
original_emoji_path_id = emoji_path + ".original"
original_emoji_key = avatar_bucket.Object(original_emoji_path_id)
self.assertEqual(original_emoji_key.get()["Body"].read(), test_image_data)
self.assertEqual(original_emoji_key.key, original_emoji_path_id)
# Test avatars # Test avatars
user_profile = UserProfile.objects.get(full_name=user.full_name, realm=imported_realm) user_profile = UserProfile.objects.get(full_name=user.full_name, realm=imported_realm)