From 5476340b520a9b67008069dd95ed0dba48309120 Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Tue, 23 Jul 2024 21:55:01 +0200 Subject: [PATCH] import: Export and import .original emoji files correctly. The export tool was only exporting the already-thumbnailed emoji file, omitting the original one. Now we make sure to export the .original file too, like we do for avatars, and make the import tool process it directly, to thumbnail it directly and generate a still in the case of animated emojis. Otherwise, the imported realm wouldn't have the .png.original file that we generally expect to have accessible, and stills for animated emojis were completely missing. --- zerver/lib/export.py | 36 +++++++++++---- zerver/lib/import_realm.py | 70 +++++++++++++++++++++++++++--- zerver/tests/test_import_export.py | 52 ++++++++++++++++++---- 3 files changed, 135 insertions(+), 23 deletions(-) diff --git a/zerver/lib/export.py b/zerver/lib/export.py index dd4a9188c6..497feb286f 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -1591,7 +1591,11 @@ def export_uploads_and_avatars( valid_hashes=avatar_hash_values, ) - emoji_paths = {get_emoji_path(realm_emoji) for realm_emoji in realm_emojis} + emoji_paths = set() + for realm_emoji in realm_emojis: + emoji_path = get_emoji_path(realm_emoji) + emoji_paths.add(emoji_path) + emoji_paths.add(emoji_path + ".original") export_files_from_s3( realm, @@ -1632,7 +1636,11 @@ def _get_exported_s3_record( record.update(key.metadata) if processing_emoji: - record["file_name"] = os.path.basename(key.key) + file_name = os.path.basename(key.key) + # Both the main emoji file and the .original version should have the same + # file_name value in the record, as they reference the same emoji. + file_name = file_name.removesuffix(".original") + record["file_name"] = file_name if "user_profile_id" in record: user_profile = get_user_profile_by_id(int(record["user_profile_id"])) @@ -1879,13 +1887,23 @@ def export_emoji_from_local( realm: Realm, local_dir: Path, output_dir: Path, realm_emojis: list[RealmEmoji] ) -> None: records = [] - for count, realm_emoji in enumerate(realm_emojis, 1): - emoji_path = get_emoji_path(realm_emoji) + + realm_emoji_helper_tuples: list[tuple[RealmEmoji, str]] = [] + for realm_emoji in realm_emojis: + realm_emoji_path = get_emoji_path(realm_emoji) # Use 'mark_sanitized' to work around false positive caused by Pysa # thinking that 'realm' (and thus 'attachment' and 'attachment.path_id') # are user controlled - emoji_path = mark_sanitized(emoji_path) + realm_emoji_path = mark_sanitized(realm_emoji_path) + + realm_emoji_path_original = realm_emoji_path + ".original" + + realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path)) + realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path_original)) + + for count, realm_emoji_helper_tuple in enumerate(realm_emoji_helper_tuples, 1): + realm_emoji_object, emoji_path = realm_emoji_helper_tuple local_path = os.path.join(local_dir, emoji_path) output_path = os.path.join(output_dir, emoji_path) @@ -1893,7 +1911,7 @@ def export_emoji_from_local( os.makedirs(os.path.dirname(output_path), exist_ok=True) shutil.copy2(local_path, output_path) # Realm emoji author is optional. - author = realm_emoji.author + author = realm_emoji_object.author author_id = None if author: author_id = author.id @@ -1902,9 +1920,9 @@ def export_emoji_from_local( author=author_id, path=emoji_path, s3_path=emoji_path, - file_name=realm_emoji.file_name, - name=realm_emoji.name, - deactivated=realm_emoji.deactivated, + file_name=realm_emoji_object.file_name, + name=realm_emoji_object.name, + deactivated=realm_emoji_object.deactivated, ) records.append(record) diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 62d257e111..df0523c255 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -28,13 +28,14 @@ from zerver.lib.markdown import markdown_convert from zerver.lib.markdown import version as markdown_version from zerver.lib.message import get_last_message_id from zerver.lib.mime_types import guess_type +from zerver.lib.partial import partial from zerver.lib.push_notifications import sends_notifications_directly from zerver.lib.remote_server import maybe_enqueue_audit_log_upload from zerver.lib.server_initialization import create_internal_realm, server_initialized from zerver.lib.streams import render_stream_description from zerver.lib.thumbnail import BadImageError from zerver.lib.timestamp import datetime_to_timestamp -from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend +from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image from zerver.lib.upload.s3 import get_bucket from zerver.lib.user_counts import realm_user_count_by_role from zerver.lib.user_groups import create_system_user_groups_for_realm @@ -823,6 +824,41 @@ def process_avatars(record: dict[str, Any]) -> None: do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None) +def process_emojis( + import_dir: str, default_user_profile_id: int | None, record: dict[str, Any] +) -> None: + if not record["s3_path"].endswith(".original"): + return + + if "author_id" in record and record["author_id"] is not None: + user_profile = get_user_profile_by_id(record["author_id"]) + else: + assert default_user_profile_id is not None + user_profile = get_user_profile_by_id(default_user_profile_id) + + # file_name has the proper file extension without the + # .original suffix. + # application/octet-stream will be rejected by upload_emoji_image, + # but it's easier to use it here as the sensible default value + # and let upload_emoji_image figure out the exact error; or handle + # the file somehow anyway if it's ever changed to do that. + content_type = guess_type(record["file_name"])[0] or "application/octet-stream" + emoji_import_data_file_dath = os.path.join(import_dir, record["path"]) + with open(emoji_import_data_file_dath, "rb") as f: + try: + # This will overwrite the files that got copied to the appropriate paths + # for emojis (whether in S3 or in the local uploads dir), ensuring to + # thumbnail them and generate stills for animated emojis. + upload_emoji_image(f, record["file_name"], user_profile, content_type) + except BadImageError: + logging.warning( + "Could not thumbnail emoji image %s; ignoring", + record["s3_path"], + ) + # TODO:: should we delete the RealmEmoji object, or keep it with the files + # that did get copied; even though they do generate this error? + + def import_uploads( realm: Realm, import_dir: Path, @@ -855,6 +891,16 @@ def import_uploads( re_map_foreign_keys_internal( records, "records", "user_profile_id", related_table="user_profile", id_field=True ) + if processing_emojis and records and "author" in records[0]: + # This condition only guarantees author field appears in the generated + # records. Potentially the value of it might be None though. In that + # case, this will be ignored by the remap below. + # Any code further down the codepath that wants to use the author value + # needs to be mindful of it potentially being None and use a fallback + # value, most likely default_user_profile_id being the right choice. + re_map_foreign_keys_internal( + records, "records", "author", related_table="user_profile", id_field=False + ) s3_uploads = settings.LOCAL_UPLOADS_DIR is None @@ -881,6 +927,8 @@ def import_uploads( relative_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record["realm_id"], emoji_file_name=record["file_name"] ) + if record["s3_path"].endswith(".original"): + relative_path += ".original" record["last_modified"] = timestamp elif processing_realm_icons: icon_name = os.path.basename(record["path"]) @@ -948,15 +996,25 @@ def import_uploads( if count % 1000 == 0: logging.info("Processed %s/%s uploads", count, len(records)) - if processing_avatars: + if processing_avatars or processing_emojis: + if processing_avatars: + process_func = process_avatars + else: + assert processing_emojis + process_func = partial( + process_emojis, + import_dir, + default_user_profile_id, + ) + # Ensure that we have medium-size avatar images for every - # avatar. TODO: This implementation is hacky, both in that it + # avatar and properly thumbnailed emojis with stills (for animated emoji). + # TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. - if processes == 1: for record in records: - process_avatars(record) + process_func(record) else: connection.close() _cache = cache._cache # type: ignore[attr-defined] # not in stubs @@ -964,7 +1022,7 @@ def import_uploads( _cache.disconnect_all() with ProcessPoolExecutor(max_workers=processes) as executor: for future in as_completed( - executor.submit(process_avatars, record) for record in records + executor.submit(process_func, record) for record in records ): future.result() diff --git a/zerver/tests/test_import_export.py b/zerver/tests/test_import_export.py index decc45dd59..bff6901b31 100644 --- a/zerver/tests/test_import_export.py +++ b/zerver/tests/test_import_export.py @@ -54,6 +54,7 @@ from zerver.lib.test_helpers import ( read_test_image_file, use_s3_backend, ) +from zerver.lib.thumbnail import BadImageError from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment from zerver.lib.utils import assert_is_not_none from zerver.models import ( @@ -251,16 +252,27 @@ class ExportFile(ZulipTestCase): emoji_path = f"{realm.id}/emoji/images/{file_name}" emoji_dir = export_fn(f"emoji/{realm.id}/emoji/images") - self.assertEqual(os.listdir(emoji_dir), [file_name]) + self.assertEqual(set(os.listdir(emoji_dir)), {file_name, file_name + ".original"}) + + (record1, record2) = read_json("emoji/records.json") + # The return order is not guaranteed, so sort it so that we can reliably + # know which record is for the .original file and which for the actual emoji. + record, record_original = sorted( + (record1, record2), key=lambda r: r["path"].endswith(".original") + ) - (record,) = read_json("emoji/records.json") self.assertEqual(record["file_name"], file_name) self.assertEqual(record["path"], emoji_path) self.assertEqual(record["s3_path"], emoji_path) + self.assertEqual(record_original["file_name"], file_name) + self.assertEqual(record_original["path"], emoji_path + ".original") + self.assertEqual(record_original["s3_path"], emoji_path + ".original") if is_s3: self.assertEqual(record["realm_id"], realm.id) self.assertEqual(record["user_profile_id"], user.id) + self.assertEqual(record_original["realm_id"], realm.id) + self.assertEqual(record_original["user_profile_id"], user.id) def verify_realm_logo_and_icon(self) -> None: records = read_json("realm_icons/records.json") @@ -1622,6 +1634,24 @@ class RealmImportExportTest(ExportFile): new_realm.id, [realm["id"] for realm in json.loads(m.call_args_list[1][0][2]["realms"])] ) + def test_import_emoji_error(self) -> None: + user = self.example_user("hamlet") + realm = user.realm + + self.upload_files_for_user(user) + self.upload_files_for_realm(user) + + self.export_realm_and_create_auditlog(realm) + + with ( + self.settings(BILLING_ENABLED=False), + self.assertLogs(level="WARNING") as mock_log, + patch("zerver.lib.import_realm.upload_emoji_image", side_effect=BadImageError("test")), + ): + do_import_realm(get_output_dir(), "test-zulip") + self.assert_length(mock_log.output, 1) + self.assertIn("Could not thumbnail emoji image", mock_log.output[0]) + def test_import_files_from_local(self) -> None: user = self.example_user("hamlet") realm = user.realm @@ -1646,6 +1676,9 @@ class RealmImportExportTest(ExportFile): attachment_file_path = os.path.join(settings.LOCAL_FILES_DIR, uploaded_file.path_id) self.assertTrue(os.path.isfile(attachment_file_path)) + test_image_data = read_test_image_file("img.png") + self.assertIsNotNone(test_image_data) + # Test emojis realm_emoji = RealmEmoji.objects.get(realm=imported_realm) emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( @@ -1653,6 +1686,8 @@ class RealmImportExportTest(ExportFile): emoji_file_name=realm_emoji.file_name, ) emoji_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, emoji_path) + with open(emoji_file_path + ".original", "rb") as f: + self.assertEqual(f.read(), test_image_data) self.assertTrue(os.path.isfile(emoji_file_path)) # Test avatars @@ -1665,9 +1700,6 @@ class RealmImportExportTest(ExportFile): upload_path = upload.upload_backend.realm_avatar_and_logo_path(imported_realm) full_upload_path = os.path.join(settings.LOCAL_AVATARS_DIR, upload_path) - test_image_data = read_test_image_file("img.png") - self.assertIsNotNone(test_image_data) - with open(os.path.join(full_upload_path, "icon.original"), "rb") as f: self.assertEqual(f.read(), test_image_data) self.assertTrue(os.path.isfile(os.path.join(full_upload_path, "icon.png"))) @@ -1715,9 +1747,13 @@ class RealmImportExportTest(ExportFile): realm_id=imported_realm.id, emoji_file_name=realm_emoji.file_name, ) - emoji_key = avatar_bucket.Object(emoji_path) - self.assertIsNotNone(emoji_key.get()["Body"].read()) - self.assertEqual(emoji_key.key, emoji_path) + resized_emoji_key = avatar_bucket.Object(emoji_path) + self.assertIsNotNone(resized_emoji_key.get()["Body"].read()) + self.assertEqual(resized_emoji_key.key, emoji_path) + original_emoji_path_id = emoji_path + ".original" + original_emoji_key = avatar_bucket.Object(original_emoji_path_id) + self.assertEqual(original_emoji_key.get()["Body"].read(), test_image_data) + self.assertEqual(original_emoji_key.key, original_emoji_path_id) # Test avatars user_profile = UserProfile.objects.get(full_name=user.full_name, realm=imported_realm)