import: Export and import .original emoji files correctly.

The export tool was only exporting the already-thumbnailed emoji file, omitting the original one. Now we make sure to export the .original file too, like we do for avatars, and make the import tool process it directly, to thumbnail it directly and generate a still in the case of animated emojis. Otherwise, the imported realm wouldn't have the <emoji>.png.original file that we generally expect to have accessible, and stills for animated emojis were completely missing.
2024-07-23 21:55:01 +02:00 · 2024-07-23 21:55:01 +02:00 · 5476340b52
parent 2a1da859ea
commit 5476340b52
3 changed files with 135 additions and 23 deletions
--- a/zerver/lib/export.py
+++ b/zerver/lib/export.py
@ -1591,7 +1591,11 @@ def export_uploads_and_avatars(
            valid_hashes=avatar_hash_values,
        )

-        emoji_paths = {get_emoji_path(realm_emoji) for realm_emoji in realm_emojis}
+        emoji_paths = set()
+        for realm_emoji in realm_emojis:
+            emoji_path = get_emoji_path(realm_emoji)
+            emoji_paths.add(emoji_path)
+            emoji_paths.add(emoji_path + ".original")

        export_files_from_s3(
            realm,
@ -1632,7 +1636,11 @@ def _get_exported_s3_record(
    record.update(key.metadata)

    if processing_emoji:
-        record["file_name"] = os.path.basename(key.key)
+        file_name = os.path.basename(key.key)
+        # Both the main emoji file and the .original version should have the same
+        # file_name value in the record, as they reference the same emoji.
+        file_name = file_name.removesuffix(".original")
+        record["file_name"] = file_name

    if "user_profile_id" in record:
        user_profile = get_user_profile_by_id(int(record["user_profile_id"]))
@ -1879,13 +1887,23 @@ def export_emoji_from_local(
    realm: Realm, local_dir: Path, output_dir: Path, realm_emojis: list[RealmEmoji]
 ) -> None:
    records = []
-    for count, realm_emoji in enumerate(realm_emojis, 1):
-        emoji_path = get_emoji_path(realm_emoji)
+
+    realm_emoji_helper_tuples: list[tuple[RealmEmoji, str]] = []
+    for realm_emoji in realm_emojis:
+        realm_emoji_path = get_emoji_path(realm_emoji)

        # Use 'mark_sanitized' to work around false positive caused by Pysa
        # thinking that 'realm' (and thus 'attachment' and 'attachment.path_id')
        # are user controlled
-        emoji_path = mark_sanitized(emoji_path)
+        realm_emoji_path = mark_sanitized(realm_emoji_path)
+
+        realm_emoji_path_original = realm_emoji_path + ".original"
+
+        realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path))
+        realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path_original))
+
+    for count, realm_emoji_helper_tuple in enumerate(realm_emoji_helper_tuples, 1):
+        realm_emoji_object, emoji_path = realm_emoji_helper_tuple

        local_path = os.path.join(local_dir, emoji_path)
        output_path = os.path.join(output_dir, emoji_path)
@ -1893,7 +1911,7 @@ def export_emoji_from_local(
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        shutil.copy2(local_path, output_path)
        # Realm emoji author is optional.
-        author = realm_emoji.author
+        author = realm_emoji_object.author
        author_id = None
        if author:
            author_id = author.id
@ -1902,9 +1920,9 @@ def export_emoji_from_local(
            author=author_id,
            path=emoji_path,
            s3_path=emoji_path,
-            file_name=realm_emoji.file_name,
-            name=realm_emoji.name,
-            deactivated=realm_emoji.deactivated,
+            file_name=realm_emoji_object.file_name,
+            name=realm_emoji_object.name,
+            deactivated=realm_emoji_object.deactivated,
        )
        records.append(record)

--- a/zerver/lib/import_realm.py
+++ b/zerver/lib/import_realm.py
@ -28,13 +28,14 @@ from zerver.lib.markdown import markdown_convert
 from zerver.lib.markdown import version as markdown_version
 from zerver.lib.message import get_last_message_id
 from zerver.lib.mime_types import guess_type
+from zerver.lib.partial import partial
 from zerver.lib.push_notifications import sends_notifications_directly
 from zerver.lib.remote_server import maybe_enqueue_audit_log_upload
 from zerver.lib.server_initialization import create_internal_realm, server_initialized
 from zerver.lib.streams import render_stream_description
 from zerver.lib.thumbnail import BadImageError
 from zerver.lib.timestamp import datetime_to_timestamp
-from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend
+from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image
 from zerver.lib.upload.s3 import get_bucket
 from zerver.lib.user_counts import realm_user_count_by_role
 from zerver.lib.user_groups import create_system_user_groups_for_realm
@ -823,6 +824,41 @@ def process_avatars(record: dict[str, Any]) -> None:
        do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None)


+def process_emojis(
+    import_dir: str, default_user_profile_id: int | None, record: dict[str, Any]
+) -> None:
+    if not record["s3_path"].endswith(".original"):
+        return
+
+    if "author_id" in record and record["author_id"] is not None:
+        user_profile = get_user_profile_by_id(record["author_id"])
+    else:
+        assert default_user_profile_id is not None
+        user_profile = get_user_profile_by_id(default_user_profile_id)
+
+    # file_name has the proper file extension without the
+    # .original suffix.
+    # application/octet-stream will be rejected by upload_emoji_image,
+    # but it's easier to use it here as the sensible default value
+    # and let upload_emoji_image figure out the exact error; or handle
+    # the file somehow anyway if it's ever changed to do that.
+    content_type = guess_type(record["file_name"])[0] or "application/octet-stream"
+    emoji_import_data_file_dath = os.path.join(import_dir, record["path"])
+    with open(emoji_import_data_file_dath, "rb") as f:
+        try:
+            # This will overwrite the files that got copied to the appropriate paths
+            # for emojis (whether in S3 or in the local uploads dir), ensuring to
+            # thumbnail them and generate stills for animated emojis.
+            upload_emoji_image(f, record["file_name"], user_profile, content_type)
+        except BadImageError:
+            logging.warning(
+                "Could not thumbnail emoji image %s; ignoring",
+                record["s3_path"],
+            )
+            # TODO:: should we delete the RealmEmoji object, or keep it with the files
+            # that did get copied; even though they do generate this error?
+
+
 def import_uploads(
    realm: Realm,
    import_dir: Path,
@ -855,6 +891,16 @@ def import_uploads(
        re_map_foreign_keys_internal(
            records, "records", "user_profile_id", related_table="user_profile", id_field=True
        )
+    if processing_emojis and records and "author" in records[0]:
+        # This condition only guarantees author field appears in the generated
+        # records. Potentially the value of it might be None though. In that
+        # case, this will be ignored by the remap below.
+        # Any code further down the codepath that wants to use the author value
+        # needs to be mindful of it potentially being None and use a fallback
+        # value, most likely default_user_profile_id being the right choice.
+        re_map_foreign_keys_internal(
+            records, "records", "author", related_table="user_profile", id_field=False
+        )

    s3_uploads = settings.LOCAL_UPLOADS_DIR is None

@ -881,6 +927,8 @@ def import_uploads(
            relative_path = RealmEmoji.PATH_ID_TEMPLATE.format(
                realm_id=record["realm_id"], emoji_file_name=record["file_name"]
            )
+            if record["s3_path"].endswith(".original"):
+                relative_path += ".original"
            record["last_modified"] = timestamp
        elif processing_realm_icons:
            icon_name = os.path.basename(record["path"])
@ -948,15 +996,25 @@ def import_uploads(
        if count % 1000 == 0:
            logging.info("Processed %s/%s uploads", count, len(records))

-    if processing_avatars:
+    if processing_avatars or processing_emojis:
+        if processing_avatars:
+            process_func = process_avatars
+        else:
+            assert processing_emojis
+            process_func = partial(
+                process_emojis,
+                import_dir,
+                default_user_profile_id,
+            )
+
        # Ensure that we have medium-size avatar images for every
-        # avatar.  TODO: This implementation is hacky, both in that it
+        # avatar and properly thumbnailed emojis with stills (for animated emoji).
+        # TODO: This implementation is hacky, both in that it
        # does get_user_profile_by_id for each user, and in that it
        # might be better to require the export to just have these.
-
        if processes == 1:
            for record in records:
-                process_avatars(record)
+                process_func(record)
        else:
            connection.close()
            _cache = cache._cache  # type: ignore[attr-defined] # not in stubs
@ -964,7 +1022,7 @@ def import_uploads(
            _cache.disconnect_all()
            with ProcessPoolExecutor(max_workers=processes) as executor:
                for future in as_completed(
-                    executor.submit(process_avatars, record) for record in records
+                    executor.submit(process_func, record) for record in records
                ):
                    future.result()

--- a/zerver/tests/test_import_export.py
+++ b/zerver/tests/test_import_export.py
@ -54,6 +54,7 @@ from zerver.lib.test_helpers import (
    read_test_image_file,
    use_s3_backend,
 )
+from zerver.lib.thumbnail import BadImageError
 from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment
 from zerver.lib.utils import assert_is_not_none
 from zerver.models import (
@ -251,16 +252,27 @@ class ExportFile(ZulipTestCase):

        emoji_path = f"{realm.id}/emoji/images/{file_name}"
        emoji_dir = export_fn(f"emoji/{realm.id}/emoji/images")
-        self.assertEqual(os.listdir(emoji_dir), [file_name])
+        self.assertEqual(set(os.listdir(emoji_dir)), {file_name, file_name + ".original"})
+
+        (record1, record2) = read_json("emoji/records.json")
+        # The return order is not guaranteed, so sort it so that we can reliably
+        # know which record is for the .original file and which for the actual emoji.
+        record, record_original = sorted(
+            (record1, record2), key=lambda r: r["path"].endswith(".original")
+        )

-        (record,) = read_json("emoji/records.json")
        self.assertEqual(record["file_name"], file_name)
        self.assertEqual(record["path"], emoji_path)
        self.assertEqual(record["s3_path"], emoji_path)
+        self.assertEqual(record_original["file_name"], file_name)
+        self.assertEqual(record_original["path"], emoji_path + ".original")
+        self.assertEqual(record_original["s3_path"], emoji_path + ".original")

        if is_s3:
            self.assertEqual(record["realm_id"], realm.id)
            self.assertEqual(record["user_profile_id"], user.id)
+            self.assertEqual(record_original["realm_id"], realm.id)
+            self.assertEqual(record_original["user_profile_id"], user.id)

    def verify_realm_logo_and_icon(self) -> None:
        records = read_json("realm_icons/records.json")
@ -1622,6 +1634,24 @@ class RealmImportExportTest(ExportFile):
            new_realm.id, [realm["id"] for realm in json.loads(m.call_args_list[1][0][2]["realms"])]
        )

+    def test_import_emoji_error(self) -> None:
+        user = self.example_user("hamlet")
+        realm = user.realm
+
+        self.upload_files_for_user(user)
+        self.upload_files_for_realm(user)
+
+        self.export_realm_and_create_auditlog(realm)
+
+        with (
+            self.settings(BILLING_ENABLED=False),
+            self.assertLogs(level="WARNING") as mock_log,
+            patch("zerver.lib.import_realm.upload_emoji_image", side_effect=BadImageError("test")),
+        ):
+            do_import_realm(get_output_dir(), "test-zulip")
+        self.assert_length(mock_log.output, 1)
+        self.assertIn("Could not thumbnail emoji image", mock_log.output[0])
+
    def test_import_files_from_local(self) -> None:
        user = self.example_user("hamlet")
        realm = user.realm
@ -1646,6 +1676,9 @@ class RealmImportExportTest(ExportFile):
        attachment_file_path = os.path.join(settings.LOCAL_FILES_DIR, uploaded_file.path_id)
        self.assertTrue(os.path.isfile(attachment_file_path))

+        test_image_data = read_test_image_file("img.png")
+        self.assertIsNotNone(test_image_data)
+
        # Test emojis
        realm_emoji = RealmEmoji.objects.get(realm=imported_realm)
        emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
@ -1653,6 +1686,8 @@ class RealmImportExportTest(ExportFile):
            emoji_file_name=realm_emoji.file_name,
        )
        emoji_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, emoji_path)
+        with open(emoji_file_path + ".original", "rb") as f:
+            self.assertEqual(f.read(), test_image_data)
        self.assertTrue(os.path.isfile(emoji_file_path))

        # Test avatars
@ -1665,9 +1700,6 @@ class RealmImportExportTest(ExportFile):
        upload_path = upload.upload_backend.realm_avatar_and_logo_path(imported_realm)
        full_upload_path = os.path.join(settings.LOCAL_AVATARS_DIR, upload_path)

-        test_image_data = read_test_image_file("img.png")
-        self.assertIsNotNone(test_image_data)
-
        with open(os.path.join(full_upload_path, "icon.original"), "rb") as f:
            self.assertEqual(f.read(), test_image_data)
        self.assertTrue(os.path.isfile(os.path.join(full_upload_path, "icon.png")))
@ -1715,9 +1747,13 @@ class RealmImportExportTest(ExportFile):
            realm_id=imported_realm.id,
            emoji_file_name=realm_emoji.file_name,
        )
-        emoji_key = avatar_bucket.Object(emoji_path)
-        self.assertIsNotNone(emoji_key.get()["Body"].read())
-        self.assertEqual(emoji_key.key, emoji_path)
+        resized_emoji_key = avatar_bucket.Object(emoji_path)
+        self.assertIsNotNone(resized_emoji_key.get()["Body"].read())
+        self.assertEqual(resized_emoji_key.key, emoji_path)
+        original_emoji_path_id = emoji_path + ".original"
+        original_emoji_key = avatar_bucket.Object(original_emoji_path_id)
+        self.assertEqual(original_emoji_key.get()["Body"].read(), test_image_data)
+        self.assertEqual(original_emoji_key.key, original_emoji_path_id)

        # Test avatars
        user_profile = UserProfile.objects.get(full_name=user.full_name, realm=imported_realm)