diff --git a/zerver/lib/export.py b/zerver/lib/export.py index dd4a9188c6..497feb286f 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -1591,7 +1591,11 @@ def export_uploads_and_avatars( valid_hashes=avatar_hash_values, ) - emoji_paths = {get_emoji_path(realm_emoji) for realm_emoji in realm_emojis} + emoji_paths = set() + for realm_emoji in realm_emojis: + emoji_path = get_emoji_path(realm_emoji) + emoji_paths.add(emoji_path) + emoji_paths.add(emoji_path + ".original") export_files_from_s3( realm, @@ -1632,7 +1636,11 @@ def _get_exported_s3_record( record.update(key.metadata) if processing_emoji: - record["file_name"] = os.path.basename(key.key) + file_name = os.path.basename(key.key) + # Both the main emoji file and the .original version should have the same + # file_name value in the record, as they reference the same emoji. + file_name = file_name.removesuffix(".original") + record["file_name"] = file_name if "user_profile_id" in record: user_profile = get_user_profile_by_id(int(record["user_profile_id"])) @@ -1879,13 +1887,23 @@ def export_emoji_from_local( realm: Realm, local_dir: Path, output_dir: Path, realm_emojis: list[RealmEmoji] ) -> None: records = [] - for count, realm_emoji in enumerate(realm_emojis, 1): - emoji_path = get_emoji_path(realm_emoji) + + realm_emoji_helper_tuples: list[tuple[RealmEmoji, str]] = [] + for realm_emoji in realm_emojis: + realm_emoji_path = get_emoji_path(realm_emoji) # Use 'mark_sanitized' to work around false positive caused by Pysa # thinking that 'realm' (and thus 'attachment' and 'attachment.path_id') # are user controlled - emoji_path = mark_sanitized(emoji_path) + realm_emoji_path = mark_sanitized(realm_emoji_path) + + realm_emoji_path_original = realm_emoji_path + ".original" + + realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path)) + realm_emoji_helper_tuples.append((realm_emoji, realm_emoji_path_original)) + + for count, realm_emoji_helper_tuple in enumerate(realm_emoji_helper_tuples, 1): + realm_emoji_object, emoji_path = realm_emoji_helper_tuple local_path = os.path.join(local_dir, emoji_path) output_path = os.path.join(output_dir, emoji_path) @@ -1893,7 +1911,7 @@ def export_emoji_from_local( os.makedirs(os.path.dirname(output_path), exist_ok=True) shutil.copy2(local_path, output_path) # Realm emoji author is optional. - author = realm_emoji.author + author = realm_emoji_object.author author_id = None if author: author_id = author.id @@ -1902,9 +1920,9 @@ def export_emoji_from_local( author=author_id, path=emoji_path, s3_path=emoji_path, - file_name=realm_emoji.file_name, - name=realm_emoji.name, - deactivated=realm_emoji.deactivated, + file_name=realm_emoji_object.file_name, + name=realm_emoji_object.name, + deactivated=realm_emoji_object.deactivated, ) records.append(record) diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 62d257e111..df0523c255 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -28,13 +28,14 @@ from zerver.lib.markdown import markdown_convert from zerver.lib.markdown import version as markdown_version from zerver.lib.message import get_last_message_id from zerver.lib.mime_types import guess_type +from zerver.lib.partial import partial from zerver.lib.push_notifications import sends_notifications_directly from zerver.lib.remote_server import maybe_enqueue_audit_log_upload from zerver.lib.server_initialization import create_internal_realm, server_initialized from zerver.lib.streams import render_stream_description from zerver.lib.thumbnail import BadImageError from zerver.lib.timestamp import datetime_to_timestamp -from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend +from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image from zerver.lib.upload.s3 import get_bucket from zerver.lib.user_counts import realm_user_count_by_role from zerver.lib.user_groups import create_system_user_groups_for_realm @@ -823,6 +824,41 @@ def process_avatars(record: dict[str, Any]) -> None: do_change_avatar_fields(user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None) +def process_emojis( + import_dir: str, default_user_profile_id: int | None, record: dict[str, Any] +) -> None: + if not record["s3_path"].endswith(".original"): + return + + if "author_id" in record and record["author_id"] is not None: + user_profile = get_user_profile_by_id(record["author_id"]) + else: + assert default_user_profile_id is not None + user_profile = get_user_profile_by_id(default_user_profile_id) + + # file_name has the proper file extension without the + # .original suffix. + # application/octet-stream will be rejected by upload_emoji_image, + # but it's easier to use it here as the sensible default value + # and let upload_emoji_image figure out the exact error; or handle + # the file somehow anyway if it's ever changed to do that. + content_type = guess_type(record["file_name"])[0] or "application/octet-stream" + emoji_import_data_file_dath = os.path.join(import_dir, record["path"]) + with open(emoji_import_data_file_dath, "rb") as f: + try: + # This will overwrite the files that got copied to the appropriate paths + # for emojis (whether in S3 or in the local uploads dir), ensuring to + # thumbnail them and generate stills for animated emojis. + upload_emoji_image(f, record["file_name"], user_profile, content_type) + except BadImageError: + logging.warning( + "Could not thumbnail emoji image %s; ignoring", + record["s3_path"], + ) + # TODO:: should we delete the RealmEmoji object, or keep it with the files + # that did get copied; even though they do generate this error? + + def import_uploads( realm: Realm, import_dir: Path, @@ -855,6 +891,16 @@ def import_uploads( re_map_foreign_keys_internal( records, "records", "user_profile_id", related_table="user_profile", id_field=True ) + if processing_emojis and records and "author" in records[0]: + # This condition only guarantees author field appears in the generated + # records. Potentially the value of it might be None though. In that + # case, this will be ignored by the remap below. + # Any code further down the codepath that wants to use the author value + # needs to be mindful of it potentially being None and use a fallback + # value, most likely default_user_profile_id being the right choice. + re_map_foreign_keys_internal( + records, "records", "author", related_table="user_profile", id_field=False + ) s3_uploads = settings.LOCAL_UPLOADS_DIR is None @@ -881,6 +927,8 @@ def import_uploads( relative_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record["realm_id"], emoji_file_name=record["file_name"] ) + if record["s3_path"].endswith(".original"): + relative_path += ".original" record["last_modified"] = timestamp elif processing_realm_icons: icon_name = os.path.basename(record["path"]) @@ -948,15 +996,25 @@ def import_uploads( if count % 1000 == 0: logging.info("Processed %s/%s uploads", count, len(records)) - if processing_avatars: + if processing_avatars or processing_emojis: + if processing_avatars: + process_func = process_avatars + else: + assert processing_emojis + process_func = partial( + process_emojis, + import_dir, + default_user_profile_id, + ) + # Ensure that we have medium-size avatar images for every - # avatar. TODO: This implementation is hacky, both in that it + # avatar and properly thumbnailed emojis with stills (for animated emoji). + # TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. - if processes == 1: for record in records: - process_avatars(record) + process_func(record) else: connection.close() _cache = cache._cache # type: ignore[attr-defined] # not in stubs @@ -964,7 +1022,7 @@ def import_uploads( _cache.disconnect_all() with ProcessPoolExecutor(max_workers=processes) as executor: for future in as_completed( - executor.submit(process_avatars, record) for record in records + executor.submit(process_func, record) for record in records ): future.result() diff --git a/zerver/tests/test_import_export.py b/zerver/tests/test_import_export.py index decc45dd59..bff6901b31 100644 --- a/zerver/tests/test_import_export.py +++ b/zerver/tests/test_import_export.py @@ -54,6 +54,7 @@ from zerver.lib.test_helpers import ( read_test_image_file, use_s3_backend, ) +from zerver.lib.thumbnail import BadImageError from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment from zerver.lib.utils import assert_is_not_none from zerver.models import ( @@ -251,16 +252,27 @@ class ExportFile(ZulipTestCase): emoji_path = f"{realm.id}/emoji/images/{file_name}" emoji_dir = export_fn(f"emoji/{realm.id}/emoji/images") - self.assertEqual(os.listdir(emoji_dir), [file_name]) + self.assertEqual(set(os.listdir(emoji_dir)), {file_name, file_name + ".original"}) + + (record1, record2) = read_json("emoji/records.json") + # The return order is not guaranteed, so sort it so that we can reliably + # know which record is for the .original file and which for the actual emoji. + record, record_original = sorted( + (record1, record2), key=lambda r: r["path"].endswith(".original") + ) - (record,) = read_json("emoji/records.json") self.assertEqual(record["file_name"], file_name) self.assertEqual(record["path"], emoji_path) self.assertEqual(record["s3_path"], emoji_path) + self.assertEqual(record_original["file_name"], file_name) + self.assertEqual(record_original["path"], emoji_path + ".original") + self.assertEqual(record_original["s3_path"], emoji_path + ".original") if is_s3: self.assertEqual(record["realm_id"], realm.id) self.assertEqual(record["user_profile_id"], user.id) + self.assertEqual(record_original["realm_id"], realm.id) + self.assertEqual(record_original["user_profile_id"], user.id) def verify_realm_logo_and_icon(self) -> None: records = read_json("realm_icons/records.json") @@ -1622,6 +1634,24 @@ class RealmImportExportTest(ExportFile): new_realm.id, [realm["id"] for realm in json.loads(m.call_args_list[1][0][2]["realms"])] ) + def test_import_emoji_error(self) -> None: + user = self.example_user("hamlet") + realm = user.realm + + self.upload_files_for_user(user) + self.upload_files_for_realm(user) + + self.export_realm_and_create_auditlog(realm) + + with ( + self.settings(BILLING_ENABLED=False), + self.assertLogs(level="WARNING") as mock_log, + patch("zerver.lib.import_realm.upload_emoji_image", side_effect=BadImageError("test")), + ): + do_import_realm(get_output_dir(), "test-zulip") + self.assert_length(mock_log.output, 1) + self.assertIn("Could not thumbnail emoji image", mock_log.output[0]) + def test_import_files_from_local(self) -> None: user = self.example_user("hamlet") realm = user.realm @@ -1646,6 +1676,9 @@ class RealmImportExportTest(ExportFile): attachment_file_path = os.path.join(settings.LOCAL_FILES_DIR, uploaded_file.path_id) self.assertTrue(os.path.isfile(attachment_file_path)) + test_image_data = read_test_image_file("img.png") + self.assertIsNotNone(test_image_data) + # Test emojis realm_emoji = RealmEmoji.objects.get(realm=imported_realm) emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( @@ -1653,6 +1686,8 @@ class RealmImportExportTest(ExportFile): emoji_file_name=realm_emoji.file_name, ) emoji_file_path = os.path.join(settings.LOCAL_AVATARS_DIR, emoji_path) + with open(emoji_file_path + ".original", "rb") as f: + self.assertEqual(f.read(), test_image_data) self.assertTrue(os.path.isfile(emoji_file_path)) # Test avatars @@ -1665,9 +1700,6 @@ class RealmImportExportTest(ExportFile): upload_path = upload.upload_backend.realm_avatar_and_logo_path(imported_realm) full_upload_path = os.path.join(settings.LOCAL_AVATARS_DIR, upload_path) - test_image_data = read_test_image_file("img.png") - self.assertIsNotNone(test_image_data) - with open(os.path.join(full_upload_path, "icon.original"), "rb") as f: self.assertEqual(f.read(), test_image_data) self.assertTrue(os.path.isfile(os.path.join(full_upload_path, "icon.png"))) @@ -1715,9 +1747,13 @@ class RealmImportExportTest(ExportFile): realm_id=imported_realm.id, emoji_file_name=realm_emoji.file_name, ) - emoji_key = avatar_bucket.Object(emoji_path) - self.assertIsNotNone(emoji_key.get()["Body"].read()) - self.assertEqual(emoji_key.key, emoji_path) + resized_emoji_key = avatar_bucket.Object(emoji_path) + self.assertIsNotNone(resized_emoji_key.get()["Body"].read()) + self.assertEqual(resized_emoji_key.key, emoji_path) + original_emoji_path_id = emoji_path + ".original" + original_emoji_key = avatar_bucket.Object(original_emoji_path_id) + self.assertEqual(original_emoji_key.get()["Body"].read(), test_image_data) + self.assertEqual(original_emoji_key.key, original_emoji_path_id) # Test avatars user_profile = UserProfile.objects.get(full_name=user.full_name, realm=imported_realm)