From 355f05ffbc2a77614650bbc5a484c22e481859ff Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Thu, 30 May 2024 02:57:56 +0200 Subject: [PATCH] import_realm: Import message.edit_history correctly. Fixes #26369. There are two important fixes to make to the dicts in edit_history: 1. Update the user_id so that it points to the imported sender. 2. Apply fix_message_rendered_content to the prev_rendered_content data to fix up mentions and other such syntax. --- zerver/lib/import_realm.py | 47 ++++++++++++++++++++++++----- zerver/tests/test_import_export.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 5e9c619be7..e7c304fc8a 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -324,13 +324,17 @@ def fix_customprofilefield(data: TableData) -> None: def fix_message_rendered_content( - realm: Realm, sender_map: Dict[int, Record], messages: List[Record] + realm: Realm, + sender_map: Dict[int, Record], + messages: List[Record], + content_key: str = "content", + rendered_content_key: str = "rendered_content", ) -> None: """ This function sets the rendered_content of the messages we're importing. """ for message in messages: - if message["rendered_content"] is not None: + if message[rendered_content_key] is not None: # For Zulip->Zulip imports, we use the original rendered # Markdown; this avoids issues where e.g. a mention can no # longer render properly because a user has changed their @@ -339,7 +343,7 @@ def fix_message_rendered_content( # However, we still need to update the data-user-id and # similar values stored on mentions, stream mentions, and # similar syntax in the rendered HTML. - soup = BeautifulSoup(message["rendered_content"], "html.parser") + soup = BeautifulSoup(message[rendered_content_key], "html.parser") user_mentions = soup.findAll("span", {"class": "user-mention"}) if len(user_mentions) != 0: @@ -356,7 +360,7 @@ def fix_message_rendered_content( old_user_id = int(mention["data-user-id"]) if old_user_id in user_id_map: mention["data-user-id"] = str(user_id_map[old_user_id]) - message["rendered_content"] = str(soup) + message[rendered_content_key] = str(soup) stream_mentions = soup.findAll("a", {"class": "stream"}) if len(stream_mentions) != 0: @@ -365,7 +369,7 @@ def fix_message_rendered_content( old_stream_id = int(mention["data-stream-id"]) if old_stream_id in stream_id_map: mention["data-stream-id"] = str(stream_id_map[old_stream_id]) - message["rendered_content"] = str(soup) + message[rendered_content_key] = str(soup) user_group_mentions = soup.findAll("span", {"class": "user-group-mention"}) if len(user_group_mentions) != 0: @@ -374,11 +378,11 @@ def fix_message_rendered_content( old_user_group_id = int(mention["data-user-group-id"]) if old_user_group_id in user_group_id_map: mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id]) - message["rendered_content"] = str(soup) + message[rendered_content_key] = str(soup) continue try: - content = message["content"] + content = message[content_key] sender_id = message["sender_id"] sender = sender_map[sender_id] @@ -398,7 +402,7 @@ def fix_message_rendered_content( translate_emoticons=translate_emoticons, ).rendered_content - message["rendered_content"] = rendered_content + message[rendered_content_key] = rendered_content if "scheduled_timestamp" not in message: # This logic runs also for ScheduledMessage, which doesn't use # the rendered_content_version field. @@ -414,6 +418,30 @@ def fix_message_rendered_content( ) +def fix_message_edit_history( + realm: Realm, sender_map: Dict[int, Record], messages: List[Record] +) -> None: + user_id_map = ID_MAP["user_profile"] + for message in messages: + edit_history_json = message.get("edit_history") + if not edit_history_json: + continue + + edit_history = orjson.loads(edit_history_json) + for edit_history_message_dict in edit_history: + edit_history_message_dict["user_id"] = user_id_map[edit_history_message_dict["user_id"]] + + fix_message_rendered_content( + realm, + sender_map, + messages=edit_history, + content_key="prev_content", + rendered_content_key="prev_rendered_content", + ) + + message["edit_history"] = orjson.dumps(edit_history).decode() + + def current_table_ids(data: TableData, table: TableName) -> List[int]: """ Returns the ids present in the current table @@ -1682,6 +1710,9 @@ def import_message_data(realm: Realm, sender_map: Dict[int, Record], import_dir: ) logging.info("Successfully rendered Markdown for message batch") + fix_message_edit_history( + realm=realm, sender_map=sender_map, messages=data["zerver_message"] + ) # A LOT HAPPENS HERE. # This is where we actually import the message data. bulk_import_model(data, Message) diff --git a/zerver/tests/test_import_export.py b/zerver/tests/test_import_export.py index 701c926d90..c348ef6f17 100644 --- a/zerver/tests/test_import_export.py +++ b/zerver/tests/test_import_export.py @@ -1086,6 +1086,54 @@ class RealmImportExportTest(ExportFile): Message.objects.filter(realm=imported_realm).count(), ) + def test_import_message_edit_history(self) -> None: + realm = get_realm("zulip") + iago = self.example_user("iago") + hamlet = self.example_user("hamlet") + user_mention_message = f"@**King Hamlet|{hamlet.id}** Hello" + + self.login_user(iago) + message_id = self.send_stream_message( + self.example_user("iago"), "Verona", user_mention_message + ) + + new_content = "new content" + result = self.client_patch( + f"/json/messages/{message_id}", + { + "content": new_content, + }, + ) + self.assert_json_success(result) + + self.export_realm_and_create_auditlog(realm) + with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"): + do_import_realm(get_output_dir(), "test-zulip") + imported_realm = Realm.objects.get(string_id="test-zulip") + + imported_message = Message.objects.filter(realm=imported_realm).latest("id") + imported_hamlet_id = UserProfile.objects.get( + delivery_email=hamlet.delivery_email, realm=imported_realm + ).id + imported_iago_id = UserProfile.objects.get( + delivery_email=iago.delivery_email, realm=imported_realm + ).id + + edit_history_json = imported_message.edit_history + assert edit_history_json is not None + edit_history = orjson.loads(edit_history_json) + self.assert_length(edit_history, 1) + + prev_version_of_message = edit_history[0] + # Ensure the "user_id" (of the sender) was updated correctly + # to the imported id in the data. + self.assertEqual(prev_version_of_message["user_id"], imported_iago_id) + + # The mention metadata in the rendered content should be updated. + self.assertIn( + f'data-user-id="{imported_hamlet_id}"', prev_version_of_message["prev_rendered_content"] + ) + def get_realm_getters(self) -> List[Callable[[Realm], object]]: names = set() getters: List[Callable[[Realm], object]] = []