From 63cf42ce858b88b2d34d8ab6dd60d5018f2a7812 Mon Sep 17 00:00:00 2001 From: PieterCK Date: Fri, 26 Jul 2024 17:31:59 +0700 Subject: [PATCH] import_realm: Fix near-links in imported messages. Because we rewrite the message ID and channel ID during the import, messages containing near-links will be broken because they still point to the old object IDs. This commit fixes channel links, topic links, group links, dm links and message links(priavte & public) in the rendered content of imported messages by remapping the relevant IDs. Fixes #31100. --- zerver/lib/import_realm.py | 69 +++++++++++++ zerver/tests/test_import_export.py | 153 +++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 41e0f7782f..adf0088551 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -50,6 +50,7 @@ from zerver.lib.thumbnail import THUMBNAIL_ACCEPT_IMAGE_TYPES, BadImageError, ma from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image from zerver.lib.upload.s3 import get_bucket +from zerver.lib.url_decoding import NearLinkHandler from zerver.lib.user_counts import realm_user_count_by_role from zerver.lib.user_groups import create_system_user_groups_for_realm from zerver.lib.user_message import UserMessageLite, bulk_insert_ums @@ -416,6 +417,11 @@ def fix_message_rendered_content( if old_user_group_id in user_group_id_map: mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id]) message[rendered_content_key] = str(soup) + + message[rendered_content_key] = fix_near_links_in_messages( + message[rendered_content_key] + ) + continue try: @@ -438,6 +444,9 @@ def fix_message_rendered_content( sent_by_bot=sent_by_bot, translate_emoticons=translate_emoticons, ).rendered_content + # Near-links fix has to be done here for third-party platforms + # import because those doesn't have any rendered content yet. + rendered_content = fix_near_links_in_messages(rendered_content) message[rendered_content_key] = rendered_content if "scheduled_timestamp" not in message: @@ -455,6 +464,66 @@ def fix_message_rendered_content( ) +def remap_near_link_recipient_encoding(fragments: list[str]) -> None: + section, recipient_encoding = fragments[1], fragments[2] + old_recipient_id, recipient_name = recipient_encoding.split("-", maxsplit=1) + + id_map = {"channel": ID_MAP["stream"], "dm": ID_MAP["user_profile"]}.get(section) + if id_map is None: + return + old_id_list = map(int, old_recipient_id.split(",")) + new_id_list = [str(id_map.get(old_id, old_id)) for old_id in old_id_list] + + new_recipient_id = ",".join(new_id_list) + fragments[2] = f"{new_recipient_id}-{recipient_name}" + + +def remap_near_link_message_id(fragments: list[str]) -> None: + message_id_map = ID_MAP["message"] + old_id = fragments[-1] + new_id = message_id_map.get(int(old_id), old_id) + fragments[-1] = str(new_id) + + +def fix_near_links_in_messages(rendered_content: str) -> str: + soup = BeautifulSoup(rendered_content, "html.parser") + near_link_prefixes = ["/#narrow/channel", "/#narrow/stream", "/#narrow/dm"] + + # Include legacy links without the leading "/", which were + # used in older versions. + near_link_prefixes += [prefix.removeprefix("/") for prefix in near_link_prefixes] + + near_links = soup.find_all( + lambda tag: tag.name == "a" + and tag.has_attr("href") + and (tag.get("href").startswith(tuple(near_link_prefixes))) + ) + + if near_links == []: + return rendered_content + + for link in near_links: + url = link["href"] + try: + near_link_instance = NearLinkHandler(url) + except AssertionError: + # NearLinkHandler does additional checks to make sure + # the URL is a near link. In this case it's probably + # not a near link (or a faulty one). + continue + + fragments = near_link_instance.get_near_link_fragment_parts() + remap_near_link_recipient_encoding(fragments) + + if fragments[-2] == "near" and len(fragments) >= 5: + remap_near_link_message_id(fragments) + + near_link_instance.patch_near_link_fragment_parts(fragments) + link["href"] = near_link_instance.get_url() + + return str(soup) + + def fix_message_edit_history( realm: Realm, sender_map: dict[int, Record], messages: list[Record] ) -> None: diff --git a/zerver/tests/test_import_export.py b/zerver/tests/test_import_export.py index 4f3df78009..5d30cb7f1c 100644 --- a/zerver/tests/test_import_export.py +++ b/zerver/tests/test_import_export.py @@ -52,6 +52,7 @@ from zerver.lib.export import ( export_usermessages_batch, ) from zerver.lib.import_realm import do_import_realm, get_incoming_message_ids +from zerver.lib.message_cache import MessageDict from zerver.lib.streams import create_stream_if_needed from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import ( @@ -65,6 +66,7 @@ from zerver.lib.test_helpers import ( ) from zerver.lib.thumbnail import BadImageError from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment +from zerver.lib.url_encoding import encode_stream, near_message_url from zerver.lib.utils import assert_is_not_none from zerver.models import ( AlertWord, @@ -413,6 +415,15 @@ class RealmImportExportTest(ExportFile): ) self.export_realm(original_realm, export_type, exportable_user_ids) + def get_message_near_link( + self, message_id: int, realm: Realm, relative_link: bool = False + ) -> str: + wide_message_dict = MessageDict.wide_dict(Message.objects.get(id=message_id), realm.id) + url = near_message_url(realm, wide_message_dict) + if relative_link: + return url.removeprefix(realm.url) + return url + def test_export_files_from_local(self) -> None: user = self.example_user("hamlet") realm = user.realm @@ -1326,6 +1337,148 @@ class RealmImportExportTest(ExportFile): f'data-user-id="{imported_hamlet_id}"', prev_version_of_message["prev_rendered_content"] ) + def test_import_public_messages_with_near_link(self) -> None: + original_realm = Realm.objects.get(string_id="zulip") + + denmark_channel = get_stream("Denmark", original_realm) + encoded_channel = encode_stream(denmark_channel.id, denmark_channel.name) + channel_link_message = ( + f"[channel near link](http://zulip.testserver/#narrow/channel/{encoded_channel})" + ) + self.send_stream_message(self.example_user("iago"), "Denmark", channel_link_message) + + topic_link_message = f"[topic near link](http://zulip.testserver/#narrow/channel/{encoded_channel}/topic/test)" + self.send_stream_message(self.example_user("hamlet"), "Denmark", topic_link_message) + + near_link_target_message = "near link!" + near_link_target_message_id = self.send_stream_message( + self.example_user("othello"), "Denmark", near_link_target_message + ) + + quote_and_reply_message = f"[message near link](http://zulip.testserver/#narrow/channel/{encoded_channel}/topic/test/near/{near_link_target_message_id})" + self.send_stream_message(self.example_user("othello"), "Denmark", quote_and_reply_message) + + self.export_realm_and_create_auditlog(original_realm) + + with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"): + do_import_realm(get_output_dir(), "test-zulip") + + imported_realm = Realm.objects.get(string_id="test-zulip") + imported_denmark_channel = Stream.objects.get(name="Denmark", realm=imported_realm) + encoded_imported_channel = encode_stream( + imported_denmark_channel.id, imported_denmark_channel.name + ) + + imported_channel_link_message = Message.objects.get( + content=channel_link_message, sender__realm=imported_realm + ) + + self.assertEqual( + imported_channel_link_message.rendered_content, + f'

channel near link

', + ) + imported_topic_link_message = Message.objects.get( + content=topic_link_message, sender__realm=imported_realm + ) + self.assertEqual( + imported_topic_link_message.rendered_content, + f'

topic near link

', + ) + imported_quote_and_reply_message = Message.objects.get( + content=quote_and_reply_message, sender__realm=imported_realm + ) + imported_near_link_target_message = Message.objects.get( + content=near_link_target_message, sender__realm=imported_realm + ) + self.assertEqual( + imported_quote_and_reply_message.rendered_content, + f'

message near link

', + ) + + def test_import_private_messages_with_near_link(self) -> None: + original_realm = Realm.objects.get(string_id="zulip") + + # The first scenario happens in a group message between iago + # , hamlet and ZOE. + # --- + # iago : "test content" + # ZOE : "Iago [said](http://zulip.testserver/#narrow/dm/7,10,11-pm/near/257): test content" + # --- + iago_message_id = self.send_group_direct_message( + self.example_user("iago"), [self.example_user("hamlet"), self.example_user("ZOE")] + ) + iago_message_near_link = self.get_message_near_link(iago_message_id, original_realm) + + zoe_message_context = f"Iago [said]({iago_message_near_link}): test content" + self.send_group_direct_message( + self.example_user("ZOE"), + [self.example_user("hamlet"), self.example_user("iago")], + zoe_message_context, + ) + + # The second scenario happens in a direct message between + # iago and hamlet. + # --- + # hamlet : "test content" + # iago : "Hamlet [said](http://zulip.testserver/#narrow/dm/10,11-pm/near/259): test content" + # --- + hamlet_dm_id = self.send_personal_message( + self.example_user("hamlet"), self.example_user("iago") + ) + hamlet_dm_near_link = self.get_message_near_link(hamlet_dm_id, original_realm) + + iago_dm_context = f"Hamlet [said]({hamlet_dm_near_link}): test content" + self.send_personal_message( + self.example_user("iago"), self.example_user("hamlet"), iago_dm_context + ) + + consented_user_ids = ["iago", "hamlet", "ZOE"] + for user_id in consented_user_ids: + do_change_user_setting( + self.example_user(user_id), "allow_private_data_export", True, acting_user=None + ) + + self.export_realm_and_create_auditlog(original_realm) + + with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"): + do_import_realm(get_output_dir(), "test-zulip") + + imported_realm = Realm.objects.get(string_id="test-zulip") + + # Fetch imported messages and validate the remapped near links + imported_iago_message_id = Message.objects.get( + content="test content", + recipient__type=Recipient.DIRECT_MESSAGE_GROUP, + sender__realm=imported_realm, + ).id + imported_zoe_message = Message.objects.get( + content=zoe_message_context, + recipient__type=Recipient.DIRECT_MESSAGE_GROUP, + sender__realm=imported_realm, + ) + + imported_hamlet_dm_id = Message.objects.get( + content="test content", recipient__type=Recipient.PERSONAL, sender__realm=imported_realm + ).id + imported_iago_message = Message.objects.get( + content=iago_dm_context, + recipient__type=Recipient.PERSONAL, + sender__realm=imported_realm, + ) + + expected_iago_message_near_link = self.get_message_near_link( + imported_iago_message_id, imported_realm, True + ) + expected_hamlet_dm_near_link = self.get_message_near_link( + imported_hamlet_dm_id, imported_realm, True + ) + + assert imported_zoe_message.rendered_content is not None + assert imported_iago_message.rendered_content is not None + + self.assertIn(expected_iago_message_near_link, imported_zoe_message.rendered_content) + self.assertIn(expected_hamlet_dm_near_link, imported_iago_message.rendered_content) + def get_realm_getters(self) -> list[Callable[[Realm], object]]: names = set() getters: list[Callable[[Realm], object]] = []