import_realm: Fix near-links in imported messages.

Because we rewrite the message ID and channel ID during the import,
messages containing near-links will be broken because they still point
to the old object IDs.

This commit fixes channel links, topic links, group links, dm links and
message links(priavte & public) in the rendered content of imported
messages by remapping the relevant IDs.

Fixes #31100.
This commit is contained in:
PieterCK 2024-07-26 17:31:59 +07:00
parent 5b98a20c0c
commit 63cf42ce85
2 changed files with 222 additions and 0 deletions

View File

@ -50,6 +50,7 @@ from zerver.lib.thumbnail import THUMBNAIL_ACCEPT_IMAGE_TYPES, BadImageError, ma
from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.timestamp import datetime_to_timestamp
from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image from zerver.lib.upload import ensure_avatar_image, sanitize_name, upload_backend, upload_emoji_image
from zerver.lib.upload.s3 import get_bucket from zerver.lib.upload.s3 import get_bucket
from zerver.lib.url_decoding import NearLinkHandler
from zerver.lib.user_counts import realm_user_count_by_role from zerver.lib.user_counts import realm_user_count_by_role
from zerver.lib.user_groups import create_system_user_groups_for_realm from zerver.lib.user_groups import create_system_user_groups_for_realm
from zerver.lib.user_message import UserMessageLite, bulk_insert_ums from zerver.lib.user_message import UserMessageLite, bulk_insert_ums
@ -416,6 +417,11 @@ def fix_message_rendered_content(
if old_user_group_id in user_group_id_map: if old_user_group_id in user_group_id_map:
mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id]) mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id])
message[rendered_content_key] = str(soup) message[rendered_content_key] = str(soup)
message[rendered_content_key] = fix_near_links_in_messages(
message[rendered_content_key]
)
continue continue
try: try:
@ -438,6 +444,9 @@ def fix_message_rendered_content(
sent_by_bot=sent_by_bot, sent_by_bot=sent_by_bot,
translate_emoticons=translate_emoticons, translate_emoticons=translate_emoticons,
).rendered_content ).rendered_content
# Near-links fix has to be done here for third-party platforms
# import because those doesn't have any rendered content yet.
rendered_content = fix_near_links_in_messages(rendered_content)
message[rendered_content_key] = rendered_content message[rendered_content_key] = rendered_content
if "scheduled_timestamp" not in message: if "scheduled_timestamp" not in message:
@ -455,6 +464,66 @@ def fix_message_rendered_content(
) )
def remap_near_link_recipient_encoding(fragments: list[str]) -> None:
section, recipient_encoding = fragments[1], fragments[2]
old_recipient_id, recipient_name = recipient_encoding.split("-", maxsplit=1)
id_map = {"channel": ID_MAP["stream"], "dm": ID_MAP["user_profile"]}.get(section)
if id_map is None:
return
old_id_list = map(int, old_recipient_id.split(","))
new_id_list = [str(id_map.get(old_id, old_id)) for old_id in old_id_list]
new_recipient_id = ",".join(new_id_list)
fragments[2] = f"{new_recipient_id}-{recipient_name}"
def remap_near_link_message_id(fragments: list[str]) -> None:
message_id_map = ID_MAP["message"]
old_id = fragments[-1]
new_id = message_id_map.get(int(old_id), old_id)
fragments[-1] = str(new_id)
def fix_near_links_in_messages(rendered_content: str) -> str:
soup = BeautifulSoup(rendered_content, "html.parser")
near_link_prefixes = ["/#narrow/channel", "/#narrow/stream", "/#narrow/dm"]
# Include legacy links without the leading "/", which were
# used in older versions.
near_link_prefixes += [prefix.removeprefix("/") for prefix in near_link_prefixes]
near_links = soup.find_all(
lambda tag: tag.name == "a"
and tag.has_attr("href")
and (tag.get("href").startswith(tuple(near_link_prefixes)))
)
if near_links == []:
return rendered_content
for link in near_links:
url = link["href"]
try:
near_link_instance = NearLinkHandler(url)
except AssertionError:
# NearLinkHandler does additional checks to make sure
# the URL is a near link. In this case it's probably
# not a near link (or a faulty one).
continue
fragments = near_link_instance.get_near_link_fragment_parts()
remap_near_link_recipient_encoding(fragments)
if fragments[-2] == "near" and len(fragments) >= 5:
remap_near_link_message_id(fragments)
near_link_instance.patch_near_link_fragment_parts(fragments)
link["href"] = near_link_instance.get_url()
return str(soup)
def fix_message_edit_history( def fix_message_edit_history(
realm: Realm, sender_map: dict[int, Record], messages: list[Record] realm: Realm, sender_map: dict[int, Record], messages: list[Record]
) -> None: ) -> None:

View File

@ -52,6 +52,7 @@ from zerver.lib.export import (
export_usermessages_batch, export_usermessages_batch,
) )
from zerver.lib.import_realm import do_import_realm, get_incoming_message_ids from zerver.lib.import_realm import do_import_realm, get_incoming_message_ids
from zerver.lib.message_cache import MessageDict
from zerver.lib.streams import create_stream_if_needed from zerver.lib.streams import create_stream_if_needed
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import ( from zerver.lib.test_helpers import (
@ -65,6 +66,7 @@ from zerver.lib.test_helpers import (
) )
from zerver.lib.thumbnail import BadImageError from zerver.lib.thumbnail import BadImageError
from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment from zerver.lib.upload import claim_attachment, upload_avatar_image, upload_message_attachment
from zerver.lib.url_encoding import encode_stream, near_message_url
from zerver.lib.utils import assert_is_not_none from zerver.lib.utils import assert_is_not_none
from zerver.models import ( from zerver.models import (
AlertWord, AlertWord,
@ -413,6 +415,15 @@ class RealmImportExportTest(ExportFile):
) )
self.export_realm(original_realm, export_type, exportable_user_ids) self.export_realm(original_realm, export_type, exportable_user_ids)
def get_message_near_link(
self, message_id: int, realm: Realm, relative_link: bool = False
) -> str:
wide_message_dict = MessageDict.wide_dict(Message.objects.get(id=message_id), realm.id)
url = near_message_url(realm, wide_message_dict)
if relative_link:
return url.removeprefix(realm.url)
return url
def test_export_files_from_local(self) -> None: def test_export_files_from_local(self) -> None:
user = self.example_user("hamlet") user = self.example_user("hamlet")
realm = user.realm realm = user.realm
@ -1326,6 +1337,148 @@ class RealmImportExportTest(ExportFile):
f'data-user-id="{imported_hamlet_id}"', prev_version_of_message["prev_rendered_content"] f'data-user-id="{imported_hamlet_id}"', prev_version_of_message["prev_rendered_content"]
) )
def test_import_public_messages_with_near_link(self) -> None:
original_realm = Realm.objects.get(string_id="zulip")
denmark_channel = get_stream("Denmark", original_realm)
encoded_channel = encode_stream(denmark_channel.id, denmark_channel.name)
channel_link_message = (
f"[channel near link](http://zulip.testserver/#narrow/channel/{encoded_channel})"
)
self.send_stream_message(self.example_user("iago"), "Denmark", channel_link_message)
topic_link_message = f"[topic near link](http://zulip.testserver/#narrow/channel/{encoded_channel}/topic/test)"
self.send_stream_message(self.example_user("hamlet"), "Denmark", topic_link_message)
near_link_target_message = "near link!"
near_link_target_message_id = self.send_stream_message(
self.example_user("othello"), "Denmark", near_link_target_message
)
quote_and_reply_message = f"[message near link](http://zulip.testserver/#narrow/channel/{encoded_channel}/topic/test/near/{near_link_target_message_id})"
self.send_stream_message(self.example_user("othello"), "Denmark", quote_and_reply_message)
self.export_realm_and_create_auditlog(original_realm)
with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"):
do_import_realm(get_output_dir(), "test-zulip")
imported_realm = Realm.objects.get(string_id="test-zulip")
imported_denmark_channel = Stream.objects.get(name="Denmark", realm=imported_realm)
encoded_imported_channel = encode_stream(
imported_denmark_channel.id, imported_denmark_channel.name
)
imported_channel_link_message = Message.objects.get(
content=channel_link_message, sender__realm=imported_realm
)
self.assertEqual(
imported_channel_link_message.rendered_content,
f'<p><a href="/#narrow/channel/{encoded_imported_channel}">channel near link</a></p>',
)
imported_topic_link_message = Message.objects.get(
content=topic_link_message, sender__realm=imported_realm
)
self.assertEqual(
imported_topic_link_message.rendered_content,
f'<p><a href="/#narrow/channel/{encoded_imported_channel}/topic/test">topic near link</a></p>',
)
imported_quote_and_reply_message = Message.objects.get(
content=quote_and_reply_message, sender__realm=imported_realm
)
imported_near_link_target_message = Message.objects.get(
content=near_link_target_message, sender__realm=imported_realm
)
self.assertEqual(
imported_quote_and_reply_message.rendered_content,
f'<p><a href="/#narrow/channel/{encoded_imported_channel}/topic/test/near/{imported_near_link_target_message.id}">message near link</a></p>',
)
def test_import_private_messages_with_near_link(self) -> None:
original_realm = Realm.objects.get(string_id="zulip")
# The first scenario happens in a group message between iago
# , hamlet and ZOE.
# ---
# iago : "test content"
# ZOE : "Iago [said](http://zulip.testserver/#narrow/dm/7,10,11-pm/near/257): test content"
# ---
iago_message_id = self.send_group_direct_message(
self.example_user("iago"), [self.example_user("hamlet"), self.example_user("ZOE")]
)
iago_message_near_link = self.get_message_near_link(iago_message_id, original_realm)
zoe_message_context = f"Iago [said]({iago_message_near_link}): test content"
self.send_group_direct_message(
self.example_user("ZOE"),
[self.example_user("hamlet"), self.example_user("iago")],
zoe_message_context,
)
# The second scenario happens in a direct message between
# iago and hamlet.
# ---
# hamlet : "test content"
# iago : "Hamlet [said](http://zulip.testserver/#narrow/dm/10,11-pm/near/259): test content"
# ---
hamlet_dm_id = self.send_personal_message(
self.example_user("hamlet"), self.example_user("iago")
)
hamlet_dm_near_link = self.get_message_near_link(hamlet_dm_id, original_realm)
iago_dm_context = f"Hamlet [said]({hamlet_dm_near_link}): test content"
self.send_personal_message(
self.example_user("iago"), self.example_user("hamlet"), iago_dm_context
)
consented_user_ids = ["iago", "hamlet", "ZOE"]
for user_id in consented_user_ids:
do_change_user_setting(
self.example_user(user_id), "allow_private_data_export", True, acting_user=None
)
self.export_realm_and_create_auditlog(original_realm)
with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"):
do_import_realm(get_output_dir(), "test-zulip")
imported_realm = Realm.objects.get(string_id="test-zulip")
# Fetch imported messages and validate the remapped near links
imported_iago_message_id = Message.objects.get(
content="test content",
recipient__type=Recipient.DIRECT_MESSAGE_GROUP,
sender__realm=imported_realm,
).id
imported_zoe_message = Message.objects.get(
content=zoe_message_context,
recipient__type=Recipient.DIRECT_MESSAGE_GROUP,
sender__realm=imported_realm,
)
imported_hamlet_dm_id = Message.objects.get(
content="test content", recipient__type=Recipient.PERSONAL, sender__realm=imported_realm
).id
imported_iago_message = Message.objects.get(
content=iago_dm_context,
recipient__type=Recipient.PERSONAL,
sender__realm=imported_realm,
)
expected_iago_message_near_link = self.get_message_near_link(
imported_iago_message_id, imported_realm, True
)
expected_hamlet_dm_near_link = self.get_message_near_link(
imported_hamlet_dm_id, imported_realm, True
)
assert imported_zoe_message.rendered_content is not None
assert imported_iago_message.rendered_content is not None
self.assertIn(expected_iago_message_near_link, imported_zoe_message.rendered_content)
self.assertIn(expected_hamlet_dm_near_link, imported_iago_message.rendered_content)
def get_realm_getters(self) -> list[Callable[[Realm], object]]: def get_realm_getters(self) -> list[Callable[[Realm], object]]:
names = set() names = set()
getters: list[Callable[[Realm], object]] = [] getters: list[Callable[[Realm], object]] = []