import_realm: Import message.edit_history correctly.

Fixes #26369.

There are two important fixes to make to the dicts in edit_history:
1. Update the user_id so that it points to the imported sender.
2. Apply fix_message_rendered_content to the prev_rendered_content data
to fix up mentions and other such syntax.
This commit is contained in:
Mateusz Mandera 2024-05-30 02:57:56 +02:00 committed by Tim Abbott
parent 878d46ea49
commit 355f05ffbc
2 changed files with 87 additions and 8 deletions

View File

@ -324,13 +324,17 @@ def fix_customprofilefield(data: TableData) -> None:
def fix_message_rendered_content( def fix_message_rendered_content(
realm: Realm, sender_map: Dict[int, Record], messages: List[Record] realm: Realm,
sender_map: Dict[int, Record],
messages: List[Record],
content_key: str = "content",
rendered_content_key: str = "rendered_content",
) -> None: ) -> None:
""" """
This function sets the rendered_content of the messages we're importing. This function sets the rendered_content of the messages we're importing.
""" """
for message in messages: for message in messages:
if message["rendered_content"] is not None: if message[rendered_content_key] is not None:
# For Zulip->Zulip imports, we use the original rendered # For Zulip->Zulip imports, we use the original rendered
# Markdown; this avoids issues where e.g. a mention can no # Markdown; this avoids issues where e.g. a mention can no
# longer render properly because a user has changed their # longer render properly because a user has changed their
@ -339,7 +343,7 @@ def fix_message_rendered_content(
# However, we still need to update the data-user-id and # However, we still need to update the data-user-id and
# similar values stored on mentions, stream mentions, and # similar values stored on mentions, stream mentions, and
# similar syntax in the rendered HTML. # similar syntax in the rendered HTML.
soup = BeautifulSoup(message["rendered_content"], "html.parser") soup = BeautifulSoup(message[rendered_content_key], "html.parser")
user_mentions = soup.findAll("span", {"class": "user-mention"}) user_mentions = soup.findAll("span", {"class": "user-mention"})
if len(user_mentions) != 0: if len(user_mentions) != 0:
@ -356,7 +360,7 @@ def fix_message_rendered_content(
old_user_id = int(mention["data-user-id"]) old_user_id = int(mention["data-user-id"])
if old_user_id in user_id_map: if old_user_id in user_id_map:
mention["data-user-id"] = str(user_id_map[old_user_id]) mention["data-user-id"] = str(user_id_map[old_user_id])
message["rendered_content"] = str(soup) message[rendered_content_key] = str(soup)
stream_mentions = soup.findAll("a", {"class": "stream"}) stream_mentions = soup.findAll("a", {"class": "stream"})
if len(stream_mentions) != 0: if len(stream_mentions) != 0:
@ -365,7 +369,7 @@ def fix_message_rendered_content(
old_stream_id = int(mention["data-stream-id"]) old_stream_id = int(mention["data-stream-id"])
if old_stream_id in stream_id_map: if old_stream_id in stream_id_map:
mention["data-stream-id"] = str(stream_id_map[old_stream_id]) mention["data-stream-id"] = str(stream_id_map[old_stream_id])
message["rendered_content"] = str(soup) message[rendered_content_key] = str(soup)
user_group_mentions = soup.findAll("span", {"class": "user-group-mention"}) user_group_mentions = soup.findAll("span", {"class": "user-group-mention"})
if len(user_group_mentions) != 0: if len(user_group_mentions) != 0:
@ -374,11 +378,11 @@ def fix_message_rendered_content(
old_user_group_id = int(mention["data-user-group-id"]) old_user_group_id = int(mention["data-user-group-id"])
if old_user_group_id in user_group_id_map: if old_user_group_id in user_group_id_map:
mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id]) mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id])
message["rendered_content"] = str(soup) message[rendered_content_key] = str(soup)
continue continue
try: try:
content = message["content"] content = message[content_key]
sender_id = message["sender_id"] sender_id = message["sender_id"]
sender = sender_map[sender_id] sender = sender_map[sender_id]
@ -398,7 +402,7 @@ def fix_message_rendered_content(
translate_emoticons=translate_emoticons, translate_emoticons=translate_emoticons,
).rendered_content ).rendered_content
message["rendered_content"] = rendered_content message[rendered_content_key] = rendered_content
if "scheduled_timestamp" not in message: if "scheduled_timestamp" not in message:
# This logic runs also for ScheduledMessage, which doesn't use # This logic runs also for ScheduledMessage, which doesn't use
# the rendered_content_version field. # the rendered_content_version field.
@ -414,6 +418,30 @@ def fix_message_rendered_content(
) )
def fix_message_edit_history(
realm: Realm, sender_map: Dict[int, Record], messages: List[Record]
) -> None:
user_id_map = ID_MAP["user_profile"]
for message in messages:
edit_history_json = message.get("edit_history")
if not edit_history_json:
continue
edit_history = orjson.loads(edit_history_json)
for edit_history_message_dict in edit_history:
edit_history_message_dict["user_id"] = user_id_map[edit_history_message_dict["user_id"]]
fix_message_rendered_content(
realm,
sender_map,
messages=edit_history,
content_key="prev_content",
rendered_content_key="prev_rendered_content",
)
message["edit_history"] = orjson.dumps(edit_history).decode()
def current_table_ids(data: TableData, table: TableName) -> List[int]: def current_table_ids(data: TableData, table: TableName) -> List[int]:
""" """
Returns the ids present in the current table Returns the ids present in the current table
@ -1682,6 +1710,9 @@ def import_message_data(realm: Realm, sender_map: Dict[int, Record], import_dir:
) )
logging.info("Successfully rendered Markdown for message batch") logging.info("Successfully rendered Markdown for message batch")
fix_message_edit_history(
realm=realm, sender_map=sender_map, messages=data["zerver_message"]
)
# A LOT HAPPENS HERE. # A LOT HAPPENS HERE.
# This is where we actually import the message data. # This is where we actually import the message data.
bulk_import_model(data, Message) bulk_import_model(data, Message)

View File

@ -1086,6 +1086,54 @@ class RealmImportExportTest(ExportFile):
Message.objects.filter(realm=imported_realm).count(), Message.objects.filter(realm=imported_realm).count(),
) )
def test_import_message_edit_history(self) -> None:
realm = get_realm("zulip")
iago = self.example_user("iago")
hamlet = self.example_user("hamlet")
user_mention_message = f"@**King Hamlet|{hamlet.id}** Hello"
self.login_user(iago)
message_id = self.send_stream_message(
self.example_user("iago"), "Verona", user_mention_message
)
new_content = "new content"
result = self.client_patch(
f"/json/messages/{message_id}",
{
"content": new_content,
},
)
self.assert_json_success(result)
self.export_realm_and_create_auditlog(realm)
with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"):
do_import_realm(get_output_dir(), "test-zulip")
imported_realm = Realm.objects.get(string_id="test-zulip")
imported_message = Message.objects.filter(realm=imported_realm).latest("id")
imported_hamlet_id = UserProfile.objects.get(
delivery_email=hamlet.delivery_email, realm=imported_realm
).id
imported_iago_id = UserProfile.objects.get(
delivery_email=iago.delivery_email, realm=imported_realm
).id
edit_history_json = imported_message.edit_history
assert edit_history_json is not None
edit_history = orjson.loads(edit_history_json)
self.assert_length(edit_history, 1)
prev_version_of_message = edit_history[0]
# Ensure the "user_id" (of the sender) was updated correctly
# to the imported id in the data.
self.assertEqual(prev_version_of_message["user_id"], imported_iago_id)
# The mention metadata in the rendered content should be updated.
self.assertIn(
f'data-user-id="{imported_hamlet_id}"', prev_version_of_message["prev_rendered_content"]
)
def get_realm_getters(self) -> List[Callable[[Realm], object]]: def get_realm_getters(self) -> List[Callable[[Realm], object]]:
names = set() names = set()
getters: List[Callable[[Realm], object]] = [] getters: List[Callable[[Realm], object]] = []