scheduled_message: Make export/import work.

Closes #25130 by addressing the import/export part of it.
This commit is contained in:
Mateusz Mandera 2023-04-29 20:45:22 +02:00 committed by Tim Abbott
parent 780ef71891
commit 0abf60fd01
3 changed files with 175 additions and 43 deletions

View File

@ -54,6 +54,7 @@ from zerver.models import (
RealmPlayground,
RealmUserDefault,
Recipient,
ScheduledMessage,
Service,
Stream,
Subscription,
@ -205,7 +206,6 @@ NON_EXPORTED_TABLES = {
# sense to export, but is relatively low value.
"zerver_scheduledemail",
"zerver_scheduledemail_users",
"zerver_scheduledmessage",
# These tables are related to a user's 2FA authentication
# configuration, which will need to be set up again on the new
# server.
@ -223,8 +223,6 @@ NON_EXPORTED_TABLES = {
"zerver_archivedreaction",
"zerver_archivedsubmessage",
"zerver_archivetransaction",
# We don't export this until export of ScheduledMessage in general is implemented.
"zerver_attachment_scheduled_messages",
# Social auth tables are not needed post-export, since we don't
# use any of this state outside of a direct authentication flow.
"social_auth_association",
@ -252,6 +250,7 @@ IMPLICIT_TABLES = {
# ManyToMany relationships are exported implicitly when importing
# the parent table.
"zerver_attachment_messages",
"zerver_attachment_scheduled_messages",
}
ATTACHMENT_TABLES = {
@ -292,6 +291,7 @@ DATE_FIELDS: Dict[TableName, List[Field]] = {
"zerver_muteduser": ["date_muted"],
"zerver_realmauditlog": ["event_time"],
"zerver_realm": ["date_created"],
"zerver_scheduledmessage": ["scheduled_timestamp"],
"zerver_stream": ["date_created"],
"zerver_useractivityinterval": ["start", "end"],
"zerver_useractivity": ["last_visit"],
@ -691,6 +691,12 @@ def get_realm_config() -> Config:
include_rows="realm_id__in",
)
Config(
custom_tables=["zerver_scheduledmessage"],
virtual_parent=realm_config,
custom_fetch=custom_fetch_scheduled_messages,
)
Config(
table="zerver_defaultstream",
model=DefaultStream,
@ -1047,10 +1053,13 @@ def custom_fetch_user_profile_cross_realm(response: TableData, context: Context)
def fetch_attachment_data(
response: TableData, realm_id: int, message_ids: Set[int]
response: TableData, realm_id: int, message_ids: Set[int], scheduled_message_ids: Set[int]
) -> List[Attachment]:
attachments = list(
Attachment.objects.filter(realm_id=realm_id, messages__in=message_ids).distinct()
Attachment.objects.filter(
Q(messages__in=message_ids) | Q(scheduled_messages__in=scheduled_message_ids),
realm_id=realm_id,
).distinct()
)
response["zerver_attachment"] = make_raw(attachments)
floatify_datetime_fields(response, "zerver_attachment")
@ -1060,11 +1069,18 @@ def fetch_attachment_data(
quite ALL messages for the realm. So, we need to
clean up our attachment data to have correct
values for response['zerver_attachment'][<n>]['messages'].
Same reasoning applies to scheduled_messages.
"""
for row in response["zerver_attachment"]:
filtered_message_ids = set(row["messages"]).intersection(message_ids)
row["messages"] = sorted(filtered_message_ids)
filtered_scheduled_message_ids = set(row["scheduled_messages"]).intersection(
scheduled_message_ids
)
row["scheduled_messages"] = sorted(filtered_scheduled_message_ids)
return attachments
@ -1123,6 +1139,19 @@ def custom_fetch_huddle_objects(response: TableData, context: Context) -> None:
response["zerver_huddle"] = make_raw(Huddle.objects.filter(id__in=huddle_ids))
def custom_fetch_scheduled_messages(response: TableData, context: Context) -> None:
"""
Simple custom fetch function to fetch only the ScheduledMessage objects that we're allowed to.
"""
realm = context["realm"]
exportable_scheduled_message_ids = context["exportable_scheduled_message_ids"]
query = ScheduledMessage.objects.filter(realm=realm, id__in=exportable_scheduled_message_ids)
rows = make_raw(list(query))
response["zerver_scheduledmessage"] = rows
def fetch_usermessages(
realm: Realm,
message_ids: Set[int],
@ -1827,6 +1856,28 @@ def do_write_stats_file_for_realm_export(output_dir: Path) -> None:
f.write("\n")
def get_exportable_scheduled_message_ids(
realm: Realm, public_only: bool = False, consent_message_id: Optional[int] = None
) -> Set[int]:
"""
Scheduled messages are private to the sender, so which ones we export depends on the
public/consent/full export mode.
"""
if public_only:
return set()
if consent_message_id:
sender_ids = get_consented_user_ids(consent_message_id)
return set(
ScheduledMessage.objects.filter(sender_id__in=sender_ids, realm=realm).values_list(
"id", flat=True
)
)
return set(ScheduledMessage.objects.filter(realm=realm).values_list("id", flat=True))
def do_export_realm(
realm: Realm,
output_dir: Path,
@ -1848,12 +1899,20 @@ def do_export_realm(
create_soft_link(source=output_dir, in_progress=True)
exportable_scheduled_message_ids = get_exportable_scheduled_message_ids(
realm, public_only, consent_message_id
)
logging.info("Exporting data from get_realm_config()...")
export_from_config(
response=response,
config=realm_config,
seed_object=realm,
context=dict(realm=realm, exportable_user_ids=exportable_user_ids),
context=dict(
realm=realm,
exportable_user_ids=exportable_user_ids,
exportable_scheduled_message_ids=exportable_scheduled_message_ids,
),
)
logging.info("...DONE with get_realm_config() data")
@ -1892,7 +1951,10 @@ def do_export_realm(
# zerver_attachment
attachments = export_attachment_table(
realm=realm, output_dir=output_dir, message_ids=message_ids
realm=realm,
output_dir=output_dir,
message_ids=message_ids,
scheduled_message_ids=exportable_scheduled_message_ids,
)
logging.info("Exporting uploaded files and avatars")
@ -1921,11 +1983,14 @@ def do_export_realm(
def export_attachment_table(
realm: Realm, output_dir: Path, message_ids: Set[int]
realm: Realm, output_dir: Path, message_ids: Set[int], scheduled_message_ids: Set[int]
) -> List[Attachment]:
response: TableData = {}
attachments = fetch_attachment_data(
response=response, realm_id=realm.id, message_ids=message_ids
response=response,
realm_id=realm.id,
message_ids=message_ids,
scheduled_message_ids=scheduled_message_ids,
)
output_file = os.path.join(output_dir, "attachment.json")
write_table_data(output_file=output_file, data=response)

View File

@ -58,6 +58,7 @@ from zerver.models import (
RealmPlayground,
RealmUserDefault,
Recipient,
ScheduledMessage,
Service,
Stream,
Subscription,
@ -136,6 +137,7 @@ ID_MAP: Dict[str, Dict[int, int]] = {
"analytics_streamcount": {},
"analytics_usercount": {},
"realmuserdefault": {},
"scheduledmessage": {},
}
id_map_to_list: Dict[str, Dict[int, List[int]]] = {
@ -371,7 +373,10 @@ def fix_message_rendered_content(
).rendered_content
message["rendered_content"] = rendered_content
message["rendered_content_version"] = markdown_version
if "scheduled_timestamp" not in message:
# This logic runs also for ScheduledMessage, which doesn't use
# the rendered_content_version field.
message["rendered_content_version"] = markdown_version
except Exception:
# This generally happens with two possible causes:
# * rendering Markdown throwing an uncaught exception
@ -1312,6 +1317,27 @@ def do_import_realm(import_dir: Path, subdomain: str, processes: int = 1) -> Rea
sender_map = {user["id"]: user for user in data["zerver_userprofile"]}
if "zerver_scheduledmessage" in data:
fix_datetime_fields(data, "zerver_scheduledmessage")
re_map_foreign_keys(data, "zerver_scheduledmessage", "sender", related_table="user_profile")
re_map_foreign_keys(data, "zerver_scheduledmessage", "recipient", related_table="recipient")
re_map_foreign_keys(
data, "zerver_scheduledmessage", "sending_client", related_table="client"
)
re_map_foreign_keys(data, "zerver_scheduledmessage", "stream", related_table="stream")
re_map_foreign_keys(data, "zerver_scheduledmessage", "realm", related_table="realm")
fix_upload_links(data, "zerver_scheduledmessage")
fix_message_rendered_content(
realm=realm,
sender_map=sender_map,
messages=data["zerver_scheduledmessage"],
)
update_model_ids(ScheduledMessage, data, "scheduledmessage")
bulk_import_model(data, ScheduledMessage)
# Import zerver_message and zerver_usermessage
import_message_data(realm=realm, sender_map=sender_map, import_dir=import_dir)
@ -1524,11 +1550,7 @@ def import_attachments(data: TableData) -> None:
parent_model = Attachment
parent_db_table_name = "zerver_attachment"
parent_singular = "attachment"
child_singular = "message"
child_plural = "messages"
m2m_table_name = "zerver_attachment_messages"
parent_id = "attachment_id"
child_id = "message_id"
update_model_ids(parent_model, data, "attachment")
# We don't bulk_import_model yet, because we need to first compute
@ -1538,27 +1560,41 @@ def import_attachments(data: TableData) -> None:
# We do this in a slightly convoluted way to anticipate
# a future where we may need to call re_map_foreign_keys.
m2m_rows: List[Record] = []
for parent_row in data[parent_db_table_name]:
for fk_id in parent_row[child_plural]:
m2m_row: Record = {}
m2m_row[parent_singular] = parent_row["id"]
m2m_row[child_singular] = ID_MAP["message"][fk_id]
m2m_rows.append(m2m_row)
def format_m2m_data(
child_singular: str, child_plural: str, m2m_table_name: str, child_id: str
) -> Tuple[str, List[Record], str]:
m2m_rows: List[Record] = []
for parent_row in data[parent_db_table_name]:
for fk_id in parent_row[child_plural]:
m2m_row: Record = {}
m2m_row[parent_singular] = parent_row["id"]
# child_singular will generally match the model name (e.g. Message, ScheduledMessage)
# after lowercasing, and that's what we enter as ID_MAP keys, so this should be
# a reasonable assumption to make.
m2m_row[child_singular] = ID_MAP[child_singular][fk_id]
m2m_rows.append(m2m_row)
# TODO: Import of scheduled messages is not implemented yet.
if "scheduled_messages" in parent_row:
del parent_row["scheduled_messages"]
# Create our table data for insert.
m2m_data: TableData = {m2m_table_name: m2m_rows}
convert_to_id_fields(m2m_data, m2m_table_name, parent_singular)
convert_to_id_fields(m2m_data, m2m_table_name, child_singular)
m2m_rows = m2m_data[m2m_table_name]
# Create our table data for insert.
m2m_data: TableData = {m2m_table_name: m2m_rows}
convert_to_id_fields(m2m_data, m2m_table_name, parent_singular)
convert_to_id_fields(m2m_data, m2m_table_name, child_singular)
m2m_rows = m2m_data[m2m_table_name]
# Next, delete out our child data from the parent rows.
for parent_row in data[parent_db_table_name]:
del parent_row[child_plural]
# Next, delete out our child data from the parent rows.
for parent_row in data[parent_db_table_name]:
del parent_row[child_plural]
return m2m_table_name, m2m_rows, child_id
messages_m2m_tuple = format_m2m_data(
"message", "messages", "zerver_attachment_messages", "message_id"
)
scheduled_messages_m2m_tuple = format_m2m_data(
"scheduledmessage",
"scheduled_messages",
"zerver_attachment_scheduled_messages",
"scheduledmessage_id",
)
# Update 'path_id' for the attachments
for attachment in data[parent_db_table_name]:
@ -1571,19 +1607,23 @@ def import_attachments(data: TableData) -> None:
# TODO: Do this the kosher Django way. We may find a
# better way to do this in Django 1.9 particularly.
with connection.cursor() as cursor:
sql_template = SQL(
for m2m_table_name, m2m_rows, child_id in [
messages_m2m_tuple,
scheduled_messages_m2m_tuple,
]:
sql_template = SQL(
"""
INSERT INTO {m2m_table_name} ({parent_id}, {child_id}) VALUES %s
"""
INSERT INTO {m2m_table_name} ({parent_id}, {child_id}) VALUES %s
"""
).format(
m2m_table_name=Identifier(m2m_table_name),
parent_id=Identifier(parent_id),
child_id=Identifier(child_id),
)
tups = [(row[parent_id], row[child_id]) for row in m2m_rows]
execute_values(cursor.cursor, sql_template, tups)
).format(
m2m_table_name=Identifier(m2m_table_name),
parent_id=Identifier(parent_id),
child_id=Identifier(child_id),
)
tups = [(row[parent_id], row[child_id]) for row in m2m_rows]
execute_values(cursor.cursor, sql_template, tups)
logging.info("Successfully imported M2M table %s", m2m_table_name)
logging.info("Successfully imported M2M table %s", m2m_table_name)
def import_analytics_data(realm: Realm, import_dir: Path) -> None:

View File

@ -27,6 +27,7 @@ from zerver.actions.realm_settings import (
do_change_realm_plan_type,
do_set_realm_authentication_methods,
)
from zerver.actions.scheduled_messages import check_schedule_message
from zerver.actions.user_activity import do_update_user_activity, do_update_user_activity_interval
from zerver.actions.user_status import do_update_user_status
from zerver.actions.user_topics import do_set_user_topic_visibility_policy
@ -66,6 +67,7 @@ from zerver.models import (
RealmEmoji,
RealmUserDefault,
Recipient,
ScheduledMessage,
Stream,
Subscription,
UserGroup,
@ -790,6 +792,22 @@ class RealmImportExportTest(ExportFile):
sample_user, client, timezone_now(), UserPresence.LEGACY_STATUS_ACTIVE_INT
)
# Set up scheduled messages.
ScheduledMessage.objects.filter(realm=original_realm).delete()
check_schedule_message(
sender=hamlet,
client=get_client("website"),
recipient_type_name="stream",
message_to=[Stream.objects.get(name="Denmark", realm=original_realm).id],
topic_name="test-import",
message_content="test message",
scheduled_message_id=None,
deliver_at=timezone_now() + datetime.timedelta(days=365),
realm=original_realm,
)
original_scheduled_message = ScheduledMessage.objects.filter(realm=original_realm).last()
assert original_scheduled_message is not None
# send Cordelia to the islands
do_update_user_status(
user_profile=cordelia,
@ -940,6 +958,15 @@ class RealmImportExportTest(ExportFile):
Recipient.objects.get(type=Recipient.HUDDLE, type_id=huddle_object.id).id,
)
self.assertEqual(ScheduledMessage.objects.filter(realm=imported_realm).count(), 1)
imported_scheduled_message = ScheduledMessage.objects.first()
assert imported_scheduled_message is not None
self.assertEqual(imported_scheduled_message.content, original_scheduled_message.content)
self.assertEqual(
imported_scheduled_message.scheduled_timestamp,
original_scheduled_message.scheduled_timestamp,
)
for user_profile in UserProfile.objects.filter(realm=imported_realm):
# Check that all Subscriptions have the correct is_user_active set.
self.assertEqual(