2019-02-28 21:19:47 +01:00
|
|
|
# This is the main code for the `./manage.py export` data export tool.
|
|
|
|
# User docs: https://zulip.readthedocs.io/en/latest/production/export-and-import.html
|
|
|
|
#
|
|
|
|
# Most developers will interact with this primarily when they add a
|
|
|
|
# new table to the schema, in which case they likely need to (1) add
|
|
|
|
# it the lists in `ALL_ZULIP_TABLES` and similar data structures and
|
|
|
|
# (2) if it doesn't belong in EXCLUDED_TABLES, add a Config object for
|
|
|
|
# it to get_realm_config.
|
2016-04-05 00:27:37 +02:00
|
|
|
import datetime
|
2020-06-11 00:54:34 +02:00
|
|
|
import glob
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
|
|
|
import tempfile
|
2022-04-13 21:18:41 +02:00
|
|
|
from functools import lru_cache
|
2022-06-23 20:07:19 +02:00
|
|
|
from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set, Tuple, TypedDict
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2018-05-31 19:13:56 +02:00
|
|
|
from django.apps import apps
|
2016-04-05 00:27:37 +02:00
|
|
|
from django.conf import settings
|
2022-07-01 18:25:59 +02:00
|
|
|
from django.db.models import Exists, OuterRef, Q
|
2016-04-05 00:27:37 +02:00
|
|
|
from django.forms.models import model_to_dict
|
2017-04-15 04:08:10 +02:00
|
|
|
from django.utils.timezone import is_naive as timezone_is_naive
|
2021-08-10 02:11:16 +02:00
|
|
|
from mypy_boto3_s3.service_resource import Object
|
2020-06-11 00:54:34 +02:00
|
|
|
|
|
|
|
import zerver.lib.upload
|
|
|
|
from analytics.models import RealmCount, StreamCount, UserCount
|
2018-07-18 23:50:16 +02:00
|
|
|
from scripts.lib.zulip_tools import overwrite_symlink
|
2018-04-23 23:28:27 +02:00
|
|
|
from zerver.lib.avatar_hash import user_avatar_path_from_ids
|
2019-12-20 00:00:45 +01:00
|
|
|
from zerver.lib.pysa import mark_sanitized
|
2020-10-26 22:10:53 +01:00
|
|
|
from zerver.lib.upload import get_bucket
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.models import (
|
2020-07-16 15:38:56 +02:00
|
|
|
AlertWord,
|
2020-06-11 00:54:34 +02:00
|
|
|
Attachment,
|
|
|
|
BotConfigData,
|
|
|
|
BotStorageData,
|
|
|
|
Client,
|
|
|
|
CustomProfileField,
|
|
|
|
CustomProfileFieldValue,
|
|
|
|
DefaultStream,
|
2021-09-29 02:46:57 +02:00
|
|
|
GroupGroupMembership,
|
2020-06-11 00:54:34 +02:00
|
|
|
Huddle,
|
|
|
|
Message,
|
2021-12-05 13:41:11 +01:00
|
|
|
MutedUser,
|
2020-06-11 00:54:34 +02:00
|
|
|
Reaction,
|
|
|
|
Realm,
|
|
|
|
RealmAuditLog,
|
|
|
|
RealmDomain,
|
|
|
|
RealmEmoji,
|
|
|
|
RealmFilter,
|
2021-02-14 12:07:09 +01:00
|
|
|
RealmPlayground,
|
2021-06-01 12:55:44 +02:00
|
|
|
RealmUserDefault,
|
2020-06-11 00:54:34 +02:00
|
|
|
Recipient,
|
|
|
|
Service,
|
|
|
|
Stream,
|
|
|
|
Subscription,
|
|
|
|
UserActivity,
|
|
|
|
UserActivityInterval,
|
|
|
|
UserGroup,
|
|
|
|
UserGroupMembership,
|
|
|
|
UserHotspot,
|
|
|
|
UserMessage,
|
|
|
|
UserPresence,
|
|
|
|
UserProfile,
|
2021-12-05 13:42:04 +01:00
|
|
|
UserStatus,
|
2021-07-23 15:26:02 +02:00
|
|
|
UserTopic,
|
2021-07-26 17:17:10 +02:00
|
|
|
get_realm,
|
2020-06-11 00:54:34 +02:00
|
|
|
get_system_bot,
|
|
|
|
get_user_profile_by_id,
|
|
|
|
)
|
2016-08-09 04:31:26 +02:00
|
|
|
|
|
|
|
# Custom mypy types follow:
|
|
|
|
Record = Dict[str, Any]
|
|
|
|
TableName = str
|
|
|
|
TableData = Dict[TableName, List[Record]]
|
|
|
|
Field = str
|
2016-09-12 02:48:49 +02:00
|
|
|
Path = str
|
2016-08-11 01:21:53 +02:00
|
|
|
Context = Dict[str, Any]
|
2016-09-11 16:17:39 +02:00
|
|
|
FilterArgs = Dict[str, Any]
|
|
|
|
IdSource = Tuple[TableName, Field]
|
|
|
|
SourceFilter = Callable[[Record], bool]
|
|
|
|
|
2021-12-08 20:16:36 +01:00
|
|
|
CustomFetch = Callable[[TableData, Context], None]
|
2016-08-09 04:31:26 +02:00
|
|
|
|
2021-12-09 13:47:04 +01:00
|
|
|
|
|
|
|
class MessagePartial(TypedDict):
|
|
|
|
zerver_message: List[Record]
|
|
|
|
zerver_userprofile_ids: List[int]
|
|
|
|
realm_id: int
|
|
|
|
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2018-06-18 16:24:01 +02:00
|
|
|
MESSAGE_BATCH_CHUNK_SIZE = 1000
|
|
|
|
|
2018-05-31 19:34:54 +02:00
|
|
|
ALL_ZULIP_TABLES = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"analytics_fillstate",
|
|
|
|
"analytics_installationcount",
|
|
|
|
"analytics_realmcount",
|
|
|
|
"analytics_streamcount",
|
|
|
|
"analytics_usercount",
|
|
|
|
"otp_static_staticdevice",
|
|
|
|
"otp_static_statictoken",
|
|
|
|
"otp_totp_totpdevice",
|
|
|
|
"social_auth_association",
|
|
|
|
"social_auth_code",
|
|
|
|
"social_auth_nonce",
|
|
|
|
"social_auth_partial",
|
|
|
|
"social_auth_usersocialauth",
|
|
|
|
"two_factor_phonedevice",
|
|
|
|
"zerver_alertword",
|
|
|
|
"zerver_archivedattachment",
|
|
|
|
"zerver_archivedattachment_messages",
|
|
|
|
"zerver_archivedmessage",
|
|
|
|
"zerver_archivedusermessage",
|
|
|
|
"zerver_attachment",
|
|
|
|
"zerver_attachment_messages",
|
|
|
|
"zerver_archivedreaction",
|
|
|
|
"zerver_archivedsubmessage",
|
|
|
|
"zerver_archivetransaction",
|
|
|
|
"zerver_botconfigdata",
|
|
|
|
"zerver_botstoragedata",
|
|
|
|
"zerver_client",
|
|
|
|
"zerver_customprofilefield",
|
|
|
|
"zerver_customprofilefieldvalue",
|
|
|
|
"zerver_defaultstream",
|
|
|
|
"zerver_defaultstreamgroup",
|
|
|
|
"zerver_defaultstreamgroup_streams",
|
|
|
|
"zerver_draft",
|
|
|
|
"zerver_emailchangestatus",
|
2021-09-29 02:46:57 +02:00
|
|
|
"zerver_groupgroupmembership",
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_huddle",
|
|
|
|
"zerver_message",
|
|
|
|
"zerver_missedmessageemailaddress",
|
|
|
|
"zerver_multiuseinvite",
|
|
|
|
"zerver_multiuseinvite_streams",
|
|
|
|
"zerver_preregistrationuser",
|
|
|
|
"zerver_preregistrationuser_streams",
|
|
|
|
"zerver_pushdevicetoken",
|
|
|
|
"zerver_reaction",
|
|
|
|
"zerver_realm",
|
|
|
|
"zerver_realmauditlog",
|
|
|
|
"zerver_realmdomain",
|
|
|
|
"zerver_realmemoji",
|
|
|
|
"zerver_realmfilter",
|
2021-02-14 12:07:09 +01:00
|
|
|
"zerver_realmplayground",
|
2022-07-26 15:48:26 +02:00
|
|
|
"zerver_realmreactivationstatus",
|
2021-05-25 14:52:52 +02:00
|
|
|
"zerver_realmuserdefault",
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_recipient",
|
|
|
|
"zerver_scheduledemail",
|
|
|
|
"zerver_scheduledemail_users",
|
|
|
|
"zerver_scheduledmessage",
|
2021-07-07 16:55:25 +02:00
|
|
|
"zerver_scheduledmessagenotificationemail",
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_service",
|
|
|
|
"zerver_stream",
|
|
|
|
"zerver_submessage",
|
|
|
|
"zerver_subscription",
|
|
|
|
"zerver_useractivity",
|
|
|
|
"zerver_useractivityinterval",
|
|
|
|
"zerver_usergroup",
|
|
|
|
"zerver_usergroupmembership",
|
|
|
|
"zerver_userhotspot",
|
|
|
|
"zerver_usermessage",
|
|
|
|
"zerver_userpresence",
|
|
|
|
"zerver_userprofile",
|
|
|
|
"zerver_userprofile_groups",
|
|
|
|
"zerver_userprofile_user_permissions",
|
|
|
|
"zerver_userstatus",
|
2021-07-31 07:39:59 +02:00
|
|
|
"zerver_usertopic",
|
2021-03-27 12:23:32 +01:00
|
|
|
"zerver_muteduser",
|
2018-05-31 19:13:56 +02:00
|
|
|
}
|
2016-08-11 20:27:26 +02:00
|
|
|
|
2019-02-28 21:19:47 +01:00
|
|
|
# This set contains those database tables that we expect to not be
|
|
|
|
# included in the export. This tool does validation to ensure that
|
|
|
|
# every table in the database is either exported or listed here, to
|
|
|
|
# ensure we never accidentally fail to export a table.
|
2018-05-31 19:13:56 +02:00
|
|
|
NON_EXPORTED_TABLES = {
|
2018-07-23 17:28:20 +02:00
|
|
|
# These invitation/confirmation flow tables don't make sense to
|
|
|
|
# export, since invitations links will be broken by the server URL
|
|
|
|
# change anyway:
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_emailchangestatus",
|
|
|
|
"zerver_multiuseinvite",
|
|
|
|
"zerver_multiuseinvite_streams",
|
|
|
|
"zerver_preregistrationuser",
|
|
|
|
"zerver_preregistrationuser_streams",
|
2022-07-26 15:48:26 +02:00
|
|
|
"zerver_realmreactivationstatus",
|
2019-12-25 22:28:05 +01:00
|
|
|
# Missed message addresses are low value to export since
|
|
|
|
# missed-message email addresses include the server's hostname and
|
|
|
|
# expire after a few days.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_missedmessageemailaddress",
|
2021-07-07 16:55:25 +02:00
|
|
|
# Scheduled message notification email data is for internal use by the server.
|
|
|
|
"zerver_scheduledmessagenotificationemail",
|
2020-10-13 23:50:18 +02:00
|
|
|
# When switching servers, clients will need to re-log in and
|
2018-07-23 17:28:20 +02:00
|
|
|
# reregister for push notifications anyway.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_pushdevicetoken",
|
2018-07-23 17:28:20 +02:00
|
|
|
# We don't use these generated Django tables
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_userprofile_groups",
|
|
|
|
"zerver_userprofile_user_permissions",
|
2018-07-23 17:28:20 +02:00
|
|
|
# These is used for scheduling future activity; it could make
|
|
|
|
# sense to export, but is relatively low value.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_scheduledemail",
|
|
|
|
"zerver_scheduledemail_users",
|
|
|
|
"zerver_scheduledmessage",
|
2018-07-23 17:28:20 +02:00
|
|
|
# These tables are related to a user's 2FA authentication
|
docs: Add missing space to compound verbs “log in”, “set up”, etc.
Noun: backup, checkout, cleanup, login, logout, setup, shutdown, signup,
timeout.
Verb: back up, check out, clean up, log in, log out, set up, shut
down, sign up, time out.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
2021-04-25 23:05:38 +02:00
|
|
|
# configuration, which will need to be set up again on the new
|
|
|
|
# server.
|
2021-02-12 08:20:45 +01:00
|
|
|
"two_factor_phonedevice",
|
|
|
|
"otp_static_staticdevice",
|
|
|
|
"otp_static_statictoken",
|
|
|
|
"otp_totp_totpdevice",
|
2018-07-23 17:28:20 +02:00
|
|
|
# These archive tables should not be exported (they are to support
|
|
|
|
# restoring content accidentally deleted due to software bugs in
|
|
|
|
# the retention policy feature)
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_archivedmessage",
|
|
|
|
"zerver_archivedusermessage",
|
|
|
|
"zerver_archivedattachment",
|
|
|
|
"zerver_archivedattachment_messages",
|
|
|
|
"zerver_archivedreaction",
|
|
|
|
"zerver_archivedsubmessage",
|
|
|
|
"zerver_archivetransaction",
|
2018-07-23 17:28:20 +02:00
|
|
|
# Social auth tables are not needed post-export, since we don't
|
|
|
|
# use any of this state outside of a direct authentication flow.
|
2021-02-12 08:20:45 +01:00
|
|
|
"social_auth_association",
|
|
|
|
"social_auth_code",
|
|
|
|
"social_auth_nonce",
|
|
|
|
"social_auth_partial",
|
|
|
|
"social_auth_usersocialauth",
|
2018-07-23 17:28:20 +02:00
|
|
|
# We will likely never want to migrate this table, since it's a
|
|
|
|
# total of all the realmcount values on the server. Might need to
|
|
|
|
# recompute it after a fillstate import.
|
2021-02-12 08:20:45 +01:00
|
|
|
"analytics_installationcount",
|
2018-07-23 17:28:20 +02:00
|
|
|
# Fillstate will require some cleverness to do the right partial export.
|
2021-02-12 08:20:45 +01:00
|
|
|
"analytics_fillstate",
|
2020-08-11 01:47:44 +02:00
|
|
|
# These are for unfinished features; we'll want to add them to the
|
2018-07-23 17:28:20 +02:00
|
|
|
# export before they reach full production status.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_defaultstreamgroup",
|
|
|
|
"zerver_defaultstreamgroup_streams",
|
|
|
|
"zerver_submessage",
|
2020-07-23 19:24:22 +02:00
|
|
|
# Drafts don't need to be exported as they are supposed to be more ephemeral.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_draft",
|
2018-07-23 17:28:20 +02:00
|
|
|
# For any tables listed below here, it's a bug that they are not present in the export.
|
2018-05-31 19:13:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
IMPLICIT_TABLES = {
|
2019-02-28 21:19:47 +01:00
|
|
|
# ManyToMany relationships are exported implicitly when importing
|
|
|
|
# the parent table.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_attachment_messages",
|
2018-05-31 19:13:56 +02:00
|
|
|
}
|
2016-08-11 20:27:26 +02:00
|
|
|
|
2018-05-31 19:13:56 +02:00
|
|
|
ATTACHMENT_TABLES = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_attachment",
|
2018-05-31 19:13:56 +02:00
|
|
|
}
|
2016-08-13 03:33:19 +02:00
|
|
|
|
2018-05-31 19:13:56 +02:00
|
|
|
MESSAGE_TABLES = {
|
2019-02-28 21:19:47 +01:00
|
|
|
# message tables get special treatment, because they're by far our
|
|
|
|
# largest tables and need to be paginated.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_message",
|
|
|
|
"zerver_usermessage",
|
2019-02-28 21:19:47 +01:00
|
|
|
# zerver_reaction belongs here, since it's added late because it
|
|
|
|
# has a foreign key into the Message table.
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_reaction",
|
2018-05-31 19:13:56 +02:00
|
|
|
}
|
2016-08-11 20:27:26 +02:00
|
|
|
|
2019-02-28 21:19:47 +01:00
|
|
|
# These get their own file as analytics data can be quite large and
|
|
|
|
# would otherwise make realm.json unpleasant to manually inspect
|
2019-01-30 08:54:29 +01:00
|
|
|
ANALYTICS_TABLES = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"analytics_realmcount",
|
|
|
|
"analytics_streamcount",
|
|
|
|
"analytics_usercount",
|
2019-01-30 08:54:29 +01:00
|
|
|
}
|
|
|
|
|
2019-02-28 21:19:47 +01:00
|
|
|
# This data structure lists all the Django DateTimeField fields in the
|
|
|
|
# data model. These are converted to floats during the export process
|
|
|
|
# via floatify_datetime_fields, and back during the import process.
|
|
|
|
#
|
|
|
|
# TODO: This data structure could likely eventually be replaced by
|
|
|
|
# inspecting the corresponding Django models
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
DATE_FIELDS: Dict[TableName, List[Field]] = {
|
2021-12-05 15:00:28 +01:00
|
|
|
"analytics_installationcount": ["end_time"],
|
|
|
|
"analytics_realmcount": ["end_time"],
|
|
|
|
"analytics_streamcount": ["end_time"],
|
|
|
|
"analytics_usercount": ["end_time"],
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_attachment": ["create_time"],
|
|
|
|
"zerver_message": ["last_edit_time", "date_sent"],
|
2021-12-05 13:41:11 +01:00
|
|
|
"zerver_muteduser": ["date_muted"],
|
2021-12-05 15:00:28 +01:00
|
|
|
"zerver_realmauditlog": ["event_time"],
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_realm": ["date_created"],
|
|
|
|
"zerver_stream": ["date_created"],
|
|
|
|
"zerver_useractivityinterval": ["start", "end"],
|
2021-12-05 15:00:28 +01:00
|
|
|
"zerver_useractivity": ["last_visit"],
|
|
|
|
"zerver_userhotspot": ["timestamp"],
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_userpresence": ["timestamp"],
|
|
|
|
"zerver_userprofile": ["date_joined", "last_login", "last_reminder"],
|
|
|
|
"zerver_userprofile_mirrordummy": ["date_joined", "last_login", "last_reminder"],
|
2021-12-05 13:42:04 +01:00
|
|
|
"zerver_userstatus": ["timestamp"],
|
2021-12-05 15:00:28 +01:00
|
|
|
"zerver_usertopic": ["last_updated"],
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
}
|
2016-08-09 16:35:43 +02:00
|
|
|
|
2020-08-07 05:36:32 +02:00
|
|
|
BITHANDLER_FIELDS: Dict[TableName, List[Field]] = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_realm": ["authentication_methods"],
|
2020-08-07 05:36:32 +02:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def sanity_check_output(data: TableData) -> None:
|
2018-05-31 19:13:56 +02:00
|
|
|
# First, we verify that the export tool has a declared
|
2019-02-28 21:19:47 +01:00
|
|
|
# configuration for every table declared in the `models.py` files.
|
2018-05-31 19:13:56 +02:00
|
|
|
target_models = (
|
2021-02-12 08:20:45 +01:00
|
|
|
list(apps.get_app_config("analytics").get_models(include_auto_created=True))
|
|
|
|
+ list(apps.get_app_config("django_otp").get_models(include_auto_created=True))
|
|
|
|
+ list(apps.get_app_config("otp_static").get_models(include_auto_created=True))
|
|
|
|
+ list(apps.get_app_config("otp_totp").get_models(include_auto_created=True))
|
2022-07-07 01:12:49 +02:00
|
|
|
+ list(apps.get_app_config("phonenumber").get_models(include_auto_created=True))
|
2021-02-12 08:20:45 +01:00
|
|
|
+ list(apps.get_app_config("social_django").get_models(include_auto_created=True))
|
|
|
|
+ list(apps.get_app_config("two_factor").get_models(include_auto_created=True))
|
|
|
|
+ list(apps.get_app_config("zerver").get_models(include_auto_created=True))
|
2018-05-31 19:13:56 +02:00
|
|
|
)
|
2020-04-09 21:51:58 +02:00
|
|
|
all_tables_db = {model._meta.db_table for model in target_models}
|
2018-05-31 19:13:56 +02:00
|
|
|
|
|
|
|
# These assertion statements will fire when we add a new database
|
|
|
|
# table that is not included in Zulip's data exports. Generally,
|
2018-05-31 19:34:54 +02:00
|
|
|
# you can add your new table to `ALL_ZULIP_TABLES` and
|
2018-05-31 19:13:56 +02:00
|
|
|
# `NON_EXPORTED_TABLES` during early work on a new feature so that
|
|
|
|
# CI passes.
|
|
|
|
#
|
|
|
|
# We'll want to make sure we handle it for exports before
|
|
|
|
# releasing the new feature, but doing so correctly requires some
|
|
|
|
# expertise on this export system.
|
2021-07-25 02:36:15 +02:00
|
|
|
error_message = f"""
|
|
|
|
It appears you've added a new database table, but haven't yet
|
|
|
|
registered it in ALL_ZULIP_TABLES and the related declarations
|
|
|
|
in {__file__} for what to include in data exports.
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert ALL_ZULIP_TABLES == all_tables_db, error_message
|
|
|
|
assert NON_EXPORTED_TABLES.issubset(ALL_ZULIP_TABLES), error_message
|
|
|
|
assert IMPLICIT_TABLES.issubset(ALL_ZULIP_TABLES), error_message
|
|
|
|
assert ATTACHMENT_TABLES.issubset(ALL_ZULIP_TABLES), error_message
|
|
|
|
assert ANALYTICS_TABLES.issubset(ALL_ZULIP_TABLES), error_message
|
2018-05-31 19:13:56 +02:00
|
|
|
|
2018-05-31 19:34:54 +02:00
|
|
|
tables = set(ALL_ZULIP_TABLES)
|
2018-05-31 19:13:56 +02:00
|
|
|
tables -= NON_EXPORTED_TABLES
|
|
|
|
tables -= IMPLICIT_TABLES
|
|
|
|
tables -= MESSAGE_TABLES
|
|
|
|
tables -= ATTACHMENT_TABLES
|
2019-01-30 08:54:29 +01:00
|
|
|
tables -= ANALYTICS_TABLES
|
2016-08-11 20:27:26 +02:00
|
|
|
|
|
|
|
for table in tables:
|
|
|
|
if table not in data:
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.warning("??? NO DATA EXPORTED FOR TABLE %s!!!", table)
|
2016-08-09 16:35:43 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def write_data_to_file(output_file: Path, data: Any) -> None:
|
2021-12-08 15:53:23 +01:00
|
|
|
"""
|
|
|
|
IMPORTANT: You generally don't want to call this directly.
|
|
|
|
|
|
|
|
Instead use one of the higher level helpers:
|
|
|
|
|
|
|
|
write_table_data
|
|
|
|
write_records_json_file
|
|
|
|
|
|
|
|
The one place we call this directly is for message partials.
|
|
|
|
"""
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(output_file, "wb") as f:
|
|
|
|
# Because we don't pass a default handler, OPT_PASSTHROUGH_DATETIME
|
|
|
|
# actually causes orjson to raise a TypeError on datetime objects. This
|
|
|
|
# is what we want, because it helps us check that we correctly
|
|
|
|
# post-processed them to serialize to UNIX timestamps rather than ISO
|
|
|
|
# 8601 strings for historical reasons.
|
|
|
|
f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2 | orjson.OPT_PASSTHROUGH_DATETIME))
|
2021-12-08 15:53:23 +01:00
|
|
|
logging.info("Finished writing %s", output_file)
|
2016-08-11 16:08:13 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-08 15:01:28 +01:00
|
|
|
def write_table_data(output_file: str, data: Dict[str, Any]) -> None:
|
2021-12-08 15:09:04 +01:00
|
|
|
# We sort by ids mostly so that humans can quickly do diffs
|
|
|
|
# on two export jobs to see what changed (either due to new
|
|
|
|
# data arriving or new code being deployed).
|
|
|
|
for table in data.values():
|
|
|
|
table.sort(key=lambda row: row["id"])
|
|
|
|
|
2021-12-08 15:01:28 +01:00
|
|
|
assert output_file.endswith(".json")
|
|
|
|
|
|
|
|
write_data_to_file(output_file, data)
|
|
|
|
|
|
|
|
|
2021-12-08 13:58:11 +01:00
|
|
|
def write_records_json_file(output_dir: str, records: List[Dict[str, Any]]) -> None:
|
2022-02-08 00:13:33 +01:00
|
|
|
# We want a somewhat deterministic sorting order here. All of our
|
2021-12-08 14:21:20 +01:00
|
|
|
# versions of records.json include a "path" field in each element,
|
|
|
|
# even though there's some variation among avatars/emoji/realm_icons/uploads
|
|
|
|
# in other fields that get written.
|
|
|
|
#
|
|
|
|
# The sorting order of paths isn't entirely sensical to humans,
|
|
|
|
# because they include ids and even some random numbers,
|
|
|
|
# but if you export the same realm twice, you should get identical results.
|
|
|
|
records.sort(key=lambda record: record["path"])
|
|
|
|
|
2021-12-08 13:58:11 +01:00
|
|
|
output_file = os.path.join(output_dir, "records.json")
|
|
|
|
with open(output_file, "wb") as f:
|
|
|
|
# For legacy reasons we allow datetime objects here, unlike
|
|
|
|
# write_data_to_file.
|
|
|
|
f.write(orjson.dumps(records, option=orjson.OPT_INDENT_2))
|
2021-12-08 15:53:23 +01:00
|
|
|
logging.info("Finished writing %s", output_file)
|
2021-12-08 13:58:11 +01:00
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def make_raw(query: Any, exclude: Optional[List[Field]] = None) -> List[Record]:
|
|
|
|
"""
|
2016-08-09 17:30:52 +02:00
|
|
|
Takes a Django query and returns a JSONable list
|
|
|
|
of dictionaries corresponding to the database rows.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2016-11-08 10:23:57 +01:00
|
|
|
rows = []
|
|
|
|
for instance in query:
|
|
|
|
data = model_to_dict(instance, exclude=exclude)
|
|
|
|
"""
|
2017-10-03 07:43:45 +02:00
|
|
|
In Django 1.11.5, model_to_dict evaluates the QuerySet of
|
|
|
|
many-to-many field to give us a list of instances. We require
|
|
|
|
a list of primary keys, so we get the primary keys from the
|
|
|
|
instances below.
|
2016-11-08 10:23:57 +01:00
|
|
|
"""
|
|
|
|
for field in instance._meta.many_to_many:
|
2022-09-12 10:06:51 +02:00
|
|
|
if exclude is not None and field.name in exclude:
|
|
|
|
continue
|
2016-11-08 10:23:57 +01:00
|
|
|
value = data[field.name]
|
2017-10-03 07:43:45 +02:00
|
|
|
data[field.name] = [row.id for row in value]
|
2016-11-08 10:23:57 +01:00
|
|
|
|
|
|
|
rows.append(data)
|
|
|
|
|
|
|
|
return rows
|
2016-08-09 17:30:52 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def floatify_datetime_fields(data: TableData, table: TableName) -> None:
|
2016-04-05 00:27:37 +02:00
|
|
|
for item in data[table]:
|
2016-08-09 16:35:43 +02:00
|
|
|
for field in DATE_FIELDS[table]:
|
2022-12-25 23:12:08 +01:00
|
|
|
dt = item[field]
|
|
|
|
if dt is None:
|
2016-08-09 16:35:43 +02:00
|
|
|
continue
|
2022-12-25 23:12:08 +01:00
|
|
|
assert isinstance(dt, datetime.datetime)
|
|
|
|
assert not timezone_is_naive(dt)
|
2022-12-26 00:34:36 +01:00
|
|
|
item[field] = dt.timestamp()
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-08-07 05:36:32 +02:00
|
|
|
def listify_bithandler_fields(data: TableData, table: TableName) -> None:
|
|
|
|
for item in data[table]:
|
|
|
|
for field in BITHANDLER_FIELDS[table]:
|
|
|
|
item[field] = list(item[field])
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class Config:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""A Config object configures a single table for exporting (and, maybe
|
2019-02-28 21:19:47 +01:00
|
|
|
some day importing as well. This configuration defines what
|
|
|
|
process needs to be followed to correctly extract the set of
|
|
|
|
objects to export.
|
2016-08-11 21:13:02 +02:00
|
|
|
|
|
|
|
You should never mutate Config objects as part of the export;
|
|
|
|
instead use the data to determine how you populate other
|
|
|
|
data structures.
|
|
|
|
|
|
|
|
There are parent/children relationships between Config objects.
|
|
|
|
The parent should be instantiated first. The child will
|
|
|
|
append itself to the parent's list of children.
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
table: Optional[str] = None,
|
|
|
|
model: Optional[Any] = None,
|
2021-02-12 08:20:45 +01:00
|
|
|
normal_parent: Optional["Config"] = None,
|
|
|
|
virtual_parent: Optional["Config"] = None,
|
2021-02-12 08:19:30 +01:00
|
|
|
filter_args: Optional[FilterArgs] = None,
|
|
|
|
custom_fetch: Optional[CustomFetch] = None,
|
|
|
|
custom_tables: Optional[List[TableName]] = None,
|
|
|
|
concat_and_destroy: Optional[List[TableName]] = None,
|
|
|
|
id_source: Optional[IdSource] = None,
|
|
|
|
source_filter: Optional[SourceFilter] = None,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows: Optional[Field] = None,
|
2021-02-12 08:19:30 +01:00
|
|
|
use_all: bool = False,
|
|
|
|
is_seeded: bool = False,
|
|
|
|
exclude: Optional[List[Field]] = None,
|
|
|
|
) -> None:
|
2016-08-11 21:13:02 +02:00
|
|
|
assert table or custom_tables
|
|
|
|
self.table = table
|
|
|
|
self.model = model
|
|
|
|
self.normal_parent = normal_parent
|
|
|
|
self.virtual_parent = virtual_parent
|
|
|
|
self.filter_args = filter_args
|
2021-12-08 19:15:54 +01:00
|
|
|
self.include_rows = include_rows
|
2016-08-11 21:13:02 +02:00
|
|
|
self.use_all = use_all
|
|
|
|
self.is_seeded = is_seeded
|
|
|
|
self.exclude = exclude
|
|
|
|
self.custom_fetch = custom_fetch
|
|
|
|
self.custom_tables = custom_tables
|
|
|
|
self.concat_and_destroy = concat_and_destroy
|
|
|
|
self.id_source = id_source
|
2016-11-28 23:29:01 +01:00
|
|
|
self.source_filter = source_filter
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
self.children: List[Config] = []
|
2016-08-11 21:13:02 +02:00
|
|
|
|
2021-12-08 19:15:54 +01:00
|
|
|
if self.include_rows:
|
|
|
|
assert self.include_rows.endswith("_id__in")
|
|
|
|
|
2021-12-08 20:00:30 +01:00
|
|
|
if self.custom_fetch:
|
|
|
|
# enforce a naming convention
|
|
|
|
assert self.custom_fetch.__name__.startswith("custom_fetch_")
|
2021-12-08 20:54:57 +01:00
|
|
|
if self.normal_parent is not None:
|
|
|
|
raise AssertionError(
|
|
|
|
"""
|
|
|
|
If you have a custom fetcher, then specify
|
|
|
|
your parent as a virtual_parent.
|
|
|
|
"""
|
|
|
|
)
|
2021-12-08 20:00:30 +01:00
|
|
|
|
2017-05-25 01:41:24 +02:00
|
|
|
if normal_parent is not None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
self.parent: Optional[Config] = normal_parent
|
2016-08-11 21:13:02 +02:00
|
|
|
else:
|
|
|
|
self.parent = None
|
|
|
|
|
2017-05-25 01:41:24 +02:00
|
|
|
if virtual_parent is not None and normal_parent is not None:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise AssertionError(
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2016-08-12 20:59:22 +02:00
|
|
|
If you specify a normal_parent, please
|
|
|
|
do not create a virtual_parent.
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-08-12 20:59:22 +02:00
|
|
|
|
2017-05-25 01:41:24 +02:00
|
|
|
if normal_parent is not None:
|
2016-08-11 21:13:02 +02:00
|
|
|
normal_parent.children.append(self)
|
2017-05-25 01:41:24 +02:00
|
|
|
elif virtual_parent is not None:
|
2016-08-12 20:59:22 +02:00
|
|
|
virtual_parent.children.append(self)
|
2017-05-25 01:41:24 +02:00
|
|
|
elif is_seeded is None:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise AssertionError(
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2016-08-12 20:59:22 +02:00
|
|
|
You must specify a parent if you are
|
|
|
|
not using is_seeded.
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-08-11 21:13:02 +02:00
|
|
|
|
2017-05-25 01:41:24 +02:00
|
|
|
if self.id_source is not None:
|
|
|
|
if self.virtual_parent is None:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise AssertionError(
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2017-05-25 01:41:24 +02:00
|
|
|
You must specify a virtual_parent if you are
|
2021-02-12 08:20:45 +01:00
|
|
|
using id_source."""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-08-11 21:13:02 +02:00
|
|
|
if self.id_source[0] != self.virtual_parent.table:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise AssertionError(
|
2021-02-12 08:20:45 +01:00
|
|
|
f"""
|
2020-06-13 08:57:35 +02:00
|
|
|
Configuration error. To populate {self.table}, you
|
|
|
|
want data from {self.id_source[0]}, but that differs from
|
|
|
|
the table name of your virtual parent ({self.virtual_parent.table}),
|
2016-08-11 21:13:02 +02:00
|
|
|
which suggests you many not have set up
|
|
|
|
the ordering correctly. You may simply
|
|
|
|
need to assign a virtual_parent, or there
|
2021-02-12 08:20:45 +01:00
|
|
|
may be deeper issues going on."""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-08-11 21:13:02 +02:00
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def export_from_config(
|
|
|
|
response: TableData,
|
|
|
|
config: Config,
|
|
|
|
seed_object: Optional[Any] = None,
|
|
|
|
context: Optional[Context] = None,
|
|
|
|
) -> None:
|
2016-08-11 21:13:02 +02:00
|
|
|
table = config.table
|
|
|
|
parent = config.parent
|
|
|
|
model = config.model
|
|
|
|
|
|
|
|
if context is None:
|
|
|
|
context = {}
|
|
|
|
|
2020-08-07 06:34:46 +02:00
|
|
|
if config.custom_tables:
|
2016-08-11 21:13:02 +02:00
|
|
|
exported_tables = config.custom_tables
|
2020-08-07 06:34:46 +02:00
|
|
|
else:
|
2021-02-12 08:19:30 +01:00
|
|
|
assert (
|
|
|
|
table is not None
|
2021-02-12 08:20:45 +01:00
|
|
|
), """
|
2020-08-07 06:34:46 +02:00
|
|
|
You must specify config.custom_tables if you
|
2021-02-12 08:20:45 +01:00
|
|
|
are not specifying config.table"""
|
2020-08-07 06:34:46 +02:00
|
|
|
exported_tables = [table]
|
2016-08-11 21:13:02 +02:00
|
|
|
|
|
|
|
for t in exported_tables:
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Exporting via export_from_config: %s", t)
|
2016-08-11 21:13:02 +02:00
|
|
|
|
|
|
|
rows = None
|
|
|
|
if config.is_seeded:
|
|
|
|
rows = [seed_object]
|
|
|
|
|
|
|
|
elif config.custom_fetch:
|
|
|
|
config.custom_fetch(
|
2021-12-08 20:07:34 +01:00
|
|
|
response,
|
|
|
|
context,
|
2016-08-11 21:13:02 +02:00
|
|
|
)
|
|
|
|
if config.custom_tables:
|
|
|
|
for t in config.custom_tables:
|
|
|
|
if t not in response:
|
2021-02-12 08:20:45 +01:00
|
|
|
raise AssertionError(f"Custom fetch failed to populate {t}")
|
2016-08-11 21:13:02 +02:00
|
|
|
|
|
|
|
elif config.concat_and_destroy:
|
|
|
|
# When we concat_and_destroy, we are working with
|
|
|
|
# temporary "tables" that are lists of records that
|
|
|
|
# should already be ready to export.
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
data: List[Record] = []
|
2016-08-11 21:13:02 +02:00
|
|
|
for t in config.concat_and_destroy:
|
|
|
|
data += response[t]
|
|
|
|
del response[t]
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Deleted temporary %s", t)
|
2017-05-25 01:41:24 +02:00
|
|
|
assert table is not None
|
2016-08-11 21:13:02 +02:00
|
|
|
response[table] = data
|
|
|
|
|
|
|
|
elif config.use_all:
|
2017-05-25 01:41:24 +02:00
|
|
|
assert model is not None
|
2016-08-11 21:13:02 +02:00
|
|
|
query = model.objects.all()
|
|
|
|
rows = list(query)
|
|
|
|
|
|
|
|
elif config.normal_parent:
|
|
|
|
# In this mode, our current model is figuratively Article,
|
|
|
|
# and normal_parent is figuratively Blog, and
|
|
|
|
# now we just need to get all the articles
|
|
|
|
# contained by the blogs.
|
|
|
|
model = config.model
|
2017-05-25 01:41:24 +02:00
|
|
|
assert parent is not None
|
|
|
|
assert parent.table is not None
|
2021-12-08 19:15:54 +01:00
|
|
|
assert config.include_rows is not None
|
2021-02-12 08:20:45 +01:00
|
|
|
parent_ids = [r["id"] for r in response[parent.table]]
|
2023-01-02 20:50:23 +01:00
|
|
|
filter_params: Dict[str, Any] = {config.include_rows: parent_ids}
|
2017-05-25 01:41:24 +02:00
|
|
|
if config.filter_args is not None:
|
2023-01-02 20:50:23 +01:00
|
|
|
filter_params.update(config.filter_args)
|
2017-05-25 01:41:24 +02:00
|
|
|
assert model is not None
|
2021-12-08 19:21:06 +01:00
|
|
|
try:
|
2023-01-02 20:50:23 +01:00
|
|
|
query = model.objects.filter(**filter_params)
|
2021-12-08 19:21:06 +01:00
|
|
|
except Exception:
|
|
|
|
print(
|
|
|
|
f"""
|
|
|
|
Something about your Config seems to make it difficult
|
|
|
|
to construct a query.
|
|
|
|
|
|
|
|
table: {table}
|
|
|
|
parent: {parent.table}
|
|
|
|
|
2023-01-02 20:50:23 +01:00
|
|
|
filter_params: {filter_params}
|
2021-12-08 19:21:06 +01:00
|
|
|
"""
|
|
|
|
)
|
|
|
|
raise
|
|
|
|
|
2016-08-11 21:13:02 +02:00
|
|
|
rows = list(query)
|
|
|
|
|
|
|
|
elif config.id_source:
|
2017-10-18 11:11:25 +02:00
|
|
|
# In this mode, we are the figurative Blog, and we now
|
2016-08-11 21:13:02 +02:00
|
|
|
# need to look at the current response to get all the
|
|
|
|
# blog ids from the Article rows we fetched previously.
|
|
|
|
model = config.model
|
2017-05-25 01:41:24 +02:00
|
|
|
assert model is not None
|
2016-08-11 21:13:02 +02:00
|
|
|
# This will be a tuple of the form ('zerver_article', 'blog').
|
|
|
|
(child_table, field) = config.id_source
|
|
|
|
child_rows = response[child_table]
|
|
|
|
if config.source_filter:
|
|
|
|
child_rows = [r for r in child_rows if config.source_filter(r)]
|
|
|
|
lookup_ids = [r[field] for r in child_rows]
|
2023-01-02 20:50:23 +01:00
|
|
|
filter_params = dict(id__in=lookup_ids)
|
2016-08-11 21:13:02 +02:00
|
|
|
if config.filter_args:
|
2023-01-02 20:50:23 +01:00
|
|
|
filter_params.update(config.filter_args)
|
|
|
|
query = model.objects.filter(**filter_params)
|
2016-08-11 21:13:02 +02:00
|
|
|
rows = list(query)
|
|
|
|
|
|
|
|
if rows is not None:
|
2017-05-25 01:41:24 +02:00
|
|
|
assert table is not None # Hint for mypy
|
2016-08-11 21:13:02 +02:00
|
|
|
response[table] = make_raw(rows, exclude=config.exclude)
|
2020-08-07 06:34:46 +02:00
|
|
|
|
|
|
|
# Post-process rows
|
|
|
|
for t in exported_tables:
|
|
|
|
if t in DATE_FIELDS:
|
|
|
|
floatify_datetime_fields(response, t)
|
2020-08-07 05:36:32 +02:00
|
|
|
if table in BITHANDLER_FIELDS:
|
|
|
|
listify_bithandler_fields(response, table)
|
2016-08-11 21:13:02 +02:00
|
|
|
|
|
|
|
# Now walk our children. It's extremely important to respect
|
|
|
|
# the order of children here.
|
|
|
|
for child_config in config.children:
|
|
|
|
export_from_config(
|
|
|
|
response=response,
|
|
|
|
config=child_config,
|
|
|
|
context=context,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_realm_config() -> Config:
|
2019-02-28 21:19:47 +01:00
|
|
|
# This function generates the main Config object that defines how
|
|
|
|
# to do a full-realm export of a single realm from a Zulip server.
|
2016-08-10 18:45:39 +02:00
|
|
|
|
|
|
|
realm_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_realm",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
is_seeded=True,
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_defaultstream",
|
2016-08-10 18:45:39 +02:00
|
|
|
model=DefaultStream,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
2018-05-23 08:50:11 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_customprofilefield",
|
2018-05-23 08:50:11 +02:00
|
|
|
model=CustomProfileField,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2018-05-23 08:50:11 +02:00
|
|
|
)
|
|
|
|
|
2021-12-08 19:43:38 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_realmauditlog",
|
|
|
|
model=RealmAuditLog,
|
|
|
|
normal_parent=realm_config,
|
|
|
|
include_rows="realm_id__in",
|
|
|
|
)
|
|
|
|
|
2016-08-10 18:45:39 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_realmemoji",
|
2016-08-10 18:45:39 +02:00
|
|
|
model=RealmEmoji,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_realmdomain",
|
2017-03-31 16:20:07 +02:00
|
|
|
model=RealmDomain,
|
2016-08-10 18:45:39 +02:00
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_realmfilter",
|
2016-08-10 18:45:39 +02:00
|
|
|
model=RealmFilter,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
2021-02-14 12:07:09 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_realmplayground",
|
|
|
|
model=RealmPlayground,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2021-02-14 12:07:09 +01:00
|
|
|
)
|
|
|
|
|
2016-08-10 18:45:39 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_client",
|
2016-08-10 18:45:39 +02:00
|
|
|
model=Client,
|
|
|
|
virtual_parent=realm_config,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
use_all=True,
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
2021-06-01 12:55:44 +02:00
|
|
|
Config(
|
|
|
|
table="zerver_realmuserdefault",
|
|
|
|
model=RealmUserDefault,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2021-06-01 12:55:44 +02:00
|
|
|
)
|
|
|
|
|
2016-08-10 18:45:39 +02:00
|
|
|
user_profile_config = Config(
|
2016-08-14 22:49:46 +02:00
|
|
|
custom_tables=[
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_userprofile",
|
|
|
|
"zerver_userprofile_mirrordummy",
|
2016-08-14 22:49:46 +02:00
|
|
|
],
|
|
|
|
# set table for children who treat us as normal parent
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_userprofile",
|
2016-08-14 22:49:46 +02:00
|
|
|
virtual_parent=realm_config,
|
2021-12-08 20:00:30 +01:00
|
|
|
custom_fetch=custom_fetch_user_profile,
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
|
|
|
|
2018-07-12 13:27:12 +02:00
|
|
|
user_groups_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_usergroup",
|
2018-07-11 19:57:15 +02:00
|
|
|
model=UserGroup,
|
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2022-09-12 10:06:51 +02:00
|
|
|
exclude=["direct_members", "direct_subgroups"],
|
2018-07-11 19:57:15 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_usergroupmembership",
|
2018-07-11 19:57:15 +02:00
|
|
|
model=UserGroupMembership,
|
2018-07-12 13:27:12 +02:00
|
|
|
normal_parent=user_groups_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_group_id__in",
|
2018-07-11 19:57:15 +02:00
|
|
|
)
|
|
|
|
|
2021-09-29 02:46:57 +02:00
|
|
|
Config(
|
|
|
|
table="zerver_groupgroupmembership",
|
|
|
|
model=GroupGroupMembership,
|
|
|
|
normal_parent=user_groups_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="supergroup_id__in",
|
2021-09-29 02:46:57 +02:00
|
|
|
)
|
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
Config(
|
|
|
|
custom_tables=[
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_userprofile_crossrealm",
|
2016-08-10 20:57:35 +02:00
|
|
|
],
|
|
|
|
virtual_parent=user_profile_config,
|
2021-12-08 20:00:30 +01:00
|
|
|
custom_fetch=custom_fetch_user_profile_cross_realm,
|
2016-08-10 20:57:35 +02:00
|
|
|
)
|
|
|
|
|
2018-07-16 15:06:52 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_service",
|
2018-07-16 15:06:52 +02:00
|
|
|
model=Service,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2018-07-16 15:06:52 +02:00
|
|
|
)
|
|
|
|
|
2018-07-17 18:58:43 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_botstoragedata",
|
2018-07-17 18:58:43 +02:00
|
|
|
model=BotStorageData,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="bot_profile_id__in",
|
2018-07-17 18:58:43 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_botconfigdata",
|
2018-07-17 18:58:43 +02:00
|
|
|
model=BotConfigData,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="bot_profile_id__in",
|
2018-07-17 18:58:43 +02:00
|
|
|
)
|
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
# Some of these tables are intermediate "tables" that we
|
|
|
|
# create only for the export. Think of them as similar to views.
|
2016-08-12 20:29:37 +02:00
|
|
|
|
|
|
|
user_subscription_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="_user_subscription",
|
2016-08-12 20:29:37 +02:00
|
|
|
model=Subscription,
|
2016-08-10 20:57:35 +02:00
|
|
|
normal_parent=user_profile_config,
|
2021-02-12 08:20:45 +01:00
|
|
|
filter_args={"recipient__type": Recipient.PERSONAL},
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2016-08-10 20:57:35 +02:00
|
|
|
)
|
2016-08-10 15:52:03 +02:00
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="_user_recipient",
|
2016-08-12 20:29:37 +02:00
|
|
|
model=Recipient,
|
|
|
|
virtual_parent=user_subscription_config,
|
2021-02-12 08:20:45 +01:00
|
|
|
id_source=("_user_subscription", "recipient"),
|
2016-08-10 20:57:35 +02:00
|
|
|
)
|
2016-08-10 15:52:03 +02:00
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
#
|
2020-07-09 13:59:05 +02:00
|
|
|
|
|
|
|
stream_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_stream",
|
2020-07-09 13:59:05 +02:00
|
|
|
model=Stream,
|
2021-02-12 08:20:45 +01:00
|
|
|
exclude=["email_token"],
|
2020-07-09 13:59:05 +02:00
|
|
|
normal_parent=realm_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2016-08-12 20:37:39 +02:00
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-12 20:50:57 +02:00
|
|
|
stream_recipient_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="_stream_recipient",
|
2016-08-10 20:57:35 +02:00
|
|
|
model=Recipient,
|
2020-07-09 13:59:05 +02:00
|
|
|
normal_parent=stream_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="type_id__in",
|
2021-02-12 08:20:45 +01:00
|
|
|
filter_args={"type": Recipient.STREAM},
|
2016-08-10 20:57:35 +02:00
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-12 20:50:57 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="_stream_subscription",
|
2020-07-09 13:59:05 +02:00
|
|
|
model=Subscription,
|
|
|
|
normal_parent=stream_recipient_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="recipient_id__in",
|
2016-08-12 20:50:57 +02:00
|
|
|
)
|
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
#
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
Config(
|
|
|
|
custom_tables=[
|
2021-02-12 08:20:45 +01:00
|
|
|
"_huddle_recipient",
|
|
|
|
"_huddle_subscription",
|
|
|
|
"zerver_huddle",
|
2016-08-10 20:57:35 +02:00
|
|
|
],
|
2021-12-08 20:54:57 +01:00
|
|
|
virtual_parent=user_profile_config,
|
2021-12-08 20:00:30 +01:00
|
|
|
custom_fetch=custom_fetch_huddle_objects,
|
2016-08-10 20:57:35 +02:00
|
|
|
)
|
2016-08-11 01:21:53 +02:00
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
# Now build permanent tables from our temp tables.
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_recipient",
|
2020-07-09 13:59:05 +02:00
|
|
|
virtual_parent=realm_config,
|
2016-08-10 20:57:35 +02:00
|
|
|
concat_and_destroy=[
|
2021-02-12 08:20:45 +01:00
|
|
|
"_user_recipient",
|
|
|
|
"_stream_recipient",
|
|
|
|
"_huddle_recipient",
|
2016-08-10 20:57:35 +02:00
|
|
|
],
|
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-10 20:57:35 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_subscription",
|
2020-07-09 13:59:05 +02:00
|
|
|
virtual_parent=realm_config,
|
2016-08-10 20:57:35 +02:00
|
|
|
concat_and_destroy=[
|
2021-02-12 08:20:45 +01:00
|
|
|
"_user_subscription",
|
|
|
|
"_stream_subscription",
|
|
|
|
"_huddle_subscription",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
],
|
2016-08-10 20:57:35 +02:00
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-11-24 19:23:54 +01:00
|
|
|
add_user_profile_child_configs(user_profile_config)
|
|
|
|
|
2016-08-12 20:59:22 +02:00
|
|
|
return realm_config
|
2016-08-11 02:39:21 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-11-24 19:23:54 +01:00
|
|
|
def add_user_profile_child_configs(user_profile_config: Config) -> None:
|
|
|
|
"""
|
|
|
|
We add tables here that are keyed by user, and for which
|
|
|
|
we fetch rows using the same scheme whether we are
|
|
|
|
exporting a realm or a single user.
|
|
|
|
|
|
|
|
For any table where there is nuance between how you
|
|
|
|
fetch for realms vs. single users, it's best to just
|
|
|
|
keep things simple and have each caller maintain its
|
|
|
|
own slightly different 4/5 line Config (while still
|
|
|
|
possibly calling common code deeper in the stack).
|
|
|
|
|
|
|
|
As of now, we do NOT include bot tables like Service.
|
|
|
|
"""
|
|
|
|
|
|
|
|
Config(
|
|
|
|
table="zerver_alertword",
|
|
|
|
model=AlertWord,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-11-24 19:23:54 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
|
|
|
table="zerver_customprofilefieldvalue",
|
|
|
|
model=CustomProfileFieldValue,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-11-24 19:23:54 +01:00
|
|
|
)
|
|
|
|
|
2021-12-05 13:41:11 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_muteduser",
|
|
|
|
model=MutedUser,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-12-05 13:41:11 +01:00
|
|
|
)
|
|
|
|
|
2021-12-08 19:39:16 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_useractivity",
|
|
|
|
model=UserActivity,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-12-08 19:39:16 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
|
|
|
table="zerver_useractivityinterval",
|
|
|
|
model=UserActivityInterval,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-12-08 19:39:16 +01:00
|
|
|
)
|
|
|
|
|
2021-11-24 19:23:54 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_userhotspot",
|
|
|
|
model=UserHotspot,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_id__in",
|
2021-11-24 19:23:54 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
|
|
|
table="zerver_userpresence",
|
|
|
|
model=UserPresence,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-11-24 19:23:54 +01:00
|
|
|
)
|
|
|
|
|
2021-12-05 13:42:04 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_userstatus",
|
|
|
|
model=UserStatus,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-12-05 13:42:04 +01:00
|
|
|
)
|
|
|
|
|
2021-11-24 19:23:54 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_usertopic",
|
|
|
|
model=UserTopic,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2021-11-24 19:23:54 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-02-23 20:27:39 +01:00
|
|
|
# We exclude these fields for the following reasons:
|
|
|
|
# * api_key is a secret.
|
|
|
|
# * password is a secret.
|
|
|
|
# * uuid is unlikely to be useful if the domain changes.
|
|
|
|
EXCLUDED_USER_PROFILE_FIELDS = ["api_key", "password", "uuid"]
|
|
|
|
|
|
|
|
|
2021-12-08 20:16:36 +01:00
|
|
|
def custom_fetch_user_profile(response: TableData, context: Context) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
realm = context["realm"]
|
|
|
|
exportable_user_ids = context["exportable_user_ids"]
|
2016-08-14 22:49:46 +02:00
|
|
|
|
|
|
|
query = UserProfile.objects.filter(realm_id=realm.id)
|
2022-02-23 20:27:39 +01:00
|
|
|
exclude = EXCLUDED_USER_PROFILE_FIELDS
|
2016-08-14 22:49:46 +02:00
|
|
|
rows = make_raw(list(query), exclude=exclude)
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
normal_rows: List[Record] = []
|
|
|
|
dummy_rows: List[Record] = []
|
2016-08-14 22:49:46 +02:00
|
|
|
|
|
|
|
for row in rows:
|
|
|
|
if exportable_user_ids is not None:
|
2021-02-12 08:20:45 +01:00
|
|
|
if row["id"] in exportable_user_ids:
|
|
|
|
assert not row["is_mirror_dummy"]
|
2016-08-14 22:49:46 +02:00
|
|
|
else:
|
|
|
|
# Convert non-exportable users to
|
|
|
|
# inactive is_mirror_dummy users.
|
2021-02-12 08:20:45 +01:00
|
|
|
row["is_mirror_dummy"] = True
|
|
|
|
row["is_active"] = False
|
2016-08-14 22:49:46 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if row["is_mirror_dummy"]:
|
2016-08-14 22:49:46 +02:00
|
|
|
dummy_rows.append(row)
|
|
|
|
else:
|
|
|
|
normal_rows.append(row)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
response["zerver_userprofile"] = normal_rows
|
|
|
|
response["zerver_userprofile_mirrordummy"] = dummy_rows
|
2016-08-14 22:49:46 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-08 20:16:36 +01:00
|
|
|
def custom_fetch_user_profile_cross_realm(response: TableData, context: Context) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
realm = context["realm"]
|
|
|
|
response["zerver_userprofile_crossrealm"] = []
|
2016-08-11 02:39:21 +02:00
|
|
|
|
2020-02-18 12:29:13 +01:00
|
|
|
bot_name_to_default_email = {
|
|
|
|
"NOTIFICATION_BOT": "notification-bot@zulip.com",
|
|
|
|
"EMAIL_GATEWAY_BOT": "emailgateway@zulip.com",
|
|
|
|
"WELCOME_BOT": "welcome-bot@zulip.com",
|
|
|
|
}
|
|
|
|
|
2017-11-27 23:46:07 +01:00
|
|
|
if realm.string_id == settings.SYSTEM_BOT_REALM:
|
2018-09-21 02:55:17 +02:00
|
|
|
return
|
|
|
|
|
2021-07-26 17:17:10 +02:00
|
|
|
internal_realm = get_realm(settings.SYSTEM_BOT_REALM)
|
2020-02-18 12:29:13 +01:00
|
|
|
for bot in settings.INTERNAL_BOTS:
|
|
|
|
bot_name = bot["var_name"]
|
|
|
|
if bot_name not in bot_name_to_default_email:
|
|
|
|
continue
|
|
|
|
|
|
|
|
bot_email = bot["email_template"] % (settings.INTERNAL_BOT_DOMAIN,)
|
|
|
|
bot_default_email = bot_name_to_default_email[bot_name]
|
2021-07-26 17:17:10 +02:00
|
|
|
bot_user_id = get_system_bot(bot_email, internal_realm.id).id
|
2020-02-18 12:29:13 +01:00
|
|
|
|
|
|
|
recipient_id = Recipient.objects.get(type_id=bot_user_id, type=Recipient.PERSONAL).id
|
2021-02-12 08:20:45 +01:00
|
|
|
response["zerver_userprofile_crossrealm"].append(
|
2021-02-12 08:19:30 +01:00
|
|
|
dict(
|
|
|
|
email=bot_default_email,
|
|
|
|
id=bot_user_id,
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2016-08-11 02:39:21 +02:00
|
|
|
|
2022-07-07 01:47:20 +02:00
|
|
|
def fetch_attachment_data(
|
|
|
|
response: TableData, realm_id: int, message_ids: Set[int]
|
|
|
|
) -> List[Attachment]:
|
|
|
|
attachments = list(
|
|
|
|
Attachment.objects.filter(realm_id=realm_id, messages__in=message_ids).distinct()
|
|
|
|
)
|
|
|
|
response["zerver_attachment"] = make_raw(attachments)
|
2021-02-12 08:20:45 +01:00
|
|
|
floatify_datetime_fields(response, "zerver_attachment")
|
2016-08-13 03:06:05 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2016-08-13 15:29:24 +02:00
|
|
|
We usually export most messages for the realm, but not
|
|
|
|
quite ALL messages for the realm. So, we need to
|
|
|
|
clean up our attachment data to have correct
|
|
|
|
values for response['zerver_attachment'][<n>]['messages'].
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
|
|
|
for row in response["zerver_attachment"]:
|
|
|
|
filterer_message_ids = set(row["messages"]).intersection(message_ids)
|
|
|
|
row["messages"] = sorted(filterer_message_ids)
|
2016-08-13 15:29:24 +02:00
|
|
|
|
2022-07-07 01:47:20 +02:00
|
|
|
return attachments
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2016-08-13 15:29:24 +02:00
|
|
|
|
2021-12-08 21:32:07 +01:00
|
|
|
def custom_fetch_realm_audit_logs_for_user(response: TableData, context: Context) -> None:
|
|
|
|
"""To be expansive, we include audit log entries for events that
|
|
|
|
either modified the target user or where the target user modified
|
|
|
|
something (E.g. if they changed the settings for a stream).
|
|
|
|
"""
|
|
|
|
user = context["user"]
|
|
|
|
query = RealmAuditLog.objects.filter(Q(modified_user_id=user.id) | Q(acting_user_id=user.id))
|
|
|
|
rows = make_raw(list(query))
|
|
|
|
response["zerver_realmauditlog"] = rows
|
|
|
|
|
|
|
|
|
2018-05-26 18:25:50 +02:00
|
|
|
def fetch_reaction_data(response: TableData, message_ids: Set[int]) -> None:
|
|
|
|
query = Reaction.objects.filter(message_id__in=list(message_ids))
|
2021-02-12 08:20:45 +01:00
|
|
|
response["zerver_reaction"] = make_raw(list(query))
|
2018-05-26 18:25:50 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-08 20:16:36 +01:00
|
|
|
def custom_fetch_huddle_objects(response: TableData, context: Context) -> None:
|
2016-08-11 01:21:53 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
realm = context["realm"]
|
2021-12-08 20:16:36 +01:00
|
|
|
user_profile_ids = {r["id"] for r in response["zerver_userprofile"]}
|
2016-08-11 01:21:53 +02:00
|
|
|
|
|
|
|
# First we get all huddles involving someone in the realm.
|
2017-11-10 03:34:13 +01:00
|
|
|
realm_huddle_subs = Subscription.objects.select_related("recipient").filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
recipient__type=Recipient.HUDDLE, user_profile__in=user_profile_ids
|
|
|
|
)
|
2020-04-09 21:51:58 +02:00
|
|
|
realm_huddle_recipient_ids = {sub.recipient_id for sub in realm_huddle_subs}
|
2016-08-11 01:21:53 +02:00
|
|
|
|
|
|
|
# Mark all Huddles whose recipient ID contains a cross-realm user.
|
|
|
|
unsafe_huddle_recipient_ids = set()
|
2021-02-12 08:19:30 +01:00
|
|
|
for sub in Subscription.objects.select_related().filter(
|
|
|
|
recipient__in=realm_huddle_recipient_ids
|
|
|
|
):
|
2016-08-11 01:21:53 +02:00
|
|
|
if sub.user_profile.realm != realm:
|
|
|
|
# In almost every case the other realm will be zulip.com
|
|
|
|
unsafe_huddle_recipient_ids.add(sub.recipient_id)
|
|
|
|
|
|
|
|
# Now filter down to just those huddles that are entirely within the realm.
|
|
|
|
#
|
|
|
|
# This is important for ensuring that the User objects needed
|
|
|
|
# to import it on the other end exist (since we're only
|
|
|
|
# exporting the users from this realm), at the cost of losing
|
|
|
|
# some of these cross-realm messages.
|
2021-02-12 08:19:30 +01:00
|
|
|
huddle_subs = [
|
|
|
|
sub for sub in realm_huddle_subs if sub.recipient_id not in unsafe_huddle_recipient_ids
|
|
|
|
]
|
2020-04-09 21:51:58 +02:00
|
|
|
huddle_recipient_ids = {sub.recipient_id for sub in huddle_subs}
|
|
|
|
huddle_ids = {sub.recipient.type_id for sub in huddle_subs}
|
2016-08-11 01:21:53 +02:00
|
|
|
|
|
|
|
huddle_subscription_dicts = make_raw(huddle_subs)
|
|
|
|
huddle_recipients = make_raw(Recipient.objects.filter(id__in=huddle_recipient_ids))
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
response["_huddle_recipient"] = huddle_recipients
|
|
|
|
response["_huddle_subscription"] = huddle_subscription_dicts
|
|
|
|
response["zerver_huddle"] = make_raw(Huddle.objects.filter(id__in=huddle_ids))
|
2016-08-11 01:21:53 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def fetch_usermessages(
|
|
|
|
realm: Realm,
|
|
|
|
message_ids: Set[int],
|
|
|
|
user_profile_ids: Set[int],
|
|
|
|
message_filename: Path,
|
|
|
|
consent_message_id: Optional[int] = None,
|
|
|
|
) -> List[Record]:
|
2016-04-05 00:27:37 +02:00
|
|
|
# UserMessage export security rule: You can export UserMessages
|
|
|
|
# for the messages you exported for the users in your realm.
|
2021-02-12 08:19:30 +01:00
|
|
|
user_message_query = UserMessage.objects.filter(
|
|
|
|
user_profile__realm=realm, message_id__in=message_ids
|
|
|
|
)
|
2019-05-10 14:28:38 +02:00
|
|
|
if consent_message_id is not None:
|
|
|
|
consented_user_ids = get_consented_user_ids(consent_message_id)
|
|
|
|
user_profile_ids = user_profile_ids & consented_user_ids
|
2016-04-05 00:27:37 +02:00
|
|
|
user_message_chunk = []
|
|
|
|
for user_message in user_message_query:
|
|
|
|
if user_message.user_profile_id not in user_profile_ids:
|
|
|
|
continue
|
|
|
|
user_message_obj = model_to_dict(user_message)
|
2021-02-12 08:20:45 +01:00
|
|
|
user_message_obj["flags_mask"] = user_message.flags.mask
|
|
|
|
del user_message_obj["flags"]
|
2016-04-05 00:27:37 +02:00
|
|
|
user_message_chunk.append(user_message_obj)
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Fetched UserMessages for %s", message_filename)
|
2016-04-05 00:27:37 +02:00
|
|
|
return user_message_chunk
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def export_usermessages_batch(
|
|
|
|
input_path: Path, output_path: Path, consent_message_id: Optional[int] = None
|
|
|
|
) -> None:
|
2016-04-05 00:27:37 +02:00
|
|
|
"""As part of the system for doing parallel exports, this runs on one
|
|
|
|
batch of Message objects and adds the corresponding UserMessage
|
2016-08-10 02:32:02 +02:00
|
|
|
objects. (This is called by the export_usermessage_batch
|
2021-12-09 14:08:15 +01:00
|
|
|
management command).
|
|
|
|
|
|
|
|
See write_message_partial_for_query for more context."""
|
|
|
|
assert input_path.endswith(".partial") or input_path.endswith(".locked")
|
|
|
|
assert output_path.endswith(".json")
|
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(input_path, "rb") as input_file:
|
2021-12-09 14:08:15 +01:00
|
|
|
input_data: MessagePartial = orjson.loads(input_file.read())
|
|
|
|
|
|
|
|
message_ids = {item["id"] for item in input_data["zerver_message"]}
|
|
|
|
user_profile_ids = set(input_data["zerver_userprofile_ids"])
|
|
|
|
realm = Realm.objects.get(id=input_data["realm_id"])
|
|
|
|
zerver_usermessage_data = fetch_usermessages(
|
2021-12-08 15:21:32 +01:00
|
|
|
realm, message_ids, user_profile_ids, output_path, consent_message_id
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-12-09 14:08:15 +01:00
|
|
|
|
|
|
|
output_data: TableData = dict(
|
|
|
|
zerver_message=input_data["zerver_message"],
|
|
|
|
zerver_usermessage=zerver_usermessage_data,
|
|
|
|
)
|
|
|
|
write_table_data(output_path, output_data)
|
2016-04-05 00:27:37 +02:00
|
|
|
os.unlink(input_path)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def export_partial_message_files(
|
|
|
|
realm: Realm,
|
|
|
|
response: TableData,
|
|
|
|
chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE,
|
|
|
|
output_dir: Optional[Path] = None,
|
|
|
|
public_only: bool = False,
|
|
|
|
consent_message_id: Optional[int] = None,
|
|
|
|
) -> Set[int]:
|
2016-04-05 00:27:37 +02:00
|
|
|
if output_dir is None:
|
|
|
|
output_dir = tempfile.mkdtemp(prefix="zulip-export")
|
|
|
|
|
2022-06-23 20:07:19 +02:00
|
|
|
def get_ids(records: Iterable[Mapping[str, Any]]) -> Set[int]:
|
2021-02-12 08:20:45 +01:00
|
|
|
return {x["id"] for x in records}
|
2016-08-11 15:26:47 +02:00
|
|
|
|
2016-08-14 22:55:41 +02:00
|
|
|
# Basic security rule: You can export everything either...
|
|
|
|
# - sent by someone in your exportable_user_ids
|
|
|
|
# OR
|
|
|
|
# - received by someone in your exportable_user_ids (which
|
|
|
|
# equates to a recipient object we are exporting)
|
2016-04-05 00:27:37 +02:00
|
|
|
#
|
|
|
|
# TODO: In theory, you should be able to export messages in
|
|
|
|
# cross-realm PM threads; currently, this only exports cross-realm
|
|
|
|
# messages received by your realm that were sent by Zulip system
|
|
|
|
# bots (e.g. emailgateway, notification-bot).
|
2016-08-14 22:55:41 +02:00
|
|
|
|
|
|
|
# Here, "we" and "us" refers to the inner circle of users who
|
|
|
|
# were specified as being allowed to be exported. "Them"
|
|
|
|
# refers to other users.
|
|
|
|
user_ids_for_us = get_ids(
|
2021-02-12 08:20:45 +01:00
|
|
|
response["zerver_userprofile"],
|
2016-08-14 22:55:41 +02:00
|
|
|
)
|
|
|
|
ids_of_our_possible_senders = get_ids(
|
2021-02-12 08:20:45 +01:00
|
|
|
response["zerver_userprofile"]
|
|
|
|
+ response["zerver_userprofile_mirrordummy"]
|
|
|
|
+ response["zerver_userprofile_crossrealm"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-08-14 22:55:41 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
consented_user_ids: Set[int] = set()
|
2019-05-10 14:28:38 +02:00
|
|
|
if consent_message_id is not None:
|
|
|
|
consented_user_ids = get_consented_user_ids(consent_message_id)
|
|
|
|
|
2019-01-08 01:51:11 +01:00
|
|
|
if public_only:
|
|
|
|
recipient_streams = Stream.objects.filter(realm=realm, invite_only=False)
|
|
|
|
recipient_ids = Recipient.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
type=Recipient.STREAM, type_id__in=recipient_streams
|
|
|
|
).values_list("id", flat=True)
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_ids_for_us = get_ids(response["zerver_recipient"]) & set(recipient_ids)
|
2019-05-10 14:28:38 +02:00
|
|
|
elif consent_message_id is not None:
|
|
|
|
public_streams = Stream.objects.filter(realm=realm, invite_only=False)
|
|
|
|
public_stream_recipient_ids = Recipient.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
type=Recipient.STREAM, type_id__in=public_streams
|
|
|
|
).values_list("id", flat=True)
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2022-07-01 18:25:59 +02:00
|
|
|
streams_with_protected_history_recipient_ids = Stream.objects.filter(
|
|
|
|
realm=realm, history_public_to_subscribers=False
|
|
|
|
).values_list("recipient_id", flat=True)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
consented_recipient_ids = Subscription.objects.filter(
|
2021-04-22 16:23:09 +02:00
|
|
|
user_profile_id__in=consented_user_ids
|
2021-02-12 08:19:30 +01:00
|
|
|
).values_list("recipient_id", flat=True)
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2022-07-01 18:25:59 +02:00
|
|
|
recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids) - set(
|
|
|
|
streams_with_protected_history_recipient_ids
|
|
|
|
)
|
2022-06-23 20:21:54 +02:00
|
|
|
recipient_ids_for_us = get_ids(response["zerver_recipient"]) & recipient_ids_set
|
2019-01-08 01:51:11 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_ids_for_us = get_ids(response["zerver_recipient"])
|
2019-05-10 14:28:38 +02:00
|
|
|
# For a full export, we have implicit consent for all users in the export.
|
|
|
|
consented_user_ids = user_ids_for_us
|
2016-08-14 22:55:41 +02:00
|
|
|
|
2019-01-08 01:51:11 +01:00
|
|
|
if public_only:
|
2019-05-10 14:28:38 +02:00
|
|
|
messages_we_received = Message.objects.filter(
|
|
|
|
sender__in=ids_of_our_possible_senders,
|
|
|
|
recipient__in=recipient_ids_for_us,
|
2021-12-09 19:50:43 +01:00
|
|
|
)
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2019-01-08 01:51:11 +01:00
|
|
|
# For the public stream export, we only need the messages those streams received.
|
|
|
|
message_queries = [
|
|
|
|
messages_we_received,
|
|
|
|
]
|
|
|
|
else:
|
2022-07-01 18:30:36 +02:00
|
|
|
message_queries = []
|
|
|
|
|
2019-05-10 14:28:38 +02:00
|
|
|
# We capture most messages here: Messages that were sent by
|
|
|
|
# anyone in the export and received by any of the users who we
|
|
|
|
# have consent to export.
|
|
|
|
messages_we_received = Message.objects.filter(
|
|
|
|
sender__in=ids_of_our_possible_senders,
|
|
|
|
recipient__in=recipient_ids_for_us,
|
2021-12-09 19:50:43 +01:00
|
|
|
)
|
2022-07-01 18:30:36 +02:00
|
|
|
message_queries.append(messages_we_received)
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2022-07-01 18:25:59 +02:00
|
|
|
if consent_message_id is not None:
|
|
|
|
# Export with member consent requires some careful handling to make sure
|
|
|
|
# we only include messages that a consenting user can access.
|
|
|
|
has_usermessage_expression = Exists(
|
|
|
|
UserMessage.objects.filter(
|
|
|
|
user_profile_id__in=consented_user_ids, message_id=OuterRef("id")
|
|
|
|
)
|
|
|
|
)
|
|
|
|
messages_we_received_in_protected_history_streams = Message.objects.annotate(
|
|
|
|
has_usermessage=has_usermessage_expression
|
|
|
|
).filter(
|
|
|
|
sender__in=ids_of_our_possible_senders,
|
|
|
|
recipient_id__in=(
|
|
|
|
set(consented_recipient_ids) & set(streams_with_protected_history_recipient_ids)
|
|
|
|
),
|
|
|
|
has_usermessage=True,
|
|
|
|
)
|
|
|
|
|
2022-07-01 18:30:36 +02:00
|
|
|
message_queries.append(messages_we_received_in_protected_history_streams)
|
|
|
|
|
2019-05-10 14:28:38 +02:00
|
|
|
# The above query is missing some messages that consenting
|
|
|
|
# users have access to, namely, PMs sent by one of the users
|
|
|
|
# in our export to another user (since the only subscriber to
|
|
|
|
# a Recipient object for Recipient.PERSONAL is the recipient,
|
|
|
|
# not the sender). The `consented_user_ids` list has
|
|
|
|
# precisely those users whose Recipient.PERSONAL recipient ID
|
|
|
|
# was already present in recipient_ids_for_us above.
|
|
|
|
ids_of_non_exported_possible_recipients = ids_of_our_possible_senders - consented_user_ids
|
2019-01-08 01:51:11 +01:00
|
|
|
|
|
|
|
recipients_for_them = Recipient.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
type=Recipient.PERSONAL, type_id__in=ids_of_non_exported_possible_recipients
|
|
|
|
).values("id")
|
2019-01-08 01:51:11 +01:00
|
|
|
recipient_ids_for_them = get_ids(recipients_for_them)
|
|
|
|
|
|
|
|
messages_we_sent_to_them = Message.objects.filter(
|
2019-05-10 14:28:38 +02:00
|
|
|
sender__in=consented_user_ids,
|
2019-01-08 01:51:11 +01:00
|
|
|
recipient__in=recipient_ids_for_them,
|
2021-12-09 19:50:43 +01:00
|
|
|
)
|
2019-01-08 01:51:11 +01:00
|
|
|
|
2022-07-01 18:30:36 +02:00
|
|
|
message_queries.append(messages_we_sent_to_them)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
all_message_ids: Set[int] = set()
|
2016-08-14 22:55:41 +02:00
|
|
|
|
|
|
|
for message_query in message_queries:
|
2021-12-09 19:50:43 +01:00
|
|
|
message_ids = set(get_id_list_gently_from_database(base_query=message_query, id_field="id"))
|
|
|
|
|
|
|
|
# We expect our queries to be disjoint, although this assertion
|
|
|
|
# isn't strictly necessary if you don't mind a little bit of
|
|
|
|
# overhead.
|
|
|
|
assert len(message_ids.intersection(all_message_ids)) == 0
|
|
|
|
|
|
|
|
all_message_ids |= message_ids
|
|
|
|
|
2022-10-30 00:35:32 +02:00
|
|
|
message_id_chunks = chunkify(sorted(all_message_ids), chunk_size=MESSAGE_BATCH_CHUNK_SIZE)
|
2021-12-09 19:50:43 +01:00
|
|
|
|
|
|
|
write_message_partials(
|
|
|
|
realm=realm,
|
|
|
|
message_id_chunks=message_id_chunks,
|
|
|
|
output_dir=output_dir,
|
|
|
|
user_profile_ids=user_ids_for_us,
|
|
|
|
)
|
2016-08-14 22:55:41 +02:00
|
|
|
|
|
|
|
return all_message_ids
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-09 19:50:43 +01:00
|
|
|
def write_message_partials(
|
|
|
|
*,
|
2021-02-12 08:19:30 +01:00
|
|
|
realm: Realm,
|
2021-12-09 19:50:43 +01:00
|
|
|
message_id_chunks: List[List[int]],
|
2021-02-12 08:19:30 +01:00
|
|
|
output_dir: Path,
|
|
|
|
user_profile_ids: Set[int],
|
2021-12-09 19:50:43 +01:00
|
|
|
) -> None:
|
2016-08-14 22:55:41 +02:00
|
|
|
|
2021-12-09 19:50:43 +01:00
|
|
|
dump_file_id = 1
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-12-09 19:50:43 +01:00
|
|
|
for message_id_chunk in message_id_chunks:
|
|
|
|
actual_query = Message.objects.filter(id__in=message_id_chunk).order_by("id")
|
|
|
|
message_chunk = make_raw(actual_query)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-10 03:28:46 +02:00
|
|
|
# Figure out the name of our shard file.
|
2020-06-13 08:59:37 +02:00
|
|
|
message_filename = os.path.join(output_dir, f"messages-{dump_file_id:06}.json")
|
2021-02-12 08:20:45 +01:00
|
|
|
message_filename += ".partial"
|
2021-05-10 07:02:14 +02:00
|
|
|
logging.info("Fetched messages for %s", message_filename)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-10 03:28:46 +02:00
|
|
|
# Clean up our messages.
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
table_data: TableData = {}
|
2021-02-12 08:20:45 +01:00
|
|
|
table_data["zerver_message"] = message_chunk
|
|
|
|
floatify_datetime_fields(table_data, "zerver_message")
|
2016-08-10 03:28:46 +02:00
|
|
|
|
|
|
|
# Build up our output for the .partial file, which needs
|
|
|
|
# a list of user_profile_ids to search for (as well as
|
|
|
|
# the realm id).
|
2021-12-09 13:47:04 +01:00
|
|
|
output: MessagePartial = dict(
|
|
|
|
zerver_message=table_data["zerver_message"],
|
|
|
|
zerver_userprofile_ids=list(user_profile_ids),
|
|
|
|
realm_id=realm.id,
|
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-10 03:28:46 +02:00
|
|
|
# And write the data.
|
2021-12-08 15:53:23 +01:00
|
|
|
write_data_to_file(message_filename, output)
|
2016-04-05 00:27:37 +02:00
|
|
|
dump_file_id += 1
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-07 15:24:41 +01:00
|
|
|
def export_uploads_and_avatars(
|
2022-07-07 01:47:20 +02:00
|
|
|
realm: Realm,
|
|
|
|
*,
|
|
|
|
attachments: Optional[List[Attachment]] = None,
|
|
|
|
user: Optional[UserProfile],
|
|
|
|
output_dir: Path,
|
2021-12-07 15:24:41 +01:00
|
|
|
) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
uploads_output_dir = os.path.join(output_dir, "uploads")
|
|
|
|
avatars_output_dir = os.path.join(output_dir, "avatars")
|
|
|
|
realm_icons_output_dir = os.path.join(output_dir, "realm_icons")
|
|
|
|
emoji_output_dir = os.path.join(output_dir, "emoji")
|
2016-08-13 16:09:26 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
for dir_path in (
|
|
|
|
uploads_output_dir,
|
|
|
|
avatars_output_dir,
|
|
|
|
emoji_output_dir,
|
|
|
|
):
|
2019-07-19 19:15:23 +02:00
|
|
|
if not os.path.exists(dir_path):
|
|
|
|
os.makedirs(dir_path)
|
2016-08-13 16:09:26 +02:00
|
|
|
|
2021-12-10 21:05:13 +01:00
|
|
|
# Avoid creating realm_icons_output_dir for single user exports
|
|
|
|
if user is None and not os.path.exists(realm_icons_output_dir):
|
|
|
|
os.makedirs(realm_icons_output_dir)
|
|
|
|
|
2021-12-07 15:24:41 +01:00
|
|
|
if user is None:
|
|
|
|
handle_system_bots = True
|
|
|
|
users = list(UserProfile.objects.filter(realm=realm))
|
2022-07-07 01:47:20 +02:00
|
|
|
assert attachments is not None
|
2021-12-07 15:24:41 +01:00
|
|
|
realm_emojis = list(RealmEmoji.objects.filter(realm_id=realm.id))
|
|
|
|
else:
|
|
|
|
handle_system_bots = False
|
|
|
|
users = [user]
|
|
|
|
attachments = list(Attachment.objects.filter(owner_id=user.id))
|
|
|
|
realm_emojis = list(RealmEmoji.objects.filter(author_id=user.id))
|
2021-12-07 11:54:28 +01:00
|
|
|
|
2016-08-11 14:48:52 +02:00
|
|
|
if settings.LOCAL_UPLOADS_DIR:
|
|
|
|
# Small installations and developers will usually just store files locally.
|
2021-02-12 08:19:30 +01:00
|
|
|
export_uploads_from_local(
|
|
|
|
realm,
|
|
|
|
local_dir=os.path.join(settings.LOCAL_UPLOADS_DIR, "files"),
|
|
|
|
output_dir=uploads_output_dir,
|
2021-12-07 12:36:02 +01:00
|
|
|
attachments=attachments,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
export_avatars_from_local(
|
|
|
|
realm,
|
|
|
|
local_dir=os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars"),
|
|
|
|
output_dir=avatars_output_dir,
|
2021-12-07 11:54:28 +01:00
|
|
|
users=users,
|
2021-12-07 13:43:47 +01:00
|
|
|
handle_system_bots=handle_system_bots,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
export_emoji_from_local(
|
|
|
|
realm,
|
|
|
|
local_dir=os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars"),
|
|
|
|
output_dir=emoji_output_dir,
|
2021-12-07 14:08:46 +01:00
|
|
|
realm_emojis=realm_emojis,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-12-09 21:13:14 +01:00
|
|
|
|
|
|
|
if user is None:
|
|
|
|
export_realm_icons(
|
|
|
|
realm,
|
|
|
|
local_dir=os.path.join(settings.LOCAL_UPLOADS_DIR),
|
|
|
|
output_dir=realm_icons_output_dir,
|
|
|
|
)
|
2016-08-11 14:48:52 +02:00
|
|
|
else:
|
2021-12-07 11:54:28 +01:00
|
|
|
user_ids = {user.id for user in users}
|
2021-12-06 19:00:50 +01:00
|
|
|
|
2016-08-11 14:48:52 +02:00
|
|
|
# Some bigger installations will have their data stored on S3.
|
2021-12-07 13:18:05 +01:00
|
|
|
|
|
|
|
path_ids = {attachment.path_id for attachment in attachments}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
export_files_from_s3(
|
2021-12-06 16:29:20 +01:00
|
|
|
realm,
|
2021-12-07 13:43:47 +01:00
|
|
|
handle_system_bots=handle_system_bots,
|
2021-12-06 16:50:43 +01:00
|
|
|
flavor="upload",
|
2021-12-06 16:29:20 +01:00
|
|
|
bucket_name=settings.S3_AUTH_UPLOADS_BUCKET,
|
|
|
|
object_prefix=f"{realm.id}/",
|
|
|
|
output_dir=uploads_output_dir,
|
2021-12-06 19:00:50 +01:00
|
|
|
user_ids=user_ids,
|
2021-12-07 13:18:05 +01:00
|
|
|
valid_hashes=path_ids,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-12-06 16:29:20 +01:00
|
|
|
|
2021-12-07 12:13:44 +01:00
|
|
|
avatar_hash_values = set()
|
|
|
|
for user_id in user_ids:
|
|
|
|
avatar_path = user_avatar_path_from_ids(user_id, realm.id)
|
|
|
|
avatar_hash_values.add(avatar_path)
|
|
|
|
avatar_hash_values.add(avatar_path + ".original")
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
export_files_from_s3(
|
2021-12-06 16:29:20 +01:00
|
|
|
realm,
|
2021-12-07 13:43:47 +01:00
|
|
|
handle_system_bots=handle_system_bots,
|
2021-12-06 16:50:43 +01:00
|
|
|
flavor="avatar",
|
2021-12-06 16:29:20 +01:00
|
|
|
bucket_name=settings.S3_AVATAR_BUCKET,
|
|
|
|
object_prefix=f"{realm.id}/",
|
|
|
|
output_dir=avatars_output_dir,
|
2021-12-06 19:00:50 +01:00
|
|
|
user_ids=user_ids,
|
2021-12-07 12:13:44 +01:00
|
|
|
valid_hashes=avatar_hash_values,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-12-06 16:29:20 +01:00
|
|
|
|
2021-12-07 14:08:46 +01:00
|
|
|
emoji_paths = {get_emoji_path(realm_emoji) for realm_emoji in realm_emojis}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
export_files_from_s3(
|
|
|
|
realm,
|
2021-12-07 13:43:47 +01:00
|
|
|
handle_system_bots=handle_system_bots,
|
2021-12-06 16:50:43 +01:00
|
|
|
flavor="emoji",
|
2021-12-06 16:29:20 +01:00
|
|
|
bucket_name=settings.S3_AVATAR_BUCKET,
|
|
|
|
object_prefix=f"{realm.id}/emoji/images/",
|
|
|
|
output_dir=emoji_output_dir,
|
2021-12-06 19:00:50 +01:00
|
|
|
user_ids=user_ids,
|
2021-12-07 14:08:46 +01:00
|
|
|
valid_hashes=emoji_paths,
|
2021-12-06 16:29:20 +01:00
|
|
|
)
|
|
|
|
|
2021-12-09 21:13:14 +01:00
|
|
|
if user is None:
|
|
|
|
export_files_from_s3(
|
|
|
|
realm,
|
|
|
|
handle_system_bots=handle_system_bots,
|
|
|
|
flavor="realm_icon_or_logo",
|
|
|
|
bucket_name=settings.S3_AVATAR_BUCKET,
|
|
|
|
object_prefix=f"{realm.id}/realm/",
|
|
|
|
output_dir=realm_icons_output_dir,
|
|
|
|
user_ids=user_ids,
|
|
|
|
valid_hashes=None,
|
|
|
|
)
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
|
2018-12-07 17:18:09 +01:00
|
|
|
def _get_exported_s3_record(
|
2021-08-10 02:11:16 +02:00
|
|
|
bucket_name: str, key: Object, processing_emoji: bool
|
|
|
|
) -> Dict[str, Any]:
|
2018-12-07 17:18:09 +01:00
|
|
|
# Helper function for export_files_from_s3
|
2021-08-10 02:11:16 +02:00
|
|
|
record: Dict[str, Any] = dict(
|
2021-02-12 08:19:30 +01:00
|
|
|
s3_path=key.key,
|
|
|
|
bucket=bucket_name,
|
|
|
|
size=key.content_length,
|
|
|
|
last_modified=key.last_modified,
|
|
|
|
content_type=key.content_type,
|
|
|
|
md5=key.e_tag,
|
|
|
|
)
|
2018-12-07 17:18:09 +01:00
|
|
|
record.update(key.metadata)
|
|
|
|
|
|
|
|
if processing_emoji:
|
2021-02-12 08:20:45 +01:00
|
|
|
record["file_name"] = os.path.basename(key.key)
|
2018-12-07 17:18:09 +01:00
|
|
|
|
2019-07-19 19:15:23 +02:00
|
|
|
if "user_profile_id" in record:
|
2021-08-10 02:11:16 +02:00
|
|
|
user_profile = get_user_profile_by_id(int(record["user_profile_id"]))
|
2021-02-12 08:20:45 +01:00
|
|
|
record["user_profile_email"] = user_profile.email
|
2018-12-07 17:18:09 +01:00
|
|
|
|
2019-07-19 19:15:23 +02:00
|
|
|
# Fix the record ids
|
2021-02-12 08:20:45 +01:00
|
|
|
record["user_profile_id"] = int(record["user_profile_id"])
|
2018-12-07 17:18:09 +01:00
|
|
|
|
2019-07-19 19:15:23 +02:00
|
|
|
# A few early avatars don't have 'realm_id' on the object; fix their metadata
|
2021-02-12 08:20:45 +01:00
|
|
|
if "realm_id" not in record:
|
|
|
|
record["realm_id"] = user_profile.realm_id
|
2019-07-19 19:15:23 +02:00
|
|
|
else:
|
|
|
|
# There are some rare cases in which 'user_profile_id' may not be present
|
|
|
|
# in S3 metadata. Eg: Exporting an organization which was created
|
|
|
|
# initially from a local export won't have the "user_profile_id" metadata
|
|
|
|
# set for realm_icons and realm_logos.
|
|
|
|
pass
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "realm_id" in record:
|
|
|
|
record["realm_id"] = int(record["realm_id"])
|
2019-07-19 19:15:23 +02:00
|
|
|
else:
|
|
|
|
raise Exception("Missing realm_id")
|
2018-12-07 17:18:09 +01:00
|
|
|
|
2019-07-19 19:15:23 +02:00
|
|
|
return record
|
2018-12-07 17:26:48 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def _save_s3_object_to_file(
|
2021-08-10 02:11:16 +02:00
|
|
|
key: Object,
|
2021-02-12 08:19:30 +01:00
|
|
|
output_dir: str,
|
2021-12-06 16:50:43 +01:00
|
|
|
processing_uploads: bool,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2018-12-07 17:26:48 +01:00
|
|
|
# Helper function for export_files_from_s3
|
2021-12-06 16:50:43 +01:00
|
|
|
if not processing_uploads:
|
2018-12-07 17:52:01 +01:00
|
|
|
filename = os.path.join(output_dir, key.key)
|
2018-12-07 17:26:48 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
fields = key.key.split("/")
|
2018-12-07 17:26:48 +01:00
|
|
|
if len(fields) != 3:
|
2020-06-10 06:41:04 +02:00
|
|
|
raise AssertionError(f"Suspicious key with invalid format {key.key}")
|
2018-12-07 17:52:01 +01:00
|
|
|
filename = os.path.join(output_dir, key.key)
|
2018-12-07 17:26:48 +01:00
|
|
|
|
2020-02-29 00:08:39 +01:00
|
|
|
if "../" in filename:
|
2020-06-10 06:41:04 +02:00
|
|
|
raise AssertionError(f"Suspicious file with invalid format {filename}")
|
2020-02-29 00:08:39 +01:00
|
|
|
|
2019-12-20 00:00:45 +01:00
|
|
|
# Use 'mark_sanitized' to cause Pysa to ignore the flow of user controlled
|
|
|
|
# data into the filesystem sink, because we've already prevented directory
|
|
|
|
# traversal with our assertion above.
|
|
|
|
dirname = mark_sanitized(os.path.dirname(filename))
|
|
|
|
|
2018-12-07 17:26:48 +01:00
|
|
|
if not os.path.exists(dirname):
|
|
|
|
os.makedirs(dirname)
|
2021-08-10 02:11:16 +02:00
|
|
|
key.download_file(Filename=filename)
|
2018-12-07 17:26:48 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def export_files_from_s3(
|
|
|
|
realm: Realm,
|
2021-12-07 13:43:47 +01:00
|
|
|
handle_system_bots: bool,
|
2021-12-06 16:50:43 +01:00
|
|
|
flavor: str,
|
2021-02-12 08:19:30 +01:00
|
|
|
bucket_name: str,
|
2021-12-06 16:29:20 +01:00
|
|
|
object_prefix: str,
|
2021-02-12 08:19:30 +01:00
|
|
|
output_dir: Path,
|
2021-12-06 19:00:50 +01:00
|
|
|
user_ids: Set[int],
|
2021-12-07 12:13:44 +01:00
|
|
|
valid_hashes: Optional[Set[str]],
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2021-12-06 16:50:43 +01:00
|
|
|
processing_uploads = flavor == "upload"
|
|
|
|
processing_emoji = flavor == "emoji"
|
|
|
|
|
2020-10-26 22:10:53 +01:00
|
|
|
bucket = get_bucket(bucket_name)
|
2016-04-05 00:27:37 +02:00
|
|
|
records = []
|
|
|
|
|
2021-12-06 16:50:43 +01:00
|
|
|
logging.info("Downloading %s files from %s", flavor, bucket_name)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-12-06 19:08:13 +01:00
|
|
|
email_gateway_bot: Optional[UserProfile] = None
|
|
|
|
|
2021-12-07 13:43:47 +01:00
|
|
|
if handle_system_bots and settings.EMAIL_GATEWAY_BOT is not None:
|
2021-07-26 17:17:10 +02:00
|
|
|
internal_realm = get_realm(settings.SYSTEM_BOT_REALM)
|
2021-12-06 19:08:13 +01:00
|
|
|
email_gateway_bot = get_system_bot(settings.EMAIL_GATEWAY_BOT, internal_realm.id)
|
|
|
|
user_ids.add(email_gateway_bot.id)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
count = 0
|
2018-12-07 17:52:01 +01:00
|
|
|
for bkey in bucket.objects.filter(Prefix=object_prefix):
|
2021-12-07 12:13:44 +01:00
|
|
|
if valid_hashes is not None:
|
|
|
|
if bkey.Object().key not in valid_hashes:
|
|
|
|
continue
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2018-12-07 17:52:01 +01:00
|
|
|
key = bucket.Object(bkey.key)
|
2021-12-06 17:55:52 +01:00
|
|
|
|
2021-12-06 23:08:06 +01:00
|
|
|
"""
|
|
|
|
For very old realms we may not have proper metadata. If you really need
|
|
|
|
an export to bypass these checks, flip the following flag.
|
|
|
|
"""
|
|
|
|
checking_metadata = True
|
|
|
|
if checking_metadata:
|
|
|
|
if "realm_id" not in key.metadata:
|
|
|
|
raise AssertionError(f"Missing realm_id in key metadata: {key.metadata}")
|
2021-12-06 18:04:21 +01:00
|
|
|
|
2021-12-06 23:08:06 +01:00
|
|
|
if "user_profile_id" not in key.metadata:
|
|
|
|
raise AssertionError(f"Missing user_profile_id in key metadata: {key.metadata}")
|
2021-12-06 18:04:21 +01:00
|
|
|
|
2021-12-06 23:08:06 +01:00
|
|
|
if int(key.metadata["user_profile_id"]) not in user_ids:
|
|
|
|
continue
|
2021-12-06 19:08:13 +01:00
|
|
|
|
2021-12-06 23:08:06 +01:00
|
|
|
# This can happen if an email address has moved realms
|
|
|
|
if key.metadata["realm_id"] != str(realm.id):
|
|
|
|
if email_gateway_bot is None or key.metadata["user_profile_id"] != str(
|
|
|
|
email_gateway_bot.id
|
|
|
|
):
|
|
|
|
raise AssertionError(
|
|
|
|
f"Key metadata problem: {key.key} / {key.metadata} / {realm.id}"
|
|
|
|
)
|
|
|
|
# Email gateway bot sends messages, potentially including attachments, cross-realm.
|
|
|
|
print(f"File uploaded by email gateway bot: {key.key} / {key.metadata}")
|
2021-12-06 17:55:52 +01:00
|
|
|
|
2020-01-11 19:53:14 +01:00
|
|
|
record = _get_exported_s3_record(bucket_name, key, processing_emoji)
|
2018-06-06 21:27:04 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
record["path"] = key.key
|
2021-12-06 16:50:43 +01:00
|
|
|
_save_s3_object_to_file(key, output_dir, processing_uploads)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
records.append(record)
|
|
|
|
count += 1
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
if count % 100 == 0:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Finished %s", count)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-12-08 13:58:11 +01:00
|
|
|
write_records_json_file(output_dir, records)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-07 12:36:02 +01:00
|
|
|
def export_uploads_from_local(
|
|
|
|
realm: Realm, local_dir: Path, output_dir: Path, attachments: List[Attachment]
|
|
|
|
) -> None:
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
count = 0
|
|
|
|
records = []
|
2021-12-07 12:36:02 +01:00
|
|
|
for attachment in attachments:
|
2019-12-20 00:00:45 +01:00
|
|
|
# Use 'mark_sanitized' to work around false positive caused by Pysa
|
|
|
|
# thinking that 'realm' (and thus 'attachment' and 'attachment.path_id')
|
|
|
|
# are user controlled
|
|
|
|
path_id = mark_sanitized(attachment.path_id)
|
|
|
|
|
|
|
|
local_path = os.path.join(local_dir, path_id)
|
|
|
|
output_path = os.path.join(output_dir, path_id)
|
|
|
|
|
2017-10-25 20:06:11 +02:00
|
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
2018-07-18 23:50:16 +02:00
|
|
|
shutil.copy2(local_path, output_path)
|
2016-04-05 00:27:37 +02:00
|
|
|
stat = os.stat(local_path)
|
2021-02-12 08:19:30 +01:00
|
|
|
record = dict(
|
|
|
|
realm_id=attachment.realm_id,
|
|
|
|
user_profile_id=attachment.owner.id,
|
|
|
|
user_profile_email=attachment.owner.email,
|
|
|
|
s3_path=path_id,
|
|
|
|
path=path_id,
|
|
|
|
size=stat.st_size,
|
|
|
|
last_modified=stat.st_mtime,
|
|
|
|
content_type=None,
|
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
records.append(record)
|
|
|
|
|
|
|
|
count += 1
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
if count % 100 == 0:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Finished %s", count)
|
2021-12-08 13:58:11 +01:00
|
|
|
|
|
|
|
write_records_json_file(output_dir, records)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-07 11:54:28 +01:00
|
|
|
def export_avatars_from_local(
|
2021-12-07 13:43:47 +01:00
|
|
|
realm: Realm,
|
|
|
|
local_dir: Path,
|
|
|
|
output_dir: Path,
|
|
|
|
users: List[UserProfile],
|
|
|
|
handle_system_bots: bool,
|
2021-12-07 11:54:28 +01:00
|
|
|
) -> None:
|
2016-08-09 02:19:29 +02:00
|
|
|
|
|
|
|
count = 0
|
|
|
|
records = []
|
|
|
|
|
2021-12-07 13:43:47 +01:00
|
|
|
if handle_system_bots:
|
|
|
|
internal_realm = get_realm(settings.SYSTEM_BOT_REALM)
|
|
|
|
users += [
|
|
|
|
get_system_bot(settings.NOTIFICATION_BOT, internal_realm.id),
|
|
|
|
get_system_bot(settings.EMAIL_GATEWAY_BOT, internal_realm.id),
|
|
|
|
get_system_bot(settings.WELCOME_BOT, internal_realm.id),
|
|
|
|
]
|
|
|
|
|
2016-08-09 02:19:29 +02:00
|
|
|
for user in users:
|
|
|
|
if user.avatar_source == UserProfile.AVATAR_FROM_GRAVATAR:
|
|
|
|
continue
|
|
|
|
|
2017-10-18 06:10:39 +02:00
|
|
|
avatar_path = user_avatar_path_from_ids(user.id, realm.id)
|
2021-02-12 08:20:45 +01:00
|
|
|
wildcard = os.path.join(local_dir, avatar_path + ".*")
|
2016-08-09 02:19:29 +02:00
|
|
|
|
|
|
|
for local_path in glob.glob(wildcard):
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info(
|
2021-02-12 08:20:45 +01:00
|
|
|
"Copying avatar file for user %s from %s",
|
2021-02-12 08:19:30 +01:00
|
|
|
user.email,
|
|
|
|
local_path,
|
2020-05-02 08:44:14 +02:00
|
|
|
)
|
2017-10-18 06:10:39 +02:00
|
|
|
fn = os.path.relpath(local_path, local_dir)
|
2016-08-09 02:19:29 +02:00
|
|
|
output_path = os.path.join(output_dir, fn)
|
2017-10-25 20:06:11 +02:00
|
|
|
os.makedirs(str(os.path.dirname(output_path)), exist_ok=True)
|
2018-07-18 23:50:16 +02:00
|
|
|
shutil.copy2(str(local_path), str(output_path))
|
2016-08-09 02:19:29 +02:00
|
|
|
stat = os.stat(local_path)
|
2021-02-12 08:19:30 +01:00
|
|
|
record = dict(
|
|
|
|
realm_id=realm.id,
|
|
|
|
user_profile_id=user.id,
|
|
|
|
user_profile_email=user.email,
|
|
|
|
s3_path=fn,
|
|
|
|
path=fn,
|
|
|
|
size=stat.st_size,
|
|
|
|
last_modified=stat.st_mtime,
|
|
|
|
content_type=None,
|
|
|
|
)
|
2016-08-09 02:19:29 +02:00
|
|
|
records.append(record)
|
|
|
|
|
|
|
|
count += 1
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
if count % 100 == 0:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Finished %s", count)
|
2016-08-09 02:19:29 +02:00
|
|
|
|
2021-12-08 13:58:11 +01:00
|
|
|
write_records_json_file(output_dir, records)
|
2019-07-19 19:15:23 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-07-19 19:15:23 +02:00
|
|
|
def export_realm_icons(realm: Realm, local_dir: Path, output_dir: Path) -> None:
|
|
|
|
records = []
|
|
|
|
dir_relative_path = zerver.lib.upload.upload_backend.realm_avatar_and_logo_path(realm)
|
2021-02-12 08:20:45 +01:00
|
|
|
icons_wildcard = os.path.join(local_dir, dir_relative_path, "*")
|
2019-07-19 19:15:23 +02:00
|
|
|
for icon_absolute_path in glob.glob(icons_wildcard):
|
|
|
|
icon_file_name = os.path.basename(icon_absolute_path)
|
|
|
|
icon_relative_path = os.path.join(str(realm.id), icon_file_name)
|
|
|
|
output_path = os.path.join(output_dir, icon_relative_path)
|
|
|
|
os.makedirs(str(os.path.dirname(output_path)), exist_ok=True)
|
|
|
|
shutil.copy2(str(icon_absolute_path), str(output_path))
|
2021-02-12 08:19:30 +01:00
|
|
|
record = dict(realm_id=realm.id, path=icon_relative_path, s3_path=icon_relative_path)
|
2019-07-19 19:15:23 +02:00
|
|
|
records.append(record)
|
|
|
|
|
2021-12-08 13:58:11 +01:00
|
|
|
write_records_json_file(output_dir, records)
|
2016-08-09 02:19:29 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-07 14:01:57 +01:00
|
|
|
def get_emoji_path(realm_emoji: RealmEmoji) -> str:
|
|
|
|
return RealmEmoji.PATH_ID_TEMPLATE.format(
|
|
|
|
realm_id=realm_emoji.realm_id,
|
|
|
|
emoji_file_name=realm_emoji.file_name,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-12-07 14:08:46 +01:00
|
|
|
def export_emoji_from_local(
|
|
|
|
realm: Realm, local_dir: Path, output_dir: Path, realm_emojis: List[RealmEmoji]
|
|
|
|
) -> None:
|
2018-05-26 21:18:54 +02:00
|
|
|
|
|
|
|
count = 0
|
|
|
|
records = []
|
2021-12-07 14:08:46 +01:00
|
|
|
for realm_emoji in realm_emojis:
|
2021-12-07 14:01:57 +01:00
|
|
|
emoji_path = get_emoji_path(realm_emoji)
|
2019-12-20 00:00:45 +01:00
|
|
|
|
|
|
|
# Use 'mark_sanitized' to work around false positive caused by Pysa
|
|
|
|
# thinking that 'realm' (and thus 'attachment' and 'attachment.path_id')
|
|
|
|
# are user controlled
|
|
|
|
emoji_path = mark_sanitized(emoji_path)
|
|
|
|
|
2018-05-26 21:18:54 +02:00
|
|
|
local_path = os.path.join(local_dir, emoji_path)
|
|
|
|
output_path = os.path.join(output_dir, emoji_path)
|
2019-12-20 00:00:45 +01:00
|
|
|
|
2018-05-26 21:18:54 +02:00
|
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
2018-07-18 23:50:16 +02:00
|
|
|
shutil.copy2(local_path, output_path)
|
2021-05-10 07:02:14 +02:00
|
|
|
# Realm emoji author is optional.
|
2019-02-11 13:26:57 +01:00
|
|
|
author = realm_emoji.author
|
|
|
|
author_id = None
|
|
|
|
if author:
|
2021-07-24 18:16:48 +02:00
|
|
|
author_id = author.id
|
2021-02-12 08:19:30 +01:00
|
|
|
record = dict(
|
|
|
|
realm_id=realm.id,
|
|
|
|
author=author_id,
|
|
|
|
path=emoji_path,
|
|
|
|
s3_path=emoji_path,
|
|
|
|
file_name=realm_emoji.file_name,
|
|
|
|
name=realm_emoji.name,
|
|
|
|
deactivated=realm_emoji.deactivated,
|
|
|
|
)
|
2018-05-26 21:18:54 +02:00
|
|
|
records.append(record)
|
|
|
|
|
|
|
|
count += 1
|
2021-02-12 08:19:30 +01:00
|
|
|
if count % 100 == 0:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Finished %s", count)
|
2021-12-08 13:58:11 +01:00
|
|
|
|
|
|
|
write_records_json_file(output_dir, records)
|
2018-05-26 21:18:54 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def do_write_stats_file_for_realm_export(output_dir: Path) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
stats_file = os.path.join(output_dir, "stats.txt")
|
|
|
|
realm_file = os.path.join(output_dir, "realm.json")
|
|
|
|
attachment_file = os.path.join(output_dir, "attachment.json")
|
|
|
|
analytics_file = os.path.join(output_dir, "analytics.json")
|
|
|
|
message_files = glob.glob(os.path.join(output_dir, "messages-*.json"))
|
2020-09-02 06:59:07 +02:00
|
|
|
fns = sorted([analytics_file, attachment_file, *message_files, realm_file])
|
2016-08-12 02:38:19 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Writing stats file: %s\n", stats_file)
|
|
|
|
with open(stats_file, "w") as f:
|
2016-08-12 02:38:19 +02:00
|
|
|
for fn in fns:
|
2021-02-12 08:20:45 +01:00
|
|
|
f.write(os.path.basename(fn) + "\n")
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(fn, "rb") as filename:
|
|
|
|
data = orjson.loads(filename.read())
|
2016-08-12 02:38:19 +02:00
|
|
|
for k in sorted(data):
|
2021-02-12 08:20:45 +01:00
|
|
|
f.write(f"{len(data[k]):5} {k}\n")
|
|
|
|
f.write("\n")
|
2016-08-12 02:38:19 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_file = os.path.join(output_dir, "avatars/records.json")
|
|
|
|
uploads_file = os.path.join(output_dir, "uploads/records.json")
|
2016-08-12 02:38:19 +02:00
|
|
|
|
|
|
|
for fn in [avatar_file, uploads_file]:
|
2021-02-12 08:20:45 +01:00
|
|
|
f.write(fn + "\n")
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(fn, "rb") as filename:
|
|
|
|
data = orjson.loads(filename.read())
|
2021-02-12 08:20:45 +01:00
|
|
|
f.write(f"{len(data):5} records\n")
|
|
|
|
f.write("\n")
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def do_export_realm(
|
|
|
|
realm: Realm,
|
|
|
|
output_dir: Path,
|
|
|
|
threads: int,
|
|
|
|
exportable_user_ids: Optional[Set[int]] = None,
|
|
|
|
public_only: bool = False,
|
|
|
|
consent_message_id: Optional[int] = None,
|
|
|
|
) -> str:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
response: TableData = {}
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-10 02:32:02 +02:00
|
|
|
# We need at least one thread running to export
|
|
|
|
# UserMessage rows. The management command should
|
|
|
|
# enforce this for us.
|
2016-08-13 20:22:23 +02:00
|
|
|
if not settings.TEST_SUITE:
|
|
|
|
assert threads >= 1
|
2016-08-10 02:32:02 +02:00
|
|
|
|
2016-08-10 18:45:39 +02:00
|
|
|
realm_config = get_realm_config()
|
|
|
|
|
2016-08-11 23:59:19 +02:00
|
|
|
create_soft_link(source=output_dir, in_progress=True)
|
|
|
|
|
2016-08-12 20:59:22 +02:00
|
|
|
logging.info("Exporting data from get_realm_config()...")
|
2016-08-10 18:45:39 +02:00
|
|
|
export_from_config(
|
|
|
|
response=response,
|
|
|
|
config=realm_config,
|
|
|
|
seed_object=realm,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
context=dict(realm=realm, exportable_user_ids=exportable_user_ids),
|
2016-08-10 18:45:39 +02:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("...DONE with get_realm_config() data")
|
2016-08-10 20:57:35 +02:00
|
|
|
|
2016-08-11 20:27:26 +02:00
|
|
|
sanity_check_output(response)
|
|
|
|
|
2016-08-11 15:26:47 +02:00
|
|
|
# We (sort of) export zerver_message rows here. We write
|
|
|
|
# them to .partial files that are subsequently fleshed out
|
|
|
|
# by parallel processes to add in zerver_usermessage data.
|
|
|
|
# This is for performance reasons, of course. Some installations
|
|
|
|
# have millions of messages.
|
|
|
|
logging.info("Exporting .partial files messages")
|
2021-02-12 08:19:30 +01:00
|
|
|
message_ids = export_partial_message_files(
|
|
|
|
realm,
|
|
|
|
response,
|
|
|
|
output_dir=output_dir,
|
|
|
|
public_only=public_only,
|
|
|
|
consent_message_id=consent_message_id,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("%d messages were exported", len(message_ids))
|
2016-08-10 02:32:02 +02:00
|
|
|
|
2018-05-26 18:25:50 +02:00
|
|
|
# zerver_reaction
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
zerver_reaction: TableData = {}
|
2018-05-26 18:25:50 +02:00
|
|
|
fetch_reaction_data(response=zerver_reaction, message_ids=message_ids)
|
|
|
|
response.update(zerver_reaction)
|
|
|
|
|
|
|
|
# Write realm data
|
|
|
|
export_file = os.path.join(output_dir, "realm.json")
|
2021-12-08 15:01:28 +01:00
|
|
|
write_table_data(output_file=export_file, data=response)
|
2018-05-26 18:25:50 +02:00
|
|
|
|
2019-01-30 08:54:29 +01:00
|
|
|
# Write analytics data
|
|
|
|
export_analytics_tables(realm=realm, output_dir=output_dir)
|
|
|
|
|
2016-08-13 03:33:19 +02:00
|
|
|
# zerver_attachment
|
2022-07-07 01:47:20 +02:00
|
|
|
attachments = export_attachment_table(
|
|
|
|
realm=realm, output_dir=output_dir, message_ids=message_ids
|
|
|
|
)
|
|
|
|
|
|
|
|
logging.info("Exporting uploaded files and avatars")
|
|
|
|
export_uploads_and_avatars(realm, attachments=attachments, user=None, output_dir=output_dir)
|
2016-08-13 03:33:19 +02:00
|
|
|
|
2016-08-11 15:43:58 +02:00
|
|
|
# Start parallel jobs to export the UserMessage objects.
|
2021-02-12 08:19:30 +01:00
|
|
|
launch_user_message_subprocesses(
|
|
|
|
threads=threads, output_dir=output_dir, consent_message_id=consent_message_id
|
|
|
|
)
|
2016-08-11 15:43:58 +02:00
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Finished exporting %s", realm.string_id)
|
2016-08-11 23:59:19 +02:00
|
|
|
create_soft_link(source=output_dir, in_progress=False)
|
|
|
|
|
2019-06-21 20:30:00 +02:00
|
|
|
do_write_stats_file_for_realm_export(output_dir)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
tarball_path = output_dir.rstrip("/") + ".tar.gz"
|
2021-12-17 04:11:46 +01:00
|
|
|
subprocess.check_call(
|
|
|
|
[
|
|
|
|
"tar",
|
|
|
|
f"-czf{tarball_path}",
|
|
|
|
f"-C{os.path.dirname(output_dir)}",
|
|
|
|
os.path.basename(output_dir),
|
|
|
|
]
|
|
|
|
)
|
2019-06-21 20:30:00 +02:00
|
|
|
return tarball_path
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2022-07-07 01:47:20 +02:00
|
|
|
def export_attachment_table(
|
|
|
|
realm: Realm, output_dir: Path, message_ids: Set[int]
|
|
|
|
) -> List[Attachment]:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
response: TableData = {}
|
2022-07-07 01:47:20 +02:00
|
|
|
attachments = fetch_attachment_data(
|
|
|
|
response=response, realm_id=realm.id, message_ids=message_ids
|
|
|
|
)
|
2016-08-13 03:33:19 +02:00
|
|
|
output_file = os.path.join(output_dir, "attachment.json")
|
2021-12-08 15:01:28 +01:00
|
|
|
write_table_data(output_file=output_file, data=response)
|
2022-07-07 01:47:20 +02:00
|
|
|
return attachments
|
2016-08-13 03:33:19 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def create_soft_link(source: Path, in_progress: bool = True) -> None:
|
2016-08-11 23:59:19 +02:00
|
|
|
is_done = not in_progress
|
2019-01-15 03:00:03 +01:00
|
|
|
if settings.DEVELOPMENT:
|
2021-02-12 08:20:45 +01:00
|
|
|
in_progress_link = os.path.join(settings.DEPLOY_ROOT, "var", "export-in-progress")
|
|
|
|
done_link = os.path.join(settings.DEPLOY_ROOT, "var", "export-most-recent")
|
2019-01-15 03:00:03 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
in_progress_link = "/home/zulip/export-in-progress"
|
|
|
|
done_link = "/home/zulip/export-most-recent"
|
2016-08-11 23:59:19 +02:00
|
|
|
|
|
|
|
if in_progress:
|
|
|
|
new_target = in_progress_link
|
|
|
|
else:
|
2018-07-18 23:50:16 +02:00
|
|
|
try:
|
|
|
|
os.remove(in_progress_link)
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
2016-08-11 23:59:19 +02:00
|
|
|
new_target = done_link
|
|
|
|
|
2018-07-18 23:50:16 +02:00
|
|
|
overwrite_symlink(source, new_target)
|
2016-08-11 23:59:19 +02:00
|
|
|
if is_done:
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("See %s for output files", new_target)
|
2016-08-11 23:59:19 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def launch_user_message_subprocesses(
|
|
|
|
threads: int, output_dir: Path, consent_message_id: Optional[int] = None
|
|
|
|
) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Launching %d PARALLEL subprocesses to export UserMessage rows", threads)
|
2019-12-29 20:19:50 +01:00
|
|
|
pids = {}
|
2016-11-29 07:22:02 +01:00
|
|
|
|
2019-12-29 20:19:50 +01:00
|
|
|
for shard_id in range(threads):
|
2019-05-10 14:28:38 +02:00
|
|
|
arguments = [
|
|
|
|
os.path.join(settings.DEPLOY_ROOT, "manage.py"),
|
2021-02-12 08:20:45 +01:00
|
|
|
"export_usermessage_batch",
|
|
|
|
f"--path={output_dir}",
|
|
|
|
f"--thread={shard_id}",
|
2019-05-10 14:28:38 +02:00
|
|
|
]
|
|
|
|
if consent_message_id is not None:
|
2021-02-12 08:20:45 +01:00
|
|
|
arguments.append(f"--consent-message-id={consent_message_id}")
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2019-12-29 20:19:50 +01:00
|
|
|
process = subprocess.Popen(arguments)
|
|
|
|
pids[process.pid] = shard_id
|
2016-08-10 02:32:02 +02:00
|
|
|
|
2019-12-29 20:19:50 +01:00
|
|
|
while pids:
|
|
|
|
pid, status = os.wait()
|
|
|
|
shard = pids.pop(pid)
|
2021-02-12 08:20:45 +01:00
|
|
|
print(f"Shard {shard} finished, status {status}")
|
2016-08-10 02:32:02 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def do_export_user(user_profile: UserProfile, output_dir: Path) -> None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
response: TableData = {}
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
export_single_user(user_profile, response)
|
|
|
|
export_file = os.path.join(output_dir, "user.json")
|
2021-12-08 15:01:28 +01:00
|
|
|
write_table_data(output_file=export_file, data=response)
|
2021-12-08 15:53:23 +01:00
|
|
|
|
2021-12-09 21:31:46 +01:00
|
|
|
reaction_message_ids: Set[int] = {row["message"] for row in response["zerver_reaction"]}
|
|
|
|
|
2016-04-05 00:27:37 +02:00
|
|
|
logging.info("Exporting messages")
|
2021-12-09 21:31:46 +01:00
|
|
|
export_messages_single_user(
|
|
|
|
user_profile, output_dir=output_dir, reaction_message_ids=reaction_message_ids
|
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-12-07 15:24:41 +01:00
|
|
|
logging.info("Exporting images")
|
|
|
|
export_uploads_and_avatars(user_profile.realm, user=user_profile, output_dir=output_dir)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def export_single_user(user_profile: UserProfile, response: TableData) -> None:
|
2016-08-11 17:57:57 +02:00
|
|
|
|
2016-08-11 19:03:16 +02:00
|
|
|
config = get_single_user_config()
|
|
|
|
export_from_config(
|
|
|
|
response=response,
|
|
|
|
config=config,
|
|
|
|
seed_object=user_profile,
|
2021-12-08 21:32:07 +01:00
|
|
|
context=dict(user=user_profile),
|
2016-08-11 19:03:16 +02:00
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_single_user_config() -> Config:
|
2019-02-28 21:19:47 +01:00
|
|
|
# This function defines the limited configuration for what data to
|
|
|
|
# export when exporting all data that a single Zulip user has
|
|
|
|
# access to in an organization.
|
2016-08-11 19:03:16 +02:00
|
|
|
|
2016-08-11 17:57:57 +02:00
|
|
|
# zerver_userprofile
|
2016-08-11 19:03:16 +02:00
|
|
|
user_profile_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_userprofile",
|
2016-08-11 19:03:16 +02:00
|
|
|
is_seeded=True,
|
2022-02-23 20:27:39 +01:00
|
|
|
exclude=EXCLUDED_USER_PROFILE_FIELDS,
|
2016-08-11 19:03:16 +02:00
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-11 17:57:57 +02:00
|
|
|
# zerver_subscription
|
2016-08-11 19:03:16 +02:00
|
|
|
subscription_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_subscription",
|
2016-08-11 19:03:16 +02:00
|
|
|
model=Subscription,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_profile_id__in",
|
2016-08-11 19:03:16 +02:00
|
|
|
)
|
2016-08-09 17:30:52 +02:00
|
|
|
|
2016-08-11 17:57:57 +02:00
|
|
|
# zerver_recipient
|
2016-08-11 19:03:16 +02:00
|
|
|
recipient_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_recipient",
|
2016-08-11 19:03:16 +02:00
|
|
|
model=Recipient,
|
|
|
|
virtual_parent=subscription_config,
|
2021-02-12 08:20:45 +01:00
|
|
|
id_source=("zerver_subscription", "recipient"),
|
2016-08-11 19:03:16 +02:00
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2016-08-11 17:57:57 +02:00
|
|
|
# zerver_stream
|
2019-05-10 14:28:38 +02:00
|
|
|
#
|
|
|
|
# TODO: We currently export the existence of private streams, but
|
|
|
|
# not their message history, in the "export with partial member
|
|
|
|
# consent" code path. This consistent with our documented policy,
|
|
|
|
# since that data is available to the organization administrator
|
|
|
|
# who initiated the export, but unnecessary and potentially
|
|
|
|
# confusing; it'd be better to just skip those streams from the
|
|
|
|
# export (which would require more complex export logic for the
|
|
|
|
# subscription/recipient/stream tables to exclude private streams
|
|
|
|
# with no consenting subscribers).
|
2016-08-11 19:03:16 +02:00
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_stream",
|
2016-08-11 19:03:16 +02:00
|
|
|
model=Stream,
|
|
|
|
virtual_parent=recipient_config,
|
2021-02-12 08:20:45 +01:00
|
|
|
id_source=("zerver_recipient", "type_id"),
|
|
|
|
source_filter=lambda r: r["type"] == Recipient.STREAM,
|
|
|
|
exclude=["email_token"],
|
2016-08-11 19:03:16 +02:00
|
|
|
)
|
|
|
|
|
2021-11-24 19:23:54 +01:00
|
|
|
Config(
|
|
|
|
table="analytics_usercount",
|
|
|
|
model=UserCount,
|
|
|
|
normal_parent=user_profile_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="user_id__in",
|
2021-11-24 19:23:54 +01:00
|
|
|
)
|
|
|
|
|
2021-12-08 19:43:38 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_realmauditlog",
|
|
|
|
model=RealmAuditLog,
|
2021-12-08 21:32:07 +01:00
|
|
|
virtual_parent=user_profile_config,
|
|
|
|
# See the docstring for why we use a custom fetch here.
|
|
|
|
custom_fetch=custom_fetch_realm_audit_logs_for_user,
|
2021-12-08 19:43:38 +01:00
|
|
|
)
|
|
|
|
|
2021-12-09 15:49:13 +01:00
|
|
|
Config(
|
|
|
|
table="zerver_reaction",
|
|
|
|
model=Reaction,
|
|
|
|
normal_parent=user_profile_config,
|
|
|
|
include_rows="user_profile_id__in",
|
|
|
|
)
|
|
|
|
|
2021-11-24 19:23:54 +01:00
|
|
|
add_user_profile_child_configs(user_profile_config)
|
|
|
|
|
2016-08-11 19:03:16 +02:00
|
|
|
return user_profile_config
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-09 19:12:13 +01:00
|
|
|
def get_id_list_gently_from_database(*, base_query: Any, id_field: str) -> List[int]:
|
|
|
|
"""
|
|
|
|
Use this function if you need a HUGE number of ids from
|
|
|
|
the database, and you don't mind a few extra trips. Particularly
|
|
|
|
for exports, we don't really care about a little extra time
|
|
|
|
to finish the export--the much bigger concern is that we don't
|
|
|
|
want to overload our database all at once, nor do we want to
|
|
|
|
keep a whole bunch of Django objects around in memory.
|
|
|
|
|
|
|
|
So our general process is to call this function first, and then
|
|
|
|
we call chunkify to break our ids into small chunks for "fat query"
|
|
|
|
batches.
|
|
|
|
|
|
|
|
Even if you are not working at huge scale, this function can
|
|
|
|
also be used for the convenience of its API.
|
|
|
|
"""
|
|
|
|
min_id = -1
|
|
|
|
all_ids = []
|
|
|
|
batch_size = 10000 # we are just getting ints
|
|
|
|
|
2021-12-09 19:50:43 +01:00
|
|
|
assert id_field == "id" or id_field.endswith("_id")
|
2021-12-09 19:12:13 +01:00
|
|
|
|
|
|
|
while True:
|
|
|
|
filter_args = {f"{id_field}__gt": min_id}
|
|
|
|
new_ids = list(
|
|
|
|
base_query.values_list(id_field, flat=True)
|
|
|
|
.filter(**filter_args)
|
|
|
|
.order_by(id_field)[:batch_size]
|
|
|
|
)
|
|
|
|
if len(new_ids) == 0:
|
|
|
|
break
|
|
|
|
all_ids += new_ids
|
|
|
|
min_id = new_ids[-1]
|
|
|
|
|
|
|
|
return all_ids
|
|
|
|
|
|
|
|
|
2021-12-09 16:17:58 +01:00
|
|
|
def chunkify(lst: List[int], chunk_size: int) -> List[List[int]]:
|
|
|
|
# chunkify([1,2,3,4,5], 2) == [[1,2], [3,4], [5]]
|
|
|
|
result = []
|
|
|
|
i = 0
|
|
|
|
while True:
|
|
|
|
chunk = lst[i : i + chunk_size]
|
|
|
|
if len(chunk) == 0:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
result.append(chunk)
|
|
|
|
i += chunk_size
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def export_messages_single_user(
|
2021-12-09 21:31:46 +01:00
|
|
|
user_profile: UserProfile, *, output_dir: Path, reaction_message_ids: Set[int]
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2022-04-13 21:18:41 +02:00
|
|
|
@lru_cache(maxsize=None)
|
2021-12-09 22:07:46 +01:00
|
|
|
def get_recipient(recipient_id: int) -> str:
|
|
|
|
recipient = Recipient.objects.get(id=recipient_id)
|
|
|
|
|
|
|
|
if recipient.type == Recipient.STREAM:
|
|
|
|
stream = Stream.objects.values("name").get(id=recipient.type_id)
|
|
|
|
return stream["name"]
|
|
|
|
|
|
|
|
user_names = (
|
|
|
|
UserProfile.objects.filter(
|
|
|
|
subscription__recipient_id=recipient.id,
|
|
|
|
)
|
|
|
|
.order_by("full_name")
|
|
|
|
.values_list("full_name", flat=True)
|
|
|
|
)
|
|
|
|
|
|
|
|
return ", ".join(user_names)
|
2021-12-09 16:17:58 +01:00
|
|
|
|
2021-12-09 21:31:46 +01:00
|
|
|
messages_from_me = Message.objects.filter(sender=user_profile)
|
|
|
|
|
|
|
|
my_subscriptions = Subscription.objects.filter(
|
|
|
|
user_profile=user_profile, recipient__type__in=[Recipient.PERSONAL, Recipient.HUDDLE]
|
2021-12-09 16:17:58 +01:00
|
|
|
)
|
2021-12-09 21:31:46 +01:00
|
|
|
my_recipient_ids = [sub.recipient_id for sub in my_subscriptions]
|
|
|
|
messages_to_me = Message.objects.filter(recipient_id__in=my_recipient_ids)
|
|
|
|
|
|
|
|
# Find all message ids that pertain to us.
|
|
|
|
all_message_ids: Set[int] = set()
|
|
|
|
|
|
|
|
for query in [messages_from_me, messages_to_me]:
|
|
|
|
all_message_ids |= set(get_id_list_gently_from_database(base_query=query, id_field="id"))
|
|
|
|
|
|
|
|
all_message_ids |= reaction_message_ids
|
2021-12-09 16:17:58 +01:00
|
|
|
|
2016-04-05 00:27:37 +02:00
|
|
|
dump_file_id = 1
|
2022-10-30 00:35:32 +02:00
|
|
|
for message_id_chunk in chunkify(sorted(all_message_ids), MESSAGE_BATCH_CHUNK_SIZE):
|
2021-12-09 16:17:58 +01:00
|
|
|
fat_query = (
|
|
|
|
UserMessage.objects.select_related("message", "message__sending_client")
|
|
|
|
.filter(user_profile=user_profile, message_id__in=message_id_chunk)
|
|
|
|
.order_by("message_id")
|
|
|
|
)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-12-09 16:17:58 +01:00
|
|
|
user_message_chunk = list(fat_query)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
message_chunk = []
|
|
|
|
for user_message in user_message_chunk:
|
|
|
|
item = model_to_dict(user_message.message)
|
2021-02-12 08:20:45 +01:00
|
|
|
item["flags"] = user_message.flags_list()
|
|
|
|
item["flags_mask"] = user_message.flags.mask
|
2016-04-05 00:27:37 +02:00
|
|
|
# Add a few nice, human-readable details
|
2021-02-12 08:20:45 +01:00
|
|
|
item["sending_client_name"] = user_message.message.sending_client.name
|
2021-12-09 22:07:46 +01:00
|
|
|
item["recipient_name"] = get_recipient(user_message.message.recipient_id)
|
2016-04-05 00:27:37 +02:00
|
|
|
message_chunk.append(item)
|
|
|
|
|
2020-06-13 08:59:37 +02:00
|
|
|
message_filename = os.path.join(output_dir, f"messages-{dump_file_id:06}.json")
|
2021-05-10 07:02:14 +02:00
|
|
|
logging.info("Fetched messages for %s", message_filename)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
output = {"zerver_message": message_chunk}
|
|
|
|
floatify_datetime_fields(output, "zerver_message")
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2021-12-08 15:53:23 +01:00
|
|
|
write_table_data(message_filename, output)
|
2016-04-05 00:27:37 +02:00
|
|
|
dump_file_id += 1
|
2019-01-30 08:54:29 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-01-30 08:54:29 +01:00
|
|
|
def export_analytics_tables(realm: Realm, output_dir: Path) -> None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
response: TableData = {}
|
2019-01-30 08:54:29 +01:00
|
|
|
|
2021-12-08 15:53:23 +01:00
|
|
|
logging.info("Fetching analytics table data")
|
2019-01-30 08:54:29 +01:00
|
|
|
config = get_analytics_config()
|
|
|
|
export_from_config(
|
|
|
|
response=response,
|
|
|
|
config=config,
|
|
|
|
seed_object=realm,
|
|
|
|
)
|
2020-08-15 00:43:28 +02:00
|
|
|
|
|
|
|
# The seeding logic results in a duplicate zerver_realm object
|
|
|
|
# being included in the analytics data. We don't want it, as that
|
|
|
|
# data is already in `realm.json`, so we just delete it here
|
|
|
|
# before writing to disk.
|
2021-02-12 08:20:45 +01:00
|
|
|
del response["zerver_realm"]
|
2020-08-15 00:43:28 +02:00
|
|
|
|
2021-12-08 15:53:23 +01:00
|
|
|
export_file = os.path.join(output_dir, "analytics.json")
|
2021-12-08 15:01:28 +01:00
|
|
|
write_table_data(output_file=export_file, data=response)
|
2019-01-30 08:54:29 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-01-30 08:54:29 +01:00
|
|
|
def get_analytics_config() -> Config:
|
2019-02-28 21:19:47 +01:00
|
|
|
# The Config function defines what data to export for the
|
|
|
|
# analytics.json file in a full-realm export.
|
2019-01-30 08:54:29 +01:00
|
|
|
|
|
|
|
analytics_config = Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_realm",
|
2019-01-30 08:54:29 +01:00
|
|
|
is_seeded=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="analytics_realmcount",
|
2019-01-30 08:54:29 +01:00
|
|
|
model=RealmCount,
|
|
|
|
normal_parent=analytics_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2019-01-30 08:54:29 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="analytics_usercount",
|
2019-01-30 08:54:29 +01:00
|
|
|
model=UserCount,
|
|
|
|
normal_parent=analytics_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2019-01-30 08:54:29 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
Config(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="analytics_streamcount",
|
2019-01-30 08:54:29 +01:00
|
|
|
model=StreamCount,
|
|
|
|
normal_parent=analytics_config,
|
2021-12-08 19:15:54 +01:00
|
|
|
include_rows="realm_id__in",
|
2019-01-30 08:54:29 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
return analytics_config
|
2019-03-25 22:18:28 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-05-10 14:28:38 +02:00
|
|
|
def get_consented_user_ids(consent_message_id: int) -> Set[int]:
|
2021-02-12 08:19:30 +01:00
|
|
|
return set(
|
|
|
|
Reaction.objects.filter(
|
2021-04-22 16:23:09 +02:00
|
|
|
message_id=consent_message_id,
|
2021-02-12 08:19:30 +01:00
|
|
|
reaction_type="unicode_emoji",
|
|
|
|
# outbox = 1f4e4
|
|
|
|
emoji_code="1f4e4",
|
|
|
|
).values_list("user_profile", flat=True)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def export_realm_wrapper(
|
|
|
|
realm: Realm,
|
|
|
|
output_dir: str,
|
|
|
|
threads: int,
|
|
|
|
upload: bool,
|
|
|
|
public_only: bool,
|
|
|
|
percent_callback: Optional[Callable[[Any], None]] = None,
|
|
|
|
consent_message_id: Optional[int] = None,
|
|
|
|
) -> Optional[str]:
|
|
|
|
tarball_path = do_export_realm(
|
|
|
|
realm=realm,
|
|
|
|
output_dir=output_dir,
|
|
|
|
threads=threads,
|
|
|
|
public_only=public_only,
|
|
|
|
consent_message_id=consent_message_id,
|
|
|
|
)
|
2021-11-02 09:39:17 +01:00
|
|
|
shutil.rmtree(output_dir)
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"Tarball written to {tarball_path}")
|
2019-05-10 09:10:47 +02:00
|
|
|
if not upload:
|
2019-04-27 01:32:29 +02:00
|
|
|
return None
|
2019-03-25 22:18:28 +01:00
|
|
|
|
2019-05-11 09:57:33 +02:00
|
|
|
# We upload to the `avatars` bucket because that's world-readable
|
|
|
|
# without additional configuration. We'll likely want to change
|
2019-06-21 22:46:04 +02:00
|
|
|
# that in the future.
|
|
|
|
print("Uploading export tarball...")
|
2020-07-30 22:10:15 +02:00
|
|
|
public_url = zerver.lib.upload.upload_backend.upload_export_tarball(
|
2021-02-12 08:19:30 +01:00
|
|
|
realm, tarball_path, percent_callback=percent_callback
|
|
|
|
)
|
2021-11-02 09:39:17 +01:00
|
|
|
print(f"\nUploaded to {public_url}")
|
2019-03-26 00:36:37 +01:00
|
|
|
|
2021-11-02 09:39:17 +01:00
|
|
|
os.remove(tarball_path)
|
|
|
|
print(f"Successfully deleted the tarball at {tarball_path}")
|
2019-04-27 01:32:29 +02:00
|
|
|
return public_url
|
2019-06-23 22:57:14 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-06-24 02:51:13 +02:00
|
|
|
def get_realm_exports_serialized(user: UserProfile) -> List[Dict[str, Any]]:
|
2021-02-12 08:19:30 +01:00
|
|
|
all_exports = RealmAuditLog.objects.filter(
|
|
|
|
realm=user.realm, event_type=RealmAuditLog.REALM_EXPORTED
|
|
|
|
)
|
2019-06-23 22:57:14 +02:00
|
|
|
exports_dict = {}
|
|
|
|
for export in all_exports:
|
2020-04-16 23:00:24 +02:00
|
|
|
pending = True
|
|
|
|
export_url = None
|
|
|
|
deleted_timestamp = None
|
|
|
|
failed_timestamp = None
|
2021-07-24 18:16:48 +02:00
|
|
|
acting_user = export.acting_user
|
2020-04-16 23:00:24 +02:00
|
|
|
|
|
|
|
if export.extra_data is not None:
|
|
|
|
pending = False
|
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
export_data = orjson.loads(export.extra_data)
|
2021-02-12 08:20:45 +01:00
|
|
|
deleted_timestamp = export_data.get("deleted_timestamp")
|
|
|
|
failed_timestamp = export_data.get("failed_timestamp")
|
|
|
|
export_path = export_data.get("export_path")
|
2020-04-16 23:00:24 +02:00
|
|
|
|
2020-04-20 23:02:45 +02:00
|
|
|
if export_path and not deleted_timestamp:
|
2020-04-16 23:00:24 +02:00
|
|
|
export_url = zerver.lib.upload.upload_backend.get_export_tarball_url(
|
2021-02-12 08:19:30 +01:00
|
|
|
user.realm, export_path
|
|
|
|
)
|
2020-04-16 23:00:24 +02:00
|
|
|
|
2021-07-24 18:16:48 +02:00
|
|
|
assert acting_user is not None
|
2019-06-23 22:57:14 +02:00
|
|
|
exports_dict[export.id] = dict(
|
|
|
|
id=export.id,
|
2019-08-08 21:46:44 +02:00
|
|
|
export_time=export.event_time.timestamp(),
|
2021-07-24 18:16:48 +02:00
|
|
|
acting_user_id=acting_user.id,
|
2019-09-24 22:46:53 +02:00
|
|
|
export_url=export_url,
|
2020-04-16 23:00:24 +02:00
|
|
|
deleted_timestamp=deleted_timestamp,
|
|
|
|
failed_timestamp=failed_timestamp,
|
|
|
|
pending=pending,
|
2019-06-23 22:57:14 +02:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
return sorted(exports_dict.values(), key=lambda export_dict: export_dict["id"])
|