export_realm: Add RealmExport model.

Earlier, we used to store the key data related to realm exports
in RealmAuditLog. This commit adds a separate table to store
those data.

It includes the code to migrate the concerned existing data in
RealmAuditLog to RealmExport.

Fixes part of #31201.
This commit is contained in:
Prakhar Pratyush 2024-09-26 15:48:55 +05:30 committed by Tim Abbott
parent 5d9eb4e358
commit 07dcee36b2
12 changed files with 426 additions and 215 deletions

View File

@ -2,7 +2,7 @@ from django.utils.timezone import now as timezone_now
from zerver.lib.export import get_realm_exports_serialized from zerver.lib.export import get_realm_exports_serialized
from zerver.lib.upload import delete_export_tarball from zerver.lib.upload import delete_export_tarball
from zerver.models import Realm, RealmAuditLog from zerver.models import Realm, RealmExport
from zerver.tornado.django_api import send_event_on_commit from zerver.tornado.django_api import send_event_on_commit
@ -11,15 +11,13 @@ def notify_realm_export(realm: Realm) -> None:
send_event_on_commit(realm, event, realm.get_human_admin_users().values_list("id", flat=True)) send_event_on_commit(realm, event, realm.get_human_admin_users().values_list("id", flat=True))
def do_delete_realm_export(export: RealmAuditLog) -> None: def do_delete_realm_export(export_row: RealmExport) -> None:
export_data = export.extra_data export_path = export_row.export_path
export_path = export_data.get("export_path") assert export_path is not None
if export_path:
# Allow removal even if the export failed.
delete_export_tarball(export_path) delete_export_tarball(export_path)
export_data.update(deleted_timestamp=timezone_now().timestamp()) export_row.status = RealmExport.DELETED
export.extra_data = export_data export_row.date_deleted = timezone_now()
export.save(update_fields=["extra_data"]) export_row.save(update_fields=["status", "date_deleted"])
notify_realm_export(export.realm) notify_realm_export(export_row.realm)

View File

@ -7,6 +7,7 @@
# (2) if it doesn't belong in EXCLUDED_TABLES, add a Config object for # (2) if it doesn't belong in EXCLUDED_TABLES, add a Config object for
# it to get_realm_config. # it to get_realm_config.
import glob import glob
import hashlib
import logging import logging
import os import os
import shutil import shutil
@ -17,6 +18,7 @@ from contextlib import suppress
from datetime import datetime from datetime import datetime
from functools import cache from functools import cache
from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypedDict from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypedDict
from urllib.parse import urlsplit
import orjson import orjson
from django.apps import apps from django.apps import apps
@ -24,12 +26,14 @@ from django.conf import settings
from django.db.models import Exists, OuterRef, Q from django.db.models import Exists, OuterRef, Q
from django.forms.models import model_to_dict from django.forms.models import model_to_dict
from django.utils.timezone import is_naive as timezone_is_naive from django.utils.timezone import is_naive as timezone_is_naive
from django.utils.timezone import now as timezone_now
import zerver.lib.upload import zerver.lib.upload
from analytics.models import RealmCount, StreamCount, UserCount from analytics.models import RealmCount, StreamCount, UserCount
from scripts.lib.zulip_tools import overwrite_symlink from scripts.lib.zulip_tools import overwrite_symlink
from zerver.lib.avatar_hash import user_avatar_base_path_from_ids from zerver.lib.avatar_hash import user_avatar_base_path_from_ids
from zerver.lib.pysa import mark_sanitized from zerver.lib.pysa import mark_sanitized
from zerver.lib.timestamp import datetime_to_timestamp
from zerver.lib.upload.s3 import get_bucket from zerver.lib.upload.s3 import get_bucket
from zerver.models import ( from zerver.models import (
AlertWord, AlertWord,
@ -53,6 +57,7 @@ from zerver.models import (
RealmAuthenticationMethod, RealmAuthenticationMethod,
RealmDomain, RealmDomain,
RealmEmoji, RealmEmoji,
RealmExport,
RealmFilter, RealmFilter,
RealmPlayground, RealmPlayground,
RealmUserDefault, RealmUserDefault,
@ -73,7 +78,7 @@ from zerver.models import (
) )
from zerver.models.presence import PresenceSequence from zerver.models.presence import PresenceSequence
from zerver.models.realm_audit_logs import AuditLogEventType from zerver.models.realm_audit_logs import AuditLogEventType
from zerver.models.realms import EXPORT_FULL_WITH_CONSENT, EXPORT_PUBLIC, get_realm from zerver.models.realms import get_realm
from zerver.models.users import get_system_bot, get_user_profile_by_id from zerver.models.users import get_system_bot, get_user_profile_by_id
if TYPE_CHECKING: if TYPE_CHECKING:
@ -158,6 +163,7 @@ ALL_ZULIP_TABLES = {
"zerver_realmauthenticationmethod", "zerver_realmauthenticationmethod",
"zerver_realmdomain", "zerver_realmdomain",
"zerver_realmemoji", "zerver_realmemoji",
"zerver_realmexport",
"zerver_realmfilter", "zerver_realmfilter",
"zerver_realmplayground", "zerver_realmplayground",
"zerver_realmreactivationstatus", "zerver_realmreactivationstatus",
@ -302,6 +308,13 @@ DATE_FIELDS: dict[TableName, list[Field]] = {
"zerver_muteduser": ["date_muted"], "zerver_muteduser": ["date_muted"],
"zerver_realmauditlog": ["event_time"], "zerver_realmauditlog": ["event_time"],
"zerver_realm": ["date_created"], "zerver_realm": ["date_created"],
"zerver_realmexport": [
"date_requested",
"date_started",
"date_succeeded",
"date_failed",
"date_deleted",
],
"zerver_scheduledmessage": ["scheduled_timestamp"], "zerver_scheduledmessage": ["scheduled_timestamp"],
"zerver_stream": ["date_created"], "zerver_stream": ["date_created"],
"zerver_namedusergroup": ["date_created"], "zerver_namedusergroup": ["date_created"],
@ -748,6 +761,13 @@ def get_realm_config() -> Config:
include_rows="realm_id__in", include_rows="realm_id__in",
) )
Config(
table="zerver_realmexport",
model=RealmExport,
normal_parent=realm_config,
include_rows="realm_id__in",
)
Config( Config(
table="zerver_realmfilter", table="zerver_realmfilter",
model=RealmFilter, model=RealmFilter,
@ -1327,16 +1347,16 @@ def export_partial_message_files(
) )
consented_user_ids: set[int] = set() consented_user_ids: set[int] = set()
if export_type == EXPORT_FULL_WITH_CONSENT: if export_type == RealmExport.EXPORT_FULL_WITH_CONSENT:
consented_user_ids = get_consented_user_ids(realm) consented_user_ids = get_consented_user_ids(realm)
if export_type == EXPORT_PUBLIC: if export_type == RealmExport.EXPORT_PUBLIC:
recipient_streams = Stream.objects.filter(realm=realm, invite_only=False) recipient_streams = Stream.objects.filter(realm=realm, invite_only=False)
recipient_ids = Recipient.objects.filter( recipient_ids = Recipient.objects.filter(
type=Recipient.STREAM, type_id__in=recipient_streams type=Recipient.STREAM, type_id__in=recipient_streams
).values_list("id", flat=True) ).values_list("id", flat=True)
recipient_ids_for_us = get_ids(response["zerver_recipient"]) & set(recipient_ids) recipient_ids_for_us = get_ids(response["zerver_recipient"]) & set(recipient_ids)
elif export_type == EXPORT_FULL_WITH_CONSENT: elif export_type == RealmExport.EXPORT_FULL_WITH_CONSENT:
public_streams = Stream.objects.filter(realm=realm, invite_only=False) public_streams = Stream.objects.filter(realm=realm, invite_only=False)
public_stream_recipient_ids = Recipient.objects.filter( public_stream_recipient_ids = Recipient.objects.filter(
type=Recipient.STREAM, type_id__in=public_streams type=Recipient.STREAM, type_id__in=public_streams
@ -1359,7 +1379,7 @@ def export_partial_message_files(
# For a full export, we have implicit consent for all users in the export. # For a full export, we have implicit consent for all users in the export.
consented_user_ids = user_ids_for_us consented_user_ids = user_ids_for_us
if export_type == EXPORT_PUBLIC: if export_type == RealmExport.EXPORT_PUBLIC:
messages_we_received = Message.objects.filter( messages_we_received = Message.objects.filter(
# Uses index: zerver_message_realm_sender_recipient # Uses index: zerver_message_realm_sender_recipient
realm_id=realm.id, realm_id=realm.id,
@ -1385,7 +1405,7 @@ def export_partial_message_files(
) )
message_queries.append(messages_we_received) message_queries.append(messages_we_received)
if export_type == EXPORT_FULL_WITH_CONSENT: if export_type == RealmExport.EXPORT_FULL_WITH_CONSENT:
# Export with member consent requires some careful handling to make sure # Export with member consent requires some careful handling to make sure
# we only include messages that a consenting user can access. # we only include messages that a consenting user can access.
has_usermessage_expression = Exists( has_usermessage_expression = Exists(
@ -1936,7 +1956,7 @@ def export_emoji_from_local(
write_records_json_file(output_dir, records) write_records_json_file(output_dir, records)
def do_write_stats_file_for_realm_export(output_dir: Path) -> None: def do_write_stats_file_for_realm_export(output_dir: Path) -> dict[str, int | dict[str, int]]:
stats_file = os.path.join(output_dir, "stats.json") stats_file = os.path.join(output_dir, "stats.json")
realm_file = os.path.join(output_dir, "realm.json") realm_file = os.path.join(output_dir, "realm.json")
attachment_file = os.path.join(output_dir, "attachment.json") attachment_file = os.path.join(output_dir, "attachment.json")
@ -1962,6 +1982,8 @@ def do_write_stats_file_for_realm_export(output_dir: Path) -> None:
with open(stats_file, "wb") as f: with open(stats_file, "wb") as f:
f.write(orjson.dumps(stats, option=orjson.OPT_INDENT_2)) f.write(orjson.dumps(stats, option=orjson.OPT_INDENT_2))
return stats
def get_exportable_scheduled_message_ids(realm: Realm, export_type: int) -> set[int]: def get_exportable_scheduled_message_ids(realm: Realm, export_type: int) -> set[int]:
""" """
@ -1969,10 +1991,10 @@ def get_exportable_scheduled_message_ids(realm: Realm, export_type: int) -> set[
public/consent/full export mode. public/consent/full export mode.
""" """
if export_type == EXPORT_PUBLIC: if export_type == RealmExport.EXPORT_PUBLIC:
return set() return set()
if export_type == EXPORT_FULL_WITH_CONSENT: if export_type == RealmExport.EXPORT_FULL_WITH_CONSENT:
sender_ids = get_consented_user_ids(realm) sender_ids = get_consented_user_ids(realm)
return set( return set(
ScheduledMessage.objects.filter(sender_id__in=sender_ids, realm=realm).values_list( ScheduledMessage.objects.filter(sender_id__in=sender_ids, realm=realm).values_list(
@ -1990,7 +2012,7 @@ def do_export_realm(
export_type: int, export_type: int,
exportable_user_ids: set[int] | None = None, exportable_user_ids: set[int] | None = None,
export_as_active: bool | None = None, export_as_active: bool | None = None,
) -> str: ) -> tuple[str, dict[str, int | dict[str, int]]]:
response: TableData = {} response: TableData = {}
# We need at least one thread running to export # We need at least one thread running to export
@ -2065,13 +2087,13 @@ def do_export_realm(
launch_user_message_subprocesses( launch_user_message_subprocesses(
threads=threads, threads=threads,
output_dir=output_dir, output_dir=output_dir,
export_full_with_consent=export_type == EXPORT_FULL_WITH_CONSENT, export_full_with_consent=export_type == RealmExport.EXPORT_FULL_WITH_CONSENT,
) )
logging.info("Finished exporting %s", realm.string_id) logging.info("Finished exporting %s", realm.string_id)
create_soft_link(source=output_dir, in_progress=False) create_soft_link(source=output_dir, in_progress=False)
do_write_stats_file_for_realm_export(output_dir) stats = do_write_stats_file_for_realm_export(output_dir)
logging.info("Compressing tarball...") logging.info("Compressing tarball...")
tarball_path = output_dir.rstrip("/") + ".tar.gz" tarball_path = output_dir.rstrip("/") + ".tar.gz"
@ -2083,7 +2105,7 @@ def do_export_realm(
os.path.basename(output_dir), os.path.basename(output_dir),
] ]
) )
return tarball_path return tarball_path, stats
def export_attachment_table( def export_attachment_table(
@ -2441,35 +2463,97 @@ def get_consented_user_ids(realm: Realm) -> set[int]:
def export_realm_wrapper( def export_realm_wrapper(
realm: Realm, export_row: RealmExport,
output_dir: str, output_dir: str,
threads: int, threads: int,
upload: bool, upload: bool,
export_type: int,
percent_callback: Callable[[Any], None] | None = None, percent_callback: Callable[[Any], None] | None = None,
export_as_active: bool | None = None, export_as_active: bool | None = None,
) -> str | None: ) -> str | None:
tarball_path = do_export_realm( try:
realm=realm, export_row.status = RealmExport.STARTED
export_row.date_started = timezone_now()
export_row.save(update_fields=["status", "date_started"])
tarball_path, stats = do_export_realm(
realm=export_row.realm,
output_dir=output_dir, output_dir=output_dir,
threads=threads, threads=threads,
export_type=export_type, export_type=export_row.type,
export_as_active=export_as_active, export_as_active=export_as_active,
) )
RealmAuditLog.objects.create(
acting_user=export_row.acting_user,
realm=export_row.realm,
event_type=AuditLogEventType.REALM_EXPORTED,
event_time=timezone_now(),
extra_data={"realm_export_id": export_row.id},
)
shutil.rmtree(output_dir) shutil.rmtree(output_dir)
print(f"Tarball written to {tarball_path}") print(f"Tarball written to {tarball_path}")
print("Calculating SHA-256 checksum of tarball...")
# TODO: We can directly use 'hashlib.file_digest'. (New in Python 3.11)
sha256_hash = hashlib.sha256()
with open(tarball_path, "rb") as f:
buf = bytearray(2**18)
view = memoryview(buf)
while True:
size = f.readinto(buf)
if size == 0:
break
sha256_hash.update(view[:size])
export_row.sha256sum_hex = sha256_hash.hexdigest()
export_row.tarball_size_bytes = os.path.getsize(tarball_path)
export_row.status = RealmExport.SUCCEEDED
export_row.date_succeeded = timezone_now()
export_row.stats = stats
print(f"SHA-256 checksum is {export_row.sha256sum_hex}")
if not upload: if not upload:
export_row.save(
update_fields=[
"sha256sum_hex",
"tarball_size_bytes",
"status",
"date_succeeded",
"stats",
]
)
return None return None
print("Uploading export tarball...") print("Uploading export tarball...")
public_url = zerver.lib.upload.upload_backend.upload_export_tarball( public_url = zerver.lib.upload.upload_backend.upload_export_tarball(
realm, tarball_path, percent_callback=percent_callback export_row.realm, tarball_path, percent_callback=percent_callback
) )
print(f"\nUploaded to {public_url}") print(f"\nUploaded to {public_url}")
# Update the export_path field now that the export is complete.
export_row.export_path = urlsplit(public_url).path
export_row.save(
update_fields=[
"sha256sum_hex",
"tarball_size_bytes",
"status",
"date_succeeded",
"stats",
"export_path",
]
)
os.remove(tarball_path) os.remove(tarball_path)
print(f"Successfully deleted the tarball at {tarball_path}") print(f"Successfully deleted the tarball at {tarball_path}")
return public_url return public_url
except Exception:
export_row.status = RealmExport.FAILED
export_row.date_failed = timezone_now()
export_row.save(update_fields=["status", "date_failed"])
raise
def get_realm_exports_serialized(realm: Realm) -> list[dict[str, Any]]: def get_realm_exports_serialized(realm: Realm) -> list[dict[str, Any]]:
@ -2479,31 +2563,26 @@ def get_realm_exports_serialized(realm: Realm) -> list[dict[str, Any]]:
# TODO: We should return those via the API as well, with an # TODO: We should return those via the API as well, with an
# appropriate way to express for who issued them; this requires an # appropriate way to express for who issued them; this requires an
# API change. # API change.
all_exports = RealmAuditLog.objects.filter( all_exports = RealmExport.objects.filter(realm=realm).exclude(acting_user=None)
realm=realm, event_type=AuditLogEventType.REALM_EXPORTED
).exclude(acting_user=None)
exports_dict = {} exports_dict = {}
for export in all_exports: for export in all_exports:
export_url = None export_url = None
deleted_timestamp = None export_path = export.export_path
failed_timestamp = None pending = export.status in [RealmExport.REQUESTED, RealmExport.STARTED]
acting_user = export.acting_user
export_data = export.extra_data if export.status == RealmExport.SUCCEEDED:
assert export_path is not None
deleted_timestamp = export_data.get("deleted_timestamp")
failed_timestamp = export_data.get("failed_timestamp")
export_path = export_data.get("export_path")
pending = deleted_timestamp is None and failed_timestamp is None and export_path is None
if export_path is not None and not deleted_timestamp:
export_url = zerver.lib.upload.upload_backend.get_export_tarball_url(realm, export_path) export_url = zerver.lib.upload.upload_backend.get_export_tarball_url(realm, export_path)
deleted_timestamp = (
datetime_to_timestamp(export.date_deleted) if export.date_deleted else None
)
failed_timestamp = datetime_to_timestamp(export.date_failed) if export.date_failed else None
acting_user = export.acting_user
assert acting_user is not None assert acting_user is not None
exports_dict[export.id] = dict( exports_dict[export.id] = dict(
id=export.id, id=export.id,
export_time=export.event_time.timestamp(), export_time=datetime_to_timestamp(export.date_requested),
acting_user_id=acting_user.id, acting_user_id=acting_user.id,
export_url=export_url, export_url=export_url,
deleted_timestamp=deleted_timestamp, deleted_timestamp=deleted_timestamp,

View File

@ -11,13 +11,7 @@ from typing_extensions import override
from zerver.actions.realm_settings import do_deactivate_realm from zerver.actions.realm_settings import do_deactivate_realm
from zerver.lib.export import export_realm_wrapper from zerver.lib.export import export_realm_wrapper
from zerver.lib.management import ZulipBaseCommand from zerver.lib.management import ZulipBaseCommand
from zerver.models import RealmAuditLog from zerver.models import RealmExport
from zerver.models.realm_audit_logs import AuditLogEventType
from zerver.models.realms import (
EXPORT_FULL_WITH_CONSENT,
EXPORT_FULL_WITHOUT_CONSENT,
EXPORT_PUBLIC,
)
class Command(ZulipBaseCommand): class Command(ZulipBaseCommand):
@ -169,26 +163,27 @@ class Command(ZulipBaseCommand):
def percent_callback(bytes_transferred: Any) -> None: def percent_callback(bytes_transferred: Any) -> None:
print(end=".", flush=True) print(end=".", flush=True)
RealmAuditLog.objects.create( if public_only:
acting_user=None, export_type = RealmExport.EXPORT_PUBLIC
elif export_full_with_consent:
export_type = RealmExport.EXPORT_FULL_WITH_CONSENT
else:
export_type = RealmExport.EXPORT_FULL_WITHOUT_CONSENT
export_row = RealmExport.objects.create(
realm=realm, realm=realm,
event_type=AuditLogEventType.REALM_EXPORTED, type=export_type,
event_time=timezone_now(), acting_user=None,
status=RealmExport.REQUESTED,
date_requested=timezone_now(),
) )
# Allows us to trigger exports separately from command line argument parsing # Allows us to trigger exports separately from command line argument parsing
if public_only:
export_type = EXPORT_PUBLIC
elif export_full_with_consent:
export_type = EXPORT_FULL_WITH_CONSENT
else:
export_type = EXPORT_FULL_WITHOUT_CONSENT
export_realm_wrapper( export_realm_wrapper(
realm=realm, export_row=export_row,
output_dir=output_dir, output_dir=output_dir,
threads=num_threads, threads=num_threads,
upload=options["upload"], upload=options["upload"],
export_type=export_type,
percent_callback=percent_callback, percent_callback=percent_callback,
export_as_active=True if options["deactivate_realm"] else None, export_as_active=True if options["deactivate_realm"] else None,
) )

View File

@ -0,0 +1,120 @@
# Generated by Django 5.0.9 on 2024-10-04 09:57
from datetime import datetime, timezone
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
def timestamp_to_datetime(timestamp: float | None) -> datetime | None:
if timestamp is None:
return None
return datetime.fromtimestamp(float(timestamp), tz=timezone.utc)
def datetime_to_timestamp(dt: datetime) -> int:
return int(dt.timestamp())
def backfill_realm_export(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
REALM_EXPORTED = 206
REQUESTED = 1
STARTED = 2
SUCCEEDED = 3
FAILED = 4
DELETED = 5
EXPORT_PUBLIC = 1
RealmAuditLog = apps.get_model("zerver", "RealmAuditLog")
RealmExport = apps.get_model("zerver", "RealmExport")
for audit_log in RealmAuditLog.objects.filter(event_type=REALM_EXPORTED).order_by("id"):
if audit_log.acting_user is None:
# This audit_log is for an export made through shell.
# We don't have enough data to determine if the export was
# successful or failed.
continue
extra_data = audit_log.extra_data
started_timestamp = extra_data.get("started_timestamp")
failed_timestamp = extra_data.get("failed_timestamp")
deleted_timestamp = extra_data.get("deleted_timestamp")
export_path = extra_data.get("export_path")
status = REQUESTED
if deleted_timestamp is not None:
status = DELETED
elif failed_timestamp is not None:
status = FAILED
elif export_path is not None:
status = SUCCEEDED
elif started_timestamp is not None:
status = STARTED
# We don't have historical data to set `date_succeeded`.
RealmExport.objects.create(
realm=audit_log.realm,
acting_user=audit_log.acting_user,
type=EXPORT_PUBLIC,
status=status,
date_requested=audit_log.event_time,
date_started=timestamp_to_datetime(started_timestamp),
date_failed=timestamp_to_datetime(failed_timestamp),
date_deleted=timestamp_to_datetime(deleted_timestamp),
export_path=export_path,
)
def reverse_code(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
RealmExport = apps.get_model("zerver", "RealmExport")
RealmExport.objects.all().delete()
class Migration(migrations.Migration):
dependencies = [
("zerver", "0594_remove_realm_user_group_edit_policy"),
]
operations = [
migrations.CreateModel(
name="RealmExport",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("type", models.PositiveSmallIntegerField(default=1)),
("status", models.PositiveSmallIntegerField(default=1)),
("date_requested", models.DateTimeField()),
("date_started", models.DateTimeField(default=None, null=True)),
("date_succeeded", models.DateTimeField(default=None, null=True)),
("date_failed", models.DateTimeField(default=None, null=True)),
("date_deleted", models.DateTimeField(default=None, null=True)),
("export_path", models.TextField(default=None, null=True)),
("sha256sum_hex", models.CharField(default=None, max_length=64, null=True)),
("tarball_size_bytes", models.PositiveIntegerField(default=None, null=True)),
("stats", models.JSONField(default=None, null=True)),
(
"acting_user",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
),
),
(
"realm",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="zerver.realm"
),
),
],
),
migrations.RunPython(backfill_realm_export, reverse_code=reverse_code, elidable=True),
]

View File

@ -49,6 +49,7 @@ from zerver.models.realm_playgrounds import RealmPlayground as RealmPlayground
from zerver.models.realms import Realm as Realm from zerver.models.realms import Realm as Realm
from zerver.models.realms import RealmAuthenticationMethod as RealmAuthenticationMethod from zerver.models.realms import RealmAuthenticationMethod as RealmAuthenticationMethod
from zerver.models.realms import RealmDomain as RealmDomain from zerver.models.realms import RealmDomain as RealmDomain
from zerver.models.realms import RealmExport as RealmExport
from zerver.models.recipients import DirectMessageGroup as DirectMessageGroup from zerver.models.recipients import DirectMessageGroup as DirectMessageGroup
from zerver.models.recipients import Recipient as Recipient from zerver.models.recipients import Recipient as Recipient
from zerver.models.saved_snippets import SavedSnippet as SavedSnippet from zerver.models.saved_snippets import SavedSnippet as SavedSnippet

View File

@ -1313,12 +1313,36 @@ def get_fake_email_domain(realm_host: str) -> str:
return settings.FAKE_EMAIL_DOMAIN return settings.FAKE_EMAIL_DOMAIN
# TODO: Move this into a new ReamExport model. class RealmExport(models.Model):
EXPORT_PUBLIC = 1 """Every data export is recorded in this table."""
EXPORT_FULL_WITH_CONSENT = 2
EXPORT_FULL_WITHOUT_CONSENT = 3 realm = models.ForeignKey(Realm, on_delete=CASCADE)
EXPORT_TYPES = [
EXPORT_PUBLIC = 1
EXPORT_FULL_WITH_CONSENT = 2
EXPORT_FULL_WITHOUT_CONSENT = 3
EXPORT_TYPES = [
EXPORT_PUBLIC, EXPORT_PUBLIC,
EXPORT_FULL_WITH_CONSENT, EXPORT_FULL_WITH_CONSENT,
EXPORT_FULL_WITHOUT_CONSENT, EXPORT_FULL_WITHOUT_CONSENT,
] ]
type = models.PositiveSmallIntegerField(default=EXPORT_PUBLIC)
REQUESTED = 1
STARTED = 2
SUCCEEDED = 3
FAILED = 4
DELETED = 5
status = models.PositiveSmallIntegerField(default=REQUESTED)
date_requested = models.DateTimeField()
date_started = models.DateTimeField(default=None, null=True)
date_succeeded = models.DateTimeField(default=None, null=True)
date_failed = models.DateTimeField(default=None, null=True)
date_deleted = models.DateTimeField(default=None, null=True)
acting_user = models.ForeignKey("UserProfile", null=True, on_delete=models.SET_NULL)
export_path = models.TextField(default=None, null=True)
sha256sum_hex = models.CharField(default=None, null=True, max_length=64)
tarball_size_bytes = models.PositiveIntegerField(default=None, null=True)
stats = models.JSONField(default=None, null=True)

View File

@ -238,6 +238,7 @@ from zerver.models import (
Realm, Realm,
RealmAuditLog, RealmAuditLog,
RealmDomain, RealmDomain,
RealmExport,
RealmFilter, RealmFilter,
RealmPlayground, RealmPlayground,
RealmUserDefault, RealmUserDefault,
@ -3378,7 +3379,7 @@ class NormalActionsTest(BaseAction):
with mock.patch( with mock.patch(
"zerver.lib.export.do_export_realm", "zerver.lib.export.do_export_realm",
return_value=create_dummy_file("test-export.tar.gz"), return_value=(create_dummy_file("test-export.tar.gz"), dict()),
): ):
with ( with (
stdout_suppressed(), stdout_suppressed(),
@ -3407,13 +3408,11 @@ class NormalActionsTest(BaseAction):
) )
# Now we check the deletion of the export. # Now we check the deletion of the export.
audit_log_entry = RealmAuditLog.objects.filter( export_row = RealmExport.objects.first()
event_type=AuditLogEventType.REALM_EXPORTED assert export_row is not None
).first() export_row_id = export_row.id
assert audit_log_entry is not None
audit_log_entry_id = audit_log_entry.id
with self.verify_action(state_change_expected=False, num_events=1) as events: with self.verify_action(state_change_expected=False, num_events=1) as events:
self.client_delete(f"/json/export/realm/{audit_log_entry_id}") self.client_delete(f"/json/export/realm/{export_row_id}")
check_realm_export( check_realm_export(
"events[0]", "events[0]",

View File

@ -76,6 +76,7 @@ from zerver.models import (
Realm, Realm,
RealmAuditLog, RealmAuditLog,
RealmEmoji, RealmEmoji,
RealmExport,
RealmUserDefault, RealmUserDefault,
Recipient, Recipient,
ScheduledMessage, ScheduledMessage,
@ -94,12 +95,7 @@ from zerver.models.clients import get_client
from zerver.models.groups import SystemGroups from zerver.models.groups import SystemGroups
from zerver.models.presence import PresenceSequence from zerver.models.presence import PresenceSequence
from zerver.models.realm_audit_logs import AuditLogEventType from zerver.models.realm_audit_logs import AuditLogEventType
from zerver.models.realms import ( from zerver.models.realms import get_realm
EXPORT_FULL_WITH_CONSENT,
EXPORT_FULL_WITHOUT_CONSENT,
EXPORT_PUBLIC,
get_realm,
)
from zerver.models.recipients import get_direct_message_group_hash from zerver.models.recipients import get_direct_message_group_hash
from zerver.models.streams import get_active_streams, get_stream from zerver.models.streams import get_active_streams, get_stream
from zerver.models.users import get_system_bot, get_user_by_delivery_email from zerver.models.users import get_system_bot, get_user_by_delivery_email
@ -364,13 +360,13 @@ class RealmImportExportTest(ExportFile):
export_usermessages_batch( export_usermessages_batch(
input_path=os.path.join(output_dir, "messages-000001.json.partial"), input_path=os.path.join(output_dir, "messages-000001.json.partial"),
output_path=os.path.join(output_dir, "messages-000001.json"), output_path=os.path.join(output_dir, "messages-000001.json"),
export_full_with_consent=export_type == EXPORT_FULL_WITH_CONSENT, export_full_with_consent=export_type == RealmExport.EXPORT_FULL_WITH_CONSENT,
) )
def export_realm_and_create_auditlog( def export_realm_and_create_auditlog(
self, self,
original_realm: Realm, original_realm: Realm,
export_type: int = EXPORT_FULL_WITHOUT_CONSENT, export_type: int = RealmExport.EXPORT_FULL_WITHOUT_CONSENT,
exportable_user_ids: set[int] | None = None, exportable_user_ids: set[int] | None = None,
) -> None: ) -> None:
RealmAuditLog.objects.create( RealmAuditLog.objects.create(
@ -413,7 +409,7 @@ class RealmImportExportTest(ExportFile):
is_message_realm_public=True, is_message_realm_public=True,
) )
self.export_realm_and_create_auditlog(realm, export_type=EXPORT_PUBLIC) self.export_realm_and_create_auditlog(realm, export_type=RealmExport.EXPORT_PUBLIC)
# The attachment row shouldn't have been exported: # The attachment row shouldn't have been exported:
self.assertEqual(read_json("attachment.json")["zerver_attachment"], []) self.assertEqual(read_json("attachment.json")["zerver_attachment"], [])
@ -662,7 +658,9 @@ class RealmImportExportTest(ExportFile):
self.example_user("hamlet"), "allow_private_data_export", True, acting_user=None self.example_user("hamlet"), "allow_private_data_export", True, acting_user=None
) )
self.export_realm_and_create_auditlog(realm, export_type=EXPORT_FULL_WITH_CONSENT) self.export_realm_and_create_auditlog(
realm, export_type=RealmExport.EXPORT_FULL_WITH_CONSENT
)
data = read_json("realm.json") data = read_json("realm.json")
@ -1003,7 +1001,7 @@ class RealmImportExportTest(ExportFile):
for f in getters: for f in getters:
snapshots[f.__name__] = f(original_realm) snapshots[f.__name__] = f(original_realm)
self.export_realm(original_realm, export_type=EXPORT_FULL_WITHOUT_CONSENT) self.export_realm(original_realm, export_type=RealmExport.EXPORT_FULL_WITHOUT_CONSENT)
with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"): with self.settings(BILLING_ENABLED=False), self.assertLogs(level="INFO"):
do_import_realm(get_output_dir(), "test-zulip") do_import_realm(get_output_dir(), "test-zulip")

View File

@ -20,7 +20,7 @@ from zerver.lib.management import ZulipBaseCommand, check_config
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import most_recent_message, stdout_suppressed from zerver.lib.test_helpers import most_recent_message, stdout_suppressed
from zerver.models import Realm, Recipient, UserProfile from zerver.models import Realm, Recipient, UserProfile
from zerver.models.realms import EXPORT_FULL_WITH_CONSENT, get_realm from zerver.models.realms import get_realm
from zerver.models.streams import get_stream from zerver.models.streams import get_stream
from zerver.models.users import get_user_profile_by_email from zerver.models.users import get_user_profile_by_email
@ -515,7 +515,6 @@ class TestExport(ZulipTestCase):
COMMAND_NAME = "export" COMMAND_NAME = "export"
def test_command_to_export_full_with_consent(self) -> None: def test_command_to_export_full_with_consent(self) -> None:
realm = get_realm("zulip")
do_change_user_setting( do_change_user_setting(
self.example_user("iago"), "allow_private_data_export", True, acting_user=None self.example_user("iago"), "allow_private_data_export", True, acting_user=None
) )
@ -529,8 +528,7 @@ class TestExport(ZulipTestCase):
): ):
call_command(self.COMMAND_NAME, "-r=zulip", "--export-full-with-consent") call_command(self.COMMAND_NAME, "-r=zulip", "--export-full-with-consent")
m.assert_called_once_with( m.assert_called_once_with(
realm=realm, export_row=mock.ANY,
export_type=EXPORT_FULL_WITH_CONSENT,
threads=mock.ANY, threads=mock.ANY,
output_dir=mock.ANY, output_dir=mock.ANY,
percent_callback=mock.ANY, percent_callback=mock.ANY,

View File

@ -18,9 +18,7 @@ from zerver.lib.test_helpers import (
stdout_suppressed, stdout_suppressed,
use_s3_backend, use_s3_backend,
) )
from zerver.models import Realm, RealmAuditLog, UserProfile from zerver.models import Realm, RealmExport, UserProfile
from zerver.models.realm_audit_logs import AuditLogEventType
from zerver.models.realms import EXPORT_PUBLIC
from zerver.views.realm_export import export_realm from zerver.views.realm_export import export_realm
@ -48,7 +46,9 @@ class RealmExportTest(ZulipTestCase):
tarball_path = create_dummy_file("test-export.tar.gz") tarball_path = create_dummy_file("test-export.tar.gz")
# Test the export logic. # Test the export logic.
with patch("zerver.lib.export.do_export_realm", return_value=tarball_path) as mock_export: with patch(
"zerver.lib.export.do_export_realm", return_value=(tarball_path, dict())
) as mock_export:
with ( with (
self.settings(LOCAL_UPLOADS_DIR=None), self.settings(LOCAL_UPLOADS_DIR=None),
stdout_suppressed(), stdout_suppressed(),
@ -61,19 +61,19 @@ class RealmExportTest(ZulipTestCase):
self.assertFalse(os.path.exists(tarball_path)) self.assertFalse(os.path.exists(tarball_path))
args = mock_export.call_args_list[0][1] args = mock_export.call_args_list[0][1]
self.assertEqual(args["realm"], admin.realm) self.assertEqual(args["realm"], admin.realm)
self.assertEqual(args["export_type"], EXPORT_PUBLIC) self.assertEqual(args["export_type"], RealmExport.EXPORT_PUBLIC)
self.assertTrue(os.path.basename(args["output_dir"]).startswith("zulip-export-")) self.assertTrue(os.path.basename(args["output_dir"]).startswith("zulip-export-"))
self.assertEqual(args["threads"], 6) self.assertEqual(args["threads"], 6)
# Get the entry and test that iago initiated it. # Get the entry and test that iago initiated it.
audit_log_entry = RealmAuditLog.objects.filter( export_row = RealmExport.objects.first()
event_type=AuditLogEventType.REALM_EXPORTED assert export_row is not None
).first() self.assertEqual(export_row.acting_user_id, admin.id)
assert audit_log_entry is not None self.assertEqual(export_row.status, RealmExport.SUCCEEDED)
self.assertEqual(audit_log_entry.acting_user_id, admin.id)
# Test that the file is hosted, and the contents are as expected. # Test that the file is hosted, and the contents are as expected.
export_path = audit_log_entry.extra_data["export_path"] export_path = export_row.export_path
assert export_path is not None
assert export_path.startswith("/") assert export_path.startswith("/")
path_id = export_path.removeprefix("/") path_id = export_path.removeprefix("/")
self.assertEqual(bucket.Object(path_id).get()["Body"].read(), b"zulip!") self.assertEqual(bucket.Object(path_id).get()["Body"].read(), b"zulip!")
@ -83,7 +83,7 @@ class RealmExportTest(ZulipTestCase):
# Test that the export we have is the export we created. # Test that the export we have is the export we created.
export_dict = response_dict["exports"] export_dict = response_dict["exports"]
self.assertEqual(export_dict[0]["id"], audit_log_entry.id) self.assertEqual(export_dict[0]["id"], export_row.id)
parsed_url = urlsplit(export_dict[0]["export_url"]) parsed_url = urlsplit(export_dict[0]["export_url"])
self.assertEqual( self.assertEqual(
parsed_url._replace(query="").geturl(), parsed_url._replace(query="").geturl(),
@ -92,22 +92,20 @@ class RealmExportTest(ZulipTestCase):
self.assertEqual(export_dict[0]["acting_user_id"], admin.id) self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
self.assert_length( self.assert_length(
export_dict, export_dict,
RealmAuditLog.objects.filter( RealmExport.objects.filter(realm=admin.realm).count(),
realm=admin.realm, event_type=AuditLogEventType.REALM_EXPORTED
).count(),
) )
# Finally, delete the file. # Finally, delete the file.
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}") result = self.client_delete(f"/json/export/realm/{export_row.id}")
self.assert_json_success(result) self.assert_json_success(result)
with self.assertRaises(botocore.exceptions.ClientError): with self.assertRaises(botocore.exceptions.ClientError):
bucket.Object(path_id).load() bucket.Object(path_id).load()
# Try to delete an export with a `deleted_timestamp` key. # Try to delete an export with a `DELETED` status.
audit_log_entry.refresh_from_db() export_row.refresh_from_db()
export_data = audit_log_entry.extra_data self.assertEqual(export_row.status, RealmExport.DELETED)
self.assertIn("deleted_timestamp", export_data) self.assertIsNotNone(export_row.date_deleted)
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}") result = self.client_delete(f"/json/export/realm/{export_row.id}")
self.assert_json_error(result, "Export already deleted") self.assert_json_error(result, "Export already deleted")
# Now try to delete a non-existent export. # Now try to delete a non-existent export.
@ -127,9 +125,9 @@ class RealmExportTest(ZulipTestCase):
export_type: int, export_type: int,
exportable_user_ids: set[int] | None = None, exportable_user_ids: set[int] | None = None,
export_as_active: bool | None = None, export_as_active: bool | None = None,
) -> str: ) -> tuple[str, dict[str, int | dict[str, int]]]:
self.assertEqual(realm, admin.realm) self.assertEqual(realm, admin.realm)
self.assertEqual(export_type, EXPORT_PUBLIC) self.assertEqual(export_type, RealmExport.EXPORT_PUBLIC)
self.assertTrue(os.path.basename(output_dir).startswith("zulip-export-")) self.assertTrue(os.path.basename(output_dir).startswith("zulip-export-"))
self.assertEqual(threads, 6) self.assertEqual(threads, 6)
@ -149,7 +147,7 @@ class RealmExportTest(ZulipTestCase):
result = self.client_delete(f"/json/export/realm/{id}") result = self.client_delete(f"/json/export/realm/{id}")
self.assert_json_error(result, "Export still in progress") self.assert_json_error(result, "Export still in progress")
return tarball_path return tarball_path, dict()
with patch( with patch(
"zerver.lib.export.do_export_realm", side_effect=fake_export_realm "zerver.lib.export.do_export_realm", side_effect=fake_export_realm
@ -166,15 +164,15 @@ class RealmExportTest(ZulipTestCase):
self.assertFalse(os.path.exists(tarball_path)) self.assertFalse(os.path.exists(tarball_path))
# Get the entry and test that iago initiated it. # Get the entry and test that iago initiated it.
audit_log_entry = RealmAuditLog.objects.filter( export_row = RealmExport.objects.first()
event_type=AuditLogEventType.REALM_EXPORTED assert export_row is not None
).first() self.assertEqual(export_row.id, data["id"])
assert audit_log_entry is not None self.assertEqual(export_row.acting_user_id, admin.id)
self.assertEqual(audit_log_entry.id, data["id"]) self.assertEqual(export_row.status, RealmExport.SUCCEEDED)
self.assertEqual(audit_log_entry.acting_user_id, admin.id)
# Test that the file is hosted, and the contents are as expected. # Test that the file is hosted, and the contents are as expected.
export_path = audit_log_entry.extra_data.get("export_path") export_path = export_row.export_path
assert export_path is not None
response = self.client_get(export_path) response = self.client_get(export_path)
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertEqual(response.getvalue(), b"zulip!") self.assertEqual(response.getvalue(), b"zulip!")
@ -184,27 +182,22 @@ class RealmExportTest(ZulipTestCase):
# Test that the export we have is the export we created. # Test that the export we have is the export we created.
export_dict = response_dict["exports"] export_dict = response_dict["exports"]
self.assertEqual(export_dict[0]["id"], audit_log_entry.id) self.assertEqual(export_dict[0]["id"], export_row.id)
self.assertEqual(export_dict[0]["export_url"], admin.realm.url + export_path) self.assertEqual(export_dict[0]["export_url"], admin.realm.url + export_path)
self.assertEqual(export_dict[0]["acting_user_id"], admin.id) self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
self.assert_length( self.assert_length(export_dict, RealmExport.objects.filter(realm=admin.realm).count())
export_dict,
RealmAuditLog.objects.filter(
realm=admin.realm, event_type=AuditLogEventType.REALM_EXPORTED
).count(),
)
# Finally, delete the file. # Finally, delete the file.
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}") result = self.client_delete(f"/json/export/realm/{export_row.id}")
self.assert_json_success(result) self.assert_json_success(result)
response = self.client_get(export_path) response = self.client_get(export_path)
self.assertEqual(response.status_code, 404) self.assertEqual(response.status_code, 404)
# Try to delete an export with a `deleted_timestamp` key. # Try to delete an export with a `DELETED` status.
audit_log_entry.refresh_from_db() export_row.refresh_from_db()
export_data = audit_log_entry.extra_data self.assertEqual(export_row.status, RealmExport.DELETED)
self.assertIn("deleted_timestamp", export_data) self.assertIsNotNone(export_row.date_deleted)
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}") result = self.client_delete(f"/json/export/realm/{export_row.id}")
self.assert_json_error(result, "Export already deleted") self.assert_json_error(result, "Export already deleted")
# Now try to delete a non-existent export. # Now try to delete a non-existent export.
@ -244,6 +237,9 @@ class RealmExportTest(ZulipTestCase):
self.assertIsNotNone(export_dict[0]["failed_timestamp"]) self.assertIsNotNone(export_dict[0]["failed_timestamp"])
self.assertEqual(export_dict[0]["acting_user_id"], admin.id) self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
export_row = RealmExport.objects.get(id=export_id)
self.assertEqual(export_row.status, RealmExport.FAILED)
# Check that we can't delete it # Check that we can't delete it
result = self.client_delete(f"/json/export/realm/{export_id}") result = self.client_delete(f"/json/export/realm/{export_id}")
self.assert_json_error(result, "Export failed, nothing to delete") self.assert_json_error(result, "Export failed, nothing to delete")
@ -258,10 +254,8 @@ class RealmExportTest(ZulipTestCase):
"deferred_work", "deferred_work",
{ {
"type": "realm_export", "type": "realm_export",
"time": 42,
"realm_id": admin.realm.id,
"user_profile_id": admin.id, "user_profile_id": admin.id,
"id": export_id, "realm_export_id": export_id,
}, },
) )
mock_export.assert_not_called() mock_export.assert_not_called()
@ -279,18 +273,19 @@ class RealmExportTest(ZulipTestCase):
admin = self.example_user("iago") admin = self.example_user("iago")
self.login_user(admin) self.login_user(admin)
current_log = RealmAuditLog.objects.filter(event_type=AuditLogEventType.REALM_EXPORTED) export_rows = RealmExport.objects.all()
self.assert_length(current_log, 0) self.assert_length(export_rows, 0)
exports = [ exports = [
RealmAuditLog( RealmExport(
realm=admin.realm, realm=admin.realm,
event_type=AuditLogEventType.REALM_EXPORTED, type=RealmExport.EXPORT_PUBLIC,
event_time=timezone_now(), date_requested=timezone_now(),
acting_user=admin,
) )
for i in range(5) for i in range(5)
] ]
RealmAuditLog.objects.bulk_create(exports) RealmExport.objects.bulk_create(exports)
with self.assertRaises(JsonableError) as error: with self.assertRaises(JsonableError) as error:
export_realm(HostRequestMock(), admin) export_realm(HostRequestMock(), admin)

View File

@ -13,16 +13,13 @@ from zerver.lib.exceptions import JsonableError
from zerver.lib.export import get_realm_exports_serialized from zerver.lib.export import get_realm_exports_serialized
from zerver.lib.queue import queue_json_publish from zerver.lib.queue import queue_json_publish
from zerver.lib.response import json_success from zerver.lib.response import json_success
from zerver.models import RealmAuditLog, UserProfile from zerver.models import RealmExport, UserProfile
from zerver.models.realm_audit_logs import AuditLogEventType
@transaction.atomic(durable=True) @transaction.atomic(durable=True)
@require_realm_admin @require_realm_admin
def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse: def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
# Currently only supports public-data-only exports. # Currently only supports public-data-only exports.
event_type = AuditLogEventType.REALM_EXPORTED
event_time = timezone_now()
realm = user.realm realm = user.realm
EXPORT_LIMIT = 5 EXPORT_LIMIT = 5
@ -37,9 +34,9 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
# Filter based upon the number of events that have occurred in the delta # Filter based upon the number of events that have occurred in the delta
# If we are at the limit, the incoming request is rejected # If we are at the limit, the incoming request is rejected
event_time_delta = event_time - timedelta(days=7) event_time_delta = timezone_now() - timedelta(days=7)
limit_check = RealmAuditLog.objects.filter( limit_check = RealmExport.objects.filter(
realm=realm, event_type=event_type, event_time__gte=event_time_delta realm=realm, date_requested__gte=event_time_delta
).count() ).count()
if limit_check >= EXPORT_LIMIT: if limit_check >= EXPORT_LIMIT:
raise JsonableError(_("Exceeded rate limit.")) raise JsonableError(_("Exceeded rate limit."))
@ -68,8 +65,12 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
) )
) )
row = RealmAuditLog.objects.create( row = RealmExport.objects.create(
realm=realm, event_type=event_type, event_time=event_time, acting_user=user realm=realm,
type=RealmExport.EXPORT_PUBLIC,
acting_user=user,
status=RealmExport.REQUESTED,
date_requested=timezone_now(),
) )
# Allow for UI updates on a pending export # Allow for UI updates on a pending export
@ -79,10 +80,8 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
# killing the process after 60s # killing the process after 60s
event = { event = {
"type": "realm_export", "type": "realm_export",
"time": event_time,
"realm_id": realm.id,
"user_profile_id": user.id, "user_profile_id": user.id,
"id": row.id, "realm_export_id": row.id,
} }
transaction.on_commit(lambda: queue_json_publish("deferred_work", event)) transaction.on_commit(lambda: queue_json_publish("deferred_work", event))
return json_success(request, data={"id": row.id}) return json_success(request, data={"id": row.id})
@ -97,20 +96,17 @@ def get_realm_exports(request: HttpRequest, user: UserProfile) -> HttpResponse:
@require_realm_admin @require_realm_admin
def delete_realm_export(request: HttpRequest, user: UserProfile, export_id: int) -> HttpResponse: def delete_realm_export(request: HttpRequest, user: UserProfile, export_id: int) -> HttpResponse:
try: try:
audit_log_entry = RealmAuditLog.objects.get( export_row = RealmExport.objects.get(id=export_id)
id=export_id, realm=user.realm, event_type=AuditLogEventType.REALM_EXPORTED except RealmExport.DoesNotExist:
)
except RealmAuditLog.DoesNotExist:
raise JsonableError(_("Invalid data export ID")) raise JsonableError(_("Invalid data export ID"))
export_data = audit_log_entry.extra_data if export_row.status == RealmExport.DELETED:
if export_data.get("deleted_timestamp") is not None:
raise JsonableError(_("Export already deleted")) raise JsonableError(_("Export already deleted"))
if export_data.get("export_path") is None: if export_row.status == RealmExport.FAILED:
if export_data.get("failed_timestamp") is not None:
raise JsonableError(_("Export failed, nothing to delete")) raise JsonableError(_("Export failed, nothing to delete"))
if export_row.status in [RealmExport.REQUESTED, RealmExport.STARTED]:
raise JsonableError(_("Export still in progress")) raise JsonableError(_("Export still in progress"))
do_delete_realm_export(audit_log_entry) do_delete_realm_export(export_row)
return json_success(request) return json_success(request)

View File

@ -3,7 +3,6 @@ import logging
import tempfile import tempfile
import time import time
from typing import Any from typing import Any
from urllib.parse import urlsplit
from django.conf import settings from django.conf import settings
from django.db import transaction from django.db import transaction
@ -25,8 +24,7 @@ from zerver.lib.remote_server import (
) )
from zerver.lib.soft_deactivation import reactivate_user_if_soft_deactivated from zerver.lib.soft_deactivation import reactivate_user_if_soft_deactivated
from zerver.lib.upload import handle_reupload_emojis_event from zerver.lib.upload import handle_reupload_emojis_event
from zerver.models import Message, Realm, RealmAuditLog, Stream, UserMessage from zerver.models import Message, Realm, RealmAuditLog, RealmExport, Stream, UserMessage
from zerver.models.realms import EXPORT_PUBLIC
from zerver.models.users import get_system_bot, get_user_profile_by_id from zerver.models.users import get_system_bot, get_user_profile_by_id
from zerver.worker.base import QueueProcessingWorker, assign_queue from zerver.worker.base import QueueProcessingWorker, assign_queue
@ -126,60 +124,70 @@ class DeferredWorker(QueueProcessingWorker):
retry_event(self.queue_name, event, failure_processor) retry_event(self.queue_name, event, failure_processor)
elif event["type"] == "realm_export": elif event["type"] == "realm_export":
realm = Realm.objects.get(id=event["realm_id"])
output_dir = tempfile.mkdtemp(prefix="zulip-export-") output_dir = tempfile.mkdtemp(prefix="zulip-export-")
export_event = RealmAuditLog.objects.get(id=event["id"])
user_profile = get_user_profile_by_id(event["user_profile_id"]) user_profile = get_user_profile_by_id(event["user_profile_id"])
realm = user_profile.realm
export_event = None
if "realm_export_id" in event:
export_row = RealmExport.objects.get(id=event["realm_export_id"])
else:
# Handle existing events in the queue before we switched to RealmExport model.
export_event = RealmAuditLog.objects.get(id=event["id"])
extra_data = export_event.extra_data extra_data = export_event.extra_data
if extra_data.get("started_timestamp") is not None:
if extra_data.get("export_row_id") is not None:
export_row = RealmExport.objects.get(id=extra_data["export_row_id"])
else:
export_row = RealmExport.objects.create(
realm=realm,
type=RealmExport.EXPORT_PUBLIC,
acting_user=user_profile,
status=RealmExport.REQUESTED,
date_requested=event["time"],
)
export_event.extra_data = {"export_row_id": export_row.id}
export_event.save(update_fields=["extra_data"])
if export_row.status != RealmExport.REQUESTED:
logger.error( logger.error(
"Marking export for realm %s as failed due to retry -- possible OOM during export?", "Marking export for realm %s as failed due to retry -- possible OOM during export?",
realm.string_id, realm.string_id,
) )
extra_data["failed_timestamp"] = timezone_now().timestamp() export_row.status = RealmExport.FAILED
export_event.extra_data = extra_data export_row.date_failed = timezone_now()
export_event.save(update_fields=["extra_data"]) export_row.save(update_fields=["status", "date_failed"])
notify_realm_export(realm) notify_realm_export(realm)
return return
extra_data["started_timestamp"] = timezone_now().timestamp()
export_event.extra_data = extra_data
export_event.save(update_fields=["extra_data"])
logger.info( logger.info(
"Starting realm export for realm %s into %s, initiated by user_profile_id %s", "Starting realm export for realm %s into %s, initiated by user_profile_id %s",
realm.string_id, realm.string_id,
output_dir, output_dir,
event["user_profile_id"], user_profile.id,
) )
try: try:
public_url = export_realm_wrapper( export_realm_wrapper(
realm=realm, export_row=export_row,
output_dir=output_dir, output_dir=output_dir,
threads=1 if self.threaded else 6, threads=1 if self.threaded else 6,
upload=True, upload=True,
export_type=EXPORT_PUBLIC,
) )
except Exception: except Exception:
extra_data["failed_timestamp"] = timezone_now().timestamp()
export_event.extra_data = extra_data
export_event.save(update_fields=["extra_data"])
logging.exception( logging.exception(
"Data export for %s failed after %s", "Data export for %s failed after %s",
user_profile.realm.string_id, realm.string_id,
time.time() - start, time.time() - start,
stack_info=True, stack_info=True,
) )
notify_realm_export(realm) notify_realm_export(realm)
return return
assert public_url is not None # We create RealmAuditLog entry in 'export_realm_wrapper'.
# Delete the old entry created before we switched to RealmExport model.
# Update the extra_data field now that the export is complete. if export_event:
extra_data["export_path"] = urlsplit(public_url).path export_event.delete()
export_event.extra_data = extra_data
export_event.save(update_fields=["extra_data"])
# Send a direct message notification letting the user who # Send a direct message notification letting the user who
# triggered the export know the export finished. # triggered the export know the export finished.
@ -198,7 +206,7 @@ class DeferredWorker(QueueProcessingWorker):
notify_realm_export(realm) notify_realm_export(realm)
logging.info( logging.info(
"Completed data export for %s in %s", "Completed data export for %s in %s",
user_profile.realm.string_id, realm.string_id,
time.time() - start, time.time() - start,
) )
elif event["type"] == "reupload_realm_emoji": elif event["type"] == "reupload_realm_emoji":