2020-06-11 00:54:34 +02:00
|
|
|
import os
|
2023-05-16 18:18:32 +02:00
|
|
|
from typing import Optional, Set
|
2020-05-26 07:16:25 +02:00
|
|
|
from unittest.mock import patch
|
2019-03-27 00:57:33 +01:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
import botocore.exceptions
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2019-06-22 01:25:53 +02:00
|
|
|
from django.conf import settings
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2019-03-27 00:57:33 +01:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from analytics.models import RealmCount
|
2019-03-27 00:57:33 +01:00
|
|
|
from zerver.lib.exceptions import JsonableError
|
2023-05-16 18:19:06 +02:00
|
|
|
from zerver.lib.queue import queue_json_publish
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.test_classes import ZulipTestCase
|
|
|
|
from zerver.lib.test_helpers import (
|
2021-07-26 17:13:43 +02:00
|
|
|
HostRequestMock,
|
2020-06-11 00:54:34 +02:00
|
|
|
create_dummy_file,
|
|
|
|
create_s3_buckets,
|
|
|
|
stdout_suppressed,
|
|
|
|
use_s3_backend,
|
|
|
|
)
|
2023-05-16 18:18:32 +02:00
|
|
|
from zerver.models import Realm, RealmAuditLog
|
2019-06-24 02:51:13 +02:00
|
|
|
from zerver.views.realm_export import export_realm
|
2019-06-22 01:25:53 +02:00
|
|
|
|
|
|
|
|
2019-03-27 00:57:33 +01:00
|
|
|
class RealmExportTest(ZulipTestCase):
|
2019-08-11 21:56:05 +02:00
|
|
|
"""
|
|
|
|
API endpoint testing covers the full end-to-end flow
|
|
|
|
from both the S3 and local uploads perspective.
|
|
|
|
|
|
|
|
`test_endpoint_s3` and `test_endpoint_local_uploads` follow
|
|
|
|
an identical pattern, which is documented in both test
|
|
|
|
functions.
|
|
|
|
"""
|
|
|
|
|
2019-03-27 00:57:33 +01:00
|
|
|
def test_export_as_not_admin(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2019-03-27 00:57:33 +01:00
|
|
|
with self.assertRaises(JsonableError):
|
2021-07-26 17:13:43 +02:00
|
|
|
export_realm(HostRequestMock(), user)
|
2019-03-27 00:57:33 +01:00
|
|
|
|
|
|
|
@use_s3_backend
|
|
|
|
def test_endpoint_s3(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
admin = self.example_user("iago")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(admin)
|
2019-06-22 01:25:53 +02:00
|
|
|
bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
|
2021-02-12 08:20:45 +01:00
|
|
|
tarball_path = create_dummy_file("test-export.tar.gz")
|
2019-03-27 00:57:33 +01:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Test the export logic.
|
2021-02-12 08:20:45 +01:00
|
|
|
with patch("zerver.lib.export.do_export_realm", return_value=tarball_path) as mock_export:
|
2021-02-12 08:19:30 +01:00
|
|
|
with self.settings(LOCAL_UPLOADS_DIR=None), stdout_suppressed(), self.assertLogs(
|
2021-02-12 08:20:45 +01:00
|
|
|
level="INFO"
|
2021-02-12 08:19:30 +01:00
|
|
|
) as info_logs:
|
2022-12-05 12:22:50 +01:00
|
|
|
with self.captureOnCommitCallbacks(execute=True):
|
|
|
|
result = self.client_post("/json/export/realm")
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertTrue("INFO:root:Completed data export for zulip in " in info_logs.output[0])
|
2019-08-11 21:56:05 +02:00
|
|
|
self.assert_json_success(result)
|
|
|
|
self.assertFalse(os.path.exists(tarball_path))
|
2019-06-22 01:25:53 +02:00
|
|
|
args = mock_export.call_args_list[0][1]
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(args["realm"], admin.realm)
|
|
|
|
self.assertEqual(args["public_only"], True)
|
2023-01-06 09:09:53 +01:00
|
|
|
self.assertTrue(os.path.basename(args["output_dir"]).startswith("zulip-export-"))
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(args["threads"], 6)
|
2019-06-22 01:25:53 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Get the entry and test that iago initiated it.
|
|
|
|
audit_log_entry = RealmAuditLog.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
event_type=RealmAuditLog.REALM_EXPORTED
|
|
|
|
).first()
|
2021-07-24 16:56:39 +02:00
|
|
|
assert audit_log_entry is not None
|
2019-08-11 21:56:05 +02:00
|
|
|
self.assertEqual(audit_log_entry.acting_user_id, admin.id)
|
2019-07-13 01:17:21 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Test that the file is hosted, and the contents are as expected.
|
2021-07-24 16:56:39 +02:00
|
|
|
extra_data = audit_log_entry.extra_data
|
|
|
|
assert extra_data is not None
|
|
|
|
export_path = orjson.loads(extra_data)["export_path"]
|
2021-02-12 08:20:45 +01:00
|
|
|
assert export_path.startswith("/")
|
2020-09-13 05:41:39 +02:00
|
|
|
path_id = export_path[1:]
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(bucket.Object(path_id).get()["Body"].read(), b"zulip!")
|
2019-06-22 01:25:53 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_get("/json/export/realm")
|
2022-06-07 01:37:01 +02:00
|
|
|
response_dict = self.assert_json_success(result)
|
2019-06-23 22:57:14 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Test that the export we have is the export we created.
|
2022-06-07 01:37:01 +02:00
|
|
|
export_dict = response_dict["exports"]
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(export_dict[0]["id"], audit_log_entry.id)
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertEqual(
|
2021-02-12 08:20:45 +01:00
|
|
|
export_dict[0]["export_url"],
|
|
|
|
"https://test-avatar-bucket.s3.amazonaws.com" + export_path,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assert_length(
|
|
|
|
export_dict,
|
|
|
|
RealmAuditLog.objects.filter(
|
|
|
|
realm=admin.realm, event_type=RealmAuditLog.REALM_EXPORTED
|
|
|
|
).count(),
|
|
|
|
)
|
2019-06-23 22:57:14 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Finally, delete the file.
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}")
|
2019-08-01 19:59:36 +02:00
|
|
|
self.assert_json_success(result)
|
2018-12-07 17:52:01 +01:00
|
|
|
with self.assertRaises(botocore.exceptions.ClientError):
|
|
|
|
bucket.Object(path_id).load()
|
2019-06-27 20:41:47 +02:00
|
|
|
|
2019-08-01 19:59:36 +02:00
|
|
|
# Try to delete an export with a `deleted_timestamp` key.
|
|
|
|
audit_log_entry.refresh_from_db()
|
2021-07-24 16:56:39 +02:00
|
|
|
extra_data = audit_log_entry.extra_data
|
|
|
|
assert extra_data is not None
|
|
|
|
export_data = orjson.loads(extra_data)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("deleted_timestamp", export_data)
|
|
|
|
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}")
|
2019-08-01 19:59:36 +02:00
|
|
|
self.assert_json_error(result, "Export already deleted")
|
|
|
|
|
|
|
|
# Now try to delete a non-existent export.
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_delete("/json/export/realm/0")
|
2019-08-01 19:59:36 +02:00
|
|
|
self.assert_json_error(result, "Invalid data export ID")
|
|
|
|
|
2019-03-27 00:57:33 +01:00
|
|
|
def test_endpoint_local_uploads(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
admin = self.example_user("iago")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(admin)
|
2021-02-12 08:20:45 +01:00
|
|
|
tarball_path = create_dummy_file("test-export.tar.gz")
|
2019-05-16 00:51:12 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Test the export logic.
|
2023-05-16 18:18:32 +02:00
|
|
|
def fake_export_realm(
|
|
|
|
realm: Realm,
|
|
|
|
output_dir: str,
|
|
|
|
threads: int,
|
|
|
|
exportable_user_ids: Optional[Set[int]] = None,
|
|
|
|
public_only: bool = False,
|
|
|
|
consent_message_id: Optional[int] = None,
|
|
|
|
export_as_active: Optional[bool] = None,
|
|
|
|
) -> str:
|
|
|
|
self.assertEqual(realm, admin.realm)
|
|
|
|
self.assertEqual(public_only, True)
|
|
|
|
self.assertTrue(os.path.basename(output_dir).startswith("zulip-export-"))
|
|
|
|
self.assertEqual(threads, 6)
|
|
|
|
|
|
|
|
# Check that the export shows up as in progress
|
|
|
|
result = self.client_get("/json/export/realm")
|
|
|
|
response_dict = self.assert_json_success(result)
|
|
|
|
export_dict = response_dict["exports"]
|
|
|
|
self.assert_length(export_dict, 1)
|
|
|
|
id = export_dict[0]["id"]
|
|
|
|
self.assertEqual(export_dict[0]["pending"], True)
|
|
|
|
self.assertIsNone(export_dict[0]["export_url"])
|
|
|
|
self.assertIsNone(export_dict[0]["deleted_timestamp"])
|
|
|
|
self.assertIsNone(export_dict[0]["failed_timestamp"])
|
|
|
|
self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
|
|
|
|
|
|
|
|
# While the export is in progress, we can't delete it
|
|
|
|
result = self.client_delete(f"/json/export/realm/{id}")
|
|
|
|
self.assert_json_error(result, "Export still in progress")
|
|
|
|
|
|
|
|
return tarball_path
|
|
|
|
|
|
|
|
with patch(
|
|
|
|
"zerver.lib.export.do_export_realm", side_effect=fake_export_realm
|
|
|
|
) as mock_export:
|
2021-02-12 08:20:45 +01:00
|
|
|
with stdout_suppressed(), self.assertLogs(level="INFO") as info_logs:
|
2022-12-05 12:22:50 +01:00
|
|
|
with self.captureOnCommitCallbacks(execute=True):
|
|
|
|
result = self.client_post("/json/export/realm")
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertTrue("INFO:root:Completed data export for zulip in " in info_logs.output[0])
|
2023-05-16 18:18:32 +02:00
|
|
|
mock_export.assert_called_once()
|
2023-05-16 19:14:14 +02:00
|
|
|
data = self.assert_json_success(result)
|
2019-06-22 01:25:53 +02:00
|
|
|
self.assertFalse(os.path.exists(tarball_path))
|
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Get the entry and test that iago initiated it.
|
|
|
|
audit_log_entry = RealmAuditLog.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
event_type=RealmAuditLog.REALM_EXPORTED
|
|
|
|
).first()
|
2021-07-24 16:56:39 +02:00
|
|
|
assert audit_log_entry is not None
|
2023-05-16 19:14:14 +02:00
|
|
|
self.assertEqual(audit_log_entry.id, data["id"])
|
2019-08-11 21:56:05 +02:00
|
|
|
self.assertEqual(audit_log_entry.acting_user_id, admin.id)
|
2019-07-12 23:10:10 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Test that the file is hosted, and the contents are as expected.
|
2021-07-24 16:56:39 +02:00
|
|
|
extra_data = audit_log_entry.extra_data
|
|
|
|
assert extra_data is not None
|
|
|
|
export_path = orjson.loads(extra_data).get("export_path")
|
2020-09-13 05:41:39 +02:00
|
|
|
response = self.client_get(export_path)
|
2019-06-22 01:25:53 +02:00
|
|
|
self.assertEqual(response.status_code, 200)
|
2023-06-16 01:07:40 +02:00
|
|
|
self.assertEqual(response.getvalue(), b"zulip!")
|
2019-03-27 00:57:33 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_get("/json/export/realm")
|
2022-06-07 01:37:01 +02:00
|
|
|
response_dict = self.assert_json_success(result)
|
2019-06-23 22:57:14 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Test that the export we have is the export we created.
|
2022-06-07 01:37:01 +02:00
|
|
|
export_dict = response_dict["exports"]
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(export_dict[0]["id"], audit_log_entry.id)
|
|
|
|
self.assertEqual(export_dict[0]["export_url"], admin.realm.uri + export_path)
|
|
|
|
self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assert_length(
|
|
|
|
export_dict,
|
|
|
|
RealmAuditLog.objects.filter(
|
|
|
|
realm=admin.realm, event_type=RealmAuditLog.REALM_EXPORTED
|
|
|
|
).count(),
|
|
|
|
)
|
2019-06-23 22:57:14 +02:00
|
|
|
|
2019-08-11 21:56:05 +02:00
|
|
|
# Finally, delete the file.
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}")
|
2019-08-01 19:59:36 +02:00
|
|
|
self.assert_json_success(result)
|
2020-09-13 05:41:39 +02:00
|
|
|
response = self.client_get(export_path)
|
2019-06-27 20:41:47 +02:00
|
|
|
self.assertEqual(response.status_code, 404)
|
|
|
|
|
2019-08-01 19:59:36 +02:00
|
|
|
# Try to delete an export with a `deleted_timestamp` key.
|
|
|
|
audit_log_entry.refresh_from_db()
|
2021-07-24 16:56:39 +02:00
|
|
|
extra_data = audit_log_entry.extra_data
|
|
|
|
assert extra_data is not None
|
|
|
|
export_data = orjson.loads(extra_data)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("deleted_timestamp", export_data)
|
|
|
|
result = self.client_delete(f"/json/export/realm/{audit_log_entry.id}")
|
2019-08-01 19:59:36 +02:00
|
|
|
self.assert_json_error(result, "Export already deleted")
|
|
|
|
|
|
|
|
# Now try to delete a non-existent export.
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_delete("/json/export/realm/0")
|
2019-08-01 19:59:36 +02:00
|
|
|
self.assert_json_error(result, "Invalid data export ID")
|
|
|
|
|
2023-05-16 19:35:41 +02:00
|
|
|
def test_export_failure(self) -> None:
|
|
|
|
admin = self.example_user("iago")
|
|
|
|
self.login_user(admin)
|
|
|
|
|
|
|
|
with patch(
|
|
|
|
"zerver.lib.export.do_export_realm", side_effect=Exception("failure")
|
|
|
|
) as mock_export:
|
|
|
|
with stdout_suppressed(), self.assertLogs(level="INFO") as info_logs:
|
|
|
|
with self.captureOnCommitCallbacks(execute=True):
|
|
|
|
result = self.client_post("/json/export/realm")
|
|
|
|
self.assertTrue(
|
|
|
|
info_logs.output[0].startswith("ERROR:root:Data export for zulip failed after ")
|
|
|
|
)
|
|
|
|
mock_export.assert_called_once()
|
|
|
|
# This is a success because the failure is swallowed in the queue worker
|
2023-05-16 19:14:14 +02:00
|
|
|
data = self.assert_json_success(result)
|
|
|
|
export_id = data["id"]
|
2023-05-16 19:35:41 +02:00
|
|
|
|
|
|
|
# Check that the export shows up as failed
|
|
|
|
result = self.client_get("/json/export/realm")
|
|
|
|
response_dict = self.assert_json_success(result)
|
|
|
|
export_dict = response_dict["exports"]
|
|
|
|
self.assert_length(export_dict, 1)
|
2023-05-16 19:14:14 +02:00
|
|
|
self.assertEqual(export_dict[0]["id"], export_id)
|
2023-05-16 19:35:41 +02:00
|
|
|
self.assertEqual(export_dict[0]["pending"], False)
|
|
|
|
self.assertIsNone(export_dict[0]["export_url"])
|
|
|
|
self.assertIsNone(export_dict[0]["deleted_timestamp"])
|
|
|
|
self.assertIsNotNone(export_dict[0]["failed_timestamp"])
|
|
|
|
self.assertEqual(export_dict[0]["acting_user_id"], admin.id)
|
|
|
|
|
2023-05-16 18:18:32 +02:00
|
|
|
# Check that we can't delete it
|
|
|
|
result = self.client_delete(f"/json/export/realm/{export_id}")
|
|
|
|
self.assert_json_error(result, "Export failed, nothing to delete")
|
|
|
|
|
2023-05-16 18:19:06 +02:00
|
|
|
# If the queue worker sees the same export-id again, it aborts
|
|
|
|
# instead of retrying
|
|
|
|
with patch("zerver.lib.export.do_export_realm") as mock_export:
|
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
|
|
|
queue_json_publish(
|
|
|
|
"deferred_work",
|
|
|
|
{
|
|
|
|
"type": "realm_export",
|
|
|
|
"time": 42,
|
|
|
|
"realm_id": admin.realm.id,
|
|
|
|
"user_profile_id": admin.id,
|
|
|
|
"id": export_id,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
mock_export.assert_not_called()
|
|
|
|
self.assertEqual(
|
|
|
|
info_logs.output,
|
|
|
|
[
|
|
|
|
(
|
|
|
|
"ERROR:zerver.worker.queue_processors:Marking export for realm zulip "
|
|
|
|
"as failed due to retry -- possible OOM during export?"
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
2019-03-27 00:57:33 +01:00
|
|
|
def test_realm_export_rate_limited(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
admin = self.example_user("iago")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(admin)
|
2019-05-16 00:51:12 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
current_log = RealmAuditLog.objects.filter(event_type=RealmAuditLog.REALM_EXPORTED)
|
2021-05-17 05:41:32 +02:00
|
|
|
self.assert_length(current_log, 0)
|
2019-03-27 00:57:33 +01:00
|
|
|
|
|
|
|
exports = []
|
|
|
|
for i in range(0, 5):
|
2021-02-12 08:19:30 +01:00
|
|
|
exports.append(
|
|
|
|
RealmAuditLog(
|
|
|
|
realm=admin.realm,
|
|
|
|
event_type=RealmAuditLog.REALM_EXPORTED,
|
|
|
|
event_time=timezone_now(),
|
|
|
|
)
|
|
|
|
)
|
2019-03-27 00:57:33 +01:00
|
|
|
RealmAuditLog.objects.bulk_create(exports)
|
|
|
|
|
2021-06-30 18:35:50 +02:00
|
|
|
with self.assertRaises(JsonableError) as error:
|
2021-07-26 17:13:43 +02:00
|
|
|
export_realm(HostRequestMock(), admin)
|
2021-06-30 18:35:50 +02:00
|
|
|
self.assertEqual(str(error.exception), "Exceeded rate limit.")
|
2019-08-13 04:10:09 +02:00
|
|
|
|
|
|
|
def test_upload_and_message_limit(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
admin = self.example_user("iago")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(admin)
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_count = RealmCount.objects.create(
|
|
|
|
realm_id=admin.realm.id,
|
|
|
|
end_time=timezone_now(),
|
|
|
|
value=0,
|
2022-09-16 00:32:17 +02:00
|
|
|
property="messages_sent:message_type:day",
|
|
|
|
subgroup="public_stream",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-08-13 04:10:09 +02:00
|
|
|
|
|
|
|
# Space limit is set as 10 GiB
|
2021-02-12 08:19:30 +01:00
|
|
|
with patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.models.Realm.currently_used_upload_space_bytes",
|
2021-02-12 08:19:30 +01:00
|
|
|
return_value=11 * 1024 * 1024 * 1024,
|
|
|
|
):
|
2021-02-12 08:20:45 +01:00
|
|
|
result = self.client_post("/json/export/realm")
|
2020-06-14 02:57:50 +02:00
|
|
|
self.assert_json_error(
|
|
|
|
result,
|
2021-02-12 08:20:45 +01:00
|
|
|
f"Please request a manual export from {settings.ZULIP_ADMINISTRATOR}.",
|
2020-06-14 02:57:50 +02:00
|
|
|
)
|
2019-08-13 04:10:09 +02:00
|
|
|
|
|
|
|
# Message limit is set as 250000
|
|
|
|
realm_count.value = 250001
|
2021-02-12 08:20:45 +01:00
|
|
|
realm_count.save(update_fields=["value"])
|
|
|
|
result = self.client_post("/json/export/realm")
|
2020-06-14 02:57:50 +02:00
|
|
|
self.assert_json_error(
|
|
|
|
result,
|
2021-02-12 08:20:45 +01:00
|
|
|
f"Please request a manual export from {settings.ZULIP_ADMINISTRATOR}.",
|
2020-06-14 02:57:50 +02:00
|
|
|
)
|