import os from unittest.mock import patch from urllib.parse import urlsplit import botocore.exceptions from django.conf import settings from django.utils.timezone import now as timezone_now from analytics.models import RealmCount from zerver.actions.user_settings import do_change_user_setting from zerver.lib.exceptions import JsonableError from zerver.lib.queue import queue_json_publish from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import ( HostRequestMock, create_dummy_file, create_s3_buckets, stdout_suppressed, use_s3_backend, ) from zerver.models import Realm, RealmExport, UserProfile from zerver.views.realm_export import export_realm class RealmExportTest(ZulipTestCase): """ API endpoint testing covers the full end-to-end flow from both the S3 and local uploads perspective. `test_endpoint_s3` and `test_endpoint_local_uploads` follow an identical pattern, which is documented in both test functions. """ def test_export_as_not_admin(self) -> None: user = self.example_user("hamlet") self.login_user(user) with self.assertRaises(JsonableError): export_realm(HostRequestMock(), user) @use_s3_backend def test_endpoint_s3(self) -> None: admin = self.example_user("iago") self.login_user(admin) bucket = create_s3_buckets(settings.S3_EXPORT_BUCKET)[0] tarball_path = create_dummy_file("test-export.tar.gz") # Test the export logic. with patch( "zerver.lib.export.do_export_realm", return_value=(tarball_path, dict()) ) as mock_export: with ( self.settings(LOCAL_UPLOADS_DIR=None), stdout_suppressed(), self.assertLogs(level="INFO") as info_logs, self.captureOnCommitCallbacks(execute=True), ): result = self.client_post("/json/export/realm") self.assertTrue("INFO:root:Completed data export for zulip in " in info_logs.output[0]) self.assert_json_success(result) self.assertFalse(os.path.exists(tarball_path)) args = mock_export.call_args_list[0][1] self.assertEqual(args["realm"], admin.realm) self.assertEqual(args["export_type"], RealmExport.EXPORT_PUBLIC) self.assertTrue(os.path.basename(args["output_dir"]).startswith("zulip-export-")) self.assertEqual(args["threads"], 6) # Get the entry and test that iago initiated it. export_row = RealmExport.objects.first() assert export_row is not None self.assertEqual(export_row.acting_user_id, admin.id) self.assertEqual(export_row.status, RealmExport.SUCCEEDED) # Test that the file is hosted, and the contents are as expected. export_path = export_row.export_path assert export_path is not None assert export_path.startswith("/") path_id = export_path.removeprefix("/") self.assertEqual(bucket.Object(path_id).get()["Body"].read(), b"zulip!") result = self.client_get("/json/export/realm") response_dict = self.assert_json_success(result) # Test that the export we have is the export we created. export_dict = response_dict["exports"] self.assertEqual(export_dict[0]["id"], export_row.id) parsed_url = urlsplit(export_dict[0]["export_url"]) self.assertEqual( parsed_url._replace(query="").geturl(), "https://test-export-bucket.s3.amazonaws.com" + export_path, ) self.assertEqual(export_dict[0]["acting_user_id"], admin.id) self.assert_length( export_dict, RealmExport.objects.filter(realm=admin.realm).count(), ) # Finally, delete the file. result = self.client_delete(f"/json/export/realm/{export_row.id}") self.assert_json_success(result) with self.assertRaises(botocore.exceptions.ClientError): bucket.Object(path_id).load() # Try to delete an export with a `DELETED` status. export_row.refresh_from_db() self.assertEqual(export_row.status, RealmExport.DELETED) self.assertIsNotNone(export_row.date_deleted) result = self.client_delete(f"/json/export/realm/{export_row.id}") self.assert_json_error(result, "Export already deleted") # Now try to delete a non-existent export. result = self.client_delete("/json/export/realm/0") self.assert_json_error(result, "Invalid data export ID") def test_endpoint_local_uploads(self) -> None: admin = self.example_user("iago") self.login_user(admin) tarball_path = create_dummy_file("test-export.tar.gz") # Test the export logic. def fake_export_realm( realm: Realm, output_dir: str, threads: int, export_type: int, exportable_user_ids: set[int] | None = None, export_as_active: bool | None = None, ) -> tuple[str, dict[str, int | dict[str, int]]]: self.assertEqual(realm, admin.realm) self.assertEqual(export_type, RealmExport.EXPORT_PUBLIC) self.assertTrue(os.path.basename(output_dir).startswith("zulip-export-")) self.assertEqual(threads, 6) # Check that the export shows up as in progress result = self.client_get("/json/export/realm") response_dict = self.assert_json_success(result) export_dict = response_dict["exports"] self.assert_length(export_dict, 1) id = export_dict[0]["id"] self.assertEqual(export_dict[0]["pending"], True) self.assertIsNone(export_dict[0]["export_url"]) self.assertIsNone(export_dict[0]["deleted_timestamp"]) self.assertIsNone(export_dict[0]["failed_timestamp"]) self.assertEqual(export_dict[0]["acting_user_id"], admin.id) # While the export is in progress, we can't delete it result = self.client_delete(f"/json/export/realm/{id}") self.assert_json_error(result, "Export still in progress") return tarball_path, dict() with patch( "zerver.lib.export.do_export_realm", side_effect=fake_export_realm ) as mock_export: with ( stdout_suppressed(), self.assertLogs(level="INFO") as info_logs, self.captureOnCommitCallbacks(execute=True), ): result = self.client_post("/json/export/realm") self.assertTrue("INFO:root:Completed data export for zulip in " in info_logs.output[0]) mock_export.assert_called_once() data = self.assert_json_success(result) self.assertFalse(os.path.exists(tarball_path)) # Get the entry and test that iago initiated it. export_row = RealmExport.objects.first() assert export_row is not None self.assertEqual(export_row.id, data["id"]) self.assertEqual(export_row.acting_user_id, admin.id) self.assertEqual(export_row.status, RealmExport.SUCCEEDED) # Test that the file is hosted, and the contents are as expected. export_path = export_row.export_path assert export_path is not None response = self.client_get(export_path) self.assertEqual(response.status_code, 200) self.assertEqual(response.getvalue(), b"zulip!") result = self.client_get("/json/export/realm") response_dict = self.assert_json_success(result) # Test that the export we have is the export we created. export_dict = response_dict["exports"] self.assertEqual(export_dict[0]["id"], export_row.id) self.assertEqual(export_dict[0]["export_url"], admin.realm.url + export_path) self.assertEqual(export_dict[0]["acting_user_id"], admin.id) self.assert_length(export_dict, RealmExport.objects.filter(realm=admin.realm).count()) # Finally, delete the file. result = self.client_delete(f"/json/export/realm/{export_row.id}") self.assert_json_success(result) response = self.client_get(export_path) self.assertEqual(response.status_code, 404) # Try to delete an export with a `DELETED` status. export_row.refresh_from_db() self.assertEqual(export_row.status, RealmExport.DELETED) self.assertIsNotNone(export_row.date_deleted) result = self.client_delete(f"/json/export/realm/{export_row.id}") self.assert_json_error(result, "Export already deleted") # Now try to delete a non-existent export. result = self.client_delete("/json/export/realm/0") self.assert_json_error(result, "Invalid data export ID") def test_export_failure(self) -> None: admin = self.example_user("iago") self.login_user(admin) with ( patch( "zerver.lib.export.do_export_realm", side_effect=Exception("failure") ) as mock_export, stdout_suppressed(), self.assertLogs(level="INFO") as info_logs, self.captureOnCommitCallbacks(execute=True), ): result = self.client_post("/json/export/realm") self.assertTrue( info_logs.output[0].startswith("ERROR:root:Data export for zulip failed after ") ) mock_export.assert_called_once() # This is a success because the failure is swallowed in the queue worker data = self.assert_json_success(result) export_id = data["id"] # Check that the export shows up as failed result = self.client_get("/json/export/realm") response_dict = self.assert_json_success(result) export_dict = response_dict["exports"] self.assert_length(export_dict, 1) self.assertEqual(export_dict[0]["id"], export_id) self.assertEqual(export_dict[0]["pending"], False) self.assertIsNone(export_dict[0]["export_url"]) self.assertIsNone(export_dict[0]["deleted_timestamp"]) self.assertIsNotNone(export_dict[0]["failed_timestamp"]) self.assertEqual(export_dict[0]["acting_user_id"], admin.id) export_row = RealmExport.objects.get(id=export_id) self.assertEqual(export_row.status, RealmExport.FAILED) # Check that we can't delete it result = self.client_delete(f"/json/export/realm/{export_id}") self.assert_json_error(result, "Export failed, nothing to delete") # If the queue worker sees the same export-id again, it aborts # instead of retrying with ( patch("zerver.lib.export.do_export_realm") as mock_export, self.assertLogs(level="INFO") as info_logs, ): queue_json_publish( "deferred_work", { "type": "realm_export", "user_profile_id": admin.id, "realm_export_id": export_id, }, ) mock_export.assert_not_called() self.assertEqual( info_logs.output, [ ( "ERROR:zerver.worker.deferred_work:Marking export for realm zulip " "as failed due to retry -- possible OOM during export?" ) ], ) def test_realm_export_rate_limited(self) -> None: admin = self.example_user("iago") self.login_user(admin) export_rows = RealmExport.objects.all() self.assert_length(export_rows, 0) exports = [ RealmExport( realm=admin.realm, type=RealmExport.EXPORT_PUBLIC, date_requested=timezone_now(), acting_user=admin, ) for i in range(5) ] RealmExport.objects.bulk_create(exports) with self.assertRaises(JsonableError) as error: export_realm(HostRequestMock(), admin) self.assertEqual(str(error.exception), "Exceeded rate limit.") def test_upload_and_message_limit(self) -> None: admin = self.example_user("iago") self.login_user(admin) realm_count = RealmCount.objects.create( realm_id=admin.realm.id, end_time=timezone_now(), value=0, property="messages_sent:message_type:day", subgroup="public_stream", ) # Space limit is set as 10 GiB with patch( "zerver.models.Realm.currently_used_upload_space_bytes", return_value=11 * 1024 * 1024 * 1024, ): result = self.client_post("/json/export/realm") self.assert_json_error( result, f"Please request a manual export from {settings.ZULIP_ADMINISTRATOR}.", ) # Message limit is set as 250000 realm_count.value = 250001 realm_count.save(update_fields=["value"]) result = self.client_post("/json/export/realm") self.assert_json_error( result, f"Please request a manual export from {settings.ZULIP_ADMINISTRATOR}.", ) def test_get_users_export_consents(self) -> None: admin = self.example_user("iago") self.login_user(admin) # By default, export consent is set to False. self.assertFalse( UserProfile.objects.filter( realm=admin.realm, is_active=True, is_bot=False, allow_private_data_export=True ).exists() ) # Hamlet and Aaron consented to export their private data. hamlet = self.example_user("hamlet") aaron = self.example_user("aaron") for user in [hamlet, aaron]: do_change_user_setting(user, "allow_private_data_export", True, acting_user=None) # Verify export consents of users. result = self.client_get("/json/export/realm/consents") response_dict = self.assert_json_success(result) export_consents = response_dict["export_consents"] for export_consent in export_consents: if export_consent["user_id"] in [hamlet.id, aaron.id]: self.assertTrue(export_consent["consented"]) continue self.assertFalse(export_consent["consented"])