public_export: Add backend API endpoint for triggering export.

An endpoint was created in zerver/views.  Basic rate-limiting was
implemented using RealmAuditLog.  The idea here is to simply log each
export event as a realm_exported event.  The number of events
occurring in the time delta is checked to ensure that the weekly
limit is not exceeded.

The event is published to the 'deferred_work' queue processor to
prevent the export process from being killed after 60s.

Upon completion of the export the realm admin(s) are notified.
This commit is contained in:
Wyatt Hoodes 2019-03-26 13:57:33 -10:00 committed by Tim Abbott
parent 8b5d2e9631
commit d4715f23d7
9 changed files with 199 additions and 2 deletions

View File

@ -5594,3 +5594,9 @@ def get_zoom_video_call_url(realm: Realm) -> str:
return ''
return response['join_url']
def notify_export_completed(user_profile: UserProfile, public_url: str) -> None:
# In the future, we may want to send this event to all realm admins.
event = dict(type='realm_exported',
public_url=public_url)
send_event(user_profile.realm, event, [user_profile.id])

View File

@ -794,6 +794,8 @@ def apply_event(state: Dict[str, Any],
user_status.pop(user_id, None)
state['user_status'] = user_status
elif event['type'] == 'realm_exported':
pass
else:
raise AssertionError("Unexpected event type %s" % (event['type'],))

View File

@ -2399,6 +2399,7 @@ class RealmAuditLog(models.Model):
REALM_SCRUBBED = 'realm_scrubbed'
REALM_PLAN_TYPE_CHANGED = 'realm_plan_type_changed'
REALM_LOGO_CHANGED = 'realm_logo_changed'
REALM_EXPORTED = 'realm_exported'
SUBSCRIPTION_CREATED = 'subscription_created'
SUBSCRIPTION_ACTIVATED = 'subscription_activated'

View File

@ -92,6 +92,7 @@ from zerver.lib.actions import (
get_typing_user_profiles,
log_event,
lookup_default_stream_groups,
notify_export_completed,
notify_realm_custom_profile_fields,
check_add_user_group,
do_update_user_group_name,
@ -2607,6 +2608,22 @@ class EventsRegisterTest(ZulipTestCase):
error = schema_checker('events[0]', events[0])
self.assert_on_error(error)
def test_public_export_notify_admins(self) -> None:
schema_checker = self.check_events_dict([
('type', equals('realm_exported')),
('public_url', check_string),
])
# Traditionally, we'd be testing the endpoint, but that
# requires somewhat annoying mocking setup for what to do with
# the export tarball.
events = self.do_test(
lambda: notify_export_completed(self.user_profile,
"http://localhost:9991/path/to/export.tar.gz"),
state_change_expected=False)
error = schema_checker('events[0]', events[0])
self.assert_on_error(error)
class FetchInitialStateDataTest(ZulipTestCase):
# Non-admin users don't have access to all bots
def test_realm_bots_non_admin(self) -> None:

View File

@ -0,0 +1,95 @@
from mock import patch
from django.test import override_settings
from django.conf import settings
from django.utils.timezone import now as timezone_now
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.exceptions import JsonableError
from zerver.lib.test_helpers import use_s3_backend, create_s3_buckets
from zerver.views.public_export import public_only_realm_export
from zerver.models import RealmAuditLog
# TODO: Mock export_realm_wrapper to test for s3 or local
class RealmExportTest(ZulipTestCase):
def setUp(self) -> None:
# TODO: Just inline this 2 lines of basic code in the
# individual test functions, since that's our standard style
# in Zulip's unit tests
self.admin = self.example_user('iago')
self.login(self.admin.email)
def test_export_as_not_admin(self) -> None:
user = self.example_user('hamlet')
self.login(user.email)
with self.assertRaises(JsonableError):
public_only_realm_export(self.client_post, user)
@use_s3_backend
def test_endpoint_s3(self) -> None:
create_s3_buckets(
settings.S3_AUTH_UPLOADS_BUCKET,
settings.S3_AVATAR_BUCKET)
with patch('zerver.views.public_export.queue_json_publish') as mock_publish:
result = self.client_post('/json/export/realm')
queue_data = mock_publish.call_args_list[0][0]
worker = mock_publish.call_args_list[0][0][0]
self.assert_json_success(result)
mock_publish.assert_called_once()
event = queue_data[1]
self.assertEqual(worker, 'deferred_work')
self.assertEqual(event['realm_id'], 1)
self.assertEqual(event['user_profile_id'], 5)
self.assertEqual(event['type'], 'realm_exported')
with patch('zerver.lib.export.do_export_realm') as mock_export:
result = self.client_post('/json/export/realm')
args = mock_export.call_args_list[0][1]
# TODO: Clean up the way we do the mocking here; we will
# want to mock do_export_realm in a way that captures its
# arguments but doesn't lead to (silent) error spam from
# do_write_stats_file_for_realm_export.
#
# Probably setting a `side_effect` makes sense?
self.assert_json_success(result)
self.assertEqual(args['realm'], self.admin.realm)
self.assertEqual(args['public_only'], True)
self.assertEqual(args['output_dir'].startswith('/tmp/zulip-export-'), True)
self.assertEqual(args['threads'], 6)
@override_settings(LOCAL_UPLOADS_DIR='/var/uploads')
def test_endpoint_local_uploads(self) -> None:
with patch('zerver.lib.export.do_export_realm'):
with patch('zerver.views.public_export.queue_json_publish') as mock_publish:
result = self.client_post('/json/export/realm')
queue_data = mock_publish.call_args_list[0][0]
worker = mock_publish.call_args_list[0][0][0]
self.assert_json_success(result)
mock_publish.assert_called_once()
event = queue_data[1]
self.assertEqual(worker, 'deferred_work')
self.assertEqual(event['realm_id'], 1)
self.assertEqual(event['user_profile_id'], 5)
self.assertEqual(event['type'], 'realm_exported')
# Rest of test should match the previous test, but we're
# blocked on support for public export in LOCAL_UPLOADS_DIR
# backend.
def test_realm_export_rate_limited(self) -> None:
current_log = RealmAuditLog.objects.filter(
event_type=RealmAuditLog.REALM_EXPORTED)
self.assertEqual(len(current_log), 0)
exports = []
for i in range(0, 5):
exports.append(RealmAuditLog(realm=self.admin.realm,
event_type=RealmAuditLog.REALM_EXPORTED,
event_time=timezone_now()))
RealmAuditLog.objects.bulk_create(exports)
result = public_only_realm_export(self.client_post, self.admin)
self.assert_json_error(result, 'Exceeded rate limit.')

View File

@ -69,6 +69,7 @@ class PublicURLTest(ZulipTestCase):
"/json/users/me/pointer",
"/json/users/me/subscriptions",
"/api/v1/users/me/subscriptions",
"/json/export/realm",
],
400: ["/api/v1/external/github",
"/api/v1/fetch_api_key",

View File

@ -0,0 +1,39 @@
from datetime import timedelta
from django.utils.timezone import now as timezone_now
from django.utils.translation import ugettext as _
from django.http import HttpResponse, HttpRequest
from zerver.decorator import require_realm_admin
from zerver.models import RealmAuditLog, UserProfile
from zerver.lib.queue import queue_json_publish
from zerver.lib.response import json_error, json_success
@require_realm_admin
def public_only_realm_export(request: HttpRequest, user: UserProfile) -> HttpResponse:
event_type = RealmAuditLog.REALM_EXPORTED
event_time = timezone_now()
realm = user.realm
time_delta_limit = 5
event_time_delta = event_time - timedelta(days=7)
# Filter based upon the number of events that have occurred in the delta
# If we are at the limit, the incoming request is rejected
limit_check = RealmAuditLog.objects.filter(realm=realm,
event_type=event_type,
event_time__gte=event_time_delta)
if len(limit_check) >= time_delta_limit:
return json_error(_('Exceeded rate limit.'))
# Using the deferred_work queue processor to avoid killing the process after 60s
event = {'type': event_type,
'time': event_time,
'realm_id': realm.id,
'user_profile_id': user.id}
queue_json_publish('deferred_work', event)
RealmAuditLog.objects.create(realm=realm,
event_type=event_type,
event_time=event_time)
return json_success()

View File

@ -3,6 +3,7 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, cast, TypeVar,
import copy
import signal
import tempfile
from functools import wraps
from threading import Timer
@ -27,7 +28,7 @@ from zerver.lib.push_notifications import handle_push_notification, handle_remov
initialize_push_notifications
from zerver.lib.actions import do_send_confirmation_email, \
do_update_user_activity, do_update_user_activity_interval, do_update_user_presence, \
internal_send_message, \
internal_send_message, internal_send_private_message, notify_export_completed, \
render_incoming_message, do_update_embedded_data, do_mark_stream_messages_as_read
from zerver.lib.url_preview import preview as url_preview
from zerver.lib.digest import handle_digest_email
@ -46,6 +47,7 @@ from zerver.models import get_bot_services
from zulip_bots.lib import extract_query_without_mention
from zerver.lib.bot_lib import EmbeddedBotHandler, get_bot_handler, EmbeddedBotQuitException
from zerver.lib.exceptions import RateLimited
from zerver.lib.export import export_realm_wrapper
import os
import sys
@ -609,3 +611,32 @@ class DeferredWorker(QueueProcessingWorker):
(stream, recipient, sub) = access_stream_by_id(user_profile, stream_id,
require_active=False)
do_mark_stream_messages_as_read(user_profile, client, stream)
elif event['type'] == 'realm_exported':
realm = Realm.objects.get(id=event['realm_id'])
output_dir = tempfile.mkdtemp(prefix="zulip-export-")
# TODO: Add support for the LOCAL_UPLOADS_DIR uploads
# backend in export_realm_wrapper so we don't need this assertion.
assert settings.LOCAL_UPLOADS_DIR is None
public_url = export_realm_wrapper(realm=realm, output_dir=output_dir,
upload_to_s3=True, threads=6, public_only=True,
delete_after_upload=True)
assert public_url is not None
# Send a private message notification letting the user who
# triggered the export know the export finished.
user_profile = get_user_profile_by_id(event['user_profile_id'])
content = "Your data export is complete and has been uploaded here:\n\n%s" % (
public_url,)
internal_send_private_message(
realm=user_profile.realm,
sender=get_system_bot(settings.NOTIFICATION_BOT),
recipient_user=user_profile,
content=content
)
# For future frontend use, also notify administrator
# clients that the export happened, including sending the
# url.
notify_export_completed(user_profile, public_url)

View File

@ -36,6 +36,7 @@ import zerver.views.streams
import zerver.views.realm
import zerver.views.digest
from zerver.context_processors import latest_info_context
import zerver.views.public_export
from zerver.lib.rest import rest_dispatch
@ -381,7 +382,11 @@ v1_api_and_json_patterns = [
# Used to generate a Zoom video call URL
url(r'^calls/create$', rest_dispatch,
{'GET': 'zerver.views.video_calls.get_zoom_url'})
{'GET': 'zerver.views.video_calls.get_zoom_url'}),
# Used for public-only realm exporting
url(r'^export/realm', rest_dispatch,
{'POST': 'zerver.views.public_export.public_only_realm_export'}),
]
# These views serve pages (HTML). As such, their internationalization