public_export: Add backend API endpoint for triggering export.

An endpoint was created in zerver/views.  Basic rate-limiting was
implemented using RealmAuditLog.  The idea here is to simply log each
export event as a realm_exported event.  The number of events
occurring in the time delta is checked to ensure that the weekly
limit is not exceeded.

The event is published to the 'deferred_work' queue processor to
prevent the export process from being killed after 60s.

Upon completion of the export the realm admin(s) are notified.
This commit is contained in:
Wyatt Hoodes 2019-03-26 13:57:33 -10:00 committed by Tim Abbott
parent 8b5d2e9631
commit d4715f23d7
9 changed files with 199 additions and 2 deletions

View File

@ -5594,3 +5594,9 @@ def get_zoom_video_call_url(realm: Realm) -> str:
return '' return ''
return response['join_url'] return response['join_url']
def notify_export_completed(user_profile: UserProfile, public_url: str) -> None:
# In the future, we may want to send this event to all realm admins.
event = dict(type='realm_exported',
public_url=public_url)
send_event(user_profile.realm, event, [user_profile.id])

View File

@ -794,6 +794,8 @@ def apply_event(state: Dict[str, Any],
user_status.pop(user_id, None) user_status.pop(user_id, None)
state['user_status'] = user_status state['user_status'] = user_status
elif event['type'] == 'realm_exported':
pass
else: else:
raise AssertionError("Unexpected event type %s" % (event['type'],)) raise AssertionError("Unexpected event type %s" % (event['type'],))

View File

@ -2399,6 +2399,7 @@ class RealmAuditLog(models.Model):
REALM_SCRUBBED = 'realm_scrubbed' REALM_SCRUBBED = 'realm_scrubbed'
REALM_PLAN_TYPE_CHANGED = 'realm_plan_type_changed' REALM_PLAN_TYPE_CHANGED = 'realm_plan_type_changed'
REALM_LOGO_CHANGED = 'realm_logo_changed' REALM_LOGO_CHANGED = 'realm_logo_changed'
REALM_EXPORTED = 'realm_exported'
SUBSCRIPTION_CREATED = 'subscription_created' SUBSCRIPTION_CREATED = 'subscription_created'
SUBSCRIPTION_ACTIVATED = 'subscription_activated' SUBSCRIPTION_ACTIVATED = 'subscription_activated'

View File

@ -92,6 +92,7 @@ from zerver.lib.actions import (
get_typing_user_profiles, get_typing_user_profiles,
log_event, log_event,
lookup_default_stream_groups, lookup_default_stream_groups,
notify_export_completed,
notify_realm_custom_profile_fields, notify_realm_custom_profile_fields,
check_add_user_group, check_add_user_group,
do_update_user_group_name, do_update_user_group_name,
@ -2607,6 +2608,22 @@ class EventsRegisterTest(ZulipTestCase):
error = schema_checker('events[0]', events[0]) error = schema_checker('events[0]', events[0])
self.assert_on_error(error) self.assert_on_error(error)
def test_public_export_notify_admins(self) -> None:
schema_checker = self.check_events_dict([
('type', equals('realm_exported')),
('public_url', check_string),
])
# Traditionally, we'd be testing the endpoint, but that
# requires somewhat annoying mocking setup for what to do with
# the export tarball.
events = self.do_test(
lambda: notify_export_completed(self.user_profile,
"http://localhost:9991/path/to/export.tar.gz"),
state_change_expected=False)
error = schema_checker('events[0]', events[0])
self.assert_on_error(error)
class FetchInitialStateDataTest(ZulipTestCase): class FetchInitialStateDataTest(ZulipTestCase):
# Non-admin users don't have access to all bots # Non-admin users don't have access to all bots
def test_realm_bots_non_admin(self) -> None: def test_realm_bots_non_admin(self) -> None:

View File

@ -0,0 +1,95 @@
from mock import patch
from django.test import override_settings
from django.conf import settings
from django.utils.timezone import now as timezone_now
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.exceptions import JsonableError
from zerver.lib.test_helpers import use_s3_backend, create_s3_buckets
from zerver.views.public_export import public_only_realm_export
from zerver.models import RealmAuditLog
# TODO: Mock export_realm_wrapper to test for s3 or local
class RealmExportTest(ZulipTestCase):
def setUp(self) -> None:
# TODO: Just inline this 2 lines of basic code in the
# individual test functions, since that's our standard style
# in Zulip's unit tests
self.admin = self.example_user('iago')
self.login(self.admin.email)
def test_export_as_not_admin(self) -> None:
user = self.example_user('hamlet')
self.login(user.email)
with self.assertRaises(JsonableError):
public_only_realm_export(self.client_post, user)
@use_s3_backend
def test_endpoint_s3(self) -> None:
create_s3_buckets(
settings.S3_AUTH_UPLOADS_BUCKET,
settings.S3_AVATAR_BUCKET)
with patch('zerver.views.public_export.queue_json_publish') as mock_publish:
result = self.client_post('/json/export/realm')
queue_data = mock_publish.call_args_list[0][0]
worker = mock_publish.call_args_list[0][0][0]
self.assert_json_success(result)
mock_publish.assert_called_once()
event = queue_data[1]
self.assertEqual(worker, 'deferred_work')
self.assertEqual(event['realm_id'], 1)
self.assertEqual(event['user_profile_id'], 5)
self.assertEqual(event['type'], 'realm_exported')
with patch('zerver.lib.export.do_export_realm') as mock_export:
result = self.client_post('/json/export/realm')
args = mock_export.call_args_list[0][1]
# TODO: Clean up the way we do the mocking here; we will
# want to mock do_export_realm in a way that captures its
# arguments but doesn't lead to (silent) error spam from
# do_write_stats_file_for_realm_export.
#
# Probably setting a `side_effect` makes sense?
self.assert_json_success(result)
self.assertEqual(args['realm'], self.admin.realm)
self.assertEqual(args['public_only'], True)
self.assertEqual(args['output_dir'].startswith('/tmp/zulip-export-'), True)
self.assertEqual(args['threads'], 6)
@override_settings(LOCAL_UPLOADS_DIR='/var/uploads')
def test_endpoint_local_uploads(self) -> None:
with patch('zerver.lib.export.do_export_realm'):
with patch('zerver.views.public_export.queue_json_publish') as mock_publish:
result = self.client_post('/json/export/realm')
queue_data = mock_publish.call_args_list[0][0]
worker = mock_publish.call_args_list[0][0][0]
self.assert_json_success(result)
mock_publish.assert_called_once()
event = queue_data[1]
self.assertEqual(worker, 'deferred_work')
self.assertEqual(event['realm_id'], 1)
self.assertEqual(event['user_profile_id'], 5)
self.assertEqual(event['type'], 'realm_exported')
# Rest of test should match the previous test, but we're
# blocked on support for public export in LOCAL_UPLOADS_DIR
# backend.
def test_realm_export_rate_limited(self) -> None:
current_log = RealmAuditLog.objects.filter(
event_type=RealmAuditLog.REALM_EXPORTED)
self.assertEqual(len(current_log), 0)
exports = []
for i in range(0, 5):
exports.append(RealmAuditLog(realm=self.admin.realm,
event_type=RealmAuditLog.REALM_EXPORTED,
event_time=timezone_now()))
RealmAuditLog.objects.bulk_create(exports)
result = public_only_realm_export(self.client_post, self.admin)
self.assert_json_error(result, 'Exceeded rate limit.')

View File

@ -69,6 +69,7 @@ class PublicURLTest(ZulipTestCase):
"/json/users/me/pointer", "/json/users/me/pointer",
"/json/users/me/subscriptions", "/json/users/me/subscriptions",
"/api/v1/users/me/subscriptions", "/api/v1/users/me/subscriptions",
"/json/export/realm",
], ],
400: ["/api/v1/external/github", 400: ["/api/v1/external/github",
"/api/v1/fetch_api_key", "/api/v1/fetch_api_key",

View File

@ -0,0 +1,39 @@
from datetime import timedelta
from django.utils.timezone import now as timezone_now
from django.utils.translation import ugettext as _
from django.http import HttpResponse, HttpRequest
from zerver.decorator import require_realm_admin
from zerver.models import RealmAuditLog, UserProfile
from zerver.lib.queue import queue_json_publish
from zerver.lib.response import json_error, json_success
@require_realm_admin
def public_only_realm_export(request: HttpRequest, user: UserProfile) -> HttpResponse:
event_type = RealmAuditLog.REALM_EXPORTED
event_time = timezone_now()
realm = user.realm
time_delta_limit = 5
event_time_delta = event_time - timedelta(days=7)
# Filter based upon the number of events that have occurred in the delta
# If we are at the limit, the incoming request is rejected
limit_check = RealmAuditLog.objects.filter(realm=realm,
event_type=event_type,
event_time__gte=event_time_delta)
if len(limit_check) >= time_delta_limit:
return json_error(_('Exceeded rate limit.'))
# Using the deferred_work queue processor to avoid killing the process after 60s
event = {'type': event_type,
'time': event_time,
'realm_id': realm.id,
'user_profile_id': user.id}
queue_json_publish('deferred_work', event)
RealmAuditLog.objects.create(realm=realm,
event_type=event_type,
event_time=event_time)
return json_success()

View File

@ -3,6 +3,7 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, cast, TypeVar,
import copy import copy
import signal import signal
import tempfile
from functools import wraps from functools import wraps
from threading import Timer from threading import Timer
@ -27,7 +28,7 @@ from zerver.lib.push_notifications import handle_push_notification, handle_remov
initialize_push_notifications initialize_push_notifications
from zerver.lib.actions import do_send_confirmation_email, \ from zerver.lib.actions import do_send_confirmation_email, \
do_update_user_activity, do_update_user_activity_interval, do_update_user_presence, \ do_update_user_activity, do_update_user_activity_interval, do_update_user_presence, \
internal_send_message, \ internal_send_message, internal_send_private_message, notify_export_completed, \
render_incoming_message, do_update_embedded_data, do_mark_stream_messages_as_read render_incoming_message, do_update_embedded_data, do_mark_stream_messages_as_read
from zerver.lib.url_preview import preview as url_preview from zerver.lib.url_preview import preview as url_preview
from zerver.lib.digest import handle_digest_email from zerver.lib.digest import handle_digest_email
@ -46,6 +47,7 @@ from zerver.models import get_bot_services
from zulip_bots.lib import extract_query_without_mention from zulip_bots.lib import extract_query_without_mention
from zerver.lib.bot_lib import EmbeddedBotHandler, get_bot_handler, EmbeddedBotQuitException from zerver.lib.bot_lib import EmbeddedBotHandler, get_bot_handler, EmbeddedBotQuitException
from zerver.lib.exceptions import RateLimited from zerver.lib.exceptions import RateLimited
from zerver.lib.export import export_realm_wrapper
import os import os
import sys import sys
@ -609,3 +611,32 @@ class DeferredWorker(QueueProcessingWorker):
(stream, recipient, sub) = access_stream_by_id(user_profile, stream_id, (stream, recipient, sub) = access_stream_by_id(user_profile, stream_id,
require_active=False) require_active=False)
do_mark_stream_messages_as_read(user_profile, client, stream) do_mark_stream_messages_as_read(user_profile, client, stream)
elif event['type'] == 'realm_exported':
realm = Realm.objects.get(id=event['realm_id'])
output_dir = tempfile.mkdtemp(prefix="zulip-export-")
# TODO: Add support for the LOCAL_UPLOADS_DIR uploads
# backend in export_realm_wrapper so we don't need this assertion.
assert settings.LOCAL_UPLOADS_DIR is None
public_url = export_realm_wrapper(realm=realm, output_dir=output_dir,
upload_to_s3=True, threads=6, public_only=True,
delete_after_upload=True)
assert public_url is not None
# Send a private message notification letting the user who
# triggered the export know the export finished.
user_profile = get_user_profile_by_id(event['user_profile_id'])
content = "Your data export is complete and has been uploaded here:\n\n%s" % (
public_url,)
internal_send_private_message(
realm=user_profile.realm,
sender=get_system_bot(settings.NOTIFICATION_BOT),
recipient_user=user_profile,
content=content
)
# For future frontend use, also notify administrator
# clients that the export happened, including sending the
# url.
notify_export_completed(user_profile, public_url)

View File

@ -36,6 +36,7 @@ import zerver.views.streams
import zerver.views.realm import zerver.views.realm
import zerver.views.digest import zerver.views.digest
from zerver.context_processors import latest_info_context from zerver.context_processors import latest_info_context
import zerver.views.public_export
from zerver.lib.rest import rest_dispatch from zerver.lib.rest import rest_dispatch
@ -381,7 +382,11 @@ v1_api_and_json_patterns = [
# Used to generate a Zoom video call URL # Used to generate a Zoom video call URL
url(r'^calls/create$', rest_dispatch, url(r'^calls/create$', rest_dispatch,
{'GET': 'zerver.views.video_calls.get_zoom_url'}) {'GET': 'zerver.views.video_calls.get_zoom_url'}),
# Used for public-only realm exporting
url(r'^export/realm', rest_dispatch,
{'POST': 'zerver.views.public_export.public_only_realm_export'}),
] ]
# These views serve pages (HTML). As such, their internationalization # These views serve pages (HTML). As such, their internationalization