mirror of https://github.com/zulip/zulip.git
realm_export: Improve estimate of data export size.
As suggested by the new comments, the cost for a Zulip data export scales with messages actually included in the export, so an organizations with 1M private messages but only 50K public stream messages should not be modeled the same as one with 1M public stream messages for the purpose of the limits here. Also improve the comments and variable names more generally.
This commit is contained in:
parent
155540c0e3
commit
76bcb96414
|
@ -205,9 +205,9 @@ class RealmExportTest(ZulipTestCase):
|
||||||
realm_count = RealmCount.objects.create(
|
realm_count = RealmCount.objects.create(
|
||||||
realm_id=admin.realm.id,
|
realm_id=admin.realm.id,
|
||||||
end_time=timezone_now(),
|
end_time=timezone_now(),
|
||||||
subgroup=1,
|
|
||||||
value=0,
|
value=0,
|
||||||
property="messages_sent:client:day",
|
property="messages_sent:message_type:day",
|
||||||
|
subgroup="public_stream",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Space limit is set as 10 GiB
|
# Space limit is set as 10 GiB
|
||||||
|
|
|
@ -24,10 +24,13 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
|
||||||
event_time = timezone_now()
|
event_time = timezone_now()
|
||||||
realm = user.realm
|
realm = user.realm
|
||||||
EXPORT_LIMIT = 5
|
EXPORT_LIMIT = 5
|
||||||
# Conservative limit on the size of message history in
|
|
||||||
# organizations being exported; this exists to protect Zulip
|
# Exporting organizations with a huge amount of history can
|
||||||
# against a possible unmonitored accidental DoS caused by trying
|
# potentially consume a lot of disk or otherwise have accidental
|
||||||
# to export an organization with huge history.
|
# DoS risk; for that reason, we require large exports to be done
|
||||||
|
# manually on the command line.
|
||||||
|
#
|
||||||
|
# It's very possible that higher limits would be completely safe.
|
||||||
MAX_MESSAGE_HISTORY = 250000
|
MAX_MESSAGE_HISTORY = 250000
|
||||||
MAX_UPLOAD_QUOTA = 10 * 1024 * 1024 * 1024
|
MAX_UPLOAD_QUOTA = 10 * 1024 * 1024 * 1024
|
||||||
|
|
||||||
|
@ -40,14 +43,22 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
|
||||||
if len(limit_check) >= EXPORT_LIMIT:
|
if len(limit_check) >= EXPORT_LIMIT:
|
||||||
raise JsonableError(_("Exceeded rate limit."))
|
raise JsonableError(_("Exceeded rate limit."))
|
||||||
|
|
||||||
total_messages = sum(
|
# The RealmCount analytics table lets us efficiently get an
|
||||||
|
# estimate for the number of public stream messages in an
|
||||||
|
# organization. It won't match the actual number of messages in
|
||||||
|
# the export, because this measures the number of messages that
|
||||||
|
# went to a public stream at the time they were sent. Thus,
|
||||||
|
# messages that were deleted or moved between streams will be
|
||||||
|
# treated differently for this check vs. in the export code.
|
||||||
|
exportable_messages_estimate = sum(
|
||||||
realm_count.value
|
realm_count.value
|
||||||
for realm_count in RealmCount.objects.filter(
|
for realm_count in RealmCount.objects.filter(
|
||||||
realm=user.realm, property="messages_sent:client:day"
|
realm=realm, property="messages_sent:message_type:day", subgroup="public_stream"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
total_messages > MAX_MESSAGE_HISTORY
|
exportable_messages_estimate > MAX_MESSAGE_HISTORY
|
||||||
or user.realm.currently_used_upload_space_bytes() > MAX_UPLOAD_QUOTA
|
or user.realm.currently_used_upload_space_bytes() > MAX_UPLOAD_QUOTA
|
||||||
):
|
):
|
||||||
raise JsonableError(
|
raise JsonableError(
|
||||||
|
|
Loading…
Reference in New Issue