mirror of https://github.com/zulip/zulip.git
analytics: Store realm disk space used as a CountStat.
Fixes #29632. The issue description explains this well: We currently recalculate `currently_used_upload_space_bytes` every file upload, by dint of calling `flush_used_upload_space_cache` on save/delete, and then immediately calling `user_profile.realm.currently_used_upload_space_bytes()` in `notify_attachment_update`. Since this walks the Attachments table, recalculating this can take seconds in large realms. Switch this to using a CountStat, so we don't need to walk significant chunks of the Attachment table when we upload an attachment. This will also give us a historical daily graph of usage.
This commit is contained in:
parent
4c4a443002
commit
9406bfbc0a
|
@ -485,6 +485,40 @@ def sql_data_collector(
|
|||
return DataCollector(output_table, pull_function)
|
||||
|
||||
|
||||
def count_upload_space_used_by_realm_query(realm: Optional[Realm]) -> QueryFn:
|
||||
if realm is None:
|
||||
realm_clause: Composable = SQL("")
|
||||
else:
|
||||
realm_clause = SQL("zerver_attachment.realm_id = {} AND").format(Literal(realm.id))
|
||||
|
||||
# Note: This query currently has to go through the entire table,
|
||||
# summing all the sizes of attachments for every realm. This can be improved
|
||||
# by having a query which looks at the latest CountStat for each realm,
|
||||
# and sums it with only the new attachments.
|
||||
# There'd be additional complexity added by the fact that attachments can
|
||||
# also be deleted. Partially this can be accounted for by subtracting
|
||||
# ArchivedAttachment sizes, but there's still the issue of attachments
|
||||
# which can be directly deleted via the API.
|
||||
|
||||
return lambda kwargs: SQL(
|
||||
"""
|
||||
INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
|
||||
SELECT
|
||||
zerver_attachment.realm_id,
|
||||
%(property)s,
|
||||
%(time_end)s,
|
||||
COALESCE(SUM(zerver_attachment.size), 0)
|
||||
FROM
|
||||
zerver_attachment
|
||||
WHERE
|
||||
{realm_clause}
|
||||
zerver_attachment.create_time < %(time_end)s
|
||||
GROUP BY
|
||||
zerver_attachment.realm_id
|
||||
"""
|
||||
).format(**kwargs, realm_clause=realm_clause)
|
||||
|
||||
|
||||
def do_pull_minutes_active(
|
||||
property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None
|
||||
) -> int:
|
||||
|
@ -863,6 +897,11 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
|
|||
CountStat.DAY,
|
||||
interval=TIMEDELTA_MAX,
|
||||
),
|
||||
CountStat(
|
||||
"upload_quota_used_bytes::day",
|
||||
sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None),
|
||||
CountStat.DAY,
|
||||
),
|
||||
# Messages read stats. messages_read::hour is the total
|
||||
# number of messages read, whereas
|
||||
# messages_read_interactions::hour tries to count the total
|
||||
|
|
|
@ -76,6 +76,7 @@ from zerver.models import (
|
|||
)
|
||||
from zerver.models.clients import get_client
|
||||
from zerver.models.groups import SystemGroups
|
||||
from zerver.models.messages import Attachment
|
||||
from zerver.models.scheduled_jobs import NotificationTriggers
|
||||
from zerver.models.users import get_user, is_cross_realm_bot_email
|
||||
from zilencer.models import (
|
||||
|
@ -190,6 +191,18 @@ class AnalyticsTestCase(ZulipTestCase):
|
|||
kwargs[key] = kwargs.get(key, value)
|
||||
return Message.objects.create(**kwargs)
|
||||
|
||||
def create_attachment(
|
||||
self, user_profile: UserProfile, filename: str, size: int, create_time: datetime
|
||||
) -> Attachment:
|
||||
return Attachment.objects.create(
|
||||
file_name=filename,
|
||||
path_id=f"foo/bar/{filename}",
|
||||
owner=user_profile,
|
||||
realm=user_profile.realm,
|
||||
size=size,
|
||||
create_time=create_time,
|
||||
)
|
||||
|
||||
# kwargs should only ever be a UserProfile or Stream.
|
||||
def assert_table_count(
|
||||
self,
|
||||
|
@ -546,6 +559,41 @@ class TestCountStats(AnalyticsTestCase):
|
|||
self.assertTableState(UserCount, [], [])
|
||||
self.assertTableState(StreamCount, [], [])
|
||||
|
||||
def test_upload_quota_used_bytes(self) -> None:
|
||||
stat = COUNT_STATS["upload_quota_used_bytes::day"]
|
||||
self.current_property = stat.property
|
||||
|
||||
user1 = self.create_user()
|
||||
user2 = self.create_user()
|
||||
user_second_realm = self.create_user(realm=self.second_realm)
|
||||
|
||||
self.create_attachment(user1, "file1", 100, self.TIME_LAST_HOUR)
|
||||
attachment2 = self.create_attachment(user2, "file2", 200, self.TIME_LAST_HOUR)
|
||||
self.create_attachment(user_second_realm, "file3", 10, self.TIME_LAST_HOUR)
|
||||
|
||||
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
||||
|
||||
self.assertTableState(
|
||||
RealmCount,
|
||||
["value", "subgroup", "realm"],
|
||||
[[300, None, self.default_realm], [10, None, self.second_realm]],
|
||||
)
|
||||
|
||||
# Delete an attachment and run the CountStat job again the next day.
|
||||
attachment2.delete()
|
||||
do_fill_count_stat_at_hour(stat, self.TIME_ZERO + self.DAY)
|
||||
|
||||
self.assertTableState(
|
||||
RealmCount,
|
||||
["value", "subgroup", "realm", "end_time"],
|
||||
[
|
||||
[300, None, self.default_realm, self.TIME_ZERO],
|
||||
[10, None, self.second_realm, self.TIME_ZERO],
|
||||
[100, None, self.default_realm, self.TIME_ZERO + self.DAY],
|
||||
[10, None, self.second_realm, self.TIME_ZERO + self.DAY],
|
||||
],
|
||||
)
|
||||
|
||||
def test_active_users_by_is_bot_for_realm_constraint(self) -> None:
|
||||
# For single Realm
|
||||
|
||||
|
|
|
@ -870,12 +870,26 @@ class Realm(models.Model): # type: ignore[django-manager-missing] # django-stub
|
|||
lambda realm: get_realm_used_upload_space_cache_key(realm.id), timeout=3600 * 24 * 7
|
||||
)
|
||||
def currently_used_upload_space_bytes(realm) -> int: # noqa: N805
|
||||
from analytics.models import RealmCount, installation_epoch
|
||||
from zerver.models import Attachment
|
||||
|
||||
used_space = Attachment.objects.filter(realm=realm).aggregate(Sum("size"))["size__sum"]
|
||||
if used_space is None:
|
||||
return 0
|
||||
return used_space
|
||||
try:
|
||||
latest_count_stat = RealmCount.objects.filter(
|
||||
realm=realm, property="upload_quota_used_bytes::day"
|
||||
).latest("end_time")
|
||||
last_recorded_used_space = latest_count_stat.value
|
||||
last_recorded_date = latest_count_stat.end_time
|
||||
except RealmCount.DoesNotExist:
|
||||
last_recorded_used_space = 0
|
||||
last_recorded_date = installation_epoch()
|
||||
|
||||
newly_used_space = Attachment.objects.filter(
|
||||
realm=realm, create_time__gte=last_recorded_date
|
||||
).aggregate(Sum("size"))["size__sum"]
|
||||
|
||||
if newly_used_space is None:
|
||||
return last_recorded_used_space
|
||||
return last_recorded_used_space + newly_used_space
|
||||
|
||||
def ensure_not_on_limited_plan(self) -> None:
|
||||
if self.plan_type == Realm.PLAN_TYPE_LIMITED:
|
||||
|
|
|
@ -9,12 +9,14 @@ from urllib.parse import quote
|
|||
|
||||
import orjson
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import now as timezone_now
|
||||
from PIL import Image
|
||||
from typing_extensions import override
|
||||
from urllib3 import encode_multipart_formdata
|
||||
from urllib3.fields import RequestField
|
||||
|
||||
import zerver.lib.upload
|
||||
from analytics.models import RealmCount
|
||||
from zerver.actions.create_realm import do_create_realm
|
||||
from zerver.actions.message_send import internal_send_private_message
|
||||
from zerver.actions.realm_icon import do_change_icon_source
|
||||
|
@ -23,7 +25,7 @@ from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_real
|
|||
from zerver.actions.user_settings import do_delete_avatar_image
|
||||
from zerver.lib.attachments import validate_attachment_request
|
||||
from zerver.lib.avatar import avatar_url, get_avatar_field
|
||||
from zerver.lib.cache import cache_get, get_realm_used_upload_space_cache_key
|
||||
from zerver.lib.cache import cache_delete, cache_get, get_realm_used_upload_space_cache_key
|
||||
from zerver.lib.create_user import copy_default_settings
|
||||
from zerver.lib.initial_password import initial_password
|
||||
from zerver.lib.realm_icon import realm_icon_url
|
||||
|
@ -1844,6 +1846,22 @@ class UploadSpaceTests(UploadSerializeMixin, ZulipTestCase):
|
|||
self.assertEqual(None, cache_get(get_realm_used_upload_space_cache_key(self.realm.id)))
|
||||
self.assert_length(data2, self.realm.currently_used_upload_space_bytes())
|
||||
|
||||
now = timezone_now()
|
||||
RealmCount.objects.create(
|
||||
realm=self.realm,
|
||||
property="upload_quota_used_bytes::day",
|
||||
end_time=now,
|
||||
value=len(data2),
|
||||
)
|
||||
# Purge the cache since we want to actually execute the function.
|
||||
cache_delete(get_realm_used_upload_space_cache_key(self.realm.id))
|
||||
|
||||
self.assert_length(data2, self.realm.currently_used_upload_space_bytes())
|
||||
|
||||
data3 = b"even-more-data!"
|
||||
upload_message_attachment("dummy3.txt", len(data3), "text/plain", data3, self.user_profile)
|
||||
self.assertEqual(len(data2) + len(data3), self.realm.currently_used_upload_space_bytes())
|
||||
|
||||
|
||||
class DecompressionBombTests(ZulipTestCase):
|
||||
@override
|
||||
|
|
Loading…
Reference in New Issue