From 235a3476391b5bded3743d13a3046f6a5076e972 Mon Sep 17 00:00:00 2001 From: Vishnu KS Date: Tue, 22 Dec 2020 22:39:34 +0530 Subject: [PATCH] analytics: Move last_successful_fill to CountStat. This is a prep commit. Currenty we only pass CountStat.property to last_successful_fill function. But it needs access to CountStat.time_increment as well. We can pass the entire CountStat object to the function as a workaround. But making last_successful_fill a property of CountStat seems to be much more cleaner. --- analytics/lib/counts.py | 11 ++- .../commands/check_analytics_state.py | 4 +- analytics/models.py | 8 -- analytics/tests/test_counts.py | 92 ++++++++++++------- analytics/tests/test_views.py | 15 +-- analytics/views.py | 3 +- 6 files changed, 70 insertions(+), 63 deletions(-) diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py index 663bec6740..3ba64d6f2c 100644 --- a/analytics/lib/counts.py +++ b/analytics/lib/counts.py @@ -17,7 +17,6 @@ from analytics.models import ( StreamCount, UserCount, installation_epoch, - last_successful_fill, ) from zerver.lib.logging_util import log_to_file from zerver.lib.timestamp import ceiling_to_day, ceiling_to_hour, floor_to_hour, verify_UTC @@ -68,6 +67,14 @@ class CountStat: def __str__(self) -> str: return f"" + def last_successful_fill(self) -> Optional[datetime]: + fillstate = FillState.objects.filter(property=self.property).first() + if fillstate is None: + return None + if fillstate.state == FillState.DONE: + return fillstate.end_time + return fillstate.end_time - timedelta(hours=1) + class LoggingCountStat(CountStat): def __init__(self, property: str, output_table: Type[BaseCount], frequency: str) -> None: CountStat.__init__(self, property, DataCollector(output_table, None), frequency) @@ -121,7 +128,7 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime, if isinstance(stat, DependentCountStat): for dependency in stat.dependencies: - dependency_fill_time = last_successful_fill(dependency) + dependency_fill_time = COUNT_STATS[dependency].last_successful_fill() if dependency_fill_time is None: logger.warning("DependentCountStat %s run before dependency %s.", stat.property, dependency) diff --git a/analytics/management/commands/check_analytics_state.py b/analytics/management/commands/check_analytics_state.py index 5d66e05f99..48576cda19 100644 --- a/analytics/management/commands/check_analytics_state.py +++ b/analytics/management/commands/check_analytics_state.py @@ -7,7 +7,7 @@ from django.core.management.base import BaseCommand from django.utils.timezone import now as timezone_now from analytics.lib.counts import COUNT_STATS, CountStat -from analytics.models import installation_epoch, last_successful_fill +from analytics.models import installation_epoch from zerver.lib.timestamp import TimezoneNotUTCException, floor_to_day, floor_to_hour, verify_UTC from zerver.models import Realm @@ -42,7 +42,7 @@ class Command(BaseCommand): warning_unfilled_properties = [] critical_unfilled_properties = [] for property, stat in COUNT_STATS.items(): - last_fill = last_successful_fill(property) + last_fill = stat.last_successful_fill() if last_fill is None: last_fill = installation_epoch() try: diff --git a/analytics/models.py b/analytics/models.py index 20bcc8b6a6..be9e87cbad 100644 --- a/analytics/models.py +++ b/analytics/models.py @@ -26,14 +26,6 @@ def installation_epoch() -> datetime.datetime: earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))['date_created__min'] return floor_to_day(earliest_realm_creation) -def last_successful_fill(property: str) -> Optional[datetime.datetime]: - fillstate = FillState.objects.filter(property=property).first() - if fillstate is None: - return None - if fillstate.state == FillState.DONE: - return fillstate.end_time - return fillstate.end_time - datetime.timedelta(hours=1) - class BaseCount(models.Model): # Note: When inheriting from BaseCount, you may want to rearrange # the order of the columns in the migration to make sure they diff --git a/analytics/tests/test_counts.py b/analytics/tests/test_counts.py index a12cfcb482..18ae2554c1 100644 --- a/analytics/tests/test_counts.py +++ b/analytics/tests/test_counts.py @@ -322,35 +322,7 @@ class TestProcessCountStat(AnalyticsTestCase): stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None), CountStat.HOUR, dependencies=['stat1', 'stat2']) - hour = [installation_epoch() + i*self.HOUR for i in range(5)] - # test when one dependency has been run, and the other hasn't - process_count_stat(stat1, hour[2]) - process_count_stat(stat3, hour[1]) - self.assertTableState(InstallationCount, ['property', 'end_time'], - [['stat1', hour[1]], ['stat1', hour[2]]]) - self.assertFillStateEquals(stat3, hour[0]) - - # test that we don't fill past the fill_to_time argument, even if - # dependencies have later last_successful_fill - process_count_stat(stat2, hour[3]) - process_count_stat(stat3, hour[1]) - self.assertTableState(InstallationCount, ['property', 'end_time'], - [['stat1', hour[1]], ['stat1', hour[2]], - ['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]], - ['stat3', hour[1]]]) - self.assertFillStateEquals(stat3, hour[1]) - - # test that we don't fill past the dependency last_successful_fill times, - # even if fill_to_time is later - process_count_stat(stat3, hour[4]) - self.assertTableState(InstallationCount, ['property', 'end_time'], - [['stat1', hour[1]], ['stat1', hour[2]], - ['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]], - ['stat3', hour[1]], ['stat3', hour[2]]]) - self.assertFillStateEquals(stat3, hour[2]) - - # test daily dependent stat with hourly dependencies query = lambda kwargs: SQL(""" INSERT INTO analytics_realmcount (realm_id, value, property, end_time) VALUES ({default_realm_id}, 1, {property}, %(time_end)s) @@ -361,13 +333,51 @@ class TestProcessCountStat(AnalyticsTestCase): stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None), CountStat.DAY, dependencies=['stat1', 'stat2']) - hour24 = installation_epoch() + 24*self.HOUR - hour25 = installation_epoch() + 25*self.HOUR - process_count_stat(stat1, hour25) - process_count_stat(stat2, hour25) - process_count_stat(stat4, hour25) - self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1) - self.assertFillStateEquals(stat4, hour24) + + dummy_count_stats = { + "stat1": stat1, + "stat2": stat2, + "stat3": stat3, + "stat4": stat4, + + } + with mock.patch("analytics.lib.counts.COUNT_STATS", dummy_count_stats): + hour = [installation_epoch() + i*self.HOUR for i in range(5)] + + # test when one dependency has been run, and the other hasn't + process_count_stat(stat1, hour[2]) + process_count_stat(stat3, hour[1]) + self.assertTableState(InstallationCount, ['property', 'end_time'], + [['stat1', hour[1]], ['stat1', hour[2]]]) + self.assertFillStateEquals(stat3, hour[0]) + + # test that we don't fill past the fill_to_time argument, even if + # dependencies have later last_successful_fill + process_count_stat(stat2, hour[3]) + process_count_stat(stat3, hour[1]) + self.assertTableState(InstallationCount, ['property', 'end_time'], + [['stat1', hour[1]], ['stat1', hour[2]], + ['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]], + ['stat3', hour[1]]]) + self.assertFillStateEquals(stat3, hour[1]) + + # test that we don't fill past the dependency last_successful_fill times, + # even if fill_to_time is later + process_count_stat(stat3, hour[4]) + self.assertTableState(InstallationCount, ['property', 'end_time'], + [['stat1', hour[1]], ['stat1', hour[2]], + ['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]], + ['stat3', hour[1]], ['stat3', hour[2]]]) + self.assertFillStateEquals(stat3, hour[2]) + + # test daily dependent stat with hourly dependencies + hour24 = installation_epoch() + 24*self.HOUR + hour25 = installation_epoch() + 25*self.HOUR + process_count_stat(stat1, hour25) + process_count_stat(stat2, hour25) + process_count_stat(stat4, hour25) + self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1) + self.assertFillStateEquals(stat4, hour24) class TestCountStats(AnalyticsTestCase): def setUp(self) -> None: @@ -995,6 +1005,18 @@ class TestCountStats(AnalyticsTestCase): self.assertTableState(InstallationCount, ['value'], []) self.assertTableState(StreamCount, [], []) + def test_last_successful_fill(self) -> None: + self.assertIsNone(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill()) + + a_time = datetime(2016, 3, 14, 19, tzinfo=timezone.utc) + one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc) + fillstate = FillState.objects.create(property=COUNT_STATS["messages_sent:is_bot:hour"].property, + end_time=a_time, state=FillState.DONE) + self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), a_time) + fillstate.state = FillState.STARTED + fillstate.save() + self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), one_hour_before) + class TestDoAggregateToSummaryTable(AnalyticsTestCase): # do_aggregate_to_summary_table is mostly tested by the end to end # nature of the tests in TestCountStats. But want to highlight one diff --git a/analytics/tests/test_views.py b/analytics/tests/test_views.py index 367045e71e..c3b9e0a5d3 100644 --- a/analytics/tests/test_views.py +++ b/analytics/tests/test_views.py @@ -8,7 +8,7 @@ from django.utils.timezone import now as timezone_now from analytics.lib.counts import COUNT_STATS, CountStat from analytics.lib.time_utils import time_range -from analytics.models import FillState, RealmCount, UserCount, last_successful_fill +from analytics.models import FillState, RealmCount, UserCount from analytics.views import rewrite_client_arrays, sort_by_totals, sort_client_labels from corporate.lib.stripe import add_months, update_sponsorship_status from corporate.models import Customer, CustomerPlan, LicenseLedger, get_customer_by_realm @@ -805,19 +805,6 @@ class TestSupportEndpoint(ZulipTestCase): m.assert_not_called() class TestGetChartDataHelpers(ZulipTestCase): - # last_successful_fill is in analytics/models.py, but get_chart_data is - # the only function that uses it at the moment - def test_last_successful_fill(self) -> None: - self.assertIsNone(last_successful_fill('non-existant')) - a_time = datetime(2016, 3, 14, 19, tzinfo=timezone.utc) - one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc) - fillstate = FillState.objects.create(property='property', end_time=a_time, - state=FillState.DONE) - self.assertEqual(last_successful_fill('property'), a_time) - fillstate.state = FillState.STARTED - fillstate.save() - self.assertEqual(last_successful_fill('property'), one_hour_before) - def test_sort_by_totals(self) -> None: empty: List[int] = [] value_arrays = {'c': [0, 1], 'a': [9], 'b': [1, 1, 1], 'd': empty} diff --git a/analytics/views.py b/analytics/views.py index 849d2e4271..c5d4684d8f 100644 --- a/analytics/views.py +++ b/analytics/views.py @@ -35,7 +35,6 @@ from analytics.models import ( StreamCount, UserCount, installation_epoch, - last_successful_fill, ) from confirmation.models import Confirmation, _properties, confirmation_url from confirmation.settings import STATUS_ACTIVE @@ -308,7 +307,7 @@ def get_chart_data(request: HttpRequest, user_profile: UserProfile, chart_name: else: start = realm.date_created if end is None: - end = max(last_successful_fill(stat.property) or + end = max(stat.last_successful_fill() or datetime.min.replace(tzinfo=timezone.utc) for stat in stats) if start > end and (timezone_now() - start > MAX_TIME_FOR_FULL_ANALYTICS_GENERATION):