analytics: Move last_successful_fill to CountStat.

This is a prep commit. Currenty we only pass CountStat.property
to last_successful_fill function. But it needs access to
CountStat.time_increment as well. We can pass the entire CountStat
object to the function as a workaround. But making last_successful_fill
a property of CountStat seems to be much more cleaner.
This commit is contained in:
Vishnu KS 2020-12-22 22:39:34 +05:30 committed by Tim Abbott
parent 189e9a2759
commit 235a347639
6 changed files with 70 additions and 63 deletions

View File

@ -17,7 +17,6 @@ from analytics.models import (
StreamCount,
UserCount,
installation_epoch,
last_successful_fill,
)
from zerver.lib.logging_util import log_to_file
from zerver.lib.timestamp import ceiling_to_day, ceiling_to_hour, floor_to_hour, verify_UTC
@ -68,6 +67,14 @@ class CountStat:
def __str__(self) -> str:
return f"<CountStat: {self.property}>"
def last_successful_fill(self) -> Optional[datetime]:
fillstate = FillState.objects.filter(property=self.property).first()
if fillstate is None:
return None
if fillstate.state == FillState.DONE:
return fillstate.end_time
return fillstate.end_time - timedelta(hours=1)
class LoggingCountStat(CountStat):
def __init__(self, property: str, output_table: Type[BaseCount], frequency: str) -> None:
CountStat.__init__(self, property, DataCollector(output_table, None), frequency)
@ -121,7 +128,7 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime,
if isinstance(stat, DependentCountStat):
for dependency in stat.dependencies:
dependency_fill_time = last_successful_fill(dependency)
dependency_fill_time = COUNT_STATS[dependency].last_successful_fill()
if dependency_fill_time is None:
logger.warning("DependentCountStat %s run before dependency %s.",
stat.property, dependency)

View File

@ -7,7 +7,7 @@ from django.core.management.base import BaseCommand
from django.utils.timezone import now as timezone_now
from analytics.lib.counts import COUNT_STATS, CountStat
from analytics.models import installation_epoch, last_successful_fill
from analytics.models import installation_epoch
from zerver.lib.timestamp import TimezoneNotUTCException, floor_to_day, floor_to_hour, verify_UTC
from zerver.models import Realm
@ -42,7 +42,7 @@ class Command(BaseCommand):
warning_unfilled_properties = []
critical_unfilled_properties = []
for property, stat in COUNT_STATS.items():
last_fill = last_successful_fill(property)
last_fill = stat.last_successful_fill()
if last_fill is None:
last_fill = installation_epoch()
try:

View File

@ -26,14 +26,6 @@ def installation_epoch() -> datetime.datetime:
earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))['date_created__min']
return floor_to_day(earliest_realm_creation)
def last_successful_fill(property: str) -> Optional[datetime.datetime]:
fillstate = FillState.objects.filter(property=property).first()
if fillstate is None:
return None
if fillstate.state == FillState.DONE:
return fillstate.end_time
return fillstate.end_time - datetime.timedelta(hours=1)
class BaseCount(models.Model):
# Note: When inheriting from BaseCount, you may want to rearrange
# the order of the columns in the migration to make sure they

View File

@ -322,35 +322,7 @@ class TestProcessCountStat(AnalyticsTestCase):
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None),
CountStat.HOUR,
dependencies=['stat1', 'stat2'])
hour = [installation_epoch() + i*self.HOUR for i in range(5)]
# test when one dependency has been run, and the other hasn't
process_count_stat(stat1, hour[2])
process_count_stat(stat3, hour[1])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]]])
self.assertFillStateEquals(stat3, hour[0])
# test that we don't fill past the fill_to_time argument, even if
# dependencies have later last_successful_fill
process_count_stat(stat2, hour[3])
process_count_stat(stat3, hour[1])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]],
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
['stat3', hour[1]]])
self.assertFillStateEquals(stat3, hour[1])
# test that we don't fill past the dependency last_successful_fill times,
# even if fill_to_time is later
process_count_stat(stat3, hour[4])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]],
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
['stat3', hour[1]], ['stat3', hour[2]]])
self.assertFillStateEquals(stat3, hour[2])
# test daily dependent stat with hourly dependencies
query = lambda kwargs: SQL("""
INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
@ -361,13 +333,51 @@ class TestProcessCountStat(AnalyticsTestCase):
stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None),
CountStat.DAY,
dependencies=['stat1', 'stat2'])
hour24 = installation_epoch() + 24*self.HOUR
hour25 = installation_epoch() + 25*self.HOUR
process_count_stat(stat1, hour25)
process_count_stat(stat2, hour25)
process_count_stat(stat4, hour25)
self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1)
self.assertFillStateEquals(stat4, hour24)
dummy_count_stats = {
"stat1": stat1,
"stat2": stat2,
"stat3": stat3,
"stat4": stat4,
}
with mock.patch("analytics.lib.counts.COUNT_STATS", dummy_count_stats):
hour = [installation_epoch() + i*self.HOUR for i in range(5)]
# test when one dependency has been run, and the other hasn't
process_count_stat(stat1, hour[2])
process_count_stat(stat3, hour[1])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]]])
self.assertFillStateEquals(stat3, hour[0])
# test that we don't fill past the fill_to_time argument, even if
# dependencies have later last_successful_fill
process_count_stat(stat2, hour[3])
process_count_stat(stat3, hour[1])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]],
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
['stat3', hour[1]]])
self.assertFillStateEquals(stat3, hour[1])
# test that we don't fill past the dependency last_successful_fill times,
# even if fill_to_time is later
process_count_stat(stat3, hour[4])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]],
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
['stat3', hour[1]], ['stat3', hour[2]]])
self.assertFillStateEquals(stat3, hour[2])
# test daily dependent stat with hourly dependencies
hour24 = installation_epoch() + 24*self.HOUR
hour25 = installation_epoch() + 25*self.HOUR
process_count_stat(stat1, hour25)
process_count_stat(stat2, hour25)
process_count_stat(stat4, hour25)
self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1)
self.assertFillStateEquals(stat4, hour24)
class TestCountStats(AnalyticsTestCase):
def setUp(self) -> None:
@ -995,6 +1005,18 @@ class TestCountStats(AnalyticsTestCase):
self.assertTableState(InstallationCount, ['value'], [])
self.assertTableState(StreamCount, [], [])
def test_last_successful_fill(self) -> None:
self.assertIsNone(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill())
a_time = datetime(2016, 3, 14, 19, tzinfo=timezone.utc)
one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc)
fillstate = FillState.objects.create(property=COUNT_STATS["messages_sent:is_bot:hour"].property,
end_time=a_time, state=FillState.DONE)
self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), a_time)
fillstate.state = FillState.STARTED
fillstate.save()
self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), one_hour_before)
class TestDoAggregateToSummaryTable(AnalyticsTestCase):
# do_aggregate_to_summary_table is mostly tested by the end to end
# nature of the tests in TestCountStats. But want to highlight one

View File

@ -8,7 +8,7 @@ from django.utils.timezone import now as timezone_now
from analytics.lib.counts import COUNT_STATS, CountStat
from analytics.lib.time_utils import time_range
from analytics.models import FillState, RealmCount, UserCount, last_successful_fill
from analytics.models import FillState, RealmCount, UserCount
from analytics.views import rewrite_client_arrays, sort_by_totals, sort_client_labels
from corporate.lib.stripe import add_months, update_sponsorship_status
from corporate.models import Customer, CustomerPlan, LicenseLedger, get_customer_by_realm
@ -805,19 +805,6 @@ class TestSupportEndpoint(ZulipTestCase):
m.assert_not_called()
class TestGetChartDataHelpers(ZulipTestCase):
# last_successful_fill is in analytics/models.py, but get_chart_data is
# the only function that uses it at the moment
def test_last_successful_fill(self) -> None:
self.assertIsNone(last_successful_fill('non-existant'))
a_time = datetime(2016, 3, 14, 19, tzinfo=timezone.utc)
one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc)
fillstate = FillState.objects.create(property='property', end_time=a_time,
state=FillState.DONE)
self.assertEqual(last_successful_fill('property'), a_time)
fillstate.state = FillState.STARTED
fillstate.save()
self.assertEqual(last_successful_fill('property'), one_hour_before)
def test_sort_by_totals(self) -> None:
empty: List[int] = []
value_arrays = {'c': [0, 1], 'a': [9], 'b': [1, 1, 1], 'd': empty}

View File

@ -35,7 +35,6 @@ from analytics.models import (
StreamCount,
UserCount,
installation_epoch,
last_successful_fill,
)
from confirmation.models import Confirmation, _properties, confirmation_url
from confirmation.settings import STATUS_ACTIVE
@ -308,7 +307,7 @@ def get_chart_data(request: HttpRequest, user_profile: UserProfile, chart_name:
else:
start = realm.date_created
if end is None:
end = max(last_successful_fill(stat.property) or
end = max(stat.last_successful_fill() or
datetime.min.replace(tzinfo=timezone.utc) for stat in stats)
if start > end and (timezone_now() - start > MAX_TIME_FOR_FULL_ANALYTICS_GENERATION):