mirror of https://github.com/zulip/zulip.git
analytics: Move last_successful_fill to CountStat.
This is a prep commit. Currenty we only pass CountStat.property to last_successful_fill function. But it needs access to CountStat.time_increment as well. We can pass the entire CountStat object to the function as a workaround. But making last_successful_fill a property of CountStat seems to be much more cleaner.
This commit is contained in:
parent
189e9a2759
commit
235a347639
|
@ -17,7 +17,6 @@ from analytics.models import (
|
|||
StreamCount,
|
||||
UserCount,
|
||||
installation_epoch,
|
||||
last_successful_fill,
|
||||
)
|
||||
from zerver.lib.logging_util import log_to_file
|
||||
from zerver.lib.timestamp import ceiling_to_day, ceiling_to_hour, floor_to_hour, verify_UTC
|
||||
|
@ -68,6 +67,14 @@ class CountStat:
|
|||
def __str__(self) -> str:
|
||||
return f"<CountStat: {self.property}>"
|
||||
|
||||
def last_successful_fill(self) -> Optional[datetime]:
|
||||
fillstate = FillState.objects.filter(property=self.property).first()
|
||||
if fillstate is None:
|
||||
return None
|
||||
if fillstate.state == FillState.DONE:
|
||||
return fillstate.end_time
|
||||
return fillstate.end_time - timedelta(hours=1)
|
||||
|
||||
class LoggingCountStat(CountStat):
|
||||
def __init__(self, property: str, output_table: Type[BaseCount], frequency: str) -> None:
|
||||
CountStat.__init__(self, property, DataCollector(output_table, None), frequency)
|
||||
|
@ -121,7 +128,7 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime,
|
|||
|
||||
if isinstance(stat, DependentCountStat):
|
||||
for dependency in stat.dependencies:
|
||||
dependency_fill_time = last_successful_fill(dependency)
|
||||
dependency_fill_time = COUNT_STATS[dependency].last_successful_fill()
|
||||
if dependency_fill_time is None:
|
||||
logger.warning("DependentCountStat %s run before dependency %s.",
|
||||
stat.property, dependency)
|
||||
|
|
|
@ -7,7 +7,7 @@ from django.core.management.base import BaseCommand
|
|||
from django.utils.timezone import now as timezone_now
|
||||
|
||||
from analytics.lib.counts import COUNT_STATS, CountStat
|
||||
from analytics.models import installation_epoch, last_successful_fill
|
||||
from analytics.models import installation_epoch
|
||||
from zerver.lib.timestamp import TimezoneNotUTCException, floor_to_day, floor_to_hour, verify_UTC
|
||||
from zerver.models import Realm
|
||||
|
||||
|
@ -42,7 +42,7 @@ class Command(BaseCommand):
|
|||
warning_unfilled_properties = []
|
||||
critical_unfilled_properties = []
|
||||
for property, stat in COUNT_STATS.items():
|
||||
last_fill = last_successful_fill(property)
|
||||
last_fill = stat.last_successful_fill()
|
||||
if last_fill is None:
|
||||
last_fill = installation_epoch()
|
||||
try:
|
||||
|
|
|
@ -26,14 +26,6 @@ def installation_epoch() -> datetime.datetime:
|
|||
earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))['date_created__min']
|
||||
return floor_to_day(earliest_realm_creation)
|
||||
|
||||
def last_successful_fill(property: str) -> Optional[datetime.datetime]:
|
||||
fillstate = FillState.objects.filter(property=property).first()
|
||||
if fillstate is None:
|
||||
return None
|
||||
if fillstate.state == FillState.DONE:
|
||||
return fillstate.end_time
|
||||
return fillstate.end_time - datetime.timedelta(hours=1)
|
||||
|
||||
class BaseCount(models.Model):
|
||||
# Note: When inheriting from BaseCount, you may want to rearrange
|
||||
# the order of the columns in the migration to make sure they
|
||||
|
|
|
@ -322,35 +322,7 @@ class TestProcessCountStat(AnalyticsTestCase):
|
|||
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None),
|
||||
CountStat.HOUR,
|
||||
dependencies=['stat1', 'stat2'])
|
||||
hour = [installation_epoch() + i*self.HOUR for i in range(5)]
|
||||
|
||||
# test when one dependency has been run, and the other hasn't
|
||||
process_count_stat(stat1, hour[2])
|
||||
process_count_stat(stat3, hour[1])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]]])
|
||||
self.assertFillStateEquals(stat3, hour[0])
|
||||
|
||||
# test that we don't fill past the fill_to_time argument, even if
|
||||
# dependencies have later last_successful_fill
|
||||
process_count_stat(stat2, hour[3])
|
||||
process_count_stat(stat3, hour[1])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]],
|
||||
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
|
||||
['stat3', hour[1]]])
|
||||
self.assertFillStateEquals(stat3, hour[1])
|
||||
|
||||
# test that we don't fill past the dependency last_successful_fill times,
|
||||
# even if fill_to_time is later
|
||||
process_count_stat(stat3, hour[4])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]],
|
||||
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
|
||||
['stat3', hour[1]], ['stat3', hour[2]]])
|
||||
self.assertFillStateEquals(stat3, hour[2])
|
||||
|
||||
# test daily dependent stat with hourly dependencies
|
||||
query = lambda kwargs: SQL("""
|
||||
INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
||||
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
|
||||
|
@ -361,13 +333,51 @@ class TestProcessCountStat(AnalyticsTestCase):
|
|||
stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None),
|
||||
CountStat.DAY,
|
||||
dependencies=['stat1', 'stat2'])
|
||||
hour24 = installation_epoch() + 24*self.HOUR
|
||||
hour25 = installation_epoch() + 25*self.HOUR
|
||||
process_count_stat(stat1, hour25)
|
||||
process_count_stat(stat2, hour25)
|
||||
process_count_stat(stat4, hour25)
|
||||
self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1)
|
||||
self.assertFillStateEquals(stat4, hour24)
|
||||
|
||||
dummy_count_stats = {
|
||||
"stat1": stat1,
|
||||
"stat2": stat2,
|
||||
"stat3": stat3,
|
||||
"stat4": stat4,
|
||||
|
||||
}
|
||||
with mock.patch("analytics.lib.counts.COUNT_STATS", dummy_count_stats):
|
||||
hour = [installation_epoch() + i*self.HOUR for i in range(5)]
|
||||
|
||||
# test when one dependency has been run, and the other hasn't
|
||||
process_count_stat(stat1, hour[2])
|
||||
process_count_stat(stat3, hour[1])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]]])
|
||||
self.assertFillStateEquals(stat3, hour[0])
|
||||
|
||||
# test that we don't fill past the fill_to_time argument, even if
|
||||
# dependencies have later last_successful_fill
|
||||
process_count_stat(stat2, hour[3])
|
||||
process_count_stat(stat3, hour[1])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]],
|
||||
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
|
||||
['stat3', hour[1]]])
|
||||
self.assertFillStateEquals(stat3, hour[1])
|
||||
|
||||
# test that we don't fill past the dependency last_successful_fill times,
|
||||
# even if fill_to_time is later
|
||||
process_count_stat(stat3, hour[4])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]],
|
||||
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
|
||||
['stat3', hour[1]], ['stat3', hour[2]]])
|
||||
self.assertFillStateEquals(stat3, hour[2])
|
||||
|
||||
# test daily dependent stat with hourly dependencies
|
||||
hour24 = installation_epoch() + 24*self.HOUR
|
||||
hour25 = installation_epoch() + 25*self.HOUR
|
||||
process_count_stat(stat1, hour25)
|
||||
process_count_stat(stat2, hour25)
|
||||
process_count_stat(stat4, hour25)
|
||||
self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1)
|
||||
self.assertFillStateEquals(stat4, hour24)
|
||||
|
||||
class TestCountStats(AnalyticsTestCase):
|
||||
def setUp(self) -> None:
|
||||
|
@ -995,6 +1005,18 @@ class TestCountStats(AnalyticsTestCase):
|
|||
self.assertTableState(InstallationCount, ['value'], [])
|
||||
self.assertTableState(StreamCount, [], [])
|
||||
|
||||
def test_last_successful_fill(self) -> None:
|
||||
self.assertIsNone(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill())
|
||||
|
||||
a_time = datetime(2016, 3, 14, 19, tzinfo=timezone.utc)
|
||||
one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc)
|
||||
fillstate = FillState.objects.create(property=COUNT_STATS["messages_sent:is_bot:hour"].property,
|
||||
end_time=a_time, state=FillState.DONE)
|
||||
self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), a_time)
|
||||
fillstate.state = FillState.STARTED
|
||||
fillstate.save()
|
||||
self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), one_hour_before)
|
||||
|
||||
class TestDoAggregateToSummaryTable(AnalyticsTestCase):
|
||||
# do_aggregate_to_summary_table is mostly tested by the end to end
|
||||
# nature of the tests in TestCountStats. But want to highlight one
|
||||
|
|
|
@ -8,7 +8,7 @@ from django.utils.timezone import now as timezone_now
|
|||
|
||||
from analytics.lib.counts import COUNT_STATS, CountStat
|
||||
from analytics.lib.time_utils import time_range
|
||||
from analytics.models import FillState, RealmCount, UserCount, last_successful_fill
|
||||
from analytics.models import FillState, RealmCount, UserCount
|
||||
from analytics.views import rewrite_client_arrays, sort_by_totals, sort_client_labels
|
||||
from corporate.lib.stripe import add_months, update_sponsorship_status
|
||||
from corporate.models import Customer, CustomerPlan, LicenseLedger, get_customer_by_realm
|
||||
|
@ -805,19 +805,6 @@ class TestSupportEndpoint(ZulipTestCase):
|
|||
m.assert_not_called()
|
||||
|
||||
class TestGetChartDataHelpers(ZulipTestCase):
|
||||
# last_successful_fill is in analytics/models.py, but get_chart_data is
|
||||
# the only function that uses it at the moment
|
||||
def test_last_successful_fill(self) -> None:
|
||||
self.assertIsNone(last_successful_fill('non-existant'))
|
||||
a_time = datetime(2016, 3, 14, 19, tzinfo=timezone.utc)
|
||||
one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc)
|
||||
fillstate = FillState.objects.create(property='property', end_time=a_time,
|
||||
state=FillState.DONE)
|
||||
self.assertEqual(last_successful_fill('property'), a_time)
|
||||
fillstate.state = FillState.STARTED
|
||||
fillstate.save()
|
||||
self.assertEqual(last_successful_fill('property'), one_hour_before)
|
||||
|
||||
def test_sort_by_totals(self) -> None:
|
||||
empty: List[int] = []
|
||||
value_arrays = {'c': [0, 1], 'a': [9], 'b': [1, 1, 1], 'd': empty}
|
||||
|
|
|
@ -35,7 +35,6 @@ from analytics.models import (
|
|||
StreamCount,
|
||||
UserCount,
|
||||
installation_epoch,
|
||||
last_successful_fill,
|
||||
)
|
||||
from confirmation.models import Confirmation, _properties, confirmation_url
|
||||
from confirmation.settings import STATUS_ACTIVE
|
||||
|
@ -308,7 +307,7 @@ def get_chart_data(request: HttpRequest, user_profile: UserProfile, chart_name:
|
|||
else:
|
||||
start = realm.date_created
|
||||
if end is None:
|
||||
end = max(last_successful_fill(stat.property) or
|
||||
end = max(stat.last_successful_fill() or
|
||||
datetime.min.replace(tzinfo=timezone.utc) for stat in stats)
|
||||
|
||||
if start > end and (timezone_now() - start > MAX_TIME_FOR_FULL_ANALYTICS_GENERATION):
|
||||
|
|
Loading…
Reference in New Issue