mirror of https://github.com/zulip/zulip.git
analytics: Change generate_time_series_data argument from length to days.
Previously, this function seemed ambivalent about whether it was generating a series of abstract data points or a series of data points that would correspond to times. Switch firmly to the latter, so e.g. if the frequency changes, so will the length of the output sequence.
This commit is contained in:
parent
3712fda30d
commit
680e7f75e1
|
@ -11,14 +11,15 @@ from random import gauss, random, seed
|
|||
|
||||
from six.moves import range, zip
|
||||
|
||||
def generate_time_series_data(length, business_hours_base, non_business_hours_base,
|
||||
def generate_time_series_data(days=100, business_hours_base=10, non_business_hours_base=10,
|
||||
growth=1, autocorrelation=0, spikiness=1, holiday_rate=0,
|
||||
frequency=CountStat.HOUR, is_gauge=False):
|
||||
frequency=CountStat.DAY, is_gauge=False):
|
||||
# type: (int, float, float, float, float, float, float, str, bool) -> List[int]
|
||||
"""
|
||||
Generate semi-realistic looking time series data for testing analytics graphs.
|
||||
|
||||
length -- Number of data points returned.
|
||||
days -- Number of days of data. Is the number of data points generated if
|
||||
frequency is CountStat.DAY.
|
||||
business_hours_base -- Average value during a business hour (or day) at beginning of
|
||||
time series, if frequency is CountStat.HOUR (CountStat.DAY, respectively).
|
||||
non_business_hours_base -- The above, for non-business hours/days.
|
||||
|
@ -28,21 +29,29 @@ def generate_time_series_data(length, business_hours_base, non_business_hours_ba
|
|||
function of the previous point.
|
||||
spikiness -- 0 means no randomness (other than holiday_rate), higher values increase
|
||||
the variance.
|
||||
holiday_rate -- Fraction of points randomly set to 0.
|
||||
holiday_rate -- Fraction of days randomly set to 0, largely for testing how we handle 0s.
|
||||
frequency -- Should be CountStat.HOUR or CountStat.DAY.
|
||||
is_gauge -- If True, return partial sum of the series.
|
||||
"""
|
||||
if length < 2:
|
||||
raise ValueError("length must be at least 2")
|
||||
if frequency == CountStat.HOUR:
|
||||
length = days*24
|
||||
seasonality = [non_business_hours_base] * 24 * 7
|
||||
for day in range(5):
|
||||
for hour in range(8):
|
||||
seasonality[24*day + hour] = business_hours_base
|
||||
holidays = []
|
||||
for i in range(days):
|
||||
holidays.extend([random() < holiday_rate] * 24)
|
||||
elif frequency == CountStat.DAY:
|
||||
seasonality = [business_hours_base]*5 + [non_business_hours_base]*2
|
||||
length = days
|
||||
seasonality = [8*business_hours_base + 16*non_business_hours_base] * 5 + \
|
||||
[24*non_business_hours_base] * 2
|
||||
holidays = [random() < holiday_rate for i in range(days)]
|
||||
else:
|
||||
raise ValueError("Unknown frequency: %s" % (frequency,))
|
||||
if length < 2:
|
||||
raise ValueError("Must be generating at least 2 data points. "
|
||||
"Currently generating %s" % (length,))
|
||||
growth_base = growth ** (1. / (length-1))
|
||||
values_no_noise = [seasonality[i % len(seasonality)] * (growth_base**i) for i in range(length)]
|
||||
|
||||
|
@ -51,8 +60,8 @@ def generate_time_series_data(length, business_hours_base, non_business_hours_ba
|
|||
for i in range(1, length):
|
||||
noise_scalars.append(noise_scalars[-1]*autocorrelation + gauss(0, 1)*(1-autocorrelation))
|
||||
|
||||
values = [0 if random() < holiday_rate else int(v + sqrt(v)*noise_scalar*spikiness)
|
||||
for v, noise_scalar in zip(values_no_noise, noise_scalars)]
|
||||
values = [0 if holiday else int(v + sqrt(v)*noise_scalar*spikiness)
|
||||
for v, noise_scalar, holiday in zip(values_no_noise, noise_scalars, holidays)]
|
||||
if is_gauge:
|
||||
for i in range(1, length):
|
||||
values[i] = values[i-1] + values[i]
|
||||
|
|
|
@ -48,9 +48,11 @@ class Command(BaseCommand):
|
|||
stat = COUNT_STATS['active_users:is_bot']
|
||||
if not RealmCount.objects.filter(property=stat.property).exists():
|
||||
last_end_time = floor_to_day(timezone.now())
|
||||
human_data = generate_time_series_data(100, 30, 10, growth=5, autocorrelation=.5,
|
||||
human_data = generate_time_series_data(days=100, business_hours_base=30,
|
||||
non_business_hours_base=10, growth=5, autocorrelation=.5,
|
||||
spikiness=3, frequency=CountStat.DAY)
|
||||
bot_data = generate_time_series_data(100, 20, 20, growth=3, frequency=CountStat.DAY)
|
||||
bot_data = generate_time_series_data(days=100, business_hours_base=20,
|
||||
non_business_hours_base=20, growth=3, frequency=CountStat.DAY)
|
||||
bulk_create_realmcount(stat.property, 'false', last_end_time,
|
||||
stat.frequency, stat.interval, human_data, realm)
|
||||
bulk_create_realmcount(stat.property, 'true', last_end_time,
|
||||
|
|
Loading…
Reference in New Issue