mirror of https://github.com/zulip/zulip.git
analytics: Fix missing unique constraint when subgroup is null.
Replaced unique_together with UniqueConstraint in models that covered nullable fields as in unique_together database indexes don't work where subgroup=None. So added conditional unique index handling invalid duplicate Count data. Added 0015_clear_duplicate_counts migration to handle existing data that violates the constraints. Also corrected a test case in test_counts.py which didn't clear its state properly and thus was accidentally taking advantage of this database schema bug.
This commit is contained in:
parent
dcc92de205
commit
f299f31340
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from django.db import migrations
|
||||
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
|
||||
from django.db.migrations.state import StateApps
|
||||
from django.db.models import Count, Sum, Q
|
||||
|
||||
def clear_duplicate_counts(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
|
||||
"""This is a preparatory migration for our Analytics tables.
|
||||
|
||||
The backstory is that Django's unique_together indexes do not properly
|
||||
handle the subgroup=None corner case (allowing duplicate rows that have a
|
||||
subgroup of None), which meant that in race conditions, rather than updating
|
||||
an existing row for the property/realm/time with subgroup=None, Django would
|
||||
create a duplicate row.
|
||||
|
||||
In the next migration, we'll add a proper constraint to fix this bug, but
|
||||
we need to fix any existing problematic rows before we can add that constraint.
|
||||
|
||||
We fix this in an appropriate fashion for each type of CountStat object; mainly
|
||||
this means deleting the extra rows, but for LoggingCountStat objects, we need to
|
||||
additionally combine the sums.
|
||||
"""
|
||||
RealmCount = apps.get_model('analytics', 'RealmCount')
|
||||
|
||||
realm_counts = RealmCount.objects.filter(subgroup=None).values(
|
||||
'realm_id', 'property', 'end_time').annotate(
|
||||
Count('id'), Sum('value')).filter(id__count__gt=1)
|
||||
|
||||
for realm_count in realm_counts:
|
||||
realm_count.pop('id__count')
|
||||
total_value = realm_count.pop('value__sum')
|
||||
duplicate_counts = list(RealmCount.objects.filter(**realm_count))
|
||||
first_count = duplicate_counts[0]
|
||||
if realm_count['property'] in ["invites_sent::day", "active_users_log:is_bot:day"]:
|
||||
# For LoggingCountStat objects, the right fix is to combine the totals;
|
||||
# for other CountStat objects, we expect the duplicates to have the same value.
|
||||
# And so all we need to do is delete them.
|
||||
first_count.value = total_value
|
||||
first_count.save()
|
||||
to_cleanup = duplicate_counts[1:]
|
||||
for duplicate_count in to_cleanup:
|
||||
duplicate_count.delete()
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('analytics', '0014_remove_fillstate_last_modified'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(clear_duplicate_counts,
|
||||
reverse_code=migrations.RunPython.noop),
|
||||
]
|
|
@ -0,0 +1,61 @@
|
|||
# Generated by Django 2.2.10 on 2020-02-29 19:40
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('analytics', '0015_clear_duplicate_counts'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterUniqueTogether(
|
||||
name='installationcount',
|
||||
unique_together=set(),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='realmcount',
|
||||
unique_together=set(),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='streamcount',
|
||||
unique_together=set(),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='usercount',
|
||||
unique_together=set(),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='installationcount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('property', 'subgroup', 'end_time'), name='unique_installation_count'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='installationcount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('property', 'end_time'), name='unique_installation_count_null_subgroup'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='realmcount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('realm', 'property', 'subgroup', 'end_time'), name='unique_realm_count'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='realmcount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('realm', 'property', 'end_time'), name='unique_realm_count_null_subgroup'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='streamcount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('stream', 'property', 'subgroup', 'end_time'), name='unique_stream_count'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='streamcount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('stream', 'property', 'end_time'), name='unique_stream_count_null_subgroup'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='usercount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('user', 'property', 'subgroup', 'end_time'), name='unique_user_count'),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='usercount',
|
||||
constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('user', 'property', 'end_time'), name='unique_user_count_null_subgroup'),
|
||||
),
|
||||
]
|
|
@ -2,6 +2,7 @@ import datetime
|
|||
from typing import Optional
|
||||
|
||||
from django.db import models
|
||||
from django.db.models import Q, UniqueConstraint
|
||||
|
||||
from zerver.lib.timestamp import floor_to_day
|
||||
from zerver.models import Realm, Stream, UserProfile
|
||||
|
@ -47,7 +48,17 @@ class BaseCount(models.Model):
|
|||
class InstallationCount(BaseCount):
|
||||
|
||||
class Meta:
|
||||
unique_together = ("property", "subgroup", "end_time")
|
||||
# Handles invalid duplicate InstallationCount data
|
||||
constraints = [
|
||||
UniqueConstraint(
|
||||
fields=["property", "subgroup", "end_time"],
|
||||
condition=Q(subgroup__isnull=False),
|
||||
name='unique_installation_count'),
|
||||
UniqueConstraint(
|
||||
fields=["property", "end_time"],
|
||||
condition=Q(subgroup__isnull=True),
|
||||
name='unique_installation_count_null_subgroup')
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "<InstallationCount: %s %s %s>" % (self.property, self.subgroup, self.value)
|
||||
|
@ -56,7 +67,17 @@ class RealmCount(BaseCount):
|
|||
realm = models.ForeignKey(Realm, on_delete=models.CASCADE)
|
||||
|
||||
class Meta:
|
||||
unique_together = ("realm", "property", "subgroup", "end_time")
|
||||
# Handles invalid duplicate RealmCount data
|
||||
constraints = [
|
||||
UniqueConstraint(
|
||||
fields=["realm", "property", "subgroup", "end_time"],
|
||||
condition=Q(subgroup__isnull=False),
|
||||
name='unique_realm_count'),
|
||||
UniqueConstraint(
|
||||
fields=["realm", "property", "end_time"],
|
||||
condition=Q(subgroup__isnull=True),
|
||||
name='unique_realm_count_null_subgroup')
|
||||
]
|
||||
index_together = ["property", "end_time"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
|
@ -67,7 +88,17 @@ class UserCount(BaseCount):
|
|||
realm = models.ForeignKey(Realm, on_delete=models.CASCADE)
|
||||
|
||||
class Meta:
|
||||
unique_together = ("user", "property", "subgroup", "end_time")
|
||||
# Handles invalid duplicate UserCount data
|
||||
constraints = [
|
||||
UniqueConstraint(
|
||||
fields=["user", "property", "subgroup", "end_time"],
|
||||
condition=Q(subgroup__isnull=False),
|
||||
name='unique_user_count'),
|
||||
UniqueConstraint(
|
||||
fields=["user", "property", "end_time"],
|
||||
condition=Q(subgroup__isnull=True),
|
||||
name='unique_user_count_null_subgroup')
|
||||
]
|
||||
# This index dramatically improves the performance of
|
||||
# aggregating from users to realms
|
||||
index_together = ["property", "realm", "end_time"]
|
||||
|
@ -80,7 +111,17 @@ class StreamCount(BaseCount):
|
|||
realm = models.ForeignKey(Realm, on_delete=models.CASCADE)
|
||||
|
||||
class Meta:
|
||||
unique_together = ("stream", "property", "subgroup", "end_time")
|
||||
# Handles invalid duplicate StreamCount data
|
||||
constraints = [
|
||||
UniqueConstraint(
|
||||
fields=["stream", "property", "subgroup", "end_time"],
|
||||
condition=Q(subgroup__isnull=False),
|
||||
name='unique_stream_count'),
|
||||
UniqueConstraint(
|
||||
fields=["stream", "property", "end_time"],
|
||||
condition=Q(subgroup__isnull=True),
|
||||
name='unique_stream_count_null_subgroup')
|
||||
]
|
||||
# This index dramatically improves the performance of
|
||||
# aggregating from streams to realms
|
||||
index_together = ["property", "realm", "end_time"]
|
||||
|
|
|
@ -1359,6 +1359,7 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
|||
self.create_user(realm=third_realm)
|
||||
|
||||
RealmCount.objects.all().delete()
|
||||
InstallationCount.objects.all().delete()
|
||||
for i in [-1, 0, 1]:
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
|
||||
self.assertTableState(RealmCount, ['value', 'realm', 'end_time'],
|
||||
|
|
Loading…
Reference in New Issue