migration: Add `extra_data_json` for audit log models.

Note that we use the DjangoJSONEncoder so that we have builtin support
for parsing Decimal and datetime.

During this intermediate state, the migration that creates
extra_data_json field has been run. We prepare for running the backfilling
migration that populates extra_data_json from extra_data.

This change implements double-write, which is important to keep the
state of extra data consistent. For most extra_data usage, this is
handled by the overriden `save` method on `AbstractRealmAuditLog`, where
we either generates extra_data_json using orjson.loads or
ast.literal_eval.

While backfilling ensures that old realm audit log entries have
extra_data_json populated, double-write ensures that any new entries
generated will also have extra_data_json set. So that we can then safely
rename extra_data_json to extra_data while ensuring the non-nullable
invariant.

For completeness, we additionally set RealmAuditLog.NEW_VALUE for
the USER_FULL_NAME_CHANGED event. This cannot be handled with the
overridden `save`.

This addresses: https://github.com/zulip/zulip/pull/23116#discussion_r1040277795

Note that extra_data_json at this point is not used yet. So the test
cases do not need to switch to testing extra_data_json. This is later
done after we rename extra_data_json to extra_data.

Double-write for the remote server audit logs is special, because we only
get the dumped bytes from an external source. Luckily, none of the
payload carries extra_data that is not generated using orjson.dumps for
audit logs of event types in SYNC_BILLING_EVENTS. This can be verified
by looking at:

`git grep -A 6 -E "event_type=.*(USER_CREATED|USER_ACTIVATED|USER_DEACTIVATED|USER_REACTIVATED|USER_ROLE_CHANGED|REALM_DEACTIVATED|REALM_REACTIVATED)"`

Therefore, we just need to populate extra_data_json doing an
orjson.loads call after a None-check.

Co-authored-by: Zixuan James Li <p359101898@gmail.com>
This commit is contained in:
Zixuan Li 2023-06-07 15:14:43 -04:00 committed by GitHub
parent 437b2747b1
commit e39e04c3ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 128 additions and 9 deletions

View File

@ -654,6 +654,7 @@ def do_change_remote_server_plan_type(remote_server: RemoteZulipServer, plan_typ
server=remote_server,
event_time=timezone_now(),
extra_data=str({"old_value": old_value, "new_value": plan_type}),
extra_data_json={"old_value": old_value, "new_value": plan_type},
)
@ -738,6 +739,8 @@ def process_initial_upgrade(
event_time=billing_cycle_anchor,
event_type=RealmAuditLog.CUSTOMER_PLAN_CREATED,
extra_data=orjson.dumps(plan_params, default=decimal_to_float).decode(),
# Note that DjangoJSONEncoder has builtin support for parsing Decimal
extra_data_json=plan_params,
)
if not free_trial:
@ -974,6 +977,7 @@ def attach_discount_to_realm(
event_type=RealmAuditLog.REALM_DISCOUNT_CHANGED,
event_time=timezone_now(),
extra_data=str({"old_discount": old_discount, "new_discount": discount}),
extra_data_json={"old_discount": old_discount, "new_discount": discount},
)
@ -989,6 +993,7 @@ def update_sponsorship_status(
event_type=RealmAuditLog.REALM_SPONSORSHIP_PENDING_STATUS_CHANGED,
event_time=timezone_now(),
extra_data=str({"sponsorship_pending": sponsorship_pending}),
extra_data_json={"sponsorship_pending": sponsorship_pending},
)
@ -1243,4 +1248,5 @@ def update_billing_method_of_current_plan(
event_type=RealmAuditLog.REALM_BILLING_METHOD_CHANGED,
event_time=timezone_now(),
extra_data=str({"charge_automatically": charge_automatically}),
extra_data_json={"charge_automatically": charge_automatically},
)

View File

@ -48,4 +48,4 @@ API_FEATURE_LEVEL = 185
# historical commits sharing the same major version, in which case a
# minor version bump suffices.
PROVISION_VERSION = (243, 0)
PROVISION_VERSION = (243, 1)

View File

@ -204,6 +204,7 @@ def do_change_full_name(
event_type=RealmAuditLog.USER_FULL_NAME_CHANGED,
event_time=event_time,
extra_data=old_name,
extra_data_json={RealmAuditLog.OLD_VALUE: old_name, RealmAuditLog.NEW_VALUE: full_name},
)
payload = dict(user_id=user_profile.id, full_name=user_profile.full_name)
send_event(

View File

@ -125,6 +125,9 @@ REALMAUDITLOG_PUSHED_FIELDS = [
"realm",
"event_time",
"backfilled",
# Note that we don't need to add extra_data_json here because
# the view remote_server_post_analytics populates extra_data_json
# from the provided extra_data.
"extra_data",
"event_type",
]

View File

@ -0,0 +1,20 @@
# Generated by Django 4.0.7 on 2022-09-30 20:25
import django
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("zerver", "0451_add_userprofile_api_key_index"),
]
operations = [
migrations.AddField(
model_name="realmauditlog",
name="extra_data_json",
field=models.JSONField(
default=dict, encoder=django.core.serializers.json.DjangoJSONEncoder
),
),
]

View File

@ -1,3 +1,4 @@
import ast
import datetime
import secrets
import time
@ -10,6 +11,7 @@ from typing import (
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Pattern,
@ -39,6 +41,7 @@ from django.contrib.contenttypes.fields import GenericRelation
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField
from django.core.exceptions import ValidationError
from django.core.serializers.json import DjangoJSONEncoder
from django.core.validators import MinLengthValidator, RegexValidator, URLValidator, validate_email
from django.db import models, transaction
from django.db.backends.base.base import BaseDatabaseWrapper
@ -4450,6 +4453,7 @@ class AbstractRealmAuditLog(models.Model):
ROLE_COUNT_BOTS = "12"
extra_data = models.TextField(null=True)
extra_data_json = models.JSONField(default=dict, encoder=DjangoJSONEncoder)
# Event types
USER_CREATED = 101
@ -4554,6 +4558,29 @@ class AbstractRealmAuditLog(models.Model):
class Meta:
abstract = True
def save(
self,
force_insert: bool = False,
force_update: bool = False,
using: Optional[str] = None,
update_fields: Optional[Iterable[str]] = None,
) -> None:
# Unless extra_data is set and extra_data_json is unset (defaults to {})
# we do not attempt to auto convert extra_data to extra_data_json
if self.extra_data is not None and not self.extra_data_json:
try:
if self.extra_data.startswith("{'"):
self.extra_data_json = ast.literal_eval(self.extra_data)
else:
self.extra_data_json = orjson.loads(self.extra_data)
except (
Exception
): # nocoverage # This is not intended to happen at all for correctly written code
raise Exception(
"extra_data_json must be explicitly set if extra_data is not str()'d from a dict or JSON-encoded."
)
super().save(force_insert, force_update, using, update_fields)
class RealmAuditLog(AbstractRealmAuditLog):
"""

View File

@ -900,7 +900,11 @@ class AnalyticsBouncerTest(BouncerTestCase):
self.add_mock_response()
def verify_request_with_overridden_extra_data(
request_extra_data: object, expected_extra_data: object
request_extra_data: object,
*,
expected_extra_data: object = None,
expected_extra_data_json: object = None,
skip_audit_log_check: bool = False,
) -> None:
user = self.example_user("hamlet")
log_entry = RealmAuditLog.objects.create(
@ -935,27 +939,44 @@ class AnalyticsBouncerTest(BouncerTestCase):
):
send_analytics_to_remote_server()
if skip_audit_log_check:
return
remote_log_entry = RemoteRealmAuditLog.objects.order_by("id").last()
assert remote_log_entry is not None
self.assertEqual(str(remote_log_entry.server.uuid), self.server_uuid)
self.assertEqual(remote_log_entry.remote_id, log_entry.id)
self.assertEqual(remote_log_entry.event_time, self.TIME_ZERO)
self.assertEqual(remote_log_entry.extra_data, expected_extra_data)
self.assertEqual(remote_log_entry.extra_data_json, expected_extra_data_json)
# Pre-migration extra_data
verify_request_with_overridden_extra_data(
request_extra_data=orjson.dumps({"fake_data": 42}).decode(),
expected_extra_data=orjson.dumps({"fake_data": 42}).decode(),
expected_extra_data_json={"fake_data": 42},
)
verify_request_with_overridden_extra_data(
request_extra_data=None, expected_extra_data=None, expected_extra_data_json={}
)
verify_request_with_overridden_extra_data(request_extra_data=None, expected_extra_data=None)
# Post-migration extra_data
verify_request_with_overridden_extra_data(
request_extra_data={"fake_data": 42},
expected_extra_data=orjson.dumps({"fake_data": 42}).decode(),
expected_extra_data_json={"fake_data": 42},
)
verify_request_with_overridden_extra_data(
request_extra_data={}, expected_extra_data=orjson.dumps({}).decode()
request_extra_data={},
expected_extra_data=orjson.dumps({}).decode(),
expected_extra_data_json={},
)
# Invalid extra_data
with self.assertLogs(level="WARNING") as m:
verify_request_with_overridden_extra_data(
request_extra_data="{malformedjson:",
skip_audit_log_check=True,
)
self.assertIn("Malformed audit log data", m.output[0])
class PushNotificationTest(BouncerTestCase):

View File

@ -0,0 +1,27 @@
# Generated by Django 4.0.7 on 2022-09-30 20:25
import django
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("zilencer", "0025_alter_remotepushdevicetoken_user_id_drop_index"),
]
operations = [
migrations.AddField(
model_name="remoterealmauditlog",
name="extra_data_json",
field=models.JSONField(
default=dict, encoder=django.core.serializers.json.DjangoJSONEncoder
),
),
migrations.AddField(
model_name="remotezulipserverauditlog",
name="extra_data_json",
field=models.JSONField(
default=dict, encoder=django.core.serializers.json.DjangoJSONEncoder
),
),
]

View File

@ -481,14 +481,27 @@ def remote_server_post_analytics(
if realmauditlog_rows is not None:
remote_realm_audit_logs = []
for row in realmauditlog_rows:
extra_data = {}
extra_data_str = None
# Remote servers that do support JSONField will pass extra_data
# as a dict. Otherwise, extra_data will be either a string or None.
if isinstance(row["extra_data"], dict):
if isinstance(row["extra_data"], str):
# A valid "extra_data" as a str, if present, should always be generated from
# orjson.dumps because the POSTed analytics data for RealmAuditLog is restricted
# to event types in SYNC_BILLING_EVENTS.
# For these event types, we don't create extra_data that requires special
# handling to fit into the JSONField.
try:
extra_data = orjson.loads(row["extra_data"])
except orjson.JSONDecodeError:
raise JsonableError(_("Malformed audit log data"))
extra_data_str = row["extra_data"]
elif row["extra_data"] is not None:
assert isinstance(row["extra_data"], dict)
extra_data = row["extra_data"]
# This is guaranteed to succeed because row["extra_data"] would be parsed
# from JSON with our json validator if it is a dict.
extra_data = orjson.dumps(row["extra_data"]).decode()
else:
extra_data = row["extra_data"]
extra_data_str = orjson.dumps(row["extra_data"]).decode()
remote_realm_audit_logs.append(
RemoteRealmAuditLog(
realm_id=row["realm"],
@ -498,7 +511,8 @@ def remote_server_post_analytics(
row["event_time"], tz=datetime.timezone.utc
),
backfilled=row["backfilled"],
extra_data=extra_data,
extra_data=extra_data_str,
extra_data_json=extra_data,
event_type=row["event_type"],
)
)