zulip/zerver/tests/test_migrations.py

268 lines
10 KiB
Python
Raw Normal View History

# These are tests for Zulip's database migrations. System documented at:
# https://zulip.readthedocs.io/en/latest/subsystems/schema-migrations.html
#
# You can also read
# https://www.caktusgroup.com/blog/2016/02/02/writing-unit-tests-django-migrations/
# to get a tutorial on the framework that inspired this feature.
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
from decimal import Decimal
from typing import Optional
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
import orjson
from django.db.migrations.state import StateApps
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
from django.utils.timezone import now as timezone_now
from zerver.lib.test_classes import MigrationsTestCase
from zerver.lib.test_helpers import use_db_models
# Important note: These tests are very expensive, and details of
# Django's database transaction model mean it does not super work to
# have a lot of migrations tested in this file at once; so we usually
# delete the old migration tests when adding a new one, so this file
# always has a single migration test in it as an example.
#
# The error you get with multiple similar tests doing migrations on
# the same table is this (table name may vary):
#
# django.db.utils.OperationalError: cannot ALTER TABLE
# "zerver_subscription" because it has pending trigger events
#
# As a result, we generally mark these tests as skipped once they have
# been tested for a migration being merged.
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
USER_ACTIVATED = 102
USER_FULL_NAME_CHANGED = 124
REALM_DISCOUNT_CHANGED = 209
OLD_VALUE = "1"
NEW_VALUE = "2"
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
class RealmAuditLogExtraData(MigrationsTestCase):
migrate_from = "0459_remove_invalid_characters_from_user_group_name"
migrate_to = "0460_backfill_realmauditlog_extradata_to_json_field"
full_name_change_log_id: Optional[int] = None
valid_json_log_id: Optional[int] = None
str_json_log_id: Optional[int] = None
# The BATCH_SIZE is defined as 5000 in
# backfill_realmauditlog_extradata_to_json_field, this later is used to test
# if batching works properly.
DATA_SIZE = 10005
expected_console_output = """Audit log entry 50003 with event type REALM_DISCOUNT_CHANGED is skipped.
The data consistency needs to be manually checked.
Discount data to remove after the upcoming JSONField migration:
{'old_discount': Decimal('25.0000'), 'new_discount': Decimal('50')}
Discount data to keep after the upcoming JSONField migration:
{}
Audit log entry 50004 with event type REALM_DISCOUNT_CHANGED is skipped.
The data consistency needs to be manually checked.
Discount data to remove after the upcoming JSONField migration:
{'old_discount': Decimal('25.0000'), 'new_discount': Decimal('50')}
Discount data to keep after the upcoming JSONField migration:
{'new_discount': '50', 'old_discount': '25.0000'}
Audit log entry with id 50001 has extra_data_json been inconsistently overwritten.
The old value is:
{"corrupted":"foo"}
The new value is:
{"key":"value"}
Audit log entry with id 50002 has extra_data_json been inconsistently overwritten.
The old value is:
{"corrupted":"bar"}
The new value is:
{"key":"value"}
"""
@use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None:
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
Realm = apps.get_model("zerver", "Realm")
RealmAuditLog = apps.get_model("zerver", "RealmAuditLog")
event_time = timezone_now()
realm = Realm.objects.get(string_id="zulip")
full_name_change_log = RealmAuditLog(
realm=realm,
event_type=USER_FULL_NAME_CHANGED,
event_time=event_time,
extra_data="foo",
)
new_full_name_change_log = RealmAuditLog(
realm=realm,
event_type=USER_FULL_NAME_CHANGED,
event_time=event_time,
extra_data="foo",
extra_data_json={OLD_VALUE: "foo", NEW_VALUE: "bar"},
)
valid_json_log = RealmAuditLog(
realm=realm,
event_type=USER_ACTIVATED,
event_time=event_time,
extra_data=orjson.dumps({"key": "value"}).decode(),
)
str_json_log = RealmAuditLog(
realm=realm,
event_type=USER_ACTIVATED,
event_time=event_time,
extra_data=str({"key": "value"}),
)
self.backfilled_inconsistent_log_id = RealmAuditLog.objects.create(
realm=realm,
event_type=USER_ACTIVATED,
event_time=event_time,
extra_data=orjson.dumps({"key": "baz"}).decode(),
extra_data_json={
"key": "baz",
"inconsistent_old_extra_data": orjson.dumps({"key": "baz"}).decode(),
"inconsistent_old_extra_data_json": {"key": "value corrupted"},
},
).id
# The following audit log entries have preset ids because we use
# them to assert the generated log output that is defined before
# the test case is run.
inconsistent_json_log = RealmAuditLog(
id=50001,
realm=realm,
event_type=USER_ACTIVATED,
event_time=event_time,
extra_data=orjson.dumps({"key": "value"}).decode(),
extra_data_json={"corrupted": "foo"},
)
inconsistent_str_json_log = RealmAuditLog(
id=50002,
realm=realm,
event_type=USER_ACTIVATED,
event_time=event_time,
extra_data=str({"key": "value"}),
extra_data_json={"corrupted": "bar"},
)
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
self.old_decimal_log_id = RealmAuditLog.objects.create(
id=50003,
realm=realm,
event_type=REALM_DISCOUNT_CHANGED,
event_time=event_time,
extra_data=str({"old_discount": Decimal("25.0000"), "new_discount": Decimal("50")}),
).id
self.new_decimal_log_id = RealmAuditLog.objects.create(
id=50004,
realm=realm,
event_type=REALM_DISCOUNT_CHANGED,
event_time=event_time,
extra_data=str({"old_discount": Decimal("25.0000"), "new_discount": Decimal("50")}),
extra_data_json={"old_discount": Decimal("25.0000"), "new_discount": Decimal("50")},
).id
RealmAuditLog.objects.bulk_create(
[
full_name_change_log,
new_full_name_change_log,
valid_json_log,
str_json_log,
inconsistent_json_log,
inconsistent_str_json_log,
]
)
self.full_name_change_log_id = full_name_change_log.id
self.new_full_name_change_log_id = new_full_name_change_log.id
self.valid_json_log_id = valid_json_log.id
self.str_json_log_id = str_json_log.id
other_logs = []
for i in range(self.DATA_SIZE):
other_logs.append(
RealmAuditLog(
realm=realm,
event_type=USER_ACTIVATED,
event_time=event_time,
extra_data=orjson.dumps({"data": i}).decode(),
)
)
self.other_logs_id = [
audit_log.id for audit_log in RealmAuditLog.objects.bulk_create(other_logs)
]
migrations: Backfill extra_data_json for audit log entries. This migration is reasonably complex because of various anomalies in existing data. Note that there are cases when extra_data does not contain data that is proper json with possibly single quotes. Thus we need to use "ast.literal_eval" to cover that. There is also a special case for "event_type == USER_FULL_NAME_CHANGED", where extra_data is a plain str. This event_type is only used for RealmAuditLog, so the zilencer migration script does not need to handle it. The migration does not handle "event_type == REALM_DISCOUNT_CHANGED" because ast.literal_eval only allow Python literals. We expect the admin to populate the jsonified extra_data for extra_data_json manually beforehand. This chunks the backfilling migration to reduce potential block time. The migration for zilencer is mostly similar to the one for zerver; except that the backfill helper is added in a wrapper and unrelated events are removed. **Logging and error recovery** We print out a warning when the extra_data_json field of an entry would have been overwritten by a value inconsistent with what we derived from extra_data. Usually this only happens when the extra_data was corrupted before this migration. This prevents data loss by backing up possibly corrupted data in extra_data_json with the keys "inconsistent_old_extra_data" and "inconsistent_old_extra_data_json". More roundtrips to the database are needed for inconsistent data, which are expected to be infrequent. This also outputs messages when there are audit log entries with decimals, indicating that such entries are not backfilled. Do note that audit log entries with decimals are not populated with "inconsistent_old_extra_data_*" in the JSONField, because they are not overwritten. For such audit log entries with "extra_data_json" marked as inconsistent, we skip them in the migration. Because when we have discovered anomalies in a previous run, there is no need to overwrite them again nesting the extra keys we added to it. **Testing** We create a migration test case utilizing the property of bulk_create that it doesn't call our modified save method. We extend ZulipTestCase to support verifying console output at the test case level. The implementation is crude but the use case should be rare enough that we don't need it to be too elaborate. Signed-off-by: Zixuan James Li <p359101898@gmail.com>
2023-07-15 18:43:23 +02:00
# No new audit log entry should have extra_data_json populated as of
# now except for the entries created with non-default values.
self.assert_length(
RealmAuditLog.objects.filter(
event_time__gte=event_time,
).exclude(
extra_data_json={},
),
5,
)
def test_realmaudit_log_extra_data_to_json(self) -> None:
RealmAuditLog = self.apps.get_model("zerver", "RealmAuditLog")
self.assertIsNotNone(self.full_name_change_log_id)
self.assertIsNotNone(self.valid_json_log_id)
self.assertIsNotNone(self.str_json_log_id)
full_name_change_log = RealmAuditLog.objects.filter(id=self.full_name_change_log_id).first()
new_full_name_change_log = RealmAuditLog.objects.filter(
id=self.new_full_name_change_log_id
).first()
valid_json_log = RealmAuditLog.objects.filter(id=self.valid_json_log_id).first()
str_json_log = RealmAuditLog.objects.filter(id=self.str_json_log_id).first()
self.assertIsNotNone(full_name_change_log)
self.assertEqual(full_name_change_log.extra_data_json, {"1": "foo", "2": None})
self.assertIsNotNone(new_full_name_change_log)
self.assertEqual(new_full_name_change_log.extra_data_json, {"1": "foo", "2": "bar"})
self.assertIsNotNone(valid_json_log)
self.assertEqual(valid_json_log.extra_data_json, {"key": "value"})
self.assertIsNotNone(str_json_log)
self.assertEqual(str_json_log.extra_data_json, {"key": "value"})
other_logs = RealmAuditLog.objects.filter(id__in=self.other_logs_id).order_by("id")
self.assertIsNotNone(other_logs)
self.assert_length(other_logs, self.DATA_SIZE)
for index, audit_log in enumerate(other_logs):
self.assertEqual(audit_log.extra_data_json, {"data": index})
inconsistent_json_log = RealmAuditLog.objects.get(
extra_data_json__inconsistent_old_extra_data=orjson.dumps({"key": "value"}).decode()
)
self.assertIsNotNone(inconsistent_json_log)
self.assertEqual(inconsistent_json_log.id, 50001)
self.assertEqual(
inconsistent_json_log.extra_data_json["inconsistent_old_extra_data_json"],
{"corrupted": "foo"},
)
inconsistent_str_json_log = RealmAuditLog.objects.get(
extra_data_json__inconsistent_old_extra_data=str({"key": "value"})
)
self.assertIsNotNone(inconsistent_str_json_log)
self.assertEqual(inconsistent_str_json_log.id, 50002)
self.assertEqual(
inconsistent_str_json_log.extra_data_json["inconsistent_old_extra_data_json"],
{"corrupted": "bar"},
)
backfilled_inconsistent_log = RealmAuditLog.objects.get(
id=self.backfilled_inconsistent_log_id
)
self.assertIsNotNone(backfilled_inconsistent_log)
self.assertEqual(
backfilled_inconsistent_log.extra_data_json,
{
"key": "baz",
"inconsistent_old_extra_data": orjson.dumps({"key": "baz"}).decode(),
"inconsistent_old_extra_data_json": {"key": "value corrupted"},
},
)