migrations: Backfill url_template from url format string.

This is implemented by replacing all matches of "%(var_name)s" in a URL
format string with "{var_name}". Since we do want to ensure that the
templates aren't broken after this migration, a RuntimeError is raised
to let the maintainer know that certain linkifier cannot be converted
automatically if it does not pass the uri_template.validate check.

Also, we need to escape "%%", which is used to represent "%" in the old
format string syntax, as well as "{" and "}", which is a part of the
URL template syntax.

Signed-off-by: Zixuan James Li <p359101898@gmail.com>
This commit is contained in:
Zixuan James Li 2022-10-05 15:18:45 -04:00 committed by Tim Abbott
parent a19c80df43
commit ab53e8d3e6
3 changed files with 124 additions and 383 deletions

View File

@ -48,4 +48,4 @@ API_FEATURE_LEVEL = 175
# historical commits sharing the same major version, in which case a
# minor version bump suffices.
PROVISION_VERSION = (233, 2)
PROVISION_VERSION = (233, 3)

View File

@ -0,0 +1,48 @@
# Generated by Django 4.0.7 on 2022-10-02 20:50
import re
import uri_template
from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
def transform_to_url_template_syntax(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
linkifier_model = apps.get_model("zerver", "RealmFilter")
linkifiers = linkifier_model.objects.all()
# Matches anything of the form %(variable_name)s
var_pattern = re.compile(r"(?<!%)((?:%%)*)%\(([a-zA-Z0-9_-]+)\)s")
escape_table = str.maketrans(
{
"{": "%7B",
"}": "%7D",
}
)
for linkifier in linkifiers:
converted_template = linkifier.url_format_string.translate(escape_table)
# Replace format string variables with the RFC 6570 URI Template syntax
converted_template = var_pattern.sub(r"\1{\2}", converted_template).replace("%%", "%")
if not uri_template.validate(converted_template):
raise RuntimeError(
f'Failed to convert url format "{var_pattern}". The converted template "{converted_template}" is invalid.'
)
linkifier.url_template = converted_template
linkifier_model.objects.bulk_update(linkifiers, fields=["url_template"])
class Migration(migrations.Migration):
dependencies = [
("zerver", "0440_realmfilter_url_template"),
]
operations = [
migrations.RunPython(
transform_to_url_template_syntax,
reverse_code=migrations.RunPython.noop,
elidable=True,
),
]

View File

@ -4,16 +4,11 @@
# You can also read
# https://www.caktusgroup.com/blog/2016/02/02/writing-unit-tests-django-migrations/
# to get a tutorial on the framework that inspired this feature.
from typing import Optional
from unittest import skip
import orjson
from django.db.migrations.state import StateApps
from django.utils.timezone import now as timezone_now
from zerver.lib.test_classes import MigrationsTestCase
from zerver.lib.test_helpers import use_db_models
from zerver.models import get_stream
# Important note: These tests are very expensive, and details of
# Django's database transaction model mean it does not super work to
@ -31,390 +26,88 @@ from zerver.models import get_stream
# been tested for a migration being merged.
@skip("Fails because newer migrations have since been merged.") # nocoverage # skipped
class MessageEditHistoryLegacyFormats(MigrationsTestCase):
migrate_from = "0376_set_realmemoji_author_and_reupload_realmemoji"
migrate_to = "0377_message_edit_history_format"
msg_id: Optional[int] = None
class LinkifierURLFormatString(MigrationsTestCase):
migrate_from = "0440_realmfilter_url_template"
migrate_to = "0441_backfill_realmfilter_url_template"
@use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None:
Recipient = apps.get_model("zerver", "Recipient")
Message = apps.get_model("zerver", "Message")
RealmFilter = apps.get_model("zerver", "RealmFilter")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
denmark_recipient = Recipient.objects.get(type=2, type_id=denmark.id)
self.msg_id = Message.objects.create(
recipient_id=denmark_recipient.id,
subject="topic 4",
sender_id=iago.id,
sending_client_id=1,
content="current message text",
date_sent=timezone_now(),
).id
# topic edits contain only "prev_subject" field.
# stream edits contain only "prev_stream" field.
msg = Message.objects.filter(id=self.msg_id).first()
msg.edit_history = orjson.dumps(
[
{
"user_id": 11,
"timestamp": 1644405050,
"prev_stream": 3,
"prev_subject": "topic 3",
},
{"user_id": 11, "timestamp": 1644405040, "prev_stream": 2},
{
"user_id": 11,
"timestamp": 1644405030,
"prev_content": "test content and topic edit",
"prev_rendered_content": "<p>test content and topic edit</p>",
"prev_rendered_content_version": 1,
"prev_subject": "topic 2",
},
{"user_id": 11, "timestamp": 1644405020, "prev_subject": "topic 1"},
{
"user_id": 11,
"timestamp": 1644405010,
"prev_content": "test content only edit",
"prev_rendered_content": "<p>test content only edit</p>",
"prev_rendered_content_version": 1,
},
urls = [
"http://example.com/",
"https://example.com/",
"https://user:password@example.com/",
"https://example.com/@user/thing",
"https://example.com/!path",
"https://example.com/foo.bar",
"https://example.com/foo[bar]",
"https://example.com/{foo}",
"https://example.com/{foo}{bars}",
"https://example.com/{foo}/and/{bar}",
"https://example.com/?foo={foo}",
"https://example.com/%ab",
"https://example.com/%ba",
"https://example.com/%21",
"https://example.com/words%20with%20spaces",
"https://example.com/back%20to%20{back}",
"https://example.com/encoded%2fwith%2fletters",
"https://example.com/encoded%2Fwith%2Fupper%2Fcase%2Fletters",
"https://example.com/%%",
"https://example.com/%%(",
"https://example.com/%%()",
"https://example.com/%%(foo",
"https://example.com/%%(foo)",
"https://example.com/%%(foo)s",
"https://example.com/%(foo)s",
"https://example.com/%(foo)s%(bar)s",
]
).decode()
msg.save(update_fields=["edit_history"])
self.linkifier_ids = []
def test_message_legacy_edit_history_format(self) -> None:
Message = self.apps.get_model("zerver", "Message")
Recipient = self.apps.get_model("zerver", "Recipient")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
new_edit_history = orjson.loads(msg.edit_history)
self.assert_length(new_edit_history, 5)
# stream and topic edit entry
self.assertFalse("prev_subject" in new_edit_history[0])
self.assertEqual(new_edit_history[0]["prev_topic"], "topic 3")
self.assertEqual(new_edit_history[0]["topic"], msg.subject)
self.assertEqual(new_edit_history[0]["prev_stream"], 3)
self.assertEqual(new_edit_history[0]["stream"], msg_stream_id)
self.assertEqual(new_edit_history[0]["stream"], denmark.id)
self.assertEqual(
set(new_edit_history[0].keys()),
{"timestamp", "prev_topic", "topic", "prev_stream", "stream", "user_id"},
)
# stream only edit entry
self.assertEqual(new_edit_history[1]["prev_stream"], 2)
self.assertEqual(new_edit_history[1]["stream"], 3)
self.assertEqual(
set(new_edit_history[1].keys()), {"timestamp", "prev_stream", "stream", "user_id"}
)
# topic and content edit entry
self.assertFalse("prev_subject" in new_edit_history[2])
self.assertEqual(new_edit_history[2]["prev_topic"], "topic 2")
self.assertEqual(new_edit_history[2]["topic"], "topic 3")
self.assertEqual(new_edit_history[2]["prev_content"], "test content and topic edit")
self.assertEqual(
new_edit_history[2]["prev_rendered_content"], "<p>test content and topic edit</p>"
)
self.assertEqual(new_edit_history[2]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[2].keys()),
{
"timestamp",
"prev_topic",
"topic",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
# topic only edit entry
self.assertFalse("prev_subject" in new_edit_history[3])
self.assertEqual(new_edit_history[3]["prev_topic"], "topic 1")
self.assertEqual(new_edit_history[3]["topic"], "topic 2")
self.assertEqual(
set(new_edit_history[3].keys()), {"timestamp", "prev_topic", "topic", "user_id"}
)
# content only edit entry - not retested because never changes
self.assertEqual(new_edit_history[4]["prev_content"], "test content only edit")
self.assertEqual(
new_edit_history[4]["prev_rendered_content"], "<p>test content only edit</p>"
)
self.assertEqual(new_edit_history[4]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[4].keys()),
{
"timestamp",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
@skip("Fails because newer migrations have since been merged.") # nocoverage # skipped
class MessageEditHistoryModernFormats(MigrationsTestCase):
migrate_from = "0376_set_realmemoji_author_and_reupload_realmemoji"
migrate_to = "0377_message_edit_history_format"
msg_id: Optional[int] = None
@use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None:
Recipient = apps.get_model("zerver", "Recipient")
Message = apps.get_model("zerver", "Message")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
denmark_recipient = Recipient.objects.get(type=2, type_id=denmark.id)
self.msg_id = Message.objects.create(
recipient_id=denmark_recipient.id,
subject="topic 4",
sender_id=iago.id,
sending_client_id=1,
content="current message text",
date_sent=timezone_now(),
for index, url in enumerate(urls):
self.linkifier_ids.append(
RealmFilter.objects.create(
realm=iago.realm,
pattern=f"dummy{index}",
url_format_string=url,
).id
)
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
def test_converted_url_templates(self) -> None:
RealmFilter = self.apps.get_model("zerver", "RealmFilter")
# topic edits contain "topic" and "prev_topic" fields.
# stream edits contain "stream" and "prev_stream" fields.
msg.edit_history = orjson.dumps(
[
{
"user_id": 11,
"timestamp": 1644405050,
"stream": msg_stream_id,
"prev_stream": 3,
"topic": msg.subject,
"prev_topic": "topic 3",
},
{"user_id": 11, "timestamp": 1644405040, "prev_stream": 2, "stream": 3},
{
"user_id": 11,
"timestamp": 1644405030,
"prev_content": "test content and topic edit",
"prev_rendered_content": "<p>test content and topic edit</p>",
"prev_rendered_content_version": 1,
"prev_topic": "topic 2",
"topic": "topic 3",
},
{
"user_id": 11,
"timestamp": 1644405020,
"prev_topic": "topic 1",
"topic": "topic 2",
},
expected_urls = [
"http://example.com/",
"https://example.com/",
"https://user:password@example.com/",
"https://example.com/@user/thing",
"https://example.com/!path",
"https://example.com/foo.bar",
"https://example.com/foo[bar]",
"https://example.com/%7Bfoo%7D",
"https://example.com/%7Bfoo%7D%7Bbars%7D",
"https://example.com/%7Bfoo%7D/and/%7Bbar%7D",
"https://example.com/?foo=%7Bfoo%7D",
"https://example.com/%ab",
"https://example.com/%ba",
"https://example.com/%21",
"https://example.com/words%20with%20spaces",
"https://example.com/back%20to%20%7Bback%7D",
"https://example.com/encoded%2fwith%2fletters",
"https://example.com/encoded%2Fwith%2Fupper%2Fcase%2Fletters",
"https://example.com/%",
"https://example.com/%(",
"https://example.com/%()",
"https://example.com/%(foo",
"https://example.com/%(foo)",
"https://example.com/%(foo)s",
"https://example.com/{foo}",
"https://example.com/{foo}{bar}",
]
).decode()
msg.save(update_fields=["edit_history"])
def test_message_modern_edit_history_format(self) -> None:
Message = self.apps.get_model("zerver", "Message")
Recipient = self.apps.get_model("zerver", "Recipient")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
new_edit_history = orjson.loads(msg.edit_history)
self.assert_length(new_edit_history, 4)
# stream and topic edit entry
self.assertEqual(new_edit_history[0]["prev_topic"], "topic 3")
self.assertEqual(new_edit_history[0]["topic"], msg.subject)
self.assertEqual(new_edit_history[0]["prev_stream"], 3)
self.assertEqual(new_edit_history[0]["stream"], msg_stream_id)
self.assertEqual(new_edit_history[0]["stream"], denmark.id)
self.assertEqual(
set(new_edit_history[0].keys()),
{"timestamp", "prev_topic", "topic", "prev_stream", "stream", "user_id"},
)
# stream only edit entry
self.assertEqual(new_edit_history[1]["prev_stream"], 2)
self.assertEqual(new_edit_history[1]["stream"], 3)
self.assertEqual(
set(new_edit_history[1].keys()), {"timestamp", "prev_stream", "stream", "user_id"}
)
# topic and content edit entry
self.assertEqual(new_edit_history[2]["prev_topic"], "topic 2")
self.assertEqual(new_edit_history[2]["topic"], "topic 3")
self.assertEqual(new_edit_history[2]["prev_content"], "test content and topic edit")
self.assertEqual(
new_edit_history[2]["prev_rendered_content"], "<p>test content and topic edit</p>"
)
self.assertEqual(new_edit_history[2]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[2].keys()),
{
"timestamp",
"prev_topic",
"topic",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
# topic only edit entry
self.assertEqual(new_edit_history[3]["prev_topic"], "topic 1")
self.assertEqual(new_edit_history[3]["topic"], "topic 2")
self.assertEqual(
set(new_edit_history[3].keys()), {"timestamp", "prev_topic", "topic", "user_id"}
)
@skip("Fails because newer migrations have since been merged.") # nocoverage # skipped
class MessageEditHistoryIntermediateFormats(MigrationsTestCase):
migrate_from = "0376_set_realmemoji_author_and_reupload_realmemoji"
migrate_to = "0377_message_edit_history_format"
msg_id: Optional[int] = None
@use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None:
Recipient = apps.get_model("zerver", "Recipient")
Message = apps.get_model("zerver", "Message")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
denmark_recipient = Recipient.objects.get(type=2, type_id=denmark.id)
self.msg_id = Message.objects.create(
recipient_id=denmark_recipient.id,
subject="topic 4",
sender_id=iago.id,
sending_client_id=1,
content="current message text",
date_sent=timezone_now(),
).id
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
# topic edits contain "prev_subject", "topic" and "prev_topic" fields.
# stream edits contain "stream" and "prev_stream" fields.
msg.edit_history = orjson.dumps(
[
{
"user_id": 11,
"timestamp": 1644405050,
"stream": msg_stream_id,
"prev_stream": 3,
"topic": msg.subject,
"prev_topic": "topic 3",
"prev_subject": "topic 3",
},
{"user_id": 11, "timestamp": 1644405040, "prev_stream": 2, "stream": 3},
{
"user_id": 11,
"timestamp": 1644405030,
"prev_content": "test content and topic edit",
"prev_rendered_content": "<p>test content and topic edit</p>",
"prev_rendered_content_version": 1,
"prev_topic": "topic 2",
"prev_subject": "topic 2",
"topic": "topic 3",
},
{
"user_id": 11,
"timestamp": 1644405020,
"prev_topic": "topic 1",
"prev_subject": "topic 1",
"topic": "topic 2",
},
]
).decode()
msg.save(update_fields=["edit_history"])
def test_message_temporary_edit_history_format(self) -> None:
Message = self.apps.get_model("zerver", "Message")
Recipient = self.apps.get_model("zerver", "Recipient")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
new_edit_history = orjson.loads(msg.edit_history)
self.assert_length(new_edit_history, 4)
# stream and topic edit entry
self.assertFalse("prev_subject" in new_edit_history[0])
self.assertEqual(new_edit_history[0]["prev_topic"], "topic 3")
self.assertEqual(new_edit_history[0]["topic"], msg.subject)
self.assertEqual(new_edit_history[0]["prev_stream"], 3)
self.assertEqual(new_edit_history[0]["stream"], msg_stream_id)
self.assertEqual(new_edit_history[0]["stream"], denmark.id)
self.assertEqual(
set(new_edit_history[0].keys()),
{"timestamp", "prev_topic", "topic", "prev_stream", "stream", "user_id"},
)
# stream only edit entry
self.assertEqual(new_edit_history[1]["prev_stream"], 2)
self.assertEqual(new_edit_history[1]["stream"], 3)
self.assertEqual(
set(new_edit_history[1].keys()), {"timestamp", "prev_stream", "stream", "user_id"}
)
# topic and content edit entry
self.assertFalse("prev_subject" in new_edit_history[2])
self.assertEqual(new_edit_history[2]["prev_topic"], "topic 2")
self.assertEqual(new_edit_history[2]["topic"], "topic 3")
self.assertEqual(new_edit_history[2]["prev_content"], "test content and topic edit")
self.assertEqual(
new_edit_history[2]["prev_rendered_content"], "<p>test content and topic edit</p>"
)
self.assertEqual(new_edit_history[2]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[2].keys()),
{
"timestamp",
"prev_topic",
"topic",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
# topic only edit entry
self.assertFalse("prev_subject" in new_edit_history[3])
self.assertEqual(new_edit_history[3]["prev_topic"], "topic 1")
self.assertEqual(new_edit_history[3]["topic"], "topic 2")
self.assertEqual(
set(new_edit_history[3].keys()), {"timestamp", "prev_topic", "topic", "user_id"}
)
for linkifier_id, expected in zip(self.linkifier_ids, expected_urls):
linkifier = RealmFilter.objects.filter(id=linkifier_id).first()
self.assertIsNotNone(linkifier)
self.assertEqual(linkifier.url_template, expected)