migrations: Backfill url_template from url format string.

This is implemented by replacing all matches of "%(var_name)s" in a URL
format string with "{var_name}". Since we do want to ensure that the
templates aren't broken after this migration, a RuntimeError is raised
to let the maintainer know that certain linkifier cannot be converted
automatically if it does not pass the uri_template.validate check.

Also, we need to escape "%%", which is used to represent "%" in the old
format string syntax, as well as "{" and "}", which is a part of the
URL template syntax.

Signed-off-by: Zixuan James Li <p359101898@gmail.com>
This commit is contained in:
Zixuan James Li 2022-10-05 15:18:45 -04:00 committed by Tim Abbott
parent a19c80df43
commit ab53e8d3e6
3 changed files with 124 additions and 383 deletions

View File

@ -48,4 +48,4 @@ API_FEATURE_LEVEL = 175
# historical commits sharing the same major version, in which case a # historical commits sharing the same major version, in which case a
# minor version bump suffices. # minor version bump suffices.
PROVISION_VERSION = (233, 2) PROVISION_VERSION = (233, 3)

View File

@ -0,0 +1,48 @@
# Generated by Django 4.0.7 on 2022-10-02 20:50
import re
import uri_template
from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
def transform_to_url_template_syntax(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
linkifier_model = apps.get_model("zerver", "RealmFilter")
linkifiers = linkifier_model.objects.all()
# Matches anything of the form %(variable_name)s
var_pattern = re.compile(r"(?<!%)((?:%%)*)%\(([a-zA-Z0-9_-]+)\)s")
escape_table = str.maketrans(
{
"{": "%7B",
"}": "%7D",
}
)
for linkifier in linkifiers:
converted_template = linkifier.url_format_string.translate(escape_table)
# Replace format string variables with the RFC 6570 URI Template syntax
converted_template = var_pattern.sub(r"\1{\2}", converted_template).replace("%%", "%")
if not uri_template.validate(converted_template):
raise RuntimeError(
f'Failed to convert url format "{var_pattern}". The converted template "{converted_template}" is invalid.'
)
linkifier.url_template = converted_template
linkifier_model.objects.bulk_update(linkifiers, fields=["url_template"])
class Migration(migrations.Migration):
dependencies = [
("zerver", "0440_realmfilter_url_template"),
]
operations = [
migrations.RunPython(
transform_to_url_template_syntax,
reverse_code=migrations.RunPython.noop,
elidable=True,
),
]

View File

@ -4,16 +4,11 @@
# You can also read # You can also read
# https://www.caktusgroup.com/blog/2016/02/02/writing-unit-tests-django-migrations/ # https://www.caktusgroup.com/blog/2016/02/02/writing-unit-tests-django-migrations/
# to get a tutorial on the framework that inspired this feature. # to get a tutorial on the framework that inspired this feature.
from typing import Optional
from unittest import skip
import orjson
from django.db.migrations.state import StateApps from django.db.migrations.state import StateApps
from django.utils.timezone import now as timezone_now
from zerver.lib.test_classes import MigrationsTestCase from zerver.lib.test_classes import MigrationsTestCase
from zerver.lib.test_helpers import use_db_models from zerver.lib.test_helpers import use_db_models
from zerver.models import get_stream
# Important note: These tests are very expensive, and details of # Important note: These tests are very expensive, and details of
# Django's database transaction model mean it does not super work to # Django's database transaction model mean it does not super work to
@ -31,390 +26,88 @@ from zerver.models import get_stream
# been tested for a migration being merged. # been tested for a migration being merged.
@skip("Fails because newer migrations have since been merged.") # nocoverage # skipped class LinkifierURLFormatString(MigrationsTestCase):
class MessageEditHistoryLegacyFormats(MigrationsTestCase): migrate_from = "0440_realmfilter_url_template"
migrate_from = "0376_set_realmemoji_author_and_reupload_realmemoji" migrate_to = "0441_backfill_realmfilter_url_template"
migrate_to = "0377_message_edit_history_format"
msg_id: Optional[int] = None
@use_db_models @use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None: def setUpBeforeMigration(self, apps: StateApps) -> None:
Recipient = apps.get_model("zerver", "Recipient") RealmFilter = apps.get_model("zerver", "RealmFilter")
Message = apps.get_model("zerver", "Message")
iago = self.example_user("iago") iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
denmark_recipient = Recipient.objects.get(type=2, type_id=denmark.id)
self.msg_id = Message.objects.create( urls = [
recipient_id=denmark_recipient.id, "http://example.com/",
subject="topic 4", "https://example.com/",
sender_id=iago.id, "https://user:password@example.com/",
sending_client_id=1, "https://example.com/@user/thing",
content="current message text", "https://example.com/!path",
date_sent=timezone_now(), "https://example.com/foo.bar",
).id "https://example.com/foo[bar]",
"https://example.com/{foo}",
"https://example.com/{foo}{bars}",
"https://example.com/{foo}/and/{bar}",
"https://example.com/?foo={foo}",
"https://example.com/%ab",
"https://example.com/%ba",
"https://example.com/%21",
"https://example.com/words%20with%20spaces",
"https://example.com/back%20to%20{back}",
"https://example.com/encoded%2fwith%2fletters",
"https://example.com/encoded%2Fwith%2Fupper%2Fcase%2Fletters",
"https://example.com/%%",
"https://example.com/%%(",
"https://example.com/%%()",
"https://example.com/%%(foo",
"https://example.com/%%(foo)",
"https://example.com/%%(foo)s",
"https://example.com/%(foo)s",
"https://example.com/%(foo)s%(bar)s",
]
self.linkifier_ids = []
# topic edits contain only "prev_subject" field. for index, url in enumerate(urls):
# stream edits contain only "prev_stream" field. self.linkifier_ids.append(
msg = Message.objects.filter(id=self.msg_id).first() RealmFilter.objects.create(
msg.edit_history = orjson.dumps( realm=iago.realm,
[ pattern=f"dummy{index}",
{ url_format_string=url,
"user_id": 11, ).id
"timestamp": 1644405050, )
"prev_stream": 3,
"prev_subject": "topic 3",
},
{"user_id": 11, "timestamp": 1644405040, "prev_stream": 2},
{
"user_id": 11,
"timestamp": 1644405030,
"prev_content": "test content and topic edit",
"prev_rendered_content": "<p>test content and topic edit</p>",
"prev_rendered_content_version": 1,
"prev_subject": "topic 2",
},
{"user_id": 11, "timestamp": 1644405020, "prev_subject": "topic 1"},
{
"user_id": 11,
"timestamp": 1644405010,
"prev_content": "test content only edit",
"prev_rendered_content": "<p>test content only edit</p>",
"prev_rendered_content_version": 1,
},
]
).decode()
msg.save(update_fields=["edit_history"])
def test_message_legacy_edit_history_format(self) -> None: def test_converted_url_templates(self) -> None:
Message = self.apps.get_model("zerver", "Message") RealmFilter = self.apps.get_model("zerver", "RealmFilter")
Recipient = self.apps.get_model("zerver", "Recipient")
iago = self.example_user("iago") expected_urls = [
stream_name = "Denmark" "http://example.com/",
denmark = get_stream(stream_name, iago.realm) "https://example.com/",
"https://user:password@example.com/",
"https://example.com/@user/thing",
"https://example.com/!path",
"https://example.com/foo.bar",
"https://example.com/foo[bar]",
"https://example.com/%7Bfoo%7D",
"https://example.com/%7Bfoo%7D%7Bbars%7D",
"https://example.com/%7Bfoo%7D/and/%7Bbar%7D",
"https://example.com/?foo=%7Bfoo%7D",
"https://example.com/%ab",
"https://example.com/%ba",
"https://example.com/%21",
"https://example.com/words%20with%20spaces",
"https://example.com/back%20to%20%7Bback%7D",
"https://example.com/encoded%2fwith%2fletters",
"https://example.com/encoded%2Fwith%2Fupper%2Fcase%2Fletters",
"https://example.com/%",
"https://example.com/%(",
"https://example.com/%()",
"https://example.com/%(foo",
"https://example.com/%(foo)",
"https://example.com/%(foo)s",
"https://example.com/{foo}",
"https://example.com/{foo}{bar}",
]
msg = Message.objects.filter(id=self.msg_id).first() for linkifier_id, expected in zip(self.linkifier_ids, expected_urls):
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id linkifier = RealmFilter.objects.filter(id=linkifier_id).first()
new_edit_history = orjson.loads(msg.edit_history) self.assertIsNotNone(linkifier)
self.assertEqual(linkifier.url_template, expected)
self.assert_length(new_edit_history, 5)
# stream and topic edit entry
self.assertFalse("prev_subject" in new_edit_history[0])
self.assertEqual(new_edit_history[0]["prev_topic"], "topic 3")
self.assertEqual(new_edit_history[0]["topic"], msg.subject)
self.assertEqual(new_edit_history[0]["prev_stream"], 3)
self.assertEqual(new_edit_history[0]["stream"], msg_stream_id)
self.assertEqual(new_edit_history[0]["stream"], denmark.id)
self.assertEqual(
set(new_edit_history[0].keys()),
{"timestamp", "prev_topic", "topic", "prev_stream", "stream", "user_id"},
)
# stream only edit entry
self.assertEqual(new_edit_history[1]["prev_stream"], 2)
self.assertEqual(new_edit_history[1]["stream"], 3)
self.assertEqual(
set(new_edit_history[1].keys()), {"timestamp", "prev_stream", "stream", "user_id"}
)
# topic and content edit entry
self.assertFalse("prev_subject" in new_edit_history[2])
self.assertEqual(new_edit_history[2]["prev_topic"], "topic 2")
self.assertEqual(new_edit_history[2]["topic"], "topic 3")
self.assertEqual(new_edit_history[2]["prev_content"], "test content and topic edit")
self.assertEqual(
new_edit_history[2]["prev_rendered_content"], "<p>test content and topic edit</p>"
)
self.assertEqual(new_edit_history[2]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[2].keys()),
{
"timestamp",
"prev_topic",
"topic",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
# topic only edit entry
self.assertFalse("prev_subject" in new_edit_history[3])
self.assertEqual(new_edit_history[3]["prev_topic"], "topic 1")
self.assertEqual(new_edit_history[3]["topic"], "topic 2")
self.assertEqual(
set(new_edit_history[3].keys()), {"timestamp", "prev_topic", "topic", "user_id"}
)
# content only edit entry - not retested because never changes
self.assertEqual(new_edit_history[4]["prev_content"], "test content only edit")
self.assertEqual(
new_edit_history[4]["prev_rendered_content"], "<p>test content only edit</p>"
)
self.assertEqual(new_edit_history[4]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[4].keys()),
{
"timestamp",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
@skip("Fails because newer migrations have since been merged.") # nocoverage # skipped
class MessageEditHistoryModernFormats(MigrationsTestCase):
migrate_from = "0376_set_realmemoji_author_and_reupload_realmemoji"
migrate_to = "0377_message_edit_history_format"
msg_id: Optional[int] = None
@use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None:
Recipient = apps.get_model("zerver", "Recipient")
Message = apps.get_model("zerver", "Message")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
denmark_recipient = Recipient.objects.get(type=2, type_id=denmark.id)
self.msg_id = Message.objects.create(
recipient_id=denmark_recipient.id,
subject="topic 4",
sender_id=iago.id,
sending_client_id=1,
content="current message text",
date_sent=timezone_now(),
).id
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
# topic edits contain "topic" and "prev_topic" fields.
# stream edits contain "stream" and "prev_stream" fields.
msg.edit_history = orjson.dumps(
[
{
"user_id": 11,
"timestamp": 1644405050,
"stream": msg_stream_id,
"prev_stream": 3,
"topic": msg.subject,
"prev_topic": "topic 3",
},
{"user_id": 11, "timestamp": 1644405040, "prev_stream": 2, "stream": 3},
{
"user_id": 11,
"timestamp": 1644405030,
"prev_content": "test content and topic edit",
"prev_rendered_content": "<p>test content and topic edit</p>",
"prev_rendered_content_version": 1,
"prev_topic": "topic 2",
"topic": "topic 3",
},
{
"user_id": 11,
"timestamp": 1644405020,
"prev_topic": "topic 1",
"topic": "topic 2",
},
]
).decode()
msg.save(update_fields=["edit_history"])
def test_message_modern_edit_history_format(self) -> None:
Message = self.apps.get_model("zerver", "Message")
Recipient = self.apps.get_model("zerver", "Recipient")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
new_edit_history = orjson.loads(msg.edit_history)
self.assert_length(new_edit_history, 4)
# stream and topic edit entry
self.assertEqual(new_edit_history[0]["prev_topic"], "topic 3")
self.assertEqual(new_edit_history[0]["topic"], msg.subject)
self.assertEqual(new_edit_history[0]["prev_stream"], 3)
self.assertEqual(new_edit_history[0]["stream"], msg_stream_id)
self.assertEqual(new_edit_history[0]["stream"], denmark.id)
self.assertEqual(
set(new_edit_history[0].keys()),
{"timestamp", "prev_topic", "topic", "prev_stream", "stream", "user_id"},
)
# stream only edit entry
self.assertEqual(new_edit_history[1]["prev_stream"], 2)
self.assertEqual(new_edit_history[1]["stream"], 3)
self.assertEqual(
set(new_edit_history[1].keys()), {"timestamp", "prev_stream", "stream", "user_id"}
)
# topic and content edit entry
self.assertEqual(new_edit_history[2]["prev_topic"], "topic 2")
self.assertEqual(new_edit_history[2]["topic"], "topic 3")
self.assertEqual(new_edit_history[2]["prev_content"], "test content and topic edit")
self.assertEqual(
new_edit_history[2]["prev_rendered_content"], "<p>test content and topic edit</p>"
)
self.assertEqual(new_edit_history[2]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[2].keys()),
{
"timestamp",
"prev_topic",
"topic",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
# topic only edit entry
self.assertEqual(new_edit_history[3]["prev_topic"], "topic 1")
self.assertEqual(new_edit_history[3]["topic"], "topic 2")
self.assertEqual(
set(new_edit_history[3].keys()), {"timestamp", "prev_topic", "topic", "user_id"}
)
@skip("Fails because newer migrations have since been merged.") # nocoverage # skipped
class MessageEditHistoryIntermediateFormats(MigrationsTestCase):
migrate_from = "0376_set_realmemoji_author_and_reupload_realmemoji"
migrate_to = "0377_message_edit_history_format"
msg_id: Optional[int] = None
@use_db_models
def setUpBeforeMigration(self, apps: StateApps) -> None:
Recipient = apps.get_model("zerver", "Recipient")
Message = apps.get_model("zerver", "Message")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
denmark_recipient = Recipient.objects.get(type=2, type_id=denmark.id)
self.msg_id = Message.objects.create(
recipient_id=denmark_recipient.id,
subject="topic 4",
sender_id=iago.id,
sending_client_id=1,
content="current message text",
date_sent=timezone_now(),
).id
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
# topic edits contain "prev_subject", "topic" and "prev_topic" fields.
# stream edits contain "stream" and "prev_stream" fields.
msg.edit_history = orjson.dumps(
[
{
"user_id": 11,
"timestamp": 1644405050,
"stream": msg_stream_id,
"prev_stream": 3,
"topic": msg.subject,
"prev_topic": "topic 3",
"prev_subject": "topic 3",
},
{"user_id": 11, "timestamp": 1644405040, "prev_stream": 2, "stream": 3},
{
"user_id": 11,
"timestamp": 1644405030,
"prev_content": "test content and topic edit",
"prev_rendered_content": "<p>test content and topic edit</p>",
"prev_rendered_content_version": 1,
"prev_topic": "topic 2",
"prev_subject": "topic 2",
"topic": "topic 3",
},
{
"user_id": 11,
"timestamp": 1644405020,
"prev_topic": "topic 1",
"prev_subject": "topic 1",
"topic": "topic 2",
},
]
).decode()
msg.save(update_fields=["edit_history"])
def test_message_temporary_edit_history_format(self) -> None:
Message = self.apps.get_model("zerver", "Message")
Recipient = self.apps.get_model("zerver", "Recipient")
iago = self.example_user("iago")
stream_name = "Denmark"
denmark = get_stream(stream_name, iago.realm)
msg = Message.objects.filter(id=self.msg_id).first()
msg_stream_id = Recipient.objects.get(id=msg.recipient_id).type_id
new_edit_history = orjson.loads(msg.edit_history)
self.assert_length(new_edit_history, 4)
# stream and topic edit entry
self.assertFalse("prev_subject" in new_edit_history[0])
self.assertEqual(new_edit_history[0]["prev_topic"], "topic 3")
self.assertEqual(new_edit_history[0]["topic"], msg.subject)
self.assertEqual(new_edit_history[0]["prev_stream"], 3)
self.assertEqual(new_edit_history[0]["stream"], msg_stream_id)
self.assertEqual(new_edit_history[0]["stream"], denmark.id)
self.assertEqual(
set(new_edit_history[0].keys()),
{"timestamp", "prev_topic", "topic", "prev_stream", "stream", "user_id"},
)
# stream only edit entry
self.assertEqual(new_edit_history[1]["prev_stream"], 2)
self.assertEqual(new_edit_history[1]["stream"], 3)
self.assertEqual(
set(new_edit_history[1].keys()), {"timestamp", "prev_stream", "stream", "user_id"}
)
# topic and content edit entry
self.assertFalse("prev_subject" in new_edit_history[2])
self.assertEqual(new_edit_history[2]["prev_topic"], "topic 2")
self.assertEqual(new_edit_history[2]["topic"], "topic 3")
self.assertEqual(new_edit_history[2]["prev_content"], "test content and topic edit")
self.assertEqual(
new_edit_history[2]["prev_rendered_content"], "<p>test content and topic edit</p>"
)
self.assertEqual(new_edit_history[2]["prev_rendered_content_version"], 1)
self.assertEqual(
set(new_edit_history[2].keys()),
{
"timestamp",
"prev_topic",
"topic",
"prev_content",
"prev_rendered_content",
"prev_rendered_content_version",
"user_id",
},
)
# topic only edit entry
self.assertFalse("prev_subject" in new_edit_history[3])
self.assertEqual(new_edit_history[3]["prev_topic"], "topic 1")
self.assertEqual(new_edit_history[3]["topic"], "topic 2")
self.assertEqual(
set(new_edit_history[3].keys()), {"timestamp", "prev_topic", "topic", "user_id"}
)