CVE-2021-41115: Use re2 for user-supplied linkifier patterns.

Zulip attempts to validate that the regular expressions that admins
enter for linkifiers are well-formatted, and only contain a specific
subset of regex grammar.  The process of checking these
properties (via a regex!) can cause denial-of-service via
backtracking.

Furthermore, this validation itself does not prevent the creation of
linkifiers which themselves cause denial-of-service when they are
executed.  As the validator accepts literally anything inside of a
`(?P<word>...)` block, any quadratic backtracking expression can be
hidden therein.

Switch user-provided linkifier patterns to be matched in the Markdown
processor by the `re2` library, which is guaranteed constant-time.
This somewhat limits the possible features of the regular
expression (notably, look-head and -behind, and back-references);
however, these features had never been advertised as working in the
context of linkifiers.

A migration removes any existing linkifiers which would not function
under re2, after printing them for posterity during the upgrade; they
are unlikely to be common, and are impossible to fix automatically.

The denial-of-service in the linkifier validator was discovered by
@erik-krogh and @yoff, as GHSL-2021-118.
This commit is contained in:
Alex Vandiver 2021-09-28 23:27:54 +00:00
parent 9f2df658f0
commit db934be064
7 changed files with 118 additions and 45 deletions

View File

@ -42,7 +42,7 @@ async function test_delete_linkifier(page: Page): Promise<void> {
async function test_add_invalid_linkifier_pattern(page: Page): Promise<void> {
await page.waitForSelector(".admin-linkifier-form", {visible: true});
await common.fill_form(page, "form.admin-linkifier-form", {
pattern: "a$",
pattern: "(foo",
url_format_string: "https://trac.example.com/ticket/%(id)s",
});
await page.click("form.admin-linkifier-form button.button");
@ -50,7 +50,7 @@ async function test_add_invalid_linkifier_pattern(page: Page): Promise<void> {
await page.waitForSelector("div#admin-linkifier-status", {visible: true});
assert.strictEqual(
await common.get_text_from_selector(page, "div#admin-linkifier-status"),
"Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].",
"Failed: Bad regular expression: missing ): (foo",
);
}
@ -83,8 +83,8 @@ async function test_edit_invalid_linkifier(page: Page): Promise<void> {
await page.click(".linkifier_row .edit");
await page.waitForFunction(() => document.activeElement?.id === "dialog_widget_modal");
await common.fill_form(page, "form.linkifier-edit-form", {
pattern: "####",
url_format_string: "####",
pattern: "#(?P<id>d????)",
url_format_string: "????",
});
await page.click(".dialog_submit_button");
@ -96,7 +96,7 @@ async function test_edit_invalid_linkifier(page: Page): Promise<void> {
);
assert.strictEqual(
edit_linkifier_pattern_status,
"Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].",
"Failed: Bad regular expression: bad repetition operator: ????",
);
const edit_linkifier_format_status_selector = "div#edit-linkifier-format-status";

View File

@ -38,6 +38,7 @@ import markdown.inlinepatterns
import markdown.postprocessors
import markdown.treeprocessors
import markdown.util
import re2
import requests
from django.conf import settings
from markdown.blockparser import BlockParser
@ -1769,7 +1770,9 @@ class MarkdownListPreprocessor(markdown.preprocessors.Preprocessor):
# Name for the outer capture group we use to separate whitespace and
# other delimiters from the actual content. This value won't be an
# option in user-entered capture groups.
BEFORE_CAPTURE_GROUP = "linkifier_before_match"
OUTER_CAPTURE_GROUP = "linkifier_actual_match"
AFTER_CAPTURE_GROUP = "linkifier_after_match"
def prepare_linkifier_pattern(source: str) -> str:
@ -1777,31 +1780,45 @@ def prepare_linkifier_pattern(source: str) -> str:
whitespace, or opening delimiters, won't match if there are word
characters directly after, and saves what was matched as
OUTER_CAPTURE_GROUP."""
return fr"""(?<![^\s'"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?!\w)"""
return fr"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])"""
# Given a regular expression pattern, linkifies groups that match it
# using the provided format string to construct the URL.
class LinkifierPattern(markdown.inlinepatterns.Pattern):
class LinkifierPattern(CompiledInlineProcessor):
"""Applied a given linkifier to the input"""
def __init__(
self,
source_pattern: str,
format_string: str,
md: Optional[markdown.Markdown] = None,
md: markdown.Markdown,
) -> None:
self.pattern = prepare_linkifier_pattern(source_pattern)
self.format_string = format_string
super().__init__(self.pattern, md)
# Do not write errors to stderr (this still raises exceptions)
options = re2.Options()
options.log_errors = False
def handleMatch(self, m: Match[str]) -> Union[Element, str]:
compiled_re2 = re2.compile(prepare_linkifier_pattern(source_pattern), options=options)
self.format_string = format_string
super().__init__(compiled_re2, md)
def handleMatch( # type: ignore[override] # supertype incompatible with supersupertype
self, m: Match[str], data: str
) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]:
db_data = self.md.zulip_db_data
return url_to_a(
url = url_to_a(
db_data,
self.format_string % m.groupdict(),
markdown.util.AtomicString(m.group(OUTER_CAPTURE_GROUP)),
)
if isinstance(url, str):
return None, None, None
return (
url,
m.start(2),
m.end(2),
)
class UserMentionPattern(CompiledInlineProcessor):
@ -2314,11 +2331,19 @@ def topic_links(linkifiers_key: int, topic_name: str) -> List[Dict[str, str]]:
matches: List[Dict[str, Union[str, int]]] = []
linkifiers = linkifiers_for_realm(linkifiers_key)
options = re2.Options()
options.log_errors = False
for linkifier in linkifiers:
raw_pattern = linkifier["pattern"]
url_format_string = linkifier["url_format"]
pattern = prepare_linkifier_pattern(raw_pattern)
for m in re.finditer(pattern, topic_name):
try:
pattern = re2.compile(prepare_linkifier_pattern(raw_pattern), options=options)
except re2.error:
# An invalid regex shouldn't be possible here, and logging
# here on an invalid regex would spam the logs with every
# message sent; simply move on.
continue
for m in pattern.finditer(topic_name):
match_details = m.groupdict()
match_text = match_details[OUTER_CAPTURE_GROUP]
# We format the linkifier's url string using the matched text.

View File

@ -0,0 +1,39 @@
import re2
from django.db import migrations
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
def delete_re2_invalid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
options = re2.Options()
options.log_errors = False
RealmFilter = apps.get_model("zerver", "RealmFilter")
found_errors = False
for linkifier in RealmFilter.objects.all():
try:
re2.compile(linkifier.pattern, options=options)
except re2.error:
if not found_errors:
print()
found_errors = True
print(
f"Deleting linkifier {linkifier.id} in realm {linkifier.realm.string_id} which is not compatible with new re2 engine:"
)
print(f" {linkifier.pattern} -> {linkifier.url_format_string}")
linkifier.delete()
class Migration(migrations.Migration):
dependencies = [
("zerver", "0325_alter_realmplayground_unique_together"),
]
operations = [
migrations.RunPython(
delete_re2_invalid,
reverse_code=migrations.RunPython.noop,
elidable=True,
)
]

View File

@ -0,0 +1,15 @@
# Generated by Django 3.2.7 on 2021-10-04 17:49
from typing import Any, List
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("zerver", "0358_split_create_stream_policy"),
("zerver", "0359_re2_linkifiers"),
]
operations: List[Any] = []

View File

@ -20,6 +20,7 @@ from typing import (
import django.contrib.auth
import orjson
import re2
from bitfield import BitField
from bitfield.types import BitHandler
from django.conf import settings
@ -1084,21 +1085,21 @@ post_delete.connect(flush_realm_emoji, sender=RealmEmoji)
def filter_pattern_validator(value: str) -> Pattern[str]:
regex = re.compile(r"^(?:(?:[\w\-#_= /:]*|[+]|[!])(\(\?P<\w+>.+\)))+$")
error_msg = _("Invalid linkifier pattern. Valid characters are {}.").format(
"[ a-zA-Z_#=/:+!-]",
)
if not regex.match(str(value)):
raise ValidationError(error_msg)
try:
pattern = re.compile(value)
except re.error:
# Regex is invalid
raise ValidationError(error_msg)
# Do not write errors to stderr (this still raises exceptions)
options = re2.Options()
options.log_errors = False
return pattern
regex = re2.compile(value, options=options)
except re2.error as e:
if len(e.args) >= 1:
if isinstance(e.args[0], str): # nocoverage
raise ValidationError(_("Bad regular expression: {}").format(e.args[0]))
if isinstance(e.args[0], bytes):
raise ValidationError(_("Bad regular expression: {}").format(e.args[0].decode()))
raise ValidationError(_("Unknown regular expression error")) # nocoverage
return regex
def filter_format_validator(value: str) -> None:

View File

@ -1406,26 +1406,25 @@ class MarkdownTest(ZulipTestCase):
url_format_string = r"https://trac.example.com/ticket/%(id)s"
linkifier_1 = RealmFilter(
realm=realm,
pattern=r"(?P<id>ABC\-[0-9]+)(?![A-Z0-9-])",
pattern=r"(?P<id>ABC\-[0-9]+)",
url_format_string=url_format_string,
)
linkifier_1.save()
self.assertEqual(
linkifier_1.__str__(),
r"<RealmFilter(zulip): (?P<id>ABC\-[0-9]+)(?![A-Z0-9-])"
" https://trac.example.com/ticket/%(id)s>",
r"<RealmFilter(zulip): (?P<id>ABC\-[0-9]+) https://trac.example.com/ticket/%(id)s>",
)
url_format_string = r"https://other-trac.example.com/ticket/%(id)s"
linkifier_2 = RealmFilter(
realm=realm,
pattern=r"(?P<id>[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])",
pattern=r"(?P<id>[A-Z][A-Z0-9]*\-[0-9]+)",
url_format_string=url_format_string,
)
linkifier_2.save()
self.assertEqual(
linkifier_2.__str__(),
r"<RealmFilter(zulip): (?P<id>[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])"
r"<RealmFilter(zulip): (?P<id>[A-Z][A-Z0-9]*\-[0-9]+)"
" https://other-trac.example.com/ticket/%(id)s>",
)

View File

@ -27,17 +27,13 @@ class RealmFilterTest(ZulipTestCase):
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(result, "This field cannot be blank.")
data["pattern"] = "$a"
data["pattern"] = "(foo"
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
self.assert_json_error(result, "Bad regular expression: missing ): (foo")
data["pattern"] = r"ZUL-(?P<id>\d++)"
data["pattern"] = r"ZUL-(?P<id>\d????)"
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????")
data["pattern"] = r"ZUL-(?P<id>\d+)"
data["url_format_string"] = "$fgfg"
@ -189,13 +185,11 @@ class RealmFilterTest(ZulipTestCase):
)
data = {
"pattern": r"ZUL-(?P<id>\d++)",
"pattern": r"ZUL-(?P<id>\d????)",
"url_format_string": "https://realm.com/my_realm_filter/%(id)s",
}
result = self.client_patch(f"/json/realm/filters/{linkifier_id}", info=data)
self.assert_json_error(
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????")
data["pattern"] = r"ZUL-(?P<id>\d+)"
data["url_format_string"] = "$fgfg"