CVE-2021-41115: Use re2 for user-supplied linkifier patterns.

Zulip attempts to validate that the regular expressions that admins
enter for linkifiers are well-formatted, and only contain a specific
subset of regex grammar.  The process of checking these
properties (via a regex!) can cause denial-of-service via
backtracking.

Furthermore, this validation itself does not prevent the creation of
linkifiers which themselves cause denial-of-service when they are
executed.  As the validator accepts literally anything inside of a
`(?P<word>...)` block, any quadratic backtracking expression can be
hidden therein.

Switch user-provided linkifier patterns to be matched in the Markdown
processor by the `re2` library, which is guaranteed constant-time.
This somewhat limits the possible features of the regular
expression (notably, look-head and -behind, and back-references);
however, these features had never been advertised as working in the
context of linkifiers.

A migration removes any existing linkifiers which would not function
under re2, after printing them for posterity during the upgrade; they
are unlikely to be common, and are impossible to fix automatically.

The denial-of-service in the linkifier validator was discovered by
@erik-krogh and @yoff, as GHSL-2021-118.
This commit is contained in:
Alex Vandiver 2021-09-28 23:27:54 +00:00
parent 9f2df658f0
commit db934be064
7 changed files with 118 additions and 45 deletions

View File

@ -42,7 +42,7 @@ async function test_delete_linkifier(page: Page): Promise<void> {
async function test_add_invalid_linkifier_pattern(page: Page): Promise<void> { async function test_add_invalid_linkifier_pattern(page: Page): Promise<void> {
await page.waitForSelector(".admin-linkifier-form", {visible: true}); await page.waitForSelector(".admin-linkifier-form", {visible: true});
await common.fill_form(page, "form.admin-linkifier-form", { await common.fill_form(page, "form.admin-linkifier-form", {
pattern: "a$", pattern: "(foo",
url_format_string: "https://trac.example.com/ticket/%(id)s", url_format_string: "https://trac.example.com/ticket/%(id)s",
}); });
await page.click("form.admin-linkifier-form button.button"); await page.click("form.admin-linkifier-form button.button");
@ -50,7 +50,7 @@ async function test_add_invalid_linkifier_pattern(page: Page): Promise<void> {
await page.waitForSelector("div#admin-linkifier-status", {visible: true}); await page.waitForSelector("div#admin-linkifier-status", {visible: true});
assert.strictEqual( assert.strictEqual(
await common.get_text_from_selector(page, "div#admin-linkifier-status"), await common.get_text_from_selector(page, "div#admin-linkifier-status"),
"Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].", "Failed: Bad regular expression: missing ): (foo",
); );
} }
@ -83,8 +83,8 @@ async function test_edit_invalid_linkifier(page: Page): Promise<void> {
await page.click(".linkifier_row .edit"); await page.click(".linkifier_row .edit");
await page.waitForFunction(() => document.activeElement?.id === "dialog_widget_modal"); await page.waitForFunction(() => document.activeElement?.id === "dialog_widget_modal");
await common.fill_form(page, "form.linkifier-edit-form", { await common.fill_form(page, "form.linkifier-edit-form", {
pattern: "####", pattern: "#(?P<id>d????)",
url_format_string: "####", url_format_string: "????",
}); });
await page.click(".dialog_submit_button"); await page.click(".dialog_submit_button");
@ -96,7 +96,7 @@ async function test_edit_invalid_linkifier(page: Page): Promise<void> {
); );
assert.strictEqual( assert.strictEqual(
edit_linkifier_pattern_status, edit_linkifier_pattern_status,
"Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].", "Failed: Bad regular expression: bad repetition operator: ????",
); );
const edit_linkifier_format_status_selector = "div#edit-linkifier-format-status"; const edit_linkifier_format_status_selector = "div#edit-linkifier-format-status";

View File

@ -38,6 +38,7 @@ import markdown.inlinepatterns
import markdown.postprocessors import markdown.postprocessors
import markdown.treeprocessors import markdown.treeprocessors
import markdown.util import markdown.util
import re2
import requests import requests
from django.conf import settings from django.conf import settings
from markdown.blockparser import BlockParser from markdown.blockparser import BlockParser
@ -1769,7 +1770,9 @@ class MarkdownListPreprocessor(markdown.preprocessors.Preprocessor):
# Name for the outer capture group we use to separate whitespace and # Name for the outer capture group we use to separate whitespace and
# other delimiters from the actual content. This value won't be an # other delimiters from the actual content. This value won't be an
# option in user-entered capture groups. # option in user-entered capture groups.
BEFORE_CAPTURE_GROUP = "linkifier_before_match"
OUTER_CAPTURE_GROUP = "linkifier_actual_match" OUTER_CAPTURE_GROUP = "linkifier_actual_match"
AFTER_CAPTURE_GROUP = "linkifier_after_match"
def prepare_linkifier_pattern(source: str) -> str: def prepare_linkifier_pattern(source: str) -> str:
@ -1777,31 +1780,45 @@ def prepare_linkifier_pattern(source: str) -> str:
whitespace, or opening delimiters, won't match if there are word whitespace, or opening delimiters, won't match if there are word
characters directly after, and saves what was matched as characters directly after, and saves what was matched as
OUTER_CAPTURE_GROUP.""" OUTER_CAPTURE_GROUP."""
return fr"""(?<![^\s'"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?!\w)""" return fr"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])"""
# Given a regular expression pattern, linkifies groups that match it # Given a regular expression pattern, linkifies groups that match it
# using the provided format string to construct the URL. # using the provided format string to construct the URL.
class LinkifierPattern(markdown.inlinepatterns.Pattern): class LinkifierPattern(CompiledInlineProcessor):
"""Applied a given linkifier to the input""" """Applied a given linkifier to the input"""
def __init__( def __init__(
self, self,
source_pattern: str, source_pattern: str,
format_string: str, format_string: str,
md: Optional[markdown.Markdown] = None, md: markdown.Markdown,
) -> None: ) -> None:
self.pattern = prepare_linkifier_pattern(source_pattern) # Do not write errors to stderr (this still raises exceptions)
self.format_string = format_string options = re2.Options()
super().__init__(self.pattern, md) options.log_errors = False
def handleMatch(self, m: Match[str]) -> Union[Element, str]: compiled_re2 = re2.compile(prepare_linkifier_pattern(source_pattern), options=options)
self.format_string = format_string
super().__init__(compiled_re2, md)
def handleMatch( # type: ignore[override] # supertype incompatible with supersupertype
self, m: Match[str], data: str
) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]:
db_data = self.md.zulip_db_data db_data = self.md.zulip_db_data
return url_to_a( url = url_to_a(
db_data, db_data,
self.format_string % m.groupdict(), self.format_string % m.groupdict(),
markdown.util.AtomicString(m.group(OUTER_CAPTURE_GROUP)), markdown.util.AtomicString(m.group(OUTER_CAPTURE_GROUP)),
) )
if isinstance(url, str):
return None, None, None
return (
url,
m.start(2),
m.end(2),
)
class UserMentionPattern(CompiledInlineProcessor): class UserMentionPattern(CompiledInlineProcessor):
@ -2314,11 +2331,19 @@ def topic_links(linkifiers_key: int, topic_name: str) -> List[Dict[str, str]]:
matches: List[Dict[str, Union[str, int]]] = [] matches: List[Dict[str, Union[str, int]]] = []
linkifiers = linkifiers_for_realm(linkifiers_key) linkifiers = linkifiers_for_realm(linkifiers_key)
options = re2.Options()
options.log_errors = False
for linkifier in linkifiers: for linkifier in linkifiers:
raw_pattern = linkifier["pattern"] raw_pattern = linkifier["pattern"]
url_format_string = linkifier["url_format"] url_format_string = linkifier["url_format"]
pattern = prepare_linkifier_pattern(raw_pattern) try:
for m in re.finditer(pattern, topic_name): pattern = re2.compile(prepare_linkifier_pattern(raw_pattern), options=options)
except re2.error:
# An invalid regex shouldn't be possible here, and logging
# here on an invalid regex would spam the logs with every
# message sent; simply move on.
continue
for m in pattern.finditer(topic_name):
match_details = m.groupdict() match_details = m.groupdict()
match_text = match_details[OUTER_CAPTURE_GROUP] match_text = match_details[OUTER_CAPTURE_GROUP]
# We format the linkifier's url string using the matched text. # We format the linkifier's url string using the matched text.

View File

@ -0,0 +1,39 @@
import re2
from django.db import migrations
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
def delete_re2_invalid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
options = re2.Options()
options.log_errors = False
RealmFilter = apps.get_model("zerver", "RealmFilter")
found_errors = False
for linkifier in RealmFilter.objects.all():
try:
re2.compile(linkifier.pattern, options=options)
except re2.error:
if not found_errors:
print()
found_errors = True
print(
f"Deleting linkifier {linkifier.id} in realm {linkifier.realm.string_id} which is not compatible with new re2 engine:"
)
print(f" {linkifier.pattern} -> {linkifier.url_format_string}")
linkifier.delete()
class Migration(migrations.Migration):
dependencies = [
("zerver", "0325_alter_realmplayground_unique_together"),
]
operations = [
migrations.RunPython(
delete_re2_invalid,
reverse_code=migrations.RunPython.noop,
elidable=True,
)
]

View File

@ -0,0 +1,15 @@
# Generated by Django 3.2.7 on 2021-10-04 17:49
from typing import Any, List
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("zerver", "0358_split_create_stream_policy"),
("zerver", "0359_re2_linkifiers"),
]
operations: List[Any] = []

View File

@ -20,6 +20,7 @@ from typing import (
import django.contrib.auth import django.contrib.auth
import orjson import orjson
import re2
from bitfield import BitField from bitfield import BitField
from bitfield.types import BitHandler from bitfield.types import BitHandler
from django.conf import settings from django.conf import settings
@ -1084,21 +1085,21 @@ post_delete.connect(flush_realm_emoji, sender=RealmEmoji)
def filter_pattern_validator(value: str) -> Pattern[str]: def filter_pattern_validator(value: str) -> Pattern[str]:
regex = re.compile(r"^(?:(?:[\w\-#_= /:]*|[+]|[!])(\(\?P<\w+>.+\)))+$")
error_msg = _("Invalid linkifier pattern. Valid characters are {}.").format(
"[ a-zA-Z_#=/:+!-]",
)
if not regex.match(str(value)):
raise ValidationError(error_msg)
try: try:
pattern = re.compile(value) # Do not write errors to stderr (this still raises exceptions)
except re.error: options = re2.Options()
# Regex is invalid options.log_errors = False
raise ValidationError(error_msg)
return pattern regex = re2.compile(value, options=options)
except re2.error as e:
if len(e.args) >= 1:
if isinstance(e.args[0], str): # nocoverage
raise ValidationError(_("Bad regular expression: {}").format(e.args[0]))
if isinstance(e.args[0], bytes):
raise ValidationError(_("Bad regular expression: {}").format(e.args[0].decode()))
raise ValidationError(_("Unknown regular expression error")) # nocoverage
return regex
def filter_format_validator(value: str) -> None: def filter_format_validator(value: str) -> None:

View File

@ -1406,26 +1406,25 @@ class MarkdownTest(ZulipTestCase):
url_format_string = r"https://trac.example.com/ticket/%(id)s" url_format_string = r"https://trac.example.com/ticket/%(id)s"
linkifier_1 = RealmFilter( linkifier_1 = RealmFilter(
realm=realm, realm=realm,
pattern=r"(?P<id>ABC\-[0-9]+)(?![A-Z0-9-])", pattern=r"(?P<id>ABC\-[0-9]+)",
url_format_string=url_format_string, url_format_string=url_format_string,
) )
linkifier_1.save() linkifier_1.save()
self.assertEqual( self.assertEqual(
linkifier_1.__str__(), linkifier_1.__str__(),
r"<RealmFilter(zulip): (?P<id>ABC\-[0-9]+)(?![A-Z0-9-])" r"<RealmFilter(zulip): (?P<id>ABC\-[0-9]+) https://trac.example.com/ticket/%(id)s>",
" https://trac.example.com/ticket/%(id)s>",
) )
url_format_string = r"https://other-trac.example.com/ticket/%(id)s" url_format_string = r"https://other-trac.example.com/ticket/%(id)s"
linkifier_2 = RealmFilter( linkifier_2 = RealmFilter(
realm=realm, realm=realm,
pattern=r"(?P<id>[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])", pattern=r"(?P<id>[A-Z][A-Z0-9]*\-[0-9]+)",
url_format_string=url_format_string, url_format_string=url_format_string,
) )
linkifier_2.save() linkifier_2.save()
self.assertEqual( self.assertEqual(
linkifier_2.__str__(), linkifier_2.__str__(),
r"<RealmFilter(zulip): (?P<id>[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])" r"<RealmFilter(zulip): (?P<id>[A-Z][A-Z0-9]*\-[0-9]+)"
" https://other-trac.example.com/ticket/%(id)s>", " https://other-trac.example.com/ticket/%(id)s>",
) )

View File

@ -27,17 +27,13 @@ class RealmFilterTest(ZulipTestCase):
result = self.client_post("/json/realm/filters", info=data) result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(result, "This field cannot be blank.") self.assert_json_error(result, "This field cannot be blank.")
data["pattern"] = "$a" data["pattern"] = "(foo"
result = self.client_post("/json/realm/filters", info=data) result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error( self.assert_json_error(result, "Bad regular expression: missing ): (foo")
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
data["pattern"] = r"ZUL-(?P<id>\d++)" data["pattern"] = r"ZUL-(?P<id>\d????)"
result = self.client_post("/json/realm/filters", info=data) result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error( self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????")
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
data["pattern"] = r"ZUL-(?P<id>\d+)" data["pattern"] = r"ZUL-(?P<id>\d+)"
data["url_format_string"] = "$fgfg" data["url_format_string"] = "$fgfg"
@ -189,13 +185,11 @@ class RealmFilterTest(ZulipTestCase):
) )
data = { data = {
"pattern": r"ZUL-(?P<id>\d++)", "pattern": r"ZUL-(?P<id>\d????)",
"url_format_string": "https://realm.com/my_realm_filter/%(id)s", "url_format_string": "https://realm.com/my_realm_filter/%(id)s",
} }
result = self.client_patch(f"/json/realm/filters/{linkifier_id}", info=data) result = self.client_patch(f"/json/realm/filters/{linkifier_id}", info=data)
self.assert_json_error( self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????")
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
data["pattern"] = r"ZUL-(?P<id>\d+)" data["pattern"] = r"ZUL-(?P<id>\d+)"
data["url_format_string"] = "$fgfg" data["url_format_string"] = "$fgfg"