openapi: Use more carefully-constructed regexes for markdown extension.

This removes a false-positive ReDoS, since the input is always
checked-in code.  It also incidentally refactors to make the regexes
be more explicit about the values they expect, and removes unnecessary
capturing groups.

It removes an optional parenthesized status code for fixtures,
unnecessary since 981e4f8946, as well as
optional key-value language options, unnecessary since
a2be9a0e2d.

Thank you to @erik-krogh and @yoff for bringing this to our attention.
This commit is contained in:
Alex Vandiver 2021-09-29 22:10:12 +00:00 committed by Tim Abbott
parent 206168ed28
commit f1c61fbea1
3 changed files with 38 additions and 96 deletions

View File

@ -10,7 +10,7 @@ import json
import re
import shlex
from textwrap import dedent
from typing import Any, Dict, List, Mapping, Match, Optional, Pattern, Tuple
from typing import Any, Dict, List, Mapping, Match, Optional, Pattern
import markdown
from django.conf import settings
@ -31,15 +31,32 @@ from zerver.openapi.openapi import (
openapi_spec,
)
API_ENDPOINT_NAME = r"/[a-z_/-{}]+:[a-z]+"
API_LANGUAGE = r"\w+"
API_KEY_TYPE = r"fixture|example"
MACRO_REGEXP = re.compile(
r"\{generate_code_example(\(\s*(.+?)\s*\))*\|\s*(.+?)\s*\|\s*(.+?)\s*(\(\s*(.+)\s*\))?\}"
rf"""
{{
generate_code_example
(?: \( \s* ({API_LANGUAGE}) \s* \) )?
\|
\s* ({API_ENDPOINT_NAME}) \s*
\|
\s* ({API_KEY_TYPE}) \s*
}}
""",
re.VERBOSE,
)
PYTHON_EXAMPLE_REGEX = re.compile(r"\# \{code_example\|\s*(start|end)\s*\}")
JS_EXAMPLE_REGEX = re.compile(r"\/\/ \{code_example\|\s*(start|end)\s*\}")
MACRO_REGEXP_DESC = re.compile(rf"{{generate_api_description\(\s*({API_ENDPOINT_NAME})\s*\)}}")
MACRO_REGEXP_TITLE = re.compile(rf"{{generate_api_title\(\s*({API_ENDPOINT_NAME})\s*\)}}")
MACRO_REGEXP_RESPONSE_DESC = re.compile(
rf"{{generate_response_description\(\s*({API_ENDPOINT_NAME})\s*\)}}"
)
MACRO_REGEXP_PARAMETER_DESC = re.compile(
rf"{{generate_parameter_description\(\s*({API_ENDPOINT_NAME})\s*\)}}"
)
PYTHON_EXAMPLE_REGEX = re.compile(r"\# \{code_example\|\s*(.+?)\s*\}")
JS_EXAMPLE_REGEX = re.compile(r"\/\/ \{code_example\|\s*(.+?)\s*\}")
MACRO_REGEXP_DESC = re.compile(r"\{generate_api_description(\(\s*(.+?)\s*\))}")
MACRO_REGEXP_TITLE = re.compile(r"\{generate_api_title(\(\s*(.+?)\s*\))}")
MACRO_REGEXP_RESPONSE_DESC = re.compile(r"\{generate_response_description(\(\s*(.+?)\s*\))}")
MACRO_REGEXP_PARAMETER_DESC = re.compile(r"\{generate_parameter_description(\(\s*(.+?)\s*\))}")
PYTHON_CLIENT_CONFIG = """
#!/usr/bin/env python3
@ -87,25 +104,6 @@ DEFAULT_EXAMPLE = {
ADMIN_CONFIG_LANGUAGES = ["python", "javascript"]
def parse_language_and_options(input_str: Optional[str]) -> Tuple[str, Dict[str, Any]]:
if not input_str:
return ("", {})
language_and_options = re.match(
r"(?P<language>\w+)(,\s*(?P<options>[\"\'\w\d\[\],= ]+))?", input_str
)
assert language_and_options is not None
kwargs_pattern = re.compile(r"(?P<key>\w+)\s*=\s*(?P<value>[\'\"\w\d]+|\[[\'\",\w\d ]+\])")
language = language_and_options.group("language")
assert language is not None
if language_and_options.group("options"):
_options = kwargs_pattern.finditer(language_and_options.group("options"))
options = {}
for m in _options:
options[m.group("key")] = json.loads(m.group("value").replace("'", '"'))
return (language, options)
return (language, {})
def extract_code_example(
source: List[str], snippet: List[Any], example_regex: Pattern[str]
) -> List[Any]:
@ -363,6 +361,7 @@ def generate_curl_example(
def render_curl_example(
function: str,
api_url: str,
admin_config: bool = False,
) -> List[str]:
"""A simple wrapper around generate_curl_example."""
parts = function.split(":")
@ -475,7 +474,7 @@ class BasePreprocessor(Preprocessor):
return lines
def generate_text(self, match: Match[str]) -> List[str]:
function = match.group(2)
function = match.group(1)
text = self.render(function)
return text
@ -488,21 +487,22 @@ class APICodeExamplesPreprocessor(BasePreprocessor):
super().__init__(MACRO_REGEXP, md, config)
def generate_text(self, match: Match[str]) -> List[str]:
language, options = parse_language_and_options(match.group(2))
function = match.group(3)
key = match.group(4)
language = match.group(1) or ""
function = match.group(2)
key = match.group(3)
if self.api_url is None:
raise AssertionError("Cannot render curl API examples without API URL set.")
options["api_url"] = self.api_url
if key == "fixture":
text = self.render(function)
elif key == "example":
path, method = function.rsplit(":", 1)
if language in ADMIN_CONFIG_LANGUAGES and check_requires_administrator(path, method):
text = SUPPORTED_LANGUAGES[language]["render"](function, admin_config=True)
else:
text = SUPPORTED_LANGUAGES[language]["render"](function, **options)
admin_config = language in ADMIN_CONFIG_LANGUAGES and check_requires_administrator(
path, method
)
text = SUPPORTED_LANGUAGES[language]["render"](
function, api_url=self.api_url, admin_config=admin_config
)
return text
def render(self, function: str) -> List[str]:

View File

@ -54,7 +54,7 @@ def test_generated_curl_examples_for_success(client: Client) -> None:
f = open(file_name)
for line in f:
# A typical example from the Markdown source looks like this:
# {generate_code_example(curl, ...}
# {generate_code_example(curl)|...|...}
if line.startswith("{generate_code_example(curl"):
curl_commands_to_test.append(line)
else:

View File

@ -14,11 +14,7 @@ from zerver.lib.request import _REQ, arguments_map
from zerver.lib.rest import rest_dispatch
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.utils import assert_is_not_none
from zerver.openapi.markdown_extension import (
generate_curl_example,
parse_language_and_options,
render_curl_example,
)
from zerver.openapi.markdown_extension import generate_curl_example, render_curl_example
from zerver.openapi.openapi import (
OPENAPI_SPEC_PATH,
OpenAPISpec,
@ -623,60 +619,6 @@ so maybe we shouldn't include it in pending_endpoints.
self.check_for_non_existant_openapi_endpoints()
class ModifyExampleGenerationTestCase(ZulipTestCase):
def test_no_mod_argument(self) -> None:
res = parse_language_and_options("python")
self.assertEqual(res, ("python", {}))
def test_single_simple_mod_argument(self) -> None:
res = parse_language_and_options("curl, mod=1")
self.assertEqual(res, ("curl", {"mod": 1}))
res = parse_language_and_options("curl, mod='somevalue'")
self.assertEqual(res, ("curl", {"mod": "somevalue"}))
res = parse_language_and_options('curl, mod="somevalue"')
self.assertEqual(res, ("curl", {"mod": "somevalue"}))
def test_multiple_simple_mod_argument(self) -> None:
res = parse_language_and_options("curl, mod1=1, mod2='a'")
self.assertEqual(res, ("curl", {"mod1": 1, "mod2": "a"}))
res = parse_language_and_options("curl, mod1=\"asdf\", mod2='thing', mod3=3")
self.assertEqual(res, ("curl", {"mod1": "asdf", "mod2": "thing", "mod3": 3}))
def test_single_list_mod_argument(self) -> None:
res = parse_language_and_options("curl, exclude=['param1', 'param2']")
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"]}))
res = parse_language_and_options('curl, exclude=["param1", "param2"]')
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"]}))
res = parse_language_and_options("curl, exclude=['param1', \"param2\"]")
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"]}))
def test_multiple_list_mod_argument(self) -> None:
res = parse_language_and_options("curl, exclude=['param1', \"param2\"], special=['param3']")
self.assertEqual(res, ("curl", {"exclude": ["param1", "param2"], "special": ["param3"]}))
def test_multiple_mixed_mod_arguments(self) -> None:
res = parse_language_and_options(
'curl, exclude=["asdf", \'sdfg\'], other_key=\'asdf\', more_things="asdf", another_list=[1, "2"]'
)
self.assertEqual(
res,
(
"curl",
{
"exclude": ["asdf", "sdfg"],
"other_key": "asdf",
"more_things": "asdf",
"another_list": [1, "2"],
},
),
)
class TestCurlExampleGeneration(ZulipTestCase):
spec_mock_without_examples = {