diff --git a/tools/lib/pretty_print.py b/tools/lib/pretty_print.py index 85bbeeabea..c25a9b7d27 100644 --- a/tools/lib/pretty_print.py +++ b/tools/lib/pretty_print.py @@ -45,11 +45,12 @@ def else_token(token: Token) -> bool: def pop_unused_tokens(tokens: List[Token], row: int) -> bool: + was_closed = False while tokens and tokens[-1].line <= row: token = tokens.pop() if close_token(token): - return True - return False + was_closed = True + return was_closed def indent_pref(row: int, tokens: List[Token], line: str) -> str: @@ -146,10 +147,19 @@ def pretty_print_html(html: str) -> str: next_offset = open_offsets.pop() return tag_continuation_offset + while tokens and tokens[-1].line < row: + token = tokens.pop() + offset = next_offset if tokens: token = tokens[-1] - if token.line == row and token.line_span > 1: + if token.kind == "indent": + token = tokens[-2] + if ( + token.line == row + and token.line_span > 1 + and token.kind not in ("template_var", "text") + ): if token.kind in ("django_comment", "handlebar_comment", "html_comment"): tag_continuation_offset = offset else: diff --git a/tools/lib/template_parser.py b/tools/lib/template_parser.py index 722959ccaf..e564188c64 100644 --- a/tools/lib/template_parser.py +++ b/tools/lib/template_parser.py @@ -68,13 +68,16 @@ def tokenize(text: str) -> List[Token]: return looking_at(" bool: - return looking_at("{{#") or looking_at("{{^") + return looking_at("{{#") or looking_at("{{^") or looking_at("{{~#") def looking_at_handlebars_else() -> bool: return looking_at("{{else") + def looking_at_template_var() -> bool: + return looking_at("{") + def looking_at_handlebars_end() -> bool: - return looking_at("{{/") + return looking_at("{{/") or looking_at("{{~/") def looking_at_django_start() -> bool: return looking_at("{% ") @@ -92,8 +95,11 @@ def tokenize(text: str) -> List[Token]: # This function detects tag like {%- if foo -%}...{% endif %} return looking_at("{%-") and not looking_at("{%- end") + def looking_at_whitespace() -> bool: + return looking_at("\n") or looking_at(" ") + state = TokenizerState() - tokens = [] + tokens: List[Token] = [] while state.i < len(text): try: @@ -142,13 +148,13 @@ def tokenize(text: str) -> List[Token]: kind = "handlebars_else" elif looking_at_handlebars_start(): s = get_handlebars_tag(text, state.i) - tag = s[3:-2].split()[0] + tag = s[3:-2].split()[0].strip("#") if tag.startswith("*"): tag = tag[1:] kind = "handlebars_start" elif looking_at_handlebars_end(): s = get_handlebars_tag(text, state.i) - tag = s[3:-2] + tag = s[3:-2].strip("/#~") kind = "handlebars_end" elif looking_at_django_else(): s = get_django_tag(text, state.i) @@ -174,15 +180,37 @@ def tokenize(text: str) -> List[Token]: s = get_django_tag(text, state.i, stripped=True) tag = s[3:-3].split()[0] kind = "jinja2_whitespace_stripped_type2_start" + elif looking_at_template_var(): + # order is important here + s = get_template_var(text, state.i) + tag = "var" + kind = "template_var" + elif looking_at("\n"): + s = "\n" + tag = "newline" + kind = "newline" + elif looking_at(" "): + s = get_spaces(text, state.i) + tag = "" + if not tokens or tokens[-1].kind == "newline": + kind = "indent" + else: + kind = "whitespace" + elif text[state.i] in "{<": + snippet = text[state.i :][:15] + raise AssertionError(f"tool cannot parse {snippet}") else: - advance(1) - continue + s = get_text(text, state.i) + if s == "": + continue + tag = "" + kind = "text" except TokenizationException as e: raise FormattedException( f'''{e.message} at line {state.line} col {state.col}:"{e.line_content}"''', ) - line_span = len(s.split("\n")) + line_span = len(s.strip("\n").split("\n")) token = Token( kind=kind, s=s, @@ -359,8 +387,13 @@ def validate(fn: Optional[str] = None, text: Optional[str] = None) -> None: "django_comment", "handlebar_comment", "handlebars_singleton", + "indent", + "template_var", "html_comment", "html_doctype", + "newline", + "text", + "whitespace", ): continue @@ -471,6 +504,22 @@ def get_handlebars_tag(text: str, i: int) -> str: return s +def get_spaces(text: str, i: int) -> str: + s = "" + while i < len(text) and text[i] in " ": + s += text[i] + i += 1 + return s + + +def get_text(text: str, i: int) -> str: + s = "" + while i < len(text) and text[i] not in "{<": + s += text[i] + i += 1 + return s.strip() + + def get_django_tag(text: str, i: int, stripped: bool = False) -> str: end = i + 2 if stripped: @@ -528,6 +577,20 @@ def get_handlebar_comment(text: str, i: int) -> str: raise TokenizationException("Unclosed comment", text[i:unclosed_end]) +def get_template_var(text: str, i: int) -> str: + end = i + 3 + unclosed_end = 0 + while end <= len(text): + if text[end - 1] == "}": + if end < len(text) and text[end] == "}": + end += 1 + return text[i:end] + if not unclosed_end and text[end] == "<": + unclosed_end = end + end += 1 + raise TokenizationException("Unclosed var", text[i:unclosed_end]) + + def get_django_comment(text: str, i: int) -> str: end = i + 4 unclosed_end = 0 diff --git a/tools/tests/test_html_branches.py b/tools/tests/test_html_branches.py index 7398a2e256..a40314b344 100644 --- a/tools/tests/test_html_branches.py +++ b/tools/tests/test_html_branches.py @@ -17,17 +17,16 @@ TEST_TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "t class TestHtmlBranches(unittest.TestCase): def test_get_tag_info(self) -> None: - html = """ -

foo

- """ + html = """

foo

""" - start_tag, end_tag = tools.lib.template_parser.tokenize(html) + start_tag, text, end_tag = tools.lib.template_parser.tokenize(html) start_tag_info = get_tag_info(start_tag) end_tag_info = get_tag_info(end_tag) self.assertEqual(start_tag_info.text(), "p.test1.test2#test") self.assertEqual(end_tag_info.text(), "p") + self.assertEqual(text.s, "foo") def test_html_tag_tree(self) -> None: html = """