From fb574431cb25589f8b09158077fa37c28c6f07a6 Mon Sep 17 00:00:00 2001
From: Steve Howell <showell@zulip.com>
Date: Thu, 2 Dec 2021 16:10:42 +0000
Subject: [PATCH] check-templates: Rewrite pretty_print (again).

It now does everything based on the tokens, rather
than walking the lines and trying to match up tokens
to lines.
---
 templates/zerver/accounts_send_confirm.html   |   2 +-
 templates/zerver/config_error.html            |  15 +-
 templates/zerver/development/email_log.html   |   4 +-
 .../emails/confirm_new_email.source.html      |   2 +-
 templates/zerver/features.html                |   2 +-
 templates/zerver/pricing_model.html           |   2 +-
 tools/lib/pretty_print.py                     | 292 +++++++-----------
 tools/lib/template_parser.py                  |   8 +
 tools/tests/test_pretty_print.py              |   5 +-
 9 files changed, 135 insertions(+), 197 deletions(-)
diff --git a/templates/zerver/accounts_send_confirm.html b/templates/zerver/accounts_send_confirm.html
index d07425644a..81fdfde2c1 100644
--- a/templates/zerver/accounts_send_confirm.html
+++ b/templates/zerver/accounts_send_confirm.html
@@ -19,7 +19,7 @@ page can be easily identified in it's respective JavaScript file -->
                 {% include 'zerver/dev_env_email_access_details.html' %}
 
                 <p>{% trans %}Still no email? We can <a href="#" id="resend_email_link">resend it</a>.{% endtrans %}
-                <i class="grey">({{ _("Just in case, take a look at your Spam folder.") }})</i></p>
+                    <i class="grey">({{ _("Just in case, take a look at your Spam folder.") }})</i></p>
                 {% if realm_creation %}
                 <form class="resend_confirm" action="/new/" method="post" style="position: absolute;">
                     {{ csrf_input }}
diff --git a/templates/zerver/config_error.html b/templates/zerver/config_error.html
index 3dac1bacce..5d563479fe 100644
--- a/templates/zerver/config_error.html
+++ b/templates/zerver/config_error.html
@@ -31,14 +31,13 @@
                         <p>
                             You may also want to test your email configuration,
                             as described in the
-                            <a href="https://zulip.readthedocs.io/en/latest/production/email.html">
-                            Production installation docs</a>.
+                            <a href="https://zulip.readthedocs.io/en/latest/production/email.html">Production installation docs</a>.
                         </p>
                         {% else %}
                         <p>
                             Please have a look at our
-                            <a target="_blank" rel="noopener noreferrer" href="https://zulip.readthedocs.io/en/latest/subsystems/email.html#development-and-testing">
-                            setup guide</a> for forwarding emails sent in development
+                            <a target="_blank" rel="noopener noreferrer" href="https://zulip.readthedocs.io/en/latest/subsystems/email.html#development-and-testing"> setup guide</a>
+                            for forwarding emails sent in development
                             environment to an email account.
                         </p>
                         {% endif %}
@@ -51,8 +50,8 @@
                     {% if has_markdown_file %}
                         {% if development_environment %}
                         {{ render_markdown_path('zerver/'+social_backend_name+'-error.md',
-                        {"root_domain_uri": root_domain_uri, "settings_path": secrets_path, "secrets_path": secrets_path,
-                        "client_id_key_name": "social_auth_" + social_backend_name + "_key"}) }}
+                          {"root_domain_uri": root_domain_uri, "settings_path": secrets_path, "secrets_path": secrets_path,
+                          "client_id_key_name": "social_auth_" + social_backend_name + "_key"}) }}
                         <p>
                             For more information, have a look at
                             the <a href="https://zulip.readthedocs.io/en/latest/development/authentication.html#{{ social_backend_name }}">authentication
@@ -60,8 +59,8 @@
                         </p>
                         {% else %}
                         {{ render_markdown_path('zerver/'+social_backend_name+'-error.md',
-                        {"root_domain_uri": root_domain_uri, "settings_path": settings_path, "secrets_path": secrets_path,
-                        "client_id_key_name": "SOCIAL_AUTH_" + social_backend_name.upper() + "_KEY"}) }}
+                          {"root_domain_uri": root_domain_uri, "settings_path": settings_path, "secrets_path": secrets_path,
+                          "client_id_key_name": "SOCIAL_AUTH_" + social_backend_name.upper() + "_KEY"}) }}
                         <p>
                             For more information, have a look at
                             our <a href="https://zulip.readthedocs.io/en/latest/production/authentication-methods.html">authentication
diff --git a/templates/zerver/development/email_log.html b/templates/zerver/development/email_log.html
index 949c09ff74..efab740851 100644
--- a/templates/zerver/development/email_log.html
+++ b/templates/zerver/development/email_log.html
@@ -49,8 +49,8 @@
                     <br />
                     <div class="alert alert-info">
                         You must set up SMTP as described
-                        <a target="_blank" rel="noopener noreferrer" href="https://zulip.readthedocs.io/en/latest/subsystems/email.html#development-and-testing">
-                        here</a> first before enabling this.
+                        <a target="_blank" rel="noopener noreferrer" href="https://zulip.readthedocs.io/en/latest/subsystems/email.html#development-and-testing"> here</a>
+                        first before enabling this.
                     </div>
                 </form>
             </div>
diff --git a/templates/zerver/emails/confirm_new_email.source.html b/templates/zerver/emails/confirm_new_email.source.html
index 4f055da92f..6b6bcd188d 100644
--- a/templates/zerver/emails/confirm_new_email.source.html
+++ b/templates/zerver/emails/confirm_new_email.source.html
@@ -8,7 +8,7 @@
 <p>{% trans %}Hi,{% endtrans %}</p>
 
 <p>{% trans realm_uri=macros.link_tag(realm_uri), old_email=macros.email_tag(old_email), new_email=macros.email_tag(new_email) %}We received a request to change the email address for the Zulip account on {{ realm_uri }} from {{ old_email }} to {{ new_email }}. To confirm this change, please click below:{% endtrans %}
-<a class="button" href="{{ activate_url }}">{{_('Confirm email change') }}</a></p>
+    <a class="button" href="{{ activate_url }}">{{_('Confirm email change') }}</a></p>
 
 <p>{% trans support_email=macros.email_tag(support_email) %}If you did not request this change, please contact us immediately at {{ support_email }}.{% endtrans %}</p>
 {% endblock %}
diff --git a/templates/zerver/features.html b/templates/zerver/features.html
index 1b366ad904..23cdf9de82 100644
--- a/templates/zerver/features.html
+++ b/templates/zerver/features.html
@@ -105,7 +105,7 @@
                 text editor.  Anything you can do with a mouse, you
                 can do even faster from the keyboard.
                 <a class="cta" href="/help/keyboard-shortcuts" target="_blank" rel="noopener noreferrer">
-                Learn more about keyboard shortcuts.</a>
+                    Learn more about keyboard shortcuts.</a>
             </p>
         </div>
         <img class="image" src="/static/images/landing-page/love-keyboard-shortcuts.svg" alt="" />
diff --git a/templates/zerver/pricing_model.html b/templates/zerver/pricing_model.html
index 548ff3e550..3a052fe8c1 100644
--- a/templates/zerver/pricing_model.html
+++ b/templates/zerver/pricing_model.html
@@ -124,7 +124,7 @@
                             <li><a href="https://zulip.readthedocs.io/en/stable/production/authentication-methods.html#synchronizing-data">LDAP/Active Directory sync</a></li>
                             <li>Advanced <a href="/help/roles-and-permissions">roles</a> and <a href="/help/stream-permissions">permissions</a></li>
                             <li>Easy <a href="https://zulip.readthedocs.io/en/stable/production/install.html">installation</a>
-                            and <a href="https://zulip.readthedocs.io/en/stable/production/upgrade-or-modify.html">maintenance</a></li>
+                                and <a href="https://zulip.readthedocs.io/en/stable/production/upgrade-or-modify.html">maintenance</a></li>
                         </ul>
                     </div>
                     <div class="bottom">
diff --git a/tools/lib/pretty_print.py b/tools/lib/pretty_print.py
index 436437e36d..cd743efc3c 100644
--- a/tools/lib/pretty_print.py
+++ b/tools/lib/pretty_print.py
@@ -1,208 +1,138 @@
 import subprocess
-from typing import List, Optional, Set
+from typing import List, Optional
 
 from zulint.printer import BOLDRED, CYAN, ENDC, GREEN
 
-from .template_parser import Token, is_django_block_tag
+from .template_parser import Token
 
 
-def requires_indent(line: str) -> bool:
-    line = line.lstrip()
-    return line.startswith("<")
+def shift_indents_to_the_next_tokens(tokens: List[Token]) -> None:
+    """
+    During the parsing/validation phase, it's useful to have separate
+    tokens for "indent" chunks, but during pretty printing, we like
+    to attach an `.indent` field to the substantive node, whether
+    it's an HTML tag or template directive or whatever.
+    """
+    tokens[0].indent = ""
+
+    for i, token in enumerate(tokens[:-1]):
+        next_token = tokens[i + 1]
+
+        if token.kind == "indent":
+            next_token.indent = token.s
+            token.new_s = ""
+
+        if token.kind == "newline" and next_token.kind != "indent":
+            next_token.indent = ""
 
 
-def open_token(token: Token) -> bool:
-    if token.kind in (
-        "handlebars_start",
-        "html_start",
-    ):
-        return True
-
-    if token.kind in (
-        "django_start",
-        "jinja2_whitespace_stripped_start",
-        "jinja2_whitespace_stripped_type2_start",
-    ):
-        return is_django_block_tag(token.tag)
-
-    return False
+def token_allows_children_to_skip_indents(token: Token) -> bool:
+    # For legacy reasons we don't always indent blocks.
+    return token.kind in ("django_start", "handlebars_start") or token.tag == "a"
 
 
-def close_token(token: Token) -> bool:
-    return token.kind in (
-        "django_end",
-        "handlebars_end",
-        "html_end",
-        "jinja2_whitespace_stripped_end",
-    )
+def adjust_block_indentation(tokens: List[Token], fn: str) -> None:
+    start_token: Optional[Token] = None
 
+    for token in tokens:
+        if token.kind in ("indent", "whitespace", "newline"):
+            continue
 
-def else_token(token: Token) -> bool:
-    return token.kind in (
-        "django_else",
-        "handlebars_else",
-    )
+        if token.tag in ("code", "pre"):
+            continue
 
+        # print(token.line, repr(start_token.indent) if start_token else "?", repr(token.indent), token.s, token.end_token and "start", token.start_token and "end")
 
-def pop_unused_tokens(tokens: List[Token], row: int) -> bool:
-    was_closed = False
-    while tokens and tokens[-1].line <= row:
-        token = tokens.pop()
-        if close_token(token):
-            was_closed = True
-    return was_closed
+        if token.tag == "else":
+            assert token.start_token
+            if token.indent is not None:
+                token.indent = token.start_token.indent
+            continue
 
+        if start_token and token.indent is not None:
+            if not start_token.indent_is_final and token.indent == start_token.orig_indent:
+                if token_allows_children_to_skip_indents(start_token):
+                    start_token.child_indent = start_token.indent
+            start_token.indent_is_final = True
 
-def indent_pref(row: int, tokens: List[Token], line: str) -> str:
-    opens = 0
-    closes = 0
-    is_else = False
-
-    while tokens and tokens[-1].line == row:
-        token = tokens.pop()
-        if open_token(token):
-            opens += 1
-        elif close_token(token):
-            closes += 1
-        elif else_token(token):
-            is_else = True
-
-    if is_else:
-        if opens and closes:
-            return "neutral"
-        return "else"
-
-    i = opens - closes
-    if i == 0:
-        return "neutral"
-    elif i == 1:
-        return "open"
-    elif i == -1:
-        return "close"
-    else:
-        print(i, opens, closes)
-        raise Exception(f"too many tokens on row {row}")
-
-
-def indent_level(s: str) -> int:
-    return len(s) - len(s.lstrip())
-
-
-def same_indent(s1: str, s2: str) -> bool:
-    return indent_level(s1) == indent_level(s2)
-
-
-def next_non_blank_line(lines: List[str], i: int) -> str:
-    next_line = ""
-    for j in range(i + 1, len(lines)):
-        next_line = lines[j]
-        if next_line.strip() != "":
-            break
-    return next_line
-
-
-def get_exempted_lines(tokens: List[Token]) -> Set[int]:
-    exempted = set()
-    for code_tag in ("code", "pre", "script"):
-        for token in tokens:
-            if token.kind == "html_start" and token.tag == code_tag:
-                start: Optional[int] = token.line
-
-            if token.kind == "html_end" and token.tag == code_tag:
-                # The pretty printer expects well-formed HTML, even
-                # if it's strangely formatted, so we expect start
-                # to be None.
-                assert start is not None
-
-                # We leave code blocks completely alone, including
-                # the start and end tags.
-                for i in range(start, token.line + 1):
-                    exempted.add(i)
-                    start = None
-    return exempted
-
-
-def pretty_print_html(html: str, tokens: List[Token]) -> str:
-    exempted_lines = get_exempted_lines(tokens)
-
-    tokens.reverse()
-    lines = html.split("\n")
-
-    open_offsets: List[str] = []
-    formatted_lines = []
-    next_offset: str = ""
-    tag_end_row: Optional[int] = None
-    tag_continuation_offset = ""
-
-    def line_offset(row: int, line: str, next_line: str) -> Optional[str]:
-        nonlocal next_offset
-        nonlocal tag_end_row
-        nonlocal tag_continuation_offset
-
-        if tag_end_row and row < tag_end_row:
-            was_closed = pop_unused_tokens(tokens, row)
-            if was_closed:
-                next_offset = open_offsets.pop()
-            return tag_continuation_offset
-
-        while tokens and tokens[-1].line < row:
-            token = tokens.pop()
-
-        offset = next_offset
-        if tokens:
-            token = tokens[-1]
-            if token.kind == "indent":
-                token = tokens[-2]
-            if (
-                token.line == row
-                and token.line_span > 1
-                and token.kind not in ("template_var", "text")
-            ):
-                if token.kind in ("django_comment", "handlebar_comment", "html_comment"):
-                    tag_continuation_offset = offset
+        # Detect start token by its having a end token
+        if token.end_token:
+            if token.indent is not None:
+                token.orig_indent = token.indent
+                if start_token:
+                    assert start_token.child_indent is not None
+                    token.indent = start_token.child_indent
                 else:
-                    tag_continuation_offset = offset + "  "
-                tag_end_row = row + token.line_span
+                    token.indent = ""
+                token.child_indent = token.indent + "    "
+            token.parent_token = start_token
+            start_token = token
+            continue
 
-        pref = indent_pref(row, tokens, line)
-        if pref == "open":
-            if same_indent(line, next_line) and not requires_indent(line):
-                next_offset = offset
-            else:
-                next_offset = offset + " " * 4
-            open_offsets.append(offset)
-        elif pref == "else":
-            offset = open_offsets[-1]
-            if same_indent(line, next_line):
-                next_offset = offset
-            else:
-                next_offset = offset + " " * 4
-        elif pref == "close":
-            offset = open_offsets.pop()
-            next_offset = offset
-        return offset
+        # Detect end token by its having a start token
+        if token.start_token:
+            if start_token != token.start_token:
+                raise AssertionError(
+                    f"""
+                    {token.kind} was unexpected in {token.s}
+                    in row {token.line} of {fn}
+                    """
+                )
 
-    def adjusted_line(row: int, line: str, next_line: str) -> str:
-        if line.strip() == "":
-            return ""
+            if token.indent is not None:
+                token.indent = start_token.indent
+            start_token = start_token.parent_token
+            continue
 
-        offset = line_offset(row, line, next_line)
+        if token.indent is None:
+            continue
 
-        if row in exempted_lines:
-            return line.rstrip()
+        if start_token is None:
+            token.indent = ""
+            continue
 
-        if offset is None:
-            return line.rstrip()
+        if start_token.child_indent is not None:
+            token.indent = start_token.child_indent
 
-        return offset + line.strip()
 
-    for i, line in enumerate(lines):
-        # We use 1-based indexing for both rows and columns.
-        next_line = next_non_blank_line(lines, i)
-        row = i + 1
-        formatted_lines.append(adjusted_line(row, line, next_line))
+def fix_indents_for_multi_line_tags(tokens: List[Token]) -> None:
+    for token in tokens:
+        if token.kind == "code":
+            continue
 
-    return "\n".join(formatted_lines)
+        if token.line_span == 1 or token.indent is None:
+            continue
+
+        if token.kind in ("django_comment", "handlebar_comment", "html_comment", "text"):
+            continue_indent = token.indent
+        else:
+            continue_indent = token.indent + "  "
+
+        frags = token.new_s.split("\n")
+
+        def fix(frag: str) -> str:
+            frag = frag.strip()
+            return continue_indent + frag if frag else ""
+
+        token.new_s = frags[0] + "\n" + "\n".join(fix(frag) for frag in frags[1:])
+
+
+def apply_token_indents(tokens: List[Token]) -> None:
+    for token in tokens:
+        if token.indent:
+            token.new_s = token.indent + token.new_s
+
+
+def pretty_print_html(tokens: List[Token], fn: str) -> str:
+    for token in tokens:
+        token.new_s = token.s
+
+    shift_indents_to_the_next_tokens(tokens)
+    adjust_block_indentation(tokens, fn)
+    fix_indents_for_multi_line_tags(tokens)
+    apply_token_indents(tokens)
+
+    return "".join(token.new_s for token in tokens)
 
 
 def numbered_lines(s: str) -> str:
@@ -212,7 +142,7 @@ def numbered_lines(s: str) -> str:
 def validate_indent_html(fn: str, tokens: List[Token], fix: bool) -> bool:
     with open(fn) as f:
         html = f.read()
-    phtml = pretty_print_html(html, tokens)
+    phtml = pretty_print_html(tokens, fn)
     if not html.split("\n") == phtml.split("\n"):
         if fix:
             print(GREEN + f"Automatically fixing indentation for {fn}" + ENDC)
diff --git a/tools/lib/template_parser.py b/tools/lib/template_parser.py
index 38b6aeaf09..3b13ce0d53 100644
--- a/tools/lib/template_parser.py
+++ b/tools/lib/template_parser.py
@@ -39,6 +39,14 @@ class Token:
         self.start_token: Optional[Token] = None
         self.end_token: Optional[Token] = None
 
+        # These get set during the pretty-print phase.
+        self.new_s = ""
+        self.indent: Optional[str] = None
+        self.orig_indent: Optional[str] = None
+        self.child_indent: Optional[str] = None
+        self.indent_is_final = False
+        self.parent_token: Optional[Token] = None
+
 
 def tokenize(text: str) -> List[Token]:
     in_code_block = False
diff --git a/tools/tests/test_pretty_print.py b/tools/tests/test_pretty_print.py
index 497c9c021a..b6cd95dc09 100644
--- a/tools/tests/test_pretty_print.py
+++ b/tools/tests/test_pretty_print.py
@@ -281,8 +281,9 @@ GOOD_HTML11 = """
 
 
 def pretty_print(html: str) -> str:
-    tokens = validate(fn=None, text=html)
-    return pretty_print_html(html, tokens)
+    fn = "<test str>"
+    tokens = validate(fn=fn, text=html)
+    return pretty_print_html(tokens, fn=fn)
 
 
 class TestPrettyPrinter(unittest.TestCase):