From fb574431cb25589f8b09158077fa37c28c6f07a6 Mon Sep 17 00:00:00 2001
From: Steve Howell
Date: Thu, 2 Dec 2021 16:10:42 +0000
Subject: [PATCH] check-templates: Rewrite pretty_print (again).
It now does everything based on the tokens, rather
than walking the lines and trying to match up tokens
to lines.
---
templates/zerver/accounts_send_confirm.html | 2 +-
templates/zerver/config_error.html | 15 +-
templates/zerver/development/email_log.html | 4 +-
.../emails/confirm_new_email.source.html | 2 +-
templates/zerver/features.html | 2 +-
templates/zerver/pricing_model.html | 2 +-
tools/lib/pretty_print.py | 292 +++++++-----------
tools/lib/template_parser.py | 8 +
tools/tests/test_pretty_print.py | 5 +-
9 files changed, 135 insertions(+), 197 deletions(-)
diff --git a/templates/zerver/accounts_send_confirm.html b/templates/zerver/accounts_send_confirm.html
index d07425644a..81fdfde2c1 100644
--- a/templates/zerver/accounts_send_confirm.html
+++ b/templates/zerver/accounts_send_confirm.html
@@ -19,7 +19,7 @@ page can be easily identified in it's respective JavaScript file -->
{% include 'zerver/dev_env_email_access_details.html' %}
{% trans %}Still no email? We can resend it.{% endtrans %}
- ({{ _("Just in case, take a look at your Spam folder.") }})
+ ({{ _("Just in case, take a look at your Spam folder.") }})
{% if realm_creation %}
diff --git a/templates/zerver/emails/confirm_new_email.source.html b/templates/zerver/emails/confirm_new_email.source.html
index 4f055da92f..6b6bcd188d 100644
--- a/templates/zerver/emails/confirm_new_email.source.html
+++ b/templates/zerver/emails/confirm_new_email.source.html
@@ -8,7 +8,7 @@
{% trans %}Hi,{% endtrans %}
{% trans realm_uri=macros.link_tag(realm_uri), old_email=macros.email_tag(old_email), new_email=macros.email_tag(new_email) %}We received a request to change the email address for the Zulip account on {{ realm_uri }} from {{ old_email }} to {{ new_email }}. To confirm this change, please click below:{% endtrans %}
-{{_('Confirm email change') }}
+ {{_('Confirm email change') }}
{% trans support_email=macros.email_tag(support_email) %}If you did not request this change, please contact us immediately at {{ support_email }}.{% endtrans %}
{% endblock %}
diff --git a/templates/zerver/features.html b/templates/zerver/features.html
index 1b366ad904..23cdf9de82 100644
--- a/templates/zerver/features.html
+++ b/templates/zerver/features.html
@@ -105,7 +105,7 @@
text editor. Anything you can do with a mouse, you
can do even faster from the keyboard.
- Learn more about keyboard shortcuts.
+ Learn more about keyboard shortcuts.
diff --git a/templates/zerver/pricing_model.html b/templates/zerver/pricing_model.html
index 548ff3e550..3a052fe8c1 100644
--- a/templates/zerver/pricing_model.html
+++ b/templates/zerver/pricing_model.html
@@ -124,7 +124,7 @@
LDAP/Active Directory sync
Advanced roles and permissions
Easy installation
- and maintenance
+ and maintenance
diff --git a/tools/lib/pretty_print.py b/tools/lib/pretty_print.py
index 436437e36d..cd743efc3c 100644
--- a/tools/lib/pretty_print.py
+++ b/tools/lib/pretty_print.py
@@ -1,208 +1,138 @@
import subprocess
-from typing import List, Optional, Set
+from typing import List, Optional
from zulint.printer import BOLDRED, CYAN, ENDC, GREEN
-from .template_parser import Token, is_django_block_tag
+from .template_parser import Token
-def requires_indent(line: str) -> bool:
- line = line.lstrip()
- return line.startswith("<")
+def shift_indents_to_the_next_tokens(tokens: List[Token]) -> None:
+ """
+ During the parsing/validation phase, it's useful to have separate
+ tokens for "indent" chunks, but during pretty printing, we like
+ to attach an `.indent` field to the substantive node, whether
+ it's an HTML tag or template directive or whatever.
+ """
+ tokens[0].indent = ""
+
+ for i, token in enumerate(tokens[:-1]):
+ next_token = tokens[i + 1]
+
+ if token.kind == "indent":
+ next_token.indent = token.s
+ token.new_s = ""
+
+ if token.kind == "newline" and next_token.kind != "indent":
+ next_token.indent = ""
-def open_token(token: Token) -> bool:
- if token.kind in (
- "handlebars_start",
- "html_start",
- ):
- return True
-
- if token.kind in (
- "django_start",
- "jinja2_whitespace_stripped_start",
- "jinja2_whitespace_stripped_type2_start",
- ):
- return is_django_block_tag(token.tag)
-
- return False
+def token_allows_children_to_skip_indents(token: Token) -> bool:
+ # For legacy reasons we don't always indent blocks.
+ return token.kind in ("django_start", "handlebars_start") or token.tag == "a"
-def close_token(token: Token) -> bool:
- return token.kind in (
- "django_end",
- "handlebars_end",
- "html_end",
- "jinja2_whitespace_stripped_end",
- )
+def adjust_block_indentation(tokens: List[Token], fn: str) -> None:
+ start_token: Optional[Token] = None
+ for token in tokens:
+ if token.kind in ("indent", "whitespace", "newline"):
+ continue
-def else_token(token: Token) -> bool:
- return token.kind in (
- "django_else",
- "handlebars_else",
- )
+ if token.tag in ("code", "pre"):
+ continue
+ # print(token.line, repr(start_token.indent) if start_token else "?", repr(token.indent), token.s, token.end_token and "start", token.start_token and "end")
-def pop_unused_tokens(tokens: List[Token], row: int) -> bool:
- was_closed = False
- while tokens and tokens[-1].line <= row:
- token = tokens.pop()
- if close_token(token):
- was_closed = True
- return was_closed
+ if token.tag == "else":
+ assert token.start_token
+ if token.indent is not None:
+ token.indent = token.start_token.indent
+ continue
+ if start_token and token.indent is not None:
+ if not start_token.indent_is_final and token.indent == start_token.orig_indent:
+ if token_allows_children_to_skip_indents(start_token):
+ start_token.child_indent = start_token.indent
+ start_token.indent_is_final = True
-def indent_pref(row: int, tokens: List[Token], line: str) -> str:
- opens = 0
- closes = 0
- is_else = False
-
- while tokens and tokens[-1].line == row:
- token = tokens.pop()
- if open_token(token):
- opens += 1
- elif close_token(token):
- closes += 1
- elif else_token(token):
- is_else = True
-
- if is_else:
- if opens and closes:
- return "neutral"
- return "else"
-
- i = opens - closes
- if i == 0:
- return "neutral"
- elif i == 1:
- return "open"
- elif i == -1:
- return "close"
- else:
- print(i, opens, closes)
- raise Exception(f"too many tokens on row {row}")
-
-
-def indent_level(s: str) -> int:
- return len(s) - len(s.lstrip())
-
-
-def same_indent(s1: str, s2: str) -> bool:
- return indent_level(s1) == indent_level(s2)
-
-
-def next_non_blank_line(lines: List[str], i: int) -> str:
- next_line = ""
- for j in range(i + 1, len(lines)):
- next_line = lines[j]
- if next_line.strip() != "":
- break
- return next_line
-
-
-def get_exempted_lines(tokens: List[Token]) -> Set[int]:
- exempted = set()
- for code_tag in ("code", "pre", "script"):
- for token in tokens:
- if token.kind == "html_start" and token.tag == code_tag:
- start: Optional[int] = token.line
-
- if token.kind == "html_end" and token.tag == code_tag:
- # The pretty printer expects well-formed HTML, even
- # if it's strangely formatted, so we expect start
- # to be None.
- assert start is not None
-
- # We leave code blocks completely alone, including
- # the start and end tags.
- for i in range(start, token.line + 1):
- exempted.add(i)
- start = None
- return exempted
-
-
-def pretty_print_html(html: str, tokens: List[Token]) -> str:
- exempted_lines = get_exempted_lines(tokens)
-
- tokens.reverse()
- lines = html.split("\n")
-
- open_offsets: List[str] = []
- formatted_lines = []
- next_offset: str = ""
- tag_end_row: Optional[int] = None
- tag_continuation_offset = ""
-
- def line_offset(row: int, line: str, next_line: str) -> Optional[str]:
- nonlocal next_offset
- nonlocal tag_end_row
- nonlocal tag_continuation_offset
-
- if tag_end_row and row < tag_end_row:
- was_closed = pop_unused_tokens(tokens, row)
- if was_closed:
- next_offset = open_offsets.pop()
- return tag_continuation_offset
-
- while tokens and tokens[-1].line < row:
- token = tokens.pop()
-
- offset = next_offset
- if tokens:
- token = tokens[-1]
- if token.kind == "indent":
- token = tokens[-2]
- if (
- token.line == row
- and token.line_span > 1
- and token.kind not in ("template_var", "text")
- ):
- if token.kind in ("django_comment", "handlebar_comment", "html_comment"):
- tag_continuation_offset = offset
+ # Detect start token by its having a end token
+ if token.end_token:
+ if token.indent is not None:
+ token.orig_indent = token.indent
+ if start_token:
+ assert start_token.child_indent is not None
+ token.indent = start_token.child_indent
else:
- tag_continuation_offset = offset + " "
- tag_end_row = row + token.line_span
+ token.indent = ""
+ token.child_indent = token.indent + " "
+ token.parent_token = start_token
+ start_token = token
+ continue
- pref = indent_pref(row, tokens, line)
- if pref == "open":
- if same_indent(line, next_line) and not requires_indent(line):
- next_offset = offset
- else:
- next_offset = offset + " " * 4
- open_offsets.append(offset)
- elif pref == "else":
- offset = open_offsets[-1]
- if same_indent(line, next_line):
- next_offset = offset
- else:
- next_offset = offset + " " * 4
- elif pref == "close":
- offset = open_offsets.pop()
- next_offset = offset
- return offset
+ # Detect end token by its having a start token
+ if token.start_token:
+ if start_token != token.start_token:
+ raise AssertionError(
+ f"""
+ {token.kind} was unexpected in {token.s}
+ in row {token.line} of {fn}
+ """
+ )
- def adjusted_line(row: int, line: str, next_line: str) -> str:
- if line.strip() == "":
- return ""
+ if token.indent is not None:
+ token.indent = start_token.indent
+ start_token = start_token.parent_token
+ continue
- offset = line_offset(row, line, next_line)
+ if token.indent is None:
+ continue
- if row in exempted_lines:
- return line.rstrip()
+ if start_token is None:
+ token.indent = ""
+ continue
- if offset is None:
- return line.rstrip()
+ if start_token.child_indent is not None:
+ token.indent = start_token.child_indent
- return offset + line.strip()
- for i, line in enumerate(lines):
- # We use 1-based indexing for both rows and columns.
- next_line = next_non_blank_line(lines, i)
- row = i + 1
- formatted_lines.append(adjusted_line(row, line, next_line))
+def fix_indents_for_multi_line_tags(tokens: List[Token]) -> None:
+ for token in tokens:
+ if token.kind == "code":
+ continue
- return "\n".join(formatted_lines)
+ if token.line_span == 1 or token.indent is None:
+ continue
+
+ if token.kind in ("django_comment", "handlebar_comment", "html_comment", "text"):
+ continue_indent = token.indent
+ else:
+ continue_indent = token.indent + " "
+
+ frags = token.new_s.split("\n")
+
+ def fix(frag: str) -> str:
+ frag = frag.strip()
+ return continue_indent + frag if frag else ""
+
+ token.new_s = frags[0] + "\n" + "\n".join(fix(frag) for frag in frags[1:])
+
+
+def apply_token_indents(tokens: List[Token]) -> None:
+ for token in tokens:
+ if token.indent:
+ token.new_s = token.indent + token.new_s
+
+
+def pretty_print_html(tokens: List[Token], fn: str) -> str:
+ for token in tokens:
+ token.new_s = token.s
+
+ shift_indents_to_the_next_tokens(tokens)
+ adjust_block_indentation(tokens, fn)
+ fix_indents_for_multi_line_tags(tokens)
+ apply_token_indents(tokens)
+
+ return "".join(token.new_s for token in tokens)
def numbered_lines(s: str) -> str:
@@ -212,7 +142,7 @@ def numbered_lines(s: str) -> str:
def validate_indent_html(fn: str, tokens: List[Token], fix: bool) -> bool:
with open(fn) as f:
html = f.read()
- phtml = pretty_print_html(html, tokens)
+ phtml = pretty_print_html(tokens, fn)
if not html.split("\n") == phtml.split("\n"):
if fix:
print(GREEN + f"Automatically fixing indentation for {fn}" + ENDC)
diff --git a/tools/lib/template_parser.py b/tools/lib/template_parser.py
index 38b6aeaf09..3b13ce0d53 100644
--- a/tools/lib/template_parser.py
+++ b/tools/lib/template_parser.py
@@ -39,6 +39,14 @@ class Token:
self.start_token: Optional[Token] = None
self.end_token: Optional[Token] = None
+ # These get set during the pretty-print phase.
+ self.new_s = ""
+ self.indent: Optional[str] = None
+ self.orig_indent: Optional[str] = None
+ self.child_indent: Optional[str] = None
+ self.indent_is_final = False
+ self.parent_token: Optional[Token] = None
+
def tokenize(text: str) -> List[Token]:
in_code_block = False
diff --git a/tools/tests/test_pretty_print.py b/tools/tests/test_pretty_print.py
index 497c9c021a..b6cd95dc09 100644
--- a/tools/tests/test_pretty_print.py
+++ b/tools/tests/test_pretty_print.py
@@ -281,8 +281,9 @@ GOOD_HTML11 = """
def pretty_print(html: str) -> str:
- tokens = validate(fn=None, text=html)
- return pretty_print_html(html, tokens)
+ fn = ""
+ tokens = validate(fn=fn, text=html)
+ return pretty_print_html(tokens, fn=fn)
class TestPrettyPrinter(unittest.TestCase):