check-templates: Avoid duplicate tokenizing step.

Now we only tokenize the file once, and we pass
**validated** tokens to the pretty printer.

There are a few reasons for this:

    * It obviously saves a lot of extra computation
      just in terms of tokenization.

    * It allows our validator to add fields
      to the Token objects that help the pretty
      printer.

I also removed/tweaked a lot of legacy tests for
pretty_print.py that were exercising bizarrely
formatted HTML that we now simply ban during the
validation phase.
This commit is contained in:
Steve Howell 2021-12-02 12:19:19 +00:00 committed by Tim Abbott
parent 0decfa8da0
commit c0d72ba236
4 changed files with 68 additions and 236 deletions

View File

@ -115,10 +115,8 @@ def check_html_templates(templates: Iterable[str], all_dups: bool, fix: bool) ->
sys.exit(1)
for fn in templates:
validate(fn)
for fn in templates:
if not validate_indent_html(fn, fix):
tokens = validate(fn)
if not validate_indent_html(fn, tokens, fix):
sys.exit(1)
@ -127,10 +125,8 @@ def check_handlebar_templates(templates: Iterable[str], fix: bool) -> None:
templates = [fn for fn in templates if fn.endswith(".hbs")]
for fn in templates:
validate(fn)
for fn in templates:
if not validate_indent_html(fn, fix):
tokens = validate(fn)
if not validate_indent_html(fn, tokens, fix):
sys.exit(1)

View File

@ -3,7 +3,7 @@ from typing import List, Optional, Set
from zulint.printer import ENDC, GREEN
from .template_parser import Token, is_django_block_tag, tokenize
from .template_parser import Token, is_django_block_tag
def requires_indent(line: str) -> bool:
@ -122,9 +122,7 @@ def get_exempted_lines(tokens: List[Token]) -> Set[int]:
return exempted
def pretty_print_html(html: str) -> str:
tokens = tokenize(html)
def pretty_print_html(html: str, tokens: List[Token]) -> str:
exempted_lines = get_exempted_lines(tokens)
tokens.reverse()
@ -207,10 +205,10 @@ def pretty_print_html(html: str) -> str:
return "\n".join(formatted_lines)
def validate_indent_html(fn: str, fix: bool) -> bool:
def validate_indent_html(fn: str, tokens: List[Token], fix: bool) -> bool:
with open(fn) as f:
html = f.read()
phtml = pretty_print_html(html)
phtml = pretty_print_html(html, tokens)
if not html.split("\n") == phtml.split("\n"):
if fix:
print(GREEN + f"Automatically fixing indentation for {fn}" + ENDC)

View File

@ -316,7 +316,7 @@ def tag_flavor(token: Token) -> Optional[str]:
raise AssertionError(f"tools programmer neglected to handle {kind} tokens")
def validate(fn: Optional[str] = None, text: Optional[str] = None) -> None:
def validate(fn: Optional[str] = None, text: Optional[str] = None) -> List[Token]:
assert fn or text
if fn is None:
@ -445,6 +445,8 @@ def validate(fn: Optional[str] = None, text: Optional[str] = None) -> None:
ensure_matching_indentation(fn, tokens, lines)
return tokens
def ensure_matching_indentation(fn: str, tokens: List[Token], lines: List[str]) -> None:
for token in tokens:

View File

@ -1,6 +1,7 @@
import unittest
from tools.lib.pretty_print import pretty_print_html
from tools.lib.template_parser import validate
# Note that GOOD_HTML isn't necessarily beautiful HTML. Apart
# from adjusting indentation, we mostly leave things alone to
@ -10,8 +11,6 @@ BAD_HTML = """
<!-- test -->
<!DOCTYPE html>
<html>
<!-- test -->
<head>
@ -20,19 +19,20 @@ BAD_HTML = """
<link rel="stylesheet" href="style.css" />
</head>
<body>
<div><p>Hello<br />world!</p></div>
<p>Goodbye<!-- test -->world!</p>
<table>
<tr>
<td>5</td>
</tr>
</table>
<div><p>Hello<br />world!</p></div>
<p>Goodbye<!-- test -->world!</p>
<table>
<tr>
<td>5</td>
</tr>
</table>
<pre>
print 'hello world'
</pre>
<div class = "foo"
<div class = "foo"
id = "bar"
role = "whatever">{{ bla }}</div>
role = "whatever">{{ bla }}
</div>
</body>
</html>
<!-- test -->
@ -42,8 +42,6 @@ GOOD_HTML = """
<!-- test -->
<!DOCTYPE html>
<html>
<!-- test -->
<head>
@ -64,7 +62,8 @@ GOOD_HTML = """
</pre>
<div class = "foo"
id = "bar"
role = "whatever">{{ bla }}</div>
role = "whatever">{{ bla }}
</div>
</body>
</html>
<!-- test -->
@ -72,9 +71,9 @@ GOOD_HTML = """
BAD_HTML1 = """
<html>
<body>
foobarfoobarfoo<b>bar</b>
</body>
<body>
foobarfoobarfoo<b>bar</b>
</body>
</html>
"""
@ -88,11 +87,11 @@ GOOD_HTML1 = """
BAD_HTML2 = """
<html>
<body>
<body>
{{# foobar area}}
foobarfoobarfoo<b>bar</b>
{{/ foobar area}}
</body>
{{/ foobar}}
</body>
</html>
"""
@ -101,7 +100,7 @@ GOOD_HTML2 = """
<body>
{{# foobar area}}
foobarfoobarfoo<b>bar</b>
{{/ foobar area}}
{{/ foobar}}
</body>
</html>
"""
@ -110,9 +109,9 @@ GOOD_HTML2 = """
BAD_HTML4 = """
<div>
foo
<p>hello</p>
bar
foo
<p>hello</p>
bar
</div>
"""
@ -126,13 +125,13 @@ GOOD_HTML4 = """
BAD_HTML5 = """
<div>
foo
{{#if foobar}}
hello
{{else}}
bye
{{/if}}
bar
foo
{{#if foobar}}
hello
{{else}}
bye
{{/if}}
bar
</div>
"""
@ -150,7 +149,7 @@ GOOD_HTML5 = """
BAD_HTML6 = """
<div>
<p> <strong> <span class = "whatever">foobar </span> </strong></p>
<p> <strong> <span class = "whatever">foobar </span> </strong></p>
</div>
"""
@ -178,14 +177,14 @@ GOOD_HTML7 = """
BAD_HTML8 = """
{{#each test}}
{{#with this}}
{{#if foobar}}
<div class="anything">{{{test}}}</div>
{{/if}}
{{#if foobar2}}
{{> teststuff}}
{{/if}}
{{/with}}
{{#with this}}
{{#if foobar}}
<div class="anything">{{{test}}}</div>
{{/if}}
{{#if foobar2}}
{{> teststuff}}
{{/if}}
{{/with}}
{{/each}}
"""
@ -262,11 +261,11 @@ GOOD_HTML10 = """
BAD_HTML11 = """
<div class="test1">
<div class="test2">
<div class="test2">
foobar
<div class="test2">
<div class="test2">
</div>
</div>
</div>
</div>
"""
@ -280,167 +279,10 @@ GOOD_HTML11 = """
</div>
"""
BAD_HTML12 = """
<div class="test1">
<pre>
<div class="test2">
foobar
<div class="test2">
</div>
</div>
</pre>
</div>
"""
GOOD_HTML12 = """
<div class="test1">
<pre>
<div class="test2">
foobar
<div class="test2">
</div>
</div>
</pre>
</div>
"""
BAD_HTML13 = """
<div>
{{#if this.code}}
<div>&nbsp:{{this.name}}:</div>
{{else}}
{{#if this.is_realm_emoji}}
<img src="{{this.url}}" class="emoji" />
{{else}}
<br />
{{/if}}
{{/if}}
<div>{{this.count}}</div>
</div>
"""
GOOD_HTML13 = """
<div>
{{#if this.code}}
<div>&nbsp:{{this.name}}:</div>
{{else}}
{{#if this.is_realm_emoji}}
<img src="{{this.url}}" class="emoji" />
{{else}}
<br />
{{/if}}
{{/if}}
<div>{{this.count}}</div>
</div>
"""
BAD_HTML14 = """
<div>
{{#if this.code}}
<pre>Here goes some cool code.</pre>
{{else}}
<div>
content of first div
<div>
content of second div.
</div>
</div>
{{/if}}
</div>
"""
GOOD_HTML14 = """
<div>
{{#if this.code}}
<pre>Here goes some cool code.</pre>
{{else}}
<div>
content of first div
<div>
content of second div.
</div>
</div>
{{/if}}
</div>
"""
BAD_HTML15 = """
<div>
<img alt=":thumbs_up:"
class="emoji"
src="/path/to/png"
title=":thumbs_up:"/>
<img alt=":thumbs_up:"
class="emoji"
src="/path/to/png"
title=":thumbs_up:"/>
<img alt=":thumbs_up:"
title=":thumbs_up:"/>
</div>
"""
GOOD_HTML15 = """
<div>
<img alt=":thumbs_up:"
class="emoji"
src="/path/to/png"
title=":thumbs_up:"/>
<img alt=":thumbs_up:"
class="emoji"
src="/path/to/png"
title=":thumbs_up:"/>
<img alt=":thumbs_up:"
title=":thumbs_up:"/>
</div>
"""
BAD_HTML16 = """
<div>
{{> settings_checkbox
setting_name="realm_name_in_notifications"
is_checked=user_settings.realm_name_in_notifications
label=settings_label.realm_name_in_notifications}}
</div>
"""
GOOD_HTML16 = """
<div>
{{> settings_checkbox
setting_name="realm_name_in_notifications"
is_checked=user_settings.realm_name_in_notifications
label=settings_label.realm_name_in_notifications}}
</div>
"""
BAD_HTML17 = """
<div>
<button type="button"
class="btn btn-primary btn-small">{{t "Yes" }}</button>
<button type="button"
id="confirm_btn"
class="btn btn-primary btn-small">{{t "Yes" }}</button>
<div class = "foo"
id = "bar"
role = "whatever">
{{ bla }}
</div>
</div>
"""
GOOD_HTML17 = """
<div>
<button type="button"
class="btn btn-primary btn-small">{{t "Yes" }}</button>
<button type="button"
id="confirm_btn"
class="btn btn-primary btn-small">{{t "Yes" }}</button>
<div class = "foo"
id = "bar"
role = "whatever">
{{ bla }}
</div>
</div>
"""
def pretty_print(html: str) -> str:
tokens = validate(fn=None, text=html)
return pretty_print_html(html, tokens)
class TestPrettyPrinter(unittest.TestCase):
@ -448,21 +290,15 @@ class TestPrettyPrinter(unittest.TestCase):
self.assertEqual(a.split("\n"), b.split("\n"))
def test_pretty_print(self) -> None:
self.compare(pretty_print_html(GOOD_HTML), GOOD_HTML)
self.compare(pretty_print_html(BAD_HTML), GOOD_HTML)
self.compare(pretty_print_html(BAD_HTML1), GOOD_HTML1)
self.compare(pretty_print_html(BAD_HTML2), GOOD_HTML2)
self.compare(pretty_print_html(BAD_HTML4), GOOD_HTML4)
self.compare(pretty_print_html(BAD_HTML5), GOOD_HTML5)
self.compare(pretty_print_html(BAD_HTML6), GOOD_HTML6)
self.compare(pretty_print_html(BAD_HTML7), GOOD_HTML7)
self.compare(pretty_print_html(BAD_HTML8), GOOD_HTML8)
self.compare(pretty_print_html(BAD_HTML9), GOOD_HTML9)
self.compare(pretty_print_html(BAD_HTML10), GOOD_HTML10)
self.compare(pretty_print_html(BAD_HTML11), GOOD_HTML11)
self.compare(pretty_print_html(BAD_HTML12), GOOD_HTML12)
self.compare(pretty_print_html(BAD_HTML13), GOOD_HTML13)
self.compare(pretty_print_html(BAD_HTML14), GOOD_HTML14)
self.compare(pretty_print_html(BAD_HTML15), GOOD_HTML15)
self.compare(pretty_print_html(BAD_HTML16), GOOD_HTML16)
self.compare(pretty_print_html(BAD_HTML17), GOOD_HTML17)
self.compare(pretty_print(GOOD_HTML), GOOD_HTML)
self.compare(pretty_print(BAD_HTML), GOOD_HTML)
self.compare(pretty_print(BAD_HTML1), GOOD_HTML1)
self.compare(pretty_print(BAD_HTML2), GOOD_HTML2)
self.compare(pretty_print(BAD_HTML4), GOOD_HTML4)
self.compare(pretty_print(BAD_HTML5), GOOD_HTML5)
self.compare(pretty_print(BAD_HTML6), GOOD_HTML6)
self.compare(pretty_print(BAD_HTML7), GOOD_HTML7)
self.compare(pretty_print(BAD_HTML8), GOOD_HTML8)
self.compare(pretty_print(BAD_HTML9), GOOD_HTML9)
self.compare(pretty_print(BAD_HTML10), GOOD_HTML10)
self.compare(pretty_print(BAD_HTML11), GOOD_HTML11)