tools: Create HTML pretty printer.

In This commit we extend the work being done by @showell in PR#1778
to develop a tool to pretty print html and our handlebar templates
in order to enforce our style convention of 4 Space indentation in
templates.

This commit introduces following changes:
* Fix Py3 Compatibility.
* Add ability to prettify in cases when html tags are not the
  starting of a line and addition of test cases for it.
* Add ability to lint handlebar tags and add test cases for it.
* Add {{else}} as special case of indent.
* Add test cases in general to testing new tool.

@showell Helped me throughout and reviewed this commit.

Fixes #1778
This commit is contained in:
adnrs96 2017-02-15 10:09:42 +05:30 committed by showell
parent 10e220f516
commit e84cf7b6f1
2 changed files with 324 additions and 0 deletions

119
tools/lib/pretty_print.py Normal file
View File

@ -0,0 +1,119 @@
from __future__ import absolute_import
from __future__ import print_function
from typing import Any
from .template_parser import (
tokenize,
Token,
)
from six.moves import range
def pretty_print_html(html, num_spaces=4):
# type: (str, int) -> str
# We use 1-based indexing for both rows and columns.
tokens = tokenize(html)
lines = html.split('\n')
# We will keep a stack of "start" tags so that we know
# when HTML ranges end. Note that some start tags won't
# be blocks from an indentation standpoint.
stack = [] # type: List[Dict[str, Any]]
# Seed our stack with a pseudo entry to make depth calculations
# easier.
info = dict(
block=False,
depth=-1,
line=-1,
token_kind='html_start',
extra_indent=0)
stack.append(info)
# Our main job is to figure out offsets that we use to nudge lines
# over by.
offsets = {} # type: Dict[int, int]
# Loop through our start/end tokens, and calculate offsets. As
# we proceed, we will push/pop info dictionaries on/off a stack.
for token in tokens:
if token.kind in ('html_start', 'handlebars_start'):
# An HTML start tag should only cause a new indent if we
# are on a new line.
is_block = token.line > stack[-1]['line']
if is_block:
if token.kind == 'handlebars_start' and stack[-1]['token_kind'] == 'handlebars_start':
info = stack.pop()
info['depth'] = info['depth'] + 1
stack.append(info)
new_depth = stack[-1]['depth'] + 1
extra_indent = stack[-1]['extra_indent']
line = lines[token.line - 1]
adjustment = len(line)-len(line.lstrip()) + 1
offset = (1 + extra_indent + new_depth * num_spaces) - adjustment
info = dict(
block=True,
depth=new_depth,
actual_depth=new_depth,
line=token.line,
token_kind=token.kind,
offset=offset,
extra_indent=token.col - adjustment + extra_indent
)
if token.kind == 'handlebars_start':
info.update(dict(depth=new_depth - 1))
else:
info = dict(
block=False,
line=token.line
)
stack.append(info)
elif token.kind in ('html_end', 'handlebars_end'):
info = stack.pop()
if info['block']:
# We are at the end of an indentation block. We
# assume the whole block was formatted ok before, just
# possibly at an indentation that we don't like, so we
# nudge over all lines in the block by the same offset.
start_line = info['line']
end_line = token.line
offsets[start_line] = info['offset']
offsets[end_line] = info['offset']
if token.tag != 'pre':
for line_num in range(start_line + 1, end_line):
# Be careful not to override offsets that happened
# deeper in the HTML within our block.
if line_num not in offsets:
line = lines[line_num - 1]
new_depth = info['depth'] + 1
if line.lstrip().startswith('{{else}}'):
new_depth = info['actual_depth']
extra_indent = info['extra_indent']
adjustment = len(line)-len(line.lstrip()) + 1
offset = (1 + extra_indent + new_depth * num_spaces) - adjustment
offsets[line_num] = offset
else:
for line_num in range(start_line + 1, end_line):
if line_num not in offsets:
offsets[line_num] = info['offset']
# Now that we have all of our offsets calculated, we can just
# join all our lines together, fixing up offsets as needed.
formatted_lines = []
for i, line in enumerate(html.split('\n')):
row = i + 1
offset = offsets.get(row, 0)
pretty_line = line
if line.strip() == '':
pretty_line = ''
else:
if offset > 0:
pretty_line = (' ' * offset) + pretty_line
elif offset < 0:
pretty_line = pretty_line[-1 * offset:]
assert line.strip() == pretty_line.strip()
formatted_lines.append(pretty_line)
return '\n'.join(formatted_lines)

View File

@ -0,0 +1,205 @@
from __future__ import absolute_import
from __future__ import print_function
import unittest
from tools.lib.pretty_print import pretty_print_html
# Note that GOOD_HTML isn't necessarily beautiful HTML. Apart
# from adjusting indentation, we mostly leave things alone to
# respect whatever line-wrapping styles were in place before.
BAD_HTML = """
<!-- test -->
<!DOCTYPE html>
<html>
<!-- test -->
<head>
<title>Test</title>
<meta charset="utf-8" />
<link rel="stylesheet" href="style.css" />
</head>
<body>
<div><p>Hello<br />world!</p></div>
<p>Goodbye<!-- test -->world!</p>
<table>
<tr>
<td>5</td>
</tr>
</table>
<pre>
print 'hello world'
</pre>
<div class = "foo"
id = "bar"
role = "whatever">{{ bla }}</div>
</body>
</html>
<!-- test -->
"""
GOOD_HTML = """
<!-- test -->
<!DOCTYPE html>
<html>
<!-- test -->
<head>
<title>Test</title>
<meta charset="utf-8" />
<link rel="stylesheet" href="style.css" />
</head>
<body>
<div><p>Hello<br />world!</p></div>
<p>Goodbye<!-- test -->world!</p>
<table>
<tr>
<td>5</td>
</tr>
</table>
<pre>
print 'hello world'
</pre>
<div class = "foo"
id = "bar"
role = "whatever">{{ bla }}</div>
</body>
</html>
<!-- test -->
"""
BAD_HTML1 = """
<html>
<body>
foobarfoobarfoo<b>bar</b>
</body>
</html>
"""
GOOD_HTML1 = """
<html>
<body>
foobarfoobarfoo<b>bar</b>
</body>
</html>
"""
BAD_HTML2 = """
<html>
<body>
{{# foobar area}}
foobarfoobarfoo<b>bar</b>
{{/ foobar area}}
</body>
</html>
"""
GOOD_HTML2 = """
<html>
<body>
{{# foobar area}}
foobarfoobarfoo<b>bar</b>
{{/ foobar area}}
</body>
</html>
"""
BAD_HTML3 = """
<html>
<body>
{{# foobar area}}
foobarfoobar<blockquote>
<p>
FOOBAR
</p>
</blockquote>
{{/ foobar area}}
</body>
</html>
"""
GOOD_HTML3 = """
<html>
<body>
{{# foobar area}}
foobarfoobar<blockquote>
<p>
FOOBAR
</p>
</blockquote>
{{/ foobar area}}
</body>
</html>
"""
BAD_HTML4 = """
<div>
foo
<p>hello</p>
bar
</div>
"""
GOOD_HTML4 = """
<div>
foo
<p>hello</p>
bar
</div>
"""
BAD_HTML5 = """
<div>
foo
{{#if foobar}}
hello
{{else}}
bye
{{/if}}
bar
</div>
"""
GOOD_HTML5 = """
<div>
foo
{{#if foobar}}
hello
{{else}}
bye
{{/if}}
bar
</div>
"""
BAD_HTML6 = """
<div>
<p> <strong> <span class = "whatever">foobar </span> </strong></p>
</div>
"""
GOOD_HTML6 = """
<div>
<p> <strong> <span class = "whatever">foobar </span> </strong></p>
</div>
"""
class TestPrettyPrinter(unittest.TestCase):
def compare(self, a, b):
# type: (str, str) -> None
self.assertEqual(a.split('\n'), b.split('\n'))
def test_pretty_print(self):
# type: () -> None
self.compare(pretty_print_html(GOOD_HTML), GOOD_HTML)
self.compare(pretty_print_html(BAD_HTML), GOOD_HTML)
self.compare(pretty_print_html(BAD_HTML1), GOOD_HTML1)
self.compare(pretty_print_html(BAD_HTML2), GOOD_HTML2)
self.compare(pretty_print_html(BAD_HTML3), GOOD_HTML3)
self.compare(pretty_print_html(BAD_HTML4), GOOD_HTML4)
self.compare(pretty_print_html(BAD_HTML5), GOOD_HTML5)
self.compare(pretty_print_html(BAD_HTML6), GOOD_HTML6)