tools: Create HTML pretty printer.

In This commit we extend the work being done by @showell in PR#1778 to develop a tool to pretty print html and our handlebar templates in order to enforce our style convention of 4 Space indentation in templates. This commit introduces following changes: * Fix Py3 Compatibility. * Add ability to prettify in cases when html tags are not the starting of a line and addition of test cases for it. * Add ability to lint handlebar tags and add test cases for it. * Add {{else}} as special case of indent. * Add test cases in general to testing new tool. @showell Helped me throughout and reviewed this commit. Fixes #1778
2017-02-15 10:09:42 +05:30 · 2017-02-15 10:09:42 +05:30 · e84cf7b6f1
parent 10e220f516
commit e84cf7b6f1
2 changed files with 324 additions and 0 deletions
--- a/tools/lib/pretty_print.py
+++ b/tools/lib/pretty_print.py
@ -0,0 +1,119 @@
+from __future__ import absolute_import
+from __future__ import print_function
+
+from typing import Any
+
+from .template_parser import (
+    tokenize,
+    Token,
+)
+from six.moves import range
+
+def pretty_print_html(html, num_spaces=4):
+    # type: (str, int) -> str
+    # We use 1-based indexing for both rows and columns.
+    tokens = tokenize(html)
+    lines = html.split('\n')
+
+    # We will keep a stack of "start" tags so that we know
+    # when HTML ranges end.  Note that some start tags won't
+    # be blocks from an indentation standpoint.
+    stack = []  # type: List[Dict[str, Any]]
+
+    # Seed our stack with a pseudo entry to make depth calculations
+    # easier.
+    info = dict(
+        block=False,
+        depth=-1,
+        line=-1,
+        token_kind='html_start',
+        extra_indent=0)
+    stack.append(info)
+
+    # Our main job is to figure out offsets that we use to nudge lines
+    # over by.
+    offsets = {}  # type: Dict[int, int]
+
+    # Loop through our start/end tokens, and calculate offsets.  As
+    # we proceed, we will push/pop info dictionaries on/off a stack.
+    for token in tokens:
+
+        if token.kind in ('html_start', 'handlebars_start'):
+            # An HTML start tag should only cause a new indent if we
+            # are on a new line.
+            is_block = token.line > stack[-1]['line']
+
+            if is_block:
+                if token.kind == 'handlebars_start' and stack[-1]['token_kind'] == 'handlebars_start':
+                    info = stack.pop()
+                    info['depth'] = info['depth'] + 1
+                    stack.append(info)
+                new_depth = stack[-1]['depth'] + 1
+                extra_indent = stack[-1]['extra_indent']
+                line = lines[token.line - 1]
+                adjustment = len(line)-len(line.lstrip()) + 1
+                offset = (1 + extra_indent + new_depth * num_spaces) - adjustment
+                info = dict(
+                    block=True,
+                    depth=new_depth,
+                    actual_depth=new_depth,
+                    line=token.line,
+                    token_kind=token.kind,
+                    offset=offset,
+                    extra_indent=token.col - adjustment + extra_indent
+                )
+                if token.kind == 'handlebars_start':
+                    info.update(dict(depth=new_depth - 1))
+            else:
+                info = dict(
+                    block=False,
+                    line=token.line
+                )
+            stack.append(info)
+        elif token.kind in ('html_end', 'handlebars_end'):
+            info = stack.pop()
+            if info['block']:
+                # We are at the end of an indentation block.  We
+                # assume the whole block was formatted ok before, just
+                # possibly at an indentation that we don't like, so we
+                # nudge over all lines in the block by the same offset.
+                start_line = info['line']
+                end_line = token.line
+                offsets[start_line] = info['offset']
+                offsets[end_line] = info['offset']
+                if token.tag != 'pre':
+                    for line_num in range(start_line + 1, end_line):
+                        # Be careful not to override offsets that happened
+                        # deeper in the HTML within our block.
+                        if line_num not in offsets:
+                            line = lines[line_num - 1]
+                            new_depth = info['depth'] + 1
+                            if line.lstrip().startswith('{{else}}'):
+                                new_depth = info['actual_depth']
+                            extra_indent = info['extra_indent']
+                            adjustment = len(line)-len(line.lstrip()) + 1
+                            offset = (1 + extra_indent + new_depth * num_spaces) - adjustment
+                            offsets[line_num] = offset
+                else:
+                    for line_num in range(start_line + 1, end_line):
+                        if line_num not in offsets:
+                            offsets[line_num] = info['offset']
+
+    # Now that we have all of our offsets calculated, we can just
+    # join all our lines together, fixing up offsets as needed.
+    formatted_lines = []
+    for i, line in enumerate(html.split('\n')):
+        row = i + 1
+        offset = offsets.get(row, 0)
+        pretty_line = line
+        if line.strip() == '':
+            pretty_line = ''
+        else:
+            if offset > 0:
+                pretty_line = (' ' * offset) + pretty_line
+            elif offset < 0:
+                pretty_line = pretty_line[-1 * offset:]
+                assert line.strip() == pretty_line.strip()
+        formatted_lines.append(pretty_line)
+
+    return '\n'.join(formatted_lines)
--- a/tools/tests/test_pretty_print.py
+++ b/tools/tests/test_pretty_print.py
@ -0,0 +1,205 @@
+from __future__ import absolute_import
+from __future__ import print_function
+
+import unittest
+
+from tools.lib.pretty_print import pretty_print_html
+
+# Note that GOOD_HTML isn't necessarily beautiful HTML.  Apart
+# from adjusting indentation, we mostly leave things alone to
+# respect whatever line-wrapping styles were in place before.
+
+BAD_HTML = """
+<!-- test -->
+<!DOCTYPE html>
+
+
+
+<html>
+    <!-- test -->
+    <head>
+        <title>Test</title>
+        <meta charset="utf-8" />
+        <link rel="stylesheet" href="style.css" />
+    </head>
+    <body>
+      <div><p>Hello<br />world!</p></div>
+        <p>Goodbye<!-- test -->world!</p>
+        <table>
+           <tr>
+                       <td>5</td>
+           </tr>
+        </table>
+    <pre>
+            print 'hello world'
+    </pre>
+         <div class = "foo"
+              id = "bar"
+              role = "whatever">{{ bla }}</div>
+    </body>
+</html>
+<!-- test -->
+"""
+
+GOOD_HTML = """
+<!-- test -->
+<!DOCTYPE html>
+
+
+
+<html>
+    <!-- test -->
+    <head>
+        <title>Test</title>
+        <meta charset="utf-8" />
+        <link rel="stylesheet" href="style.css" />
+    </head>
+    <body>
+        <div><p>Hello<br />world!</p></div>
+        <p>Goodbye<!-- test -->world!</p>
+        <table>
+            <tr>
+                <td>5</td>
+            </tr>
+        </table>
+        <pre>
+                print 'hello world'
+        </pre>
+        <div class = "foo"
+            id = "bar"
+             role = "whatever">{{ bla }}</div>
+    </body>
+</html>
+<!-- test -->
+"""
+
+BAD_HTML1 = """
+<html>
+  <body>
+    foobarfoobarfoo<b>bar</b>
+  </body>
+</html>
+"""
+
+GOOD_HTML1 = """
+<html>
+    <body>
+        foobarfoobarfoo<b>bar</b>
+    </body>
+</html>
+"""
+
+BAD_HTML2 = """
+<html>
+  <body>
+    {{# foobar area}}
+    foobarfoobarfoo<b>bar</b>
+    {{/ foobar area}}
+  </body>
+</html>
+"""
+
+GOOD_HTML2 = """
+<html>
+    <body>
+        {{# foobar area}}
+        foobarfoobarfoo<b>bar</b>
+        {{/ foobar area}}
+    </body>
+</html>
+"""
+
+BAD_HTML3 = """
+<html>
+  <body>
+    {{# foobar area}}
+    foobarfoobar<blockquote>
+    <p>
+        FOOBAR
+    </p>
+                </blockquote>
+    {{/ foobar area}}
+  </body>
+</html>
+"""
+
+GOOD_HTML3 = """
+<html>
+    <body>
+        {{# foobar area}}
+        foobarfoobar<blockquote>
+                        <p>
+                            FOOBAR
+                        </p>
+                    </blockquote>
+        {{/ foobar area}}
+    </body>
+</html>
+"""
+
+BAD_HTML4 = """
+<div>
+  foo
+  <p>hello</p>
+  bar
+</div>
+"""
+
+GOOD_HTML4 = """
+<div>
+    foo
+    <p>hello</p>
+    bar
+</div>
+"""
+
+BAD_HTML5 = """
+<div>
+  foo
+  {{#if foobar}}
+  hello
+  {{else}}
+  bye
+  {{/if}}
+  bar
+</div>
+"""
+
+GOOD_HTML5 = """
+<div>
+    foo
+    {{#if foobar}}
+    hello
+    {{else}}
+    bye
+    {{/if}}
+    bar
+</div>
+"""
+
+BAD_HTML6 = """
+<div>
+  <p> <strong> <span class = "whatever">foobar </span> </strong></p>
+</div>
+"""
+
+GOOD_HTML6 = """
+<div>
+    <p> <strong> <span class = "whatever">foobar </span> </strong></p>
+</div>
+"""
+class TestPrettyPrinter(unittest.TestCase):
+    def compare(self, a, b):
+        # type: (str, str) -> None
+        self.assertEqual(a.split('\n'), b.split('\n'))
+
+    def test_pretty_print(self):
+        # type: () -> None
+        self.compare(pretty_print_html(GOOD_HTML), GOOD_HTML)
+        self.compare(pretty_print_html(BAD_HTML), GOOD_HTML)
+        self.compare(pretty_print_html(BAD_HTML1), GOOD_HTML1)
+        self.compare(pretty_print_html(BAD_HTML2), GOOD_HTML2)
+        self.compare(pretty_print_html(BAD_HTML3), GOOD_HTML3)
+        self.compare(pretty_print_html(BAD_HTML4), GOOD_HTML4)
+        self.compare(pretty_print_html(BAD_HTML5), GOOD_HTML5)
+        self.compare(pretty_print_html(BAD_HTML6), GOOD_HTML6)