Lint for duplicate ids in templates.

In this commit we enhance our current template linter to detect duplicate ids and report them during lint checks. html_branches.py was topped up with a new function build_id_dict for the purpose. Also the get_tag_info function in same file was updated to parse ids and classes more robustly in cases of template variables. split_for_id_and_class function was added to serve this purpose. Unit tests for both the functions were created under tests/test_html_branches. Also a directory under tests called test_template_data was created to hold templates for testing under newly created functionality. check_templates was modified to print to console any duplicates detected. showell reviewed my commit and helped me out. Fixes #2950.
2017-01-06 19:41:15 +05:30 · 2017-01-06 19:41:15 +05:30 · 153ad18807
parent 196cf4367b
commit 153ad18807
5 changed files with 172 additions and 6 deletions
--- a/tools/check-templates
+++ b/tools/check-templates
@ -2,8 +2,10 @@
 from __future__ import absolute_import
 from __future__ import print_function
 from lib.template_parser import validate
 from lib.html_branches import build_id_dict
 import argparse
 import sys
 import logging
 from six.moves import filter
 try:
@ -15,12 +17,20 @@ except ImportError as e:
    print("If you are using Vagrant, you can `vagrant ssh` to enter the Vagrant guest.")
    sys.exit(1)
 EXCLUDED_FILES = [
    ## Test data Files for testing modules in tests
    "tools/tests/test_template_data",
 ]
 def check_our_files():
    # type: () -> None
    parser = argparse.ArgumentParser()
    parser.add_argument('-m', '--modified',
                        action='store_true', default=False,
                        help='only check modified files')
    parser.add_argument('--all-dups',
                        action="store_true", default=False,
                        help='Run lint tool to detect duplicate ids on ignored files as well')
    args = parser.parse_args()
    by_lang = cast(
@ -28,13 +38,13 @@ def check_our_files():
        lister.list_files(
            modified_only=args.modified,
            ftypes=['handlebars', 'html'],
-            group_by_ftype=True))
+            group_by_ftype=True, exclude=EXCLUDED_FILES))
    check_handlebar_templates(by_lang['handlebars'], args.modified)
-    check_html_templates(by_lang['html'], args.modified)
+    check_html_templates(by_lang['html'], args.modified, args.all_dups)
-def check_html_templates(templates, modified_only):
+def check_html_templates(templates, modified_only, all_dups):
-    # type: (Iterable[str], bool) -> None
+    # type: (Iterable[str], bool, bool) -> None
    # Our files with .html extensions are usually for Django, but we also
    # have a few static .html files.
    # The file base.html has a bit of funny HTML that we can't parse here yet.
@ -43,11 +53,50 @@ def check_html_templates(templates, modified_only):
    # The casperjs files use HTML5 (whereas Zulip prefers XHTML), and
    # there are also cases where Casper deliberately uses invalid HTML,
    # so we exclude them from our linter.
    logging.basicConfig(format='%(levelname)s:%(message)s')
    templates = filter(
-        lambda fn: ('base.html' not in fn) and ('casperjs' not in fn),
+        lambda fn: (('base.html' not in fn) and ('casperjs' not in fn)),
        templates)
    templates = sorted(list(templates))
    template_id_dict = build_id_dict(templates)
    # TODO: Clean up these cases of duplicate ids in the code
    IGNORE_IDS = [
        'api-example-tabs',
        'errors',
        'email',
        'registration',
        'pw_strength',
        'id_password',
        'top_navbar',
        'id_email',
        'id_terms',
        'send_confirm',
        'register',
    ]
    bad_ids_dict = {ids: fns for ids, fns in template_id_dict.items()
                    if (not ids in IGNORE_IDS) and len(fns) > 1}
    if all_dups:
        ignorable_ids_dict = {ids: fns for ids, fns in template_id_dict.items()
                              if ids in IGNORE_IDS and len(fns) > 1}
        for ids, fns in ignorable_ids_dict.items():
            logging.warning("Duplicate ID(s) detected :Id '" + ids
                            + "' present at following files:")
            for fn in fns:
                print(fn)
    for ids, fns in bad_ids_dict.items():
        logging.error("Duplicate ID(s) detected :Id '" + ids
                      + "' present at following files:")
        for fn in fns:
            print(fn)
    if list(bad_ids_dict.keys()):
        print('Exiting--please clean up all duplicates before running this again.')
        sys.exit(1)
    if not modified_only:
        assert len(templates) >= 10 # sanity check that we are actually doing work
    for fn in templates:
--- a/tools/lib/html_branches.py
+++ b/tools/lib/html_branches.py
@ -4,6 +4,7 @@ from __future__ import print_function
 from typing import Optional
 import re
 from collections import defaultdict
 from .template_parser import (
    tokenize,
@ -109,11 +110,38 @@ def get_tag_info(token):
        m = re.search(regex, s)
        if m:
            for g in m.groups():
-                lst += g.split()
+                lst += split_for_id_and_class(g)
    return TagInfo(tag=tag, classes=classes, ids=ids, token=token)
 def split_for_id_and_class(element):
    # type: (str) -> List[str]
    # Here we split a given string which is expected to contain id or class
    # attributes from HTML tags. This also takes care of template variables
    # in string during splitting process. For eg. 'red black {{ a|b|c }}'
    # is split as ['red', 'black', '{{ a|b|c }}']
    outside_braces = True # type: bool
    lst = []
    s = ''
    for ch in element:
        if ch == '{':
            outside_braces = False
        if ch == '}':
            outside_braces = True
        if ch == ' ' and outside_braces:
            if not s == '':
                lst.append(s)
            s = ''
        else:
            s += ch
    if not s == '':
        lst.append(s)
    return lst
 def html_branches(text, fn=None):
    # type: (str, str) -> List[HtmlTreeBranch]
@ -163,3 +191,21 @@ def html_tag_tree(text):
            stack.pop()
    return top_level
 def build_id_dict(templates):
    # type: (List[str]) -> (Dict[str,List[str]])
    template_id_dict = defaultdict(list) # type: (Dict[str,List[str]])
    for fn in templates:
        text = open(fn).read()
        list_tags = tokenize(text)
        for tag in list_tags:
            info = get_tag_info(tag)
            for ids in info.ids:
                template_id_dict[ids].append("Line " + str(info.token.line) + ":" + fn)
    return template_id_dict
--- a/tools/tests/test_html_branches.py
+++ b/tools/tests/test_html_branches.py
@ -2,6 +2,7 @@ from __future__ import absolute_import
 from __future__ import print_function
 import unittest
 import os
 import tools.lib.template_parser
@ -9,6 +10,8 @@ from tools.lib.html_branches import (
    get_tag_info,
    html_branches,
    html_tag_tree,
    build_id_dict,
    split_for_id_and_class,
 )
@ -100,3 +103,37 @@ class TestHtmlBranches(unittest.TestCase):
        self.assertEqual(branches[0].staircase_text(), '\n    html\n        head\n            title\n')
        self.assertEqual(branches[1].staircase_text(), '\n    html\n        body\n            p\n                br\n')
        self.assertEqual(branches[2].staircase_text(), '\n    html\n        body\n            p\n')
    def test_build_id_dict(self):
        # type: () -> None
        TEST_TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_template_data")
        templates = ["test_template1.html", "test_template2.html"]
        templates = [os.path.join(TEST_TEMPLATES_DIR, fn) for fn in templates]
        template_id_dict = build_id_dict(templates)
        self.assertEqual(list(template_id_dict.keys()), ['hello_{{ message }}', 'intro', 'below_navbar'])
        self.assertEqual(template_id_dict['hello_{{ message }}'], [
                         'Line 12:/srv/zulip/tools/tests/test_template_data/test_template1.html',
                         'Line 12:/srv/zulip/tools/tests/test_template_data/test_template2.html'])
        self.assertEqual(template_id_dict['intro'], [
                         'Line 10:/srv/zulip/tools/tests/test_template_data/test_template1.html',
                         'Line 11:/srv/zulip/tools/tests/test_template_data/test_template1.html',
                         'Line 11:/srv/zulip/tools/tests/test_template_data/test_template2.html'])
        self.assertEqual(template_id_dict['below_navbar'], ['Line 10:/srv/zulip/tools/tests/test_template_data/test_template2.html'])
    def test_split_for_id_and_class(self):
        # type: () -> None
        id1 = "{{ red|blue }}"
        id2 = "search_box_{{ page }}"
        class1 = "chat_box message"
        class2 = "stream_{{ topic }}"
        class3 = "foo {{ a|b|c }} bar"
        self.assertEqual(split_for_id_and_class(id1), ['{{ red|blue }}'])
        self.assertEqual(split_for_id_and_class(id2), ['search_box_{{ page }}'])
        self.assertEqual(split_for_id_and_class(class1), ['chat_box', 'message'])
        self.assertEqual(split_for_id_and_class(class2), ['stream_{{ topic }}'])
        self.assertEqual(split_for_id_and_class(class3), ['foo', '{{ a|b|c }}', 'bar'])
--- a/tools/tests/test_template_data/test_template1.html
+++ b/tools/tests/test_template_data/test_template1.html
@ -0,0 +1,17 @@
 <!-- test -->
      <!DOCTYPE html>
      <html>
      <head>
          <title>Test</title>
          <meta charset="utf-8" />
          <link rel="stylesheet" href="style.css" />
      </head>
      <body>
        <div id="intro">
          <p id="intro">
            <span id="hello_{{ message }}">Hello World!!</span>
            This is a test file for checking correct working of duplicate id detection module.
          </p>
        </div>
      </body>
      </html>
--- a/tools/tests/test_template_data/test_template2.html
+++ b/tools/tests/test_template_data/test_template2.html
@ -0,0 +1,17 @@
 <!-- test -->
      <!DOCTYPE html>
      <html>
      <head>
          <title>Test</title>
          <meta charset="utf-8" />
          <link rel="stylesheet" href="style.css" />
      </head>
      <body>
        <div id="below_navbar">
          <p id="intro">
            <span id="hello_{{ message }}">Hello World!!</span>
            This is a test file for checking correct working of duplicate id detection module.
          </p>
        </div>
      </body>
      </html>