Lint for duplicate ids in templates.

In this commit we enhance our current template linter to detect
duplicate ids and report them during lint checks. html_branches.py
was topped up with a new function build_id_dict for the purpose.

Also the get_tag_info function in same file was updated to parse
ids and classes more robustly in cases of template variables.
split_for_id_and_class function was added to serve this purpose.

Unit tests for both the functions were created under
tests/test_html_branches. Also a directory under tests called
test_template_data was created to hold templates for testing under
newly created functionality.

check_templates was modified to print to console any duplicates
detected.
showell reviewed my commit and helped me out.

Fixes #2950.
This commit is contained in:
adnrs96 2017-01-06 19:41:15 +05:30 committed by showell
parent 196cf4367b
commit 153ad18807
5 changed files with 172 additions and 6 deletions

View File

@ -2,8 +2,10 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import print_function from __future__ import print_function
from lib.template_parser import validate from lib.template_parser import validate
from lib.html_branches import build_id_dict
import argparse import argparse
import sys import sys
import logging
from six.moves import filter from six.moves import filter
try: try:
@ -15,12 +17,20 @@ except ImportError as e:
print("If you are using Vagrant, you can `vagrant ssh` to enter the Vagrant guest.") print("If you are using Vagrant, you can `vagrant ssh` to enter the Vagrant guest.")
sys.exit(1) sys.exit(1)
EXCLUDED_FILES = [
## Test data Files for testing modules in tests
"tools/tests/test_template_data",
]
def check_our_files(): def check_our_files():
# type: () -> None # type: () -> None
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-m', '--modified', parser.add_argument('-m', '--modified',
action='store_true', default=False, action='store_true', default=False,
help='only check modified files') help='only check modified files')
parser.add_argument('--all-dups',
action="store_true", default=False,
help='Run lint tool to detect duplicate ids on ignored files as well')
args = parser.parse_args() args = parser.parse_args()
by_lang = cast( by_lang = cast(
@ -28,13 +38,13 @@ def check_our_files():
lister.list_files( lister.list_files(
modified_only=args.modified, modified_only=args.modified,
ftypes=['handlebars', 'html'], ftypes=['handlebars', 'html'],
group_by_ftype=True)) group_by_ftype=True, exclude=EXCLUDED_FILES))
check_handlebar_templates(by_lang['handlebars'], args.modified) check_handlebar_templates(by_lang['handlebars'], args.modified)
check_html_templates(by_lang['html'], args.modified) check_html_templates(by_lang['html'], args.modified, args.all_dups)
def check_html_templates(templates, modified_only): def check_html_templates(templates, modified_only, all_dups):
# type: (Iterable[str], bool) -> None # type: (Iterable[str], bool, bool) -> None
# Our files with .html extensions are usually for Django, but we also # Our files with .html extensions are usually for Django, but we also
# have a few static .html files. # have a few static .html files.
# The file base.html has a bit of funny HTML that we can't parse here yet. # The file base.html has a bit of funny HTML that we can't parse here yet.
@ -43,11 +53,50 @@ def check_html_templates(templates, modified_only):
# The casperjs files use HTML5 (whereas Zulip prefers XHTML), and # The casperjs files use HTML5 (whereas Zulip prefers XHTML), and
# there are also cases where Casper deliberately uses invalid HTML, # there are also cases where Casper deliberately uses invalid HTML,
# so we exclude them from our linter. # so we exclude them from our linter.
logging.basicConfig(format='%(levelname)s:%(message)s')
templates = filter( templates = filter(
lambda fn: ('base.html' not in fn) and ('casperjs' not in fn), lambda fn: (('base.html' not in fn) and ('casperjs' not in fn)),
templates) templates)
templates = sorted(list(templates)) templates = sorted(list(templates))
template_id_dict = build_id_dict(templates)
# TODO: Clean up these cases of duplicate ids in the code
IGNORE_IDS = [
'api-example-tabs',
'errors',
'email',
'registration',
'pw_strength',
'id_password',
'top_navbar',
'id_email',
'id_terms',
'send_confirm',
'register',
]
bad_ids_dict = {ids: fns for ids, fns in template_id_dict.items()
if (not ids in IGNORE_IDS) and len(fns) > 1}
if all_dups:
ignorable_ids_dict = {ids: fns for ids, fns in template_id_dict.items()
if ids in IGNORE_IDS and len(fns) > 1}
for ids, fns in ignorable_ids_dict.items():
logging.warning("Duplicate ID(s) detected :Id '" + ids
+ "' present at following files:")
for fn in fns:
print(fn)
for ids, fns in bad_ids_dict.items():
logging.error("Duplicate ID(s) detected :Id '" + ids
+ "' present at following files:")
for fn in fns:
print(fn)
if list(bad_ids_dict.keys()):
print('Exiting--please clean up all duplicates before running this again.')
sys.exit(1)
if not modified_only: if not modified_only:
assert len(templates) >= 10 # sanity check that we are actually doing work assert len(templates) >= 10 # sanity check that we are actually doing work
for fn in templates: for fn in templates:

View File

@ -4,6 +4,7 @@ from __future__ import print_function
from typing import Optional from typing import Optional
import re import re
from collections import defaultdict
from .template_parser import ( from .template_parser import (
tokenize, tokenize,
@ -109,11 +110,38 @@ def get_tag_info(token):
m = re.search(regex, s) m = re.search(regex, s)
if m: if m:
for g in m.groups(): for g in m.groups():
lst += g.split() lst += split_for_id_and_class(g)
return TagInfo(tag=tag, classes=classes, ids=ids, token=token) return TagInfo(tag=tag, classes=classes, ids=ids, token=token)
def split_for_id_and_class(element):
# type: (str) -> List[str]
# Here we split a given string which is expected to contain id or class
# attributes from HTML tags. This also takes care of template variables
# in string during splitting process. For eg. 'red black {{ a|b|c }}'
# is split as ['red', 'black', '{{ a|b|c }}']
outside_braces = True # type: bool
lst = []
s = ''
for ch in element:
if ch == '{':
outside_braces = False
if ch == '}':
outside_braces = True
if ch == ' ' and outside_braces:
if not s == '':
lst.append(s)
s = ''
else:
s += ch
if not s == '':
lst.append(s)
return lst
def html_branches(text, fn=None): def html_branches(text, fn=None):
# type: (str, str) -> List[HtmlTreeBranch] # type: (str, str) -> List[HtmlTreeBranch]
@ -163,3 +191,21 @@ def html_tag_tree(text):
stack.pop() stack.pop()
return top_level return top_level
def build_id_dict(templates):
# type: (List[str]) -> (Dict[str,List[str]])
template_id_dict = defaultdict(list) # type: (Dict[str,List[str]])
for fn in templates:
text = open(fn).read()
list_tags = tokenize(text)
for tag in list_tags:
info = get_tag_info(tag)
for ids in info.ids:
template_id_dict[ids].append("Line " + str(info.token.line) + ":" + fn)
return template_id_dict

View File

@ -2,6 +2,7 @@ from __future__ import absolute_import
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import os
import tools.lib.template_parser import tools.lib.template_parser
@ -9,6 +10,8 @@ from tools.lib.html_branches import (
get_tag_info, get_tag_info,
html_branches, html_branches,
html_tag_tree, html_tag_tree,
build_id_dict,
split_for_id_and_class,
) )
@ -100,3 +103,37 @@ class TestHtmlBranches(unittest.TestCase):
self.assertEqual(branches[0].staircase_text(), '\n html\n head\n title\n') self.assertEqual(branches[0].staircase_text(), '\n html\n head\n title\n')
self.assertEqual(branches[1].staircase_text(), '\n html\n body\n p\n br\n') self.assertEqual(branches[1].staircase_text(), '\n html\n body\n p\n br\n')
self.assertEqual(branches[2].staircase_text(), '\n html\n body\n p\n') self.assertEqual(branches[2].staircase_text(), '\n html\n body\n p\n')
def test_build_id_dict(self):
# type: () -> None
TEST_TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_template_data")
templates = ["test_template1.html", "test_template2.html"]
templates = [os.path.join(TEST_TEMPLATES_DIR, fn) for fn in templates]
template_id_dict = build_id_dict(templates)
self.assertEqual(list(template_id_dict.keys()), ['hello_{{ message }}', 'intro', 'below_navbar'])
self.assertEqual(template_id_dict['hello_{{ message }}'], [
'Line 12:/srv/zulip/tools/tests/test_template_data/test_template1.html',
'Line 12:/srv/zulip/tools/tests/test_template_data/test_template2.html'])
self.assertEqual(template_id_dict['intro'], [
'Line 10:/srv/zulip/tools/tests/test_template_data/test_template1.html',
'Line 11:/srv/zulip/tools/tests/test_template_data/test_template1.html',
'Line 11:/srv/zulip/tools/tests/test_template_data/test_template2.html'])
self.assertEqual(template_id_dict['below_navbar'], ['Line 10:/srv/zulip/tools/tests/test_template_data/test_template2.html'])
def test_split_for_id_and_class(self):
# type: () -> None
id1 = "{{ red|blue }}"
id2 = "search_box_{{ page }}"
class1 = "chat_box message"
class2 = "stream_{{ topic }}"
class3 = "foo {{ a|b|c }} bar"
self.assertEqual(split_for_id_and_class(id1), ['{{ red|blue }}'])
self.assertEqual(split_for_id_and_class(id2), ['search_box_{{ page }}'])
self.assertEqual(split_for_id_and_class(class1), ['chat_box', 'message'])
self.assertEqual(split_for_id_and_class(class2), ['stream_{{ topic }}'])
self.assertEqual(split_for_id_and_class(class3), ['foo', '{{ a|b|c }}', 'bar'])

View File

@ -0,0 +1,17 @@
<!-- test -->
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
<meta charset="utf-8" />
<link rel="stylesheet" href="style.css" />
</head>
<body>
<div id="intro">
<p id="intro">
<span id="hello_{{ message }}">Hello World!!</span>
This is a test file for checking correct working of duplicate id detection module.
</p>
</div>
</body>
</html>

View File

@ -0,0 +1,17 @@
<!-- test -->
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
<meta charset="utf-8" />
<link rel="stylesheet" href="style.css" />
</head>
<body>
<div id="below_navbar">
<p id="intro">
<span id="hello_{{ message }}">Hello World!!</span>
This is a test file for checking correct working of duplicate id detection module.
</p>
</div>
</body>
</html>