zulip/tools/check-templates

#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import optparse
import os
import sys
import subprocess
from six.moves import filter
from six.moves import map
from six.moves import range
try:
    import lister
    from typing import cast, Callable, Dict, Iterable, List
except ImportError as e:
    print("ImportError: {}".format(e))
    print("You need to run the Zulip linters inside a Zulip dev environment.")
    print("If you are using Vagrant, you can `vagrant ssh` to enter the Vagrant guest.")
    sys.exit(1)

class TokenizerState(object):
    def __init__(self):
        self.i = 0
        self.line = 1
        self.col = 1

class Token(object):
    def __init__(self, kind, s, tag, line, col):
        self.kind = kind
        self.s = s
        self.tag = tag
        self.line = line
        self.col = col

def tokenize(text):
    def advance(n):
        # type: (int) -> None
        for _ in range(n):
            state.i += 1
            if state.i >= 0 and text[state.i - 1] == '\n':
                state.line += 1
                state.col = 1
            else:
                state.col += 1

    def looking_at(s):
        # type: (str) -> bool
        return text[state.i:state.i+len(s)] == s

    def looking_at_html_start():
        # type: () -> bool
        return looking_at("<") and not looking_at("</")

    def looking_at_html_end():
        # type: () -> bool
        return looking_at("</")

    def looking_at_handlebars_start():
        # type: () -> bool
        return looking_at("{{#") or looking_at("{{^")

    def looking_at_handlebars_end():
        # type: () -> bool
        return looking_at("{{/")

    def looking_at_django_start():
        # type: () -> bool
        return looking_at("{% ") and not looking_at("{% end")

    def looking_at_django_end():
        # type: () -> bool
        return looking_at("{% end")

    state = TokenizerState()
    tokens = []

    while state.i < len(text):
        if looking_at_html_start():
            s = get_html_tag(text, state.i)
            tag = s[1:-1].split()[0]
            kind = 'html_start'
        elif looking_at_html_end():
            s = get_html_tag(text, state.i)
            tag = s[2:-1]
            kind = 'html_end'
        elif looking_at_handlebars_start():
            s = get_handlebars_tag(text, state.i)
            tag = s[3:-2].split()[0]
            kind = 'handlebars_start'
        elif looking_at_handlebars_end():
            s = get_handlebars_tag(text, state.i)
            tag = s[3:-2]
            kind = 'handlebars_end'
        elif looking_at_django_start():
            s = get_django_tag(text, state.i)
            tag = s[3:-2].split()[0]
            kind = 'django_start'
        elif looking_at_django_end():
            s = get_django_tag(text, state.i)
            tag = s[6:-3]
            kind = 'django_end'
        else:
            advance(1)
            continue

        token = Token(
            kind=kind,
            s=s,
            tag=tag,
            line=state.line,
            col=state.col,
        )
        tokens.append(token)
        advance(len(s))

    return tokens

def validate(fn, check_indent=True):
    # type: (str, bool) -> None
    text = open(fn).read()
    tokens = tokenize(text)

    class State(object):
        def __init__(self, func):
            # type: (Callable[[Token], None]) -> None
            self.depth = 0
            self.matcher = func

    def no_start_tag(token):
        # type: (Token) -> None
        raise Exception('''
            No start tag
            fn: %s
            end tag:
                %s
                line %d, col %d
            ''' % (fn, token.tag, token.line, token.col))

    state = State(no_start_tag)

    def start_tag_matcher(start_token):
        # type: (Token) -> None
        state.depth += 1
        start_tag = start_token.tag
        start_line = start_token.line
        start_col = start_token.col

        old_matcher = state.matcher
        def f(end_token):
            # type: (Token) -> None

            end_tag = end_token.tag
            end_line = end_token.line
            end_col = end_token.col

            problem = None
            if start_tag != end_tag:
                problem = 'Mismatched tag.'
            elif check_indent and end_line > start_line + 1 and end_col != start_col:
                problem = 'Bad indentation.'
            if problem:
                raise Exception('''
                    fn: %s
                    %s
                    start:
                        %s
                        line %d, col %d
                    end tag:
                        %s
                        line %d, col %d
                    ''' % (fn, problem, start_token.s, start_line, start_col, end_tag, end_line, end_col))
            state.matcher = old_matcher
            state.depth -= 1
        state.matcher = f

    for token in tokens:
        kind = token.kind
        tag = token.tag
        s = token.s

        if kind == 'html_start':
            if not is_special_html_tag(s, tag):
                start_tag_matcher(token)
        elif kind == 'html_end':
            state.matcher(token)

        elif kind == 'handlebars_start':
            start_tag_matcher(token)
        elif kind == 'handlebars_end':
            state.matcher(token)

        elif kind == 'django_start':
            if is_django_block_tag(tag):
                start_tag_matcher(token)
        elif kind == 'django_end':
            state.matcher(token)

    null_token = Token(
        kind=None,
        s='(NO TAG)',
        tag='NO TAG',
        line=0,
        col=0,
    )

    if state.depth != 0:
        state.matcher(null_token)

def is_special_html_tag(s, tag):
    # type: (str, str) -> bool
    return (s.startswith('<!--') or
           s.endswith('/>') or
           tag in ['link', 'meta', '!DOCTYPE'])

def is_django_block_tag(tag):
    # type: (str) -> bool
    return tag in [
        'autoescape',
        'block',
        'comment',
        'for',
        'if',
        'ifequal',
        'verbatim',
        'blocktrans',
        'trans',
        'raw',
    ]

def get_handlebars_tag(text, i):
    # type: (str, int) -> str
    end = i + 2
    while end < len(text) -1 and text[end] != '}':
        end += 1
    if text[end] != '}' or text[end+1] != '}':
        raise Exception('Tag missing }}')
    s = text[i:end+2]
    return s

def get_django_tag(text, i):
    # type: (str, int) -> str
    end = i + 2
    while end < len(text) -1 and text[end] != '%':
        end += 1
    if text[end] != '%' or text[end+1] != '}':
        raise Exception('Tag missing %}')
    s = text[i:end+2]
    return s

def get_html_tag(text, i):
    # type: (str, int) -> str
    quote_count = 0
    end = i + 1
    while end < len(text) and (text[end] != '>' or quote_count % 2 != 0):
        if text[end] == '"':
            quote_count += 1
        end += 1
    if end == len(text) or text[end] != '>':
        raise Exception('Tag missing >')
    s = text[i:end+1]
    return s

def check_our_files():
    # type: () -> None
    parser = optparse.OptionParser()
    parser.add_option('--modified', '-m',
        action='store_true', default=False,
        help='Only check modified files')
    (options, _) = parser.parse_args()

    by_lang = cast(
        Dict[str, List[str]],
        lister.list_files(
            modified_only=options.modified,
            ftypes=['handlebars', 'html'],
            group_by_ftype=True))

    check_handlebar_templates(by_lang['handlebars'], options.modified)
    check_html_templates(by_lang['html'], options.modified)

def check_handlebar_templates(templates, modified_only):
    # type: (Iterable[str], bool) -> None
    # Check all our handlebars templates.
    templates = [fn for fn in templates if fn.endswith('.handlebars')]
    if not modified_only:
        assert len(templates) >= 10 # sanity check that we are actually doing work
    for fn in templates:
        validate(fn, check_indent=True)

def check_html_templates(templates, modified_only):
    # type: (Iterable[str], bool) -> None
    # Our files with .html extensions are usually for Django, but we also
    # have a few static .html files.
    # The file base.html has a bit of funny HTML that we can't parse here yet.
    #
    # We also have .html files that we vendored from Casper.
    # The casperjs files use HTML5 (whereas Zulip prefers XHTML), and
    # there are also cases where Casper deliberately uses invalid HTML,
    # so we exclude them from our linter.
    templates = filter(
        lambda fn: ('base.html' not in fn) and ('casperjs' not in fn),
        templates)
    templates = sorted(list(templates))

    if not modified_only:
        assert len(templates) >= 10 # sanity check that we are actually doing work
    for fn in templates:
        # Many of our Django templates have strange indentation.  The
        # indentation errors are often harmless, even stylistically
        # harmless, but they tend to be in files that might be old
        # and might eventually require more scrutiny for things like
        # localization.  See github #1236.
        bad_files = [
            'static/html/5xx.html',
            'templates/500.html',
            'templates/confirmation/confirm.html',
            'templates/corporate/mit.html',
            'templates/corporate/privacy.html',
            'templates/corporate/terms-enterprise.html',
            'templates/corporate/zephyr-mirror.html',
            'templates/corporate/zephyr.html',
            'templates/zerver/accounts_home.html',
            'templates/zerver/accounts_send_confirm.html',
            'templates/zerver/api.html',
            'templates/zerver/api_endpoints.html',
            'templates/zerver/apps.html',
            'templates/zerver/create_realm.html',
            'templates/zerver/emails/followup/day1.html',
            'templates/zerver/emails/followup/day2.html',
            'templates/zerver/features.html',
            'templates/zerver/hello.html',
            'templates/zerver/home.html',
            'templates/zerver/integrations.html',
            'templates/zerver/invite_user.html',
            'templates/zerver/left-sidebar.html',
            'templates/zerver/login.html',
            'templates/zerver/markdown_help.html',
            'templates/zerver/register.html',
            'templates/zerver/right-sidebar.html',
            'templates/zerver/search_operators.html',
        ]
        validate(fn, check_indent=(fn not in bad_files))

if __name__ == '__main__':
    check_our_files()