From 2a07b204bf36ed80b5eeab70cd1950d5b19d9a0a Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Wed, 1 Feb 2017 06:31:24 -0800 Subject: [PATCH] css parser: Show line numbers for errors. This is a fairly major overhaul of the CSS parser to support line numbers in error messages. Basically, instead of passing "slices" of tokens around, we pass indexes into the token arrays to all of our sub-parsers, which allows them to have access to previous tokens in certain cases. This is particularly important for errors where stuff is missing (vs. being wrong). In testing this out I found a few more places to catch errors. --- tools/check-css | 8 +- tools/lib/css_parser.py | 184 +++++++++++++++++---------------- tools/tests/test_css_parser.py | 10 ++ 3 files changed, 113 insertions(+), 89 deletions(-) diff --git a/tools/check-css b/tools/check-css index 2347d6c502..98bb8df3a6 100755 --- a/tools/check-css +++ b/tools/check-css @@ -32,8 +32,12 @@ def check_our_files(): try: validate(fn) except CssParserException as e: - print('CssParserException raised while parsing file %s' % (fn,)) - print(e) + msg = ''' + ERROR! Some CSS seems to be misformatted. + {} + See line {} in file {} + '''.format(e.msg, e.token.line, fn) + print(msg) sys.exit(1) if __name__ == '__main__': diff --git a/tools/lib/css_parser.py b/tools/lib/css_parser.py index 353e4de466..3926d097cf 100644 --- a/tools/lib/css_parser.py +++ b/tools/lib/css_parser.py @@ -13,27 +13,36 @@ class Token(object): self.col = col class CssParserException(Exception): - # TODO: Have callers pass in line numbers. - pass + def __init__(self, msg, token): + # type: (str, Token) -> None + self.msg = msg + self.token = token -def find_end_brace(tokens, i): - # type: (List[Token], int) -> int + def __str__(self): + # type: () -> str + return self.msg + +def find_end_brace(tokens, i, end): + # type: (List[Token], int, int) -> int depth = 0 - while i < len(tokens): + while i < end: s = tokens[i].s if s == '{': depth += 1 elif s == '}': if depth == 0: - raise CssParserException('unexpected }') + raise CssParserException('unexpected }', tokens[i]) elif depth == 1: break depth -= 1 i += 1 + else: + raise CssParserException('missing }', tokens[i-1]) + return i -def get_whitespace_and_comments(tokens, i, line=None): - # type: (List[Token], int, int) -> Tuple[int, str] +def get_whitespace_and_comments(tokens, i, end, line=None): + # type: (List[Token], int, int, int) -> Tuple[int, str] def is_fluff_token(token): # type: (Token) -> bool @@ -52,7 +61,7 @@ def get_whitespace_and_comments(tokens, i, line=None): return False text = '' - while (i < len(tokens)) and is_fluff_token(tokens[i]): + while (i < end) and is_fluff_token(tokens[i]): s = tokens[i].s text += s i += 1 @@ -63,27 +72,29 @@ def get_whitespace_and_comments(tokens, i, line=None): ############### Begin parsing here -def parse_sections(tokens): - # type: (List[Token]) -> CssSectionList - i = 0 +def parse_sections(tokens, start, end): + # type: (List[Token], int, int) -> CssSectionList + i = start sections = [] - while i < len(tokens): - start, pre_fluff = get_whitespace_and_comments(tokens, i) + while i < end: + start, pre_fluff = get_whitespace_and_comments(tokens, i, end) - i = find_end_brace(tokens, start) + if start >= end: + raise CssParserException('unexpected empty section', tokens[end-1]) - section_tokens = tokens[start:i+1] - i, post_fluff = get_whitespace_and_comments(tokens, i+1) + i = find_end_brace(tokens, start, end) - if section_tokens: - section = parse_section( - tokens=section_tokens, - pre_fluff=pre_fluff, - post_fluff=post_fluff - ) - sections.append(section) - else: - raise CssParserException('unexpected empty section') + section_end = i + 1 + i, post_fluff = get_whitespace_and_comments(tokens, i+1, end) + + section = parse_section( + tokens=tokens, + start=start, + end=section_end, + pre_fluff=pre_fluff, + post_fluff=post_fluff + ) + sections.append(section) section_list = CssSectionList( tokens=tokens, @@ -91,16 +102,15 @@ def parse_sections(tokens): ) return section_list -def parse_section(tokens, pre_fluff, post_fluff): - # type: (List[Token], str, str) -> Union[CssNestedSection, CssSection] - assert not ws(tokens[0].s) - assert tokens[-1].s == '}' # caller should strip trailing fluff +def parse_section(tokens, start, end, pre_fluff, post_fluff): + # type: (List[Token], int, int, str, str) -> Union[CssNestedSection, CssSection] + assert not ws(tokens[start].s) + assert tokens[end-1].s == '}' # caller should strip trailing fluff - first_token = tokens[0].s + first_token = tokens[start].s if first_token in ('@media', '@keyframes') or first_token.startswith('@-'): - i, selector_list = parse_selectors_section(tokens) # not technically selectors - body_tokens = tokens[i+1:-1] - section_list = parse_sections(body_tokens) + i, selector_list = parse_selectors_section(tokens, start, end) # not technically selectors + section_list = parse_sections(tokens, i+1, end-1) nested_section = CssNestedSection( tokens=tokens, selector_list=selector_list, @@ -110,8 +120,8 @@ def parse_section(tokens, pre_fluff, post_fluff): ) return nested_section else: - i, selector_list = parse_selectors_section(tokens) - declaration_block = parse_declaration_block(tokens[i:]) + i, selector_list = parse_selectors_section(tokens, start, end) + declaration_block = parse_declaration_block(tokens, i, end) section = CssSection( tokens=tokens, selector_list=selector_list, @@ -121,35 +131,34 @@ def parse_section(tokens, pre_fluff, post_fluff): ) return section -def parse_selectors_section(tokens): - # type: (List[Token]) -> Tuple[int, CssSelectorList] - start, pre_fluff = get_whitespace_and_comments(tokens, 0) +def parse_selectors_section(tokens, start, end): + # type: (List[Token], int, int) -> Tuple[int, CssSelectorList] + start, pre_fluff = get_whitespace_and_comments(tokens, start, end) assert pre_fluff == '' i = start text = '' - while i < len(tokens) and tokens[i].s != '{': + while i < end and tokens[i].s != '{': s = tokens[i].s text += s i += 1 - selector_list = parse_selectors(tokens[start:i]) + selector_list = parse_selectors(tokens, start, i) return i, selector_list -def parse_selectors(tokens): - # type: (List[Token]) -> CssSelectorList - i = 0 - start = i +def parse_selectors(tokens, start, end): + # type: (List[Token], int, int) -> CssSelectorList + i = start selectors = [] - while i < len(tokens): + while i < end: s = tokens[i].s if s == ',': - selector = parse_selector(tokens[start:i]) + selector = parse_selector(tokens, start, i) selectors.append(selector) i += 1 start = i if s.startswith('/*'): - raise CssParserException('Comments in selector section are not allowed') + raise CssParserException('Comments in selector section are not allowed', tokens[i]) i += 1 - selector = parse_selector(tokens[start:i]) + selector = parse_selector(tokens, start, i) selectors.append(selector) selector_list = CssSelectorList( tokens=tokens, @@ -157,12 +166,12 @@ def parse_selectors(tokens): ) return selector_list -def parse_selector(tokens): - # type: (List[Token]) -> CssSelector - i, pre_fluff = get_whitespace_and_comments(tokens, 0) +def parse_selector(tokens, start, end): + # type: (List[Token], int, int) -> CssSelector + i, pre_fluff = get_whitespace_and_comments(tokens, start, end) levels = [] last_i = None - while i < len(tokens): + while i < end: token = tokens[i] i += 1 if not ws(token.s[0]): @@ -170,10 +179,10 @@ def parse_selector(tokens): levels.append(token) if last_i is None: - raise CssParserException('Missing selector') + raise CssParserException('Missing selector', tokens[-1]) assert last_i is not None - start, post_fluff = get_whitespace_and_comments(tokens, last_i) + start, post_fluff = get_whitespace_and_comments(tokens, last_i, end) selector = CssSelector( tokens=tokens, pre_fluff=pre_fluff, @@ -182,21 +191,20 @@ def parse_selector(tokens): ) return selector -def parse_declaration_block(tokens): - # type: (List[Token]) -> CssDeclarationBlock - assert tokens[0].s == '{' # caller should strip leading fluff - assert tokens[-1].s == '}' # caller should strip trailing fluff - tokens = tokens[1:-1] - i = 0 +def parse_declaration_block(tokens, start, end): + # type: (List[Token], int, int) -> CssDeclarationBlock + assert tokens[start].s == '{' # caller should strip leading fluff + assert tokens[end-1].s == '}' # caller should strip trailing fluff + i = start + 1 declarations = [] - while i < len(tokens): + while i < end-1: start = i - i, _ = get_whitespace_and_comments(tokens, i) - while (i < len(tokens)) and (tokens[i].s != ';'): + i, _ = get_whitespace_and_comments(tokens, i, end) + while (i < end) and (tokens[i].s != ';'): i += 1 - if i < len(tokens): - i, _ = get_whitespace_and_comments(tokens, i+1, line=tokens[i].line) - declaration = parse_declaration(tokens[start:i]) + if i < end: + i, _ = get_whitespace_and_comments(tokens, i+1, end, line=tokens[i].line) + declaration = parse_declaration(tokens, start, i) declarations.append(declaration) declaration_block = CssDeclarationBlock( @@ -205,26 +213,25 @@ def parse_declaration_block(tokens): ) return declaration_block -def parse_declaration(tokens): - # type: (List[Token]) -> CssDeclaration - i, pre_fluff = get_whitespace_and_comments(tokens, 0) - try: - css_property = tokens[i].s - except IndexError: - raise CssParserException('Empty declaration') +def parse_declaration(tokens, start, end): + # type: (List[Token], int, int) -> CssDeclaration + i, pre_fluff = get_whitespace_and_comments(tokens, start, end) + if (i >= end) or (tokens[i].s == '}'): + raise CssParserException('Empty declaration or missing semicolon', tokens[i-1]) + + css_property = tokens[i].s if tokens[i+1].s != ':': - # print(css_property) - raise CssParserException('We expect a colon here') + raise CssParserException('We expect a colon here', tokens[i]) i += 2 start = i - while (i < len(tokens)) and (tokens[i].s != ';'): + while (i < end) and (tokens[i].s != ';') and (tokens[i].s != '}'): i += 1 - css_value = parse_value(tokens[start:i]) - semicolon = (i < len(tokens)) and (tokens[i].s == ';') + css_value = parse_value(tokens, start, i) + semicolon = (i < end) and (tokens[i].s == ';') if semicolon: i += 1 - _, post_fluff = get_whitespace_and_comments(tokens, i) + _, post_fluff = get_whitespace_and_comments(tokens, i, end) declaration = CssDeclaration( tokens=tokens, pre_fluff=pre_fluff, @@ -235,11 +242,14 @@ def parse_declaration(tokens): ) return declaration -def parse_value(tokens): - # type: (List[Token]) -> CssValue - i, pre_fluff = get_whitespace_and_comments(tokens, 0) - value = tokens[i] - i, post_fluff = get_whitespace_and_comments(tokens, i+1) +def parse_value(tokens, start, end): + # type: (List[Token], int, int) -> CssValue + i, pre_fluff = get_whitespace_and_comments(tokens, start, end) + if i < end: + value = tokens[i] + else: + raise CssParserException('Missing value', tokens[i-1]) + i, post_fluff = get_whitespace_and_comments(tokens, i+1, end) return CssValue( tokens=tokens, value=value, @@ -377,7 +387,7 @@ class CssValue(object): def parse(text): # type: (str) -> CssSectionList tokens = tokenize(text) - section_list = parse_sections(tokens=tokens) + section_list = parse_sections(tokens, 0, len(tokens)) return section_list #### Begin tokenizer section here @@ -469,7 +479,7 @@ def tokenize(text): while (state.i < len(text)) and not looking_at('*/'): state.i += 1 if not looking_at('*/'): - raise CssParserException('unclosed comment') + raise CssParserException('unclosed comment', tokens[-1]) s = text[old_i:state.i+2] state.i = old_i diff --git a/tools/tests/test_css_parser.py b/tools/tests/test_css_parser.py index 51b0a3ce20..3205cf168f 100644 --- a/tools/tests/test_css_parser.py +++ b/tools/tests/test_css_parser.py @@ -193,6 +193,16 @@ class ParserTestSadPath(unittest.TestCase): error = 'Missing selector' self._assert_error(my_css, error) + def test_missing_value(self): + # type: () -> None + my_css = ''' + h1 + { + bottom: + }''' + error = 'Missing value' + self._assert_error(my_css, error) + def test_disallow_comments_in_selectors(self): # type: () -> None my_css = '''