zulip/tools/lib/css_parser.py

from six.moves import range
from typing import Callable, List, Tuple, Union

####### Helpers

class Token(object):
    def __init__(self, s, line, col):
        # type: (str, int, int) -> None
        self.s = s
        self.line = line
        self.col = col

class CssParserException(Exception):
    def __init__(self, msg, token):
        # type: (str, Token) -> None
        self.msg = msg
        self.token = token

    def __str__(self):
        # type: () -> str
        return self.msg

def find_end_brace(tokens, i, end):
    # type: (List[Token], int, int) -> int
    depth = 0
    while i < end:
        s = tokens[i].s
        if s == '{':
            depth += 1
        elif s == '}':
            if depth == 0:
                raise CssParserException('unexpected }', tokens[i])
            elif depth == 1:
                break
            depth -= 1
        i += 1
    else:
        raise CssParserException('missing }', tokens[i-1])

    return i

def get_whitespace_and_comments(tokens, i, end, line=None):
    # type: (List[Token], int, int, int) -> Tuple[int, str]

    def is_fluff_token(token):
        # type: (Token) -> bool
        s = token.s
        if ws(s[0]):
            return True
        elif s.startswith('/*'):
            # For CSS comments, the caller may pass in a line
            # number to indicate that they only want to get
            # comments on the same line.  (Subsequent comments
            # will be attached to the next actual line of code.)
            if line is None:
                return True
            if tokens[i].line == line:
                return True
        return False

    text = ''
    while (i < end) and is_fluff_token(tokens[i]):
        s = tokens[i].s
        text += s
        i += 1

    return i, text


############### Begin parsing here


def parse_sections(tokens, start, end):
    # type: (List[Token], int, int) -> CssSectionList
    i = start
    sections = []
    while i < end:
        start, pre_fluff = get_whitespace_and_comments(tokens, i, end)

        if start >= end:
            raise CssParserException('unexpected empty section', tokens[end-1])

        i = find_end_brace(tokens, start, end)

        section_end = i + 1
        i, post_fluff = get_whitespace_and_comments(tokens, i+1, end)

        section = parse_section(
            tokens=tokens,
            start=start,
            end=section_end,
            pre_fluff=pre_fluff,
            post_fluff=post_fluff
        )
        sections.append(section)

    section_list = CssSectionList(
        tokens=tokens,
        sections=sections,
    )
    return section_list

def parse_section(tokens, start, end, pre_fluff, post_fluff):
    # type: (List[Token], int, int, str, str) -> Union[CssNestedSection, CssSection]
    assert not ws(tokens[start].s)
    assert tokens[end-1].s == '}'  # caller should strip trailing fluff

    first_token = tokens[start].s
    if first_token in ('@media', '@keyframes') or first_token.startswith('@-'):
        i, selector_list = parse_selectors_section(tokens, start, end)  # not technically selectors
        section_list = parse_sections(tokens, i+1, end-1)
        nested_section = CssNestedSection(
            tokens=tokens,
            selector_list=selector_list,
            section_list=section_list,
            pre_fluff=pre_fluff,
            post_fluff=post_fluff,
        )
        return nested_section
    else:
        i, selector_list = parse_selectors_section(tokens, start, end)
        declaration_block = parse_declaration_block(tokens, i, end)
        section = CssSection(
            tokens=tokens,
            selector_list=selector_list,
            declaration_block=declaration_block,
            pre_fluff=pre_fluff,
            post_fluff=post_fluff,
        )
        return section

def parse_selectors_section(tokens, start, end):
    # type: (List[Token], int, int) -> Tuple[int, CssSelectorList]
    start, pre_fluff = get_whitespace_and_comments(tokens, start, end)
    assert pre_fluff == ''
    i = start
    text = ''
    while i < end and tokens[i].s != '{':
        s = tokens[i].s
        text += s
        i += 1
    selector_list = parse_selectors(tokens, start, i)
    return i, selector_list

def parse_selectors(tokens, start, end):
    # type: (List[Token], int, int) -> CssSelectorList
    i = start
    selectors = []
    while i < end:
        s = tokens[i].s
        if s == ',':
            selector = parse_selector(tokens, start, i)
            selectors.append(selector)
            i += 1
            start = i
        if s.startswith('/*'):
            raise CssParserException('Comments in selector section are not allowed', tokens[i])
        i += 1
    selector = parse_selector(tokens, start, i)
    selectors.append(selector)
    selector_list = CssSelectorList(
        tokens=tokens,
        selectors=selectors,
    )
    return selector_list

def parse_selector(tokens, start, end):
    # type: (List[Token], int, int) -> CssSelector
    i, pre_fluff = get_whitespace_and_comments(tokens, start, end)
    levels = []
    last_i = None
    while i < end:
        token = tokens[i]
        i += 1
        if not ws(token.s[0]):
            last_i = i
            levels.append(token)

    if last_i is None:
        raise CssParserException('Missing selector', tokens[-1])

    assert last_i is not None
    start, post_fluff = get_whitespace_and_comments(tokens, last_i, end)
    selector = CssSelector(
        tokens=tokens,
        pre_fluff=pre_fluff,
        post_fluff=post_fluff,
        levels=levels,
    )
    return selector

def parse_declaration_block(tokens, start, end):
    # type: (List[Token], int, int) -> CssDeclarationBlock
    assert tokens[start].s == '{'  # caller should strip leading fluff
    assert tokens[end-1].s == '}'  # caller should strip trailing fluff
    i = start + 1
    declarations = []
    while i < end-1:
        start = i
        i, _ = get_whitespace_and_comments(tokens, i, end)
        while (i < end) and (tokens[i].s != ';'):
            i += 1
        if i < end:
            i, _ = get_whitespace_and_comments(tokens, i+1, end, line=tokens[i].line)
        declaration = parse_declaration(tokens, start, i)
        declarations.append(declaration)

    declaration_block = CssDeclarationBlock(
        tokens=tokens,
        declarations=declarations,
    )
    return declaration_block

def parse_declaration(tokens, start, end):
    # type: (List[Token], int, int) -> CssDeclaration
    i, pre_fluff = get_whitespace_and_comments(tokens, start, end)

    if (i >= end) or (tokens[i].s == '}'):
        raise CssParserException('Empty declaration or missing semicolon', tokens[i-1])

    css_property = tokens[i].s
    if tokens[i+1].s != ':':
        raise CssParserException('We expect a colon here', tokens[i])
    i += 2
    start = i
    while (i < end) and (tokens[i].s != ';') and (tokens[i].s != '}'):
        i += 1
    css_value = parse_value(tokens, start, i)
    semicolon = (i < end) and (tokens[i].s == ';')
    if semicolon:
        i += 1
    _, post_fluff = get_whitespace_and_comments(tokens, i, end)
    declaration = CssDeclaration(
        tokens=tokens,
        pre_fluff=pre_fluff,
        post_fluff=post_fluff,
        css_property=css_property,
        css_value=css_value,
        semicolon=semicolon,
    )
    return declaration

def parse_value(tokens, start, end):
    # type: (List[Token], int, int) -> CssValue
    i, pre_fluff = get_whitespace_and_comments(tokens, start, end)
    if i < end:
        value = tokens[i]
    else:
        raise CssParserException('Missing value', tokens[i-1])
    i, post_fluff = get_whitespace_and_comments(tokens, i+1, end)
    return CssValue(
        tokens=tokens,
        value=value,
        pre_fluff=pre_fluff,
        post_fluff=post_fluff,
    )

def handle_prefluff(pre_fluff, indent=False):
    # type: (str, bool) -> str
    pre_fluff_lines = pre_fluff.split('\n')
    formatted_pre_fluff_lines = []
    comment_indent = ''
    general_indent = ''
    if indent:
        general_indent = '    '
    for i, ln in enumerate(pre_fluff_lines):
        line_indent = ''
        if ln.strip() != '':
            if not i:
                line_indent = general_indent
                comment_indent = '   '
            else:
                if comment_indent:
                    if ('*/' in ln or '*' in ln) and (ln.strip()[:2] in ('*/', '* ', '*')):
                        line_indent = general_indent
                        if '*/' in ln:
                            comment_indent = ''
                    else:
                        line_indent = general_indent + comment_indent
                else:
                    line_indent = general_indent
                    comment_indent = '   '
        elif len(pre_fluff_lines) == 1 and indent and ln != '':
            line_indent = ' '
        formatted_pre_fluff_lines.append(line_indent + ln.strip())
    if formatted_pre_fluff_lines[-1] != '':
        if formatted_pre_fluff_lines[-1].strip() == '' and indent:
            formatted_pre_fluff_lines[-1] = ''
        formatted_pre_fluff_lines.append('')
    pre_fluff = '\n'.join(formatted_pre_fluff_lines)
    res = ''
    if indent:
        if '\n' in pre_fluff:
            res = pre_fluff + '    '
        elif pre_fluff == '':
            res = '    '
        else:
            res = pre_fluff.rstrip() + ' '
    else:
        res = pre_fluff

    return res

def handle_postfluff(post_fluff, indent=False, space_after_first_line=False):
    # type: (str, bool, bool) -> str
    post_fluff_lines = post_fluff.split('\n')
    formatted_post_fluff_lines = []
    comment_indent = ''
    general_indent = ''
    if indent:
        general_indent = '    '
    for i, ln in enumerate(post_fluff_lines):
        line_indent = ''
        if ln.strip() != '':
            if i:
                if comment_indent:
                    if ('*/' in ln or '*' in ln) and (ln.strip()[:2] in ('*/', '* ', '*')):
                        line_indent = general_indent
                        if '*/' in ln:
                            comment_indent = ''
                    else:
                        line_indent = general_indent + comment_indent
                else:
                    line_indent = general_indent
                    comment_indent = '   '
            elif indent and not i and len(post_fluff_lines) > 2:
                formatted_post_fluff_lines.append('')
                line_indent = general_indent
                comment_indent = '   '
            elif space_after_first_line:
                line_indent = ' '
                if not i:
                    comment_indent = '   '
            elif not i:
                comment_indent = '   '
        formatted_post_fluff_lines.append(line_indent + ln.strip())
    if len(formatted_post_fluff_lines) == 1 and not space_after_first_line:
        if formatted_post_fluff_lines[-1].strip() == '':
            if formatted_post_fluff_lines[-1] != '':
                formatted_post_fluff_lines[-1] = ' '
        else:
            formatted_post_fluff_lines.append('')
    elif formatted_post_fluff_lines[-1].strip() == '':
        formatted_post_fluff_lines[-1] = ''
        if len(formatted_post_fluff_lines) == 1 and indent:
            formatted_post_fluff_lines.append('')
    elif space_after_first_line:
        formatted_post_fluff_lines.append('')
    post_fluff = '\n'.join(formatted_post_fluff_lines)
    return post_fluff

#### Begin CSS classes here

class CssSectionList(object):
    def __init__(self, tokens, sections):
        # type: (List[Token], List[Union[CssNestedSection, CssSection]]) -> None
        self.tokens = tokens
        self.sections = sections

    def text(self):
        # type: () -> str
        res = ''.join(section.text() for section in self.sections)
        return res

class CssNestedSection(object):
    def __init__(self, tokens, selector_list, section_list, pre_fluff, post_fluff):
        # type: (List[Token], CssSelectorList, CssSectionList, str, str) -> None
        self.tokens = tokens
        self.selector_list = selector_list
        self.section_list = section_list
        self.pre_fluff = pre_fluff
        self.post_fluff = post_fluff

    def text(self):
        # type: () -> str
        res = ''
        res += self.pre_fluff
        res += self.selector_list.text()
        res += ' {'
        section_list_lines = self.section_list.text().split('\n')
        formatted_section_list = []
        for ln in section_list_lines:
            if ln.strip() == '':
                formatted_section_list.append('')
            else:
                formatted_section_list.append('    ' + ln)
        res += '\n'.join(formatted_section_list)
        res += '}'
        res += self.post_fluff
        return res

class CssSection(object):
    def __init__(self, tokens, selector_list, declaration_block, pre_fluff, post_fluff):
        # type: (List[Token], CssSelectorList, CssDeclarationBlock, str, str) -> None
        self.tokens = tokens
        self.selector_list = selector_list
        self.declaration_block = declaration_block
        self.pre_fluff = pre_fluff
        self.post_fluff = post_fluff

    def text(self):
        # type: () -> str
        res = ''
        res += handle_prefluff(self.pre_fluff)
        res += self.selector_list.text()
        res += ' '
        res += self.declaration_block.text()
        res += handle_postfluff(self.post_fluff, space_after_first_line=True)
        return res

class CssSelectorList(object):
    def __init__(self, tokens, selectors):
        # type: (List[Token], List[CssSelector]) -> None
        self.tokens = tokens
        self.selectors = selectors

    def text(self):
        # type: () -> str
        return ',\n'.join(sel.text() for sel in self.selectors)

class CssSelector(object):
    def __init__(self, tokens, pre_fluff, post_fluff, levels):
        # type: (List[Token],str, str, List[Token]) -> None
        self.tokens = tokens
        self.pre_fluff = pre_fluff
        self.post_fluff = post_fluff
        self.levels = levels

    def text(self):
        # type: () -> str
        res = ' '.join(level.s for level in self.levels)
        return res

class CssDeclarationBlock(object):
    def __init__(self, tokens, declarations):
        # type: (List[Token], List[CssDeclaration]) -> None
        self.tokens = tokens
        self.declarations = declarations

    def text(self):
        # type: () -> str
        res = '{'
        for declaration in self.declarations:
            res += declaration.text()
        res += '}'
        return res

class CssDeclaration(object):
    def __init__(self, tokens, pre_fluff, post_fluff, css_property, css_value, semicolon):
        # type: (List[Token], str, str, str, CssValue, bool) -> None
        self.tokens = tokens
        self.pre_fluff = pre_fluff
        self.post_fluff = post_fluff
        self.css_property = css_property
        self.css_value = css_value
        self.semicolon = semicolon

    def text(self):
        # type: () -> str
        res = ''
        res += handle_prefluff(self.pre_fluff, True)
        res += self.css_property
        res += ':'
        value_text = self.css_value.text()
        if '\n' in value_text:
            # gradient values can be multi-line
            res += value_text.rstrip()
        else:
            res += ' '
            res += value_text.strip()
        res += ';'
        res += handle_postfluff(self.post_fluff, True, True)
        return res

class CssValue(object):
    def __init__(self, tokens, value, pre_fluff, post_fluff):
        # type: (List[Token], Token, str, str) -> None
        self.value = value
        self.pre_fluff = pre_fluff
        self.post_fluff = post_fluff
        assert pre_fluff.strip() == ''

    def text(self):
        # type: () -> str
        return self.pre_fluff + self.value.s + self.post_fluff

def parse(text):
    # type: (str) -> CssSectionList
    tokens = tokenize(text)
    section_list = parse_sections(tokens, 0, len(tokens))
    return section_list

#### Begin tokenizer section here

def ws(c):
    # type: (str) -> bool
    return c in ' \t\n'

def tokenize(text):
    # type: (str) -> List[Token]

    class State(object):
        def __init__(self):
            # type: () -> None
            self.i = 0
            self.line = 1
            self.col = 1

    tokens = []
    state = State()

    def add_token(s, state):
        # type: (str, State) -> None
        # deep copy data
        token = Token(s=s, line=state.line, col=state.col)
        tokens.append(token)

    def legal(offset):
        # type: (int) -> bool
        return state.i + offset < len(text)

    def advance(n):
        # type: (int) -> None
        for _ in range(n):
            state.i += 1
            if state.i >= 0 and text[state.i - 1] == '\n':
                state.line += 1
                state.col = 1
            else:
                state.col += 1

    def looking_at(s):
        # type: (str) -> bool
        return text[state.i:state.i+len(s)] == s

    def get_field(terminator):
        # type: (Callable[[str], bool]) -> str
        offset = 0
        paren_level = 0
        while legal(offset) and (paren_level or not terminator(text[state.i + offset])):
            c = text[state.i + offset]
            if c == '(':
                paren_level += 1
            elif c == ')':
                paren_level -= 1
            offset += 1
        return text[state.i:state.i+offset]

    in_property = False
    in_value = False
    in_media_line = False
    starting_media_section = False
    while state.i < len(text):
        c = text[state.i]

        if c in '{};:,':
            if c == ':':
                in_property = False
                in_value = True
            elif c == ';':
                in_property = True
                in_value = False
            elif c in '{':
                if starting_media_section:
                    starting_media_section = False
                else:
                    in_property = True
            elif c == '}':
                in_property = False
            s = c

        elif ws(c):
            terminator = lambda c: not ws(c)
            s = get_field(terminator)

        elif looking_at('/*'):
            # hacky
            old_i = state.i
            while (state.i < len(text)) and not looking_at('*/'):
                state.i += 1
            if not looking_at('*/'):
                raise CssParserException('unclosed comment', tokens[-1])
            s = text[old_i:state.i+2]
            state.i = old_i

        elif looking_at('@media'):
            s = '@media'
            in_media_line = True
            starting_media_section = True

        elif in_media_line:
            in_media_line = False
            terminator = lambda c: c == '{'
            s = get_field(terminator)
            s = s.rstrip()

        elif in_property:
            terminator = lambda c: ws(c) or c in ':{'
            s = get_field(terminator)

        elif in_value:
            in_value = False
            in_property = True
            terminator = lambda c: c in ';}'
            s = get_field(terminator)
            s = s.rstrip()

        else:
            terminator = lambda c: ws(c) or c == ','
            s = get_field(terminator)

        add_token(s, state)
        advance(len(s))

    return tokens