zulip/tools/lib/pretty_print.py

import subprocess
from typing import Any, Dict, List

from zulint.printer import ENDC, GREEN

from .template_parser import is_django_block_tag, tokenize


def pretty_print_html(html: str, num_spaces: int = 4) -> str:
    # We use 1-based indexing for both rows and columns.
    tokens = tokenize(html)
    lines = html.split('\n')

    # We will keep a stack of "start" tags so that we know
    # when HTML ranges end.  Note that some start tags won't
    # be blocks from an indentation standpoint.
    stack: List[Dict[str, Any]] = []

    # Seed our stack with a pseudo entry to make depth calculations
    # easier.
    info: Dict[str, Any] = dict(
        block=False,
        depth=-1,
        line=-1,
        token_kind='html_start',
        tag='html',
        extra_indent=0,
        ignore_lines=[],
    )
    stack.append(info)

    # Our main job is to figure out offsets that we use to nudge lines
    # over by.
    offsets: Dict[int, int] = {}

    # Loop through our start/end tokens, and calculate offsets.  As
    # we proceed, we will push/pop info dictionaries on/off a stack.
    for token in tokens:

        if (
            token.kind
            in (
                'html_start',
                'handlebars_start',
                'handlebars_singleton',
                'html_singleton',
                'django_start',
                'jinja2_whitespace_stripped_type2_start',
                'jinja2_whitespace_stripped_start',
            )
            and stack[-1]['tag'] != 'pre'
        ):
            # An HTML start tag should only cause a new indent if we
            # are on a new line.
            if token.tag not in ('extends', 'include', 'else', 'elif') and (
                is_django_block_tag(token.tag) or token.kind != 'django_start'
            ):
                is_block = token.line > stack[-1]['line']

                if is_block:
                    if (
                        (
                            token.kind == 'handlebars_start'
                            and stack[-1]['token_kind'] == 'handlebars_start'
                        )
                        or (
                            token.kind
                            in {
                                'django_start',
                                'jinja2_whitespace_stripped_type2_start',
                                'jinja2_whitespace_stripped_start',
                            }
                            and stack[-1]['token_kind']
                            in {
                                'django_start',
                                'jinja2_whitespace_stripped_type2_start',
                                'jinja2_whitespace_stripped_start',
                            }
                        )
                    ) and not stack[-1]['indenting']:
                        info = stack.pop()
                        info['depth'] = info['depth'] + 1
                        info['indenting'] = True
                        info['adjust_offset_until'] = token.line
                        stack.append(info)
                    new_depth = stack[-1]['depth'] + 1
                    extra_indent = stack[-1]['extra_indent']
                    line = lines[token.line - 1]
                    adjustment = len(line) - len(line.lstrip()) + 1
                    offset = (1 + extra_indent + new_depth * num_spaces) - adjustment
                    info = dict(
                        block=True,
                        depth=new_depth,
                        actual_depth=new_depth,
                        line=token.line,
                        tag=token.tag,
                        token_kind=token.kind,
                        line_span=token.line_span,
                        offset=offset,
                        extra_indent=token.col - adjustment + extra_indent,
                        extra_indent_prev=extra_indent,
                        adjustment=adjustment,
                        indenting=True,
                        adjust_offset_until=token.line,
                        ignore_lines=[],
                    )
                    if token.kind in ('handlebars_start', 'django_start'):
                        info.update(dict(depth=new_depth - 1, indenting=False))
                else:
                    info = dict(
                        block=False,
                        depth=stack[-1]['depth'],
                        actual_depth=stack[-1]['depth'],
                        line=token.line,
                        tag=token.tag,
                        token_kind=token.kind,
                        extra_indent=stack[-1]['extra_indent'],
                        ignore_lines=[],
                    )
                stack.append(info)
        elif (
            token.kind
            in (
                'html_end',
                'handlebars_end',
                'html_singleton_end',
                'django_end',
                'handlebars_singleton_end',
                'jinja2_whitespace_stripped_end',
            )
            and (stack[-1]['tag'] != 'pre' or token.tag == 'pre')
        ):
            info = stack.pop()
            if info['block']:
                # We are at the end of an indentation block.  We
                # assume the whole block was formatted ok before, just
                # possibly at an indentation that we don't like, so we
                # nudge over all lines in the block by the same offset.
                start_line = info['line']
                end_line = token.line
                if token.tag == 'pre':
                    offsets[start_line] = 0
                    offsets[end_line] = 0
                    stack[-1]['ignore_lines'].append(start_line)
                    stack[-1]['ignore_lines'].append(end_line)
                else:
                    offsets[start_line] = info['offset']
                    line = lines[token.line - 1]
                    adjustment = len(line) - len(line.lstrip()) + 1
                    if adjustment == token.col and token.kind != 'html_singleton_end':
                        offsets[end_line] = (
                            info['offset']
                            + info['adjustment']
                            - adjustment
                            + info['extra_indent']
                            - info['extra_indent_prev']
                        )
                    elif start_line + info['line_span'] - 1 == end_line and info['line_span'] > 1:
                        offsets[end_line] = (
                            1 + info['extra_indent'] + (info['depth'] + 1) * num_spaces
                        ) - adjustment
                        # We would like singleton tags and tags which spread over
                        # multiple lines to have 2 space indentation.
                        offsets[end_line] -= 2
                    elif token.line != info['line']:
                        offsets[end_line] = info['offset']
                if token.tag != 'pre' and token.tag != 'script':
                    for line_num in range(start_line + 1, end_line):
                        # Be careful not to override offsets that happened
                        # deeper in the HTML within our block.
                        if line_num not in offsets:
                            line = lines[line_num - 1]
                            new_depth = info['depth'] + 1
                            if (
                                line.lstrip().startswith('{{else}}')
                                or line.lstrip().startswith('{% else %}')
                                or line.lstrip().startswith('{% elif')
                            ):
                                new_depth = info['actual_depth']
                            extra_indent = info['extra_indent']
                            adjustment = len(line) - len(line.lstrip()) + 1
                            offset = (1 + extra_indent + new_depth * num_spaces) - adjustment
                            if line_num <= start_line + info['line_span'] - 1:
                                # We would like singleton tags and tags which spread over
                                # multiple lines to have 2 space indentation.
                                offset -= 2
                            offsets[line_num] = offset
                        elif (
                            token.kind in ('handlebars_end', 'django_end')
                            and info['indenting']
                            and line_num < info['adjust_offset_until']
                            and line_num not in info['ignore_lines']
                        ):
                            offsets[line_num] += num_spaces
                elif token.tag != 'pre':
                    for line_num in range(start_line + 1, end_line):
                        if line_num not in offsets:
                            offsets[line_num] = info['offset']
                else:
                    for line_num in range(start_line + 1, end_line):
                        if line_num not in offsets:
                            offsets[line_num] = 0
                            stack[-1]['ignore_lines'].append(line_num)

    # Now that we have all of our offsets calculated, we can just
    # join all our lines together, fixing up offsets as needed.
    formatted_lines = []
    for i, line in enumerate(html.split('\n')):
        row = i + 1
        offset = offsets.get(row, 0)
        pretty_line = line
        if line.strip() == '':
            pretty_line = ''
        else:
            if offset > 0:
                pretty_line = (' ' * offset) + pretty_line
            elif offset < 0:
                pretty_line = pretty_line[-1 * offset :]
                assert line.strip() == pretty_line.strip()
        formatted_lines.append(pretty_line)

    return '\n'.join(formatted_lines)


def validate_indent_html(fn: str, fix: bool) -> int:
    with open(fn) as f:
        html = f.read()
    phtml = pretty_print_html(html)
    if not html.split('\n') == phtml.split('\n'):
        if fix:
            print(GREEN + "Automatically fixing problems..." + ENDC)
            with open(fn, 'w') as f:
                f.write(phtml)
            # Since we successfully fixed the issues, we exit with status 0
            return 0
        print(
            'Invalid Indentation detected in file: '
            f'{fn}\nDiff for the file against expected indented file:',
            flush=True,
        )
        subprocess.run(['diff', fn, '-'], input=phtml, universal_newlines=True)
        print()
        print("This problem can be fixed with the `--fix` option.")
        return 0
    return 1