2018-02-15 22:16:40 +01:00
|
|
|
from typing import Callable, List, Optional, Text
|
2016-08-02 00:14:01 +02:00
|
|
|
|
2016-08-31 00:43:08 +02:00
|
|
|
class TemplateParserException(Exception):
|
2017-02-21 13:53:52 +01:00
|
|
|
def __init__(self, message):
|
|
|
|
# type: (str) -> None
|
|
|
|
self.message = message
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
# type: () -> str
|
|
|
|
return self.message
|
|
|
|
|
|
|
|
class TokenizationException(Exception):
|
|
|
|
def __init__(self, message, line_content=None):
|
2018-03-23 23:42:54 +01:00
|
|
|
# type: (str, Optional[str]) -> None
|
2017-02-21 13:53:52 +01:00
|
|
|
self.message = message
|
|
|
|
self.line_content = line_content
|
2016-08-31 00:43:08 +02:00
|
|
|
|
2017-11-05 11:57:15 +01:00
|
|
|
class TokenizerState:
|
2016-08-02 00:14:01 +02:00
|
|
|
def __init__(self):
|
|
|
|
# type: () -> None
|
|
|
|
self.i = 0
|
|
|
|
self.line = 1
|
|
|
|
self.col = 1
|
|
|
|
|
2017-11-05 11:57:15 +01:00
|
|
|
class Token:
|
2017-02-16 20:25:53 +01:00
|
|
|
def __init__(self, kind, s, tag, line, col, line_span):
|
|
|
|
# type: (str, str, str, int, int, int) -> None
|
2016-08-02 00:14:01 +02:00
|
|
|
self.kind = kind
|
|
|
|
self.s = s
|
|
|
|
self.tag = tag
|
|
|
|
self.line = line
|
|
|
|
self.col = col
|
2017-02-16 20:25:53 +01:00
|
|
|
self.line_span = line_span
|
2016-08-02 00:14:01 +02:00
|
|
|
|
|
|
|
def tokenize(text):
|
2016-09-12 18:03:37 +02:00
|
|
|
# type: (str) -> List[Token]
|
2016-08-02 00:14:01 +02:00
|
|
|
def advance(n):
|
|
|
|
# type: (int) -> None
|
|
|
|
for _ in range(n):
|
|
|
|
state.i += 1
|
|
|
|
if state.i >= 0 and text[state.i - 1] == '\n':
|
|
|
|
state.line += 1
|
|
|
|
state.col = 1
|
|
|
|
else:
|
|
|
|
state.col += 1
|
|
|
|
|
|
|
|
def looking_at(s):
|
|
|
|
# type: (str) -> bool
|
|
|
|
return text[state.i:state.i+len(s)] == s
|
|
|
|
|
2017-02-23 18:12:52 +01:00
|
|
|
def looking_at_htmlcomment():
|
2017-02-03 04:26:35 +01:00
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("<!--")
|
|
|
|
|
2017-02-23 18:12:52 +01:00
|
|
|
def looking_at_handlebarcomment():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("{{!")
|
|
|
|
|
|
|
|
def looking_at_djangocomment():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("{#")
|
|
|
|
|
2018-04-03 10:09:00 +02:00
|
|
|
def looking_at_handlebarpartial() -> bool:
|
|
|
|
return looking_at("{{partial")
|
|
|
|
|
2016-08-02 00:14:01 +02:00
|
|
|
def looking_at_html_start():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("<") and not looking_at("</")
|
|
|
|
|
|
|
|
def looking_at_html_end():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("</")
|
|
|
|
|
|
|
|
def looking_at_handlebars_start():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("{{#") or looking_at("{{^")
|
|
|
|
|
|
|
|
def looking_at_handlebars_end():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("{{/")
|
|
|
|
|
|
|
|
def looking_at_django_start():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("{% ") and not looking_at("{% end")
|
|
|
|
|
|
|
|
def looking_at_django_end():
|
|
|
|
# type: () -> bool
|
|
|
|
return looking_at("{% end")
|
|
|
|
|
|
|
|
state = TokenizerState()
|
|
|
|
tokens = []
|
|
|
|
|
|
|
|
while state.i < len(text):
|
2017-02-21 13:53:52 +01:00
|
|
|
try:
|
2017-02-23 18:12:52 +01:00
|
|
|
if looking_at_htmlcomment():
|
2017-02-21 13:53:52 +01:00
|
|
|
s = get_html_comment(text, state.i)
|
|
|
|
tag = s[4:-3]
|
|
|
|
kind = 'html_comment'
|
2017-02-23 18:12:52 +01:00
|
|
|
elif looking_at_handlebarcomment():
|
|
|
|
s = get_handlebar_comment(text, state.i)
|
|
|
|
tag = s[3:-2]
|
|
|
|
kind = 'handlebar_comment'
|
|
|
|
elif looking_at_djangocomment():
|
|
|
|
s = get_django_comment(text, state.i)
|
|
|
|
tag = s[2:-2]
|
|
|
|
kind = 'django_comment'
|
2018-04-03 10:09:00 +02:00
|
|
|
elif looking_at_handlebarpartial():
|
|
|
|
s = get_handlebar_partial(text, state.i)
|
|
|
|
tag = s[9:-2]
|
|
|
|
kind = 'handlebars_singleton'
|
2017-02-21 13:53:52 +01:00
|
|
|
elif looking_at_html_start():
|
|
|
|
s = get_html_tag(text, state.i)
|
|
|
|
tag_parts = s[1:-1].split()
|
|
|
|
|
|
|
|
if not tag_parts:
|
|
|
|
raise TemplateParserException("Tag name missing")
|
|
|
|
|
|
|
|
tag = tag_parts[0]
|
|
|
|
|
|
|
|
if is_special_html_tag(s, tag):
|
|
|
|
kind = 'html_special'
|
2018-02-15 22:16:40 +01:00
|
|
|
elif is_self_closing_html_tag(s, tag):
|
2017-02-21 13:53:52 +01:00
|
|
|
kind = 'html_singleton'
|
|
|
|
else:
|
|
|
|
kind = 'html_start'
|
|
|
|
elif looking_at_html_end():
|
|
|
|
s = get_html_tag(text, state.i)
|
|
|
|
tag = s[2:-1]
|
|
|
|
kind = 'html_end'
|
|
|
|
elif looking_at_handlebars_start():
|
|
|
|
s = get_handlebars_tag(text, state.i)
|
|
|
|
tag = s[3:-2].split()[0]
|
|
|
|
kind = 'handlebars_start'
|
|
|
|
elif looking_at_handlebars_end():
|
|
|
|
s = get_handlebars_tag(text, state.i)
|
|
|
|
tag = s[3:-2]
|
|
|
|
kind = 'handlebars_end'
|
|
|
|
elif looking_at_django_start():
|
|
|
|
s = get_django_tag(text, state.i)
|
|
|
|
tag = s[3:-2].split()[0]
|
|
|
|
kind = 'django_start'
|
|
|
|
elif looking_at_django_end():
|
|
|
|
s = get_django_tag(text, state.i)
|
|
|
|
tag = s[6:-3]
|
|
|
|
kind = 'django_end'
|
2016-08-07 15:28:17 +02:00
|
|
|
else:
|
2017-02-21 13:53:52 +01:00
|
|
|
advance(1)
|
|
|
|
continue
|
|
|
|
except TokenizationException as e:
|
|
|
|
raise TemplateParserException('''%s at Line %d Col %d:"%s"''' %
|
|
|
|
(e.message, state.line, state.col,
|
|
|
|
e.line_content))
|
2016-08-02 00:14:01 +02:00
|
|
|
|
2017-02-16 20:25:53 +01:00
|
|
|
line_span = len(s.split('\n'))
|
2016-08-02 00:14:01 +02:00
|
|
|
token = Token(
|
|
|
|
kind=kind,
|
|
|
|
s=s,
|
|
|
|
tag=tag,
|
|
|
|
line=state.line,
|
|
|
|
col=state.col,
|
2017-02-16 20:25:53 +01:00
|
|
|
line_span=line_span
|
2016-08-02 00:14:01 +02:00
|
|
|
)
|
|
|
|
tokens.append(token)
|
|
|
|
advance(len(s))
|
2018-04-03 10:09:00 +02:00
|
|
|
|
|
|
|
def add_pseudo_end_token(kind: str) -> None:
|
2017-02-16 20:25:53 +01:00
|
|
|
token = Token(
|
2018-04-03 10:09:00 +02:00
|
|
|
kind=kind,
|
2017-02-16 20:25:53 +01:00
|
|
|
s='</' + tag + '>',
|
|
|
|
tag=tag,
|
|
|
|
line=state.line,
|
|
|
|
col=state.col,
|
|
|
|
line_span=1
|
|
|
|
)
|
|
|
|
tokens.append(token)
|
2016-08-02 00:14:01 +02:00
|
|
|
|
2018-04-03 10:09:00 +02:00
|
|
|
if kind == 'html_singleton':
|
|
|
|
# Here we insert a Pseudo html_singleton_end tag so as to have
|
|
|
|
# ease of detection of end of singleton html tags which might be
|
|
|
|
# needed in some cases as with our html pretty printer.
|
|
|
|
add_pseudo_end_token('html_singleton_end')
|
|
|
|
if kind == 'handlebars_singleton':
|
|
|
|
# We insert a pseudo handlbar end tag for singleton cases of
|
|
|
|
# handlebars like the partials. This helps in indenting multi line partials.
|
|
|
|
add_pseudo_end_token('handlebars_singleton_end')
|
|
|
|
|
2016-08-02 00:14:01 +02:00
|
|
|
return tokens
|
|
|
|
|
2016-08-04 01:44:15 +02:00
|
|
|
def validate(fn=None, text=None, check_indent=True):
|
2016-09-12 18:03:37 +02:00
|
|
|
# type: (Optional[str], Optional[str], bool) -> None
|
2016-08-04 01:44:15 +02:00
|
|
|
assert fn or text
|
|
|
|
|
|
|
|
if fn is None:
|
|
|
|
fn = '<in memory file>'
|
|
|
|
|
|
|
|
if text is None:
|
|
|
|
text = open(fn).read()
|
|
|
|
|
2016-08-02 00:14:01 +02:00
|
|
|
tokens = tokenize(text)
|
|
|
|
|
2017-11-05 11:57:15 +01:00
|
|
|
class State:
|
2016-08-02 00:14:01 +02:00
|
|
|
def __init__(self, func):
|
|
|
|
# type: (Callable[[Token], None]) -> None
|
|
|
|
self.depth = 0
|
|
|
|
self.matcher = func
|
|
|
|
|
|
|
|
def no_start_tag(token):
|
|
|
|
# type: (Token) -> None
|
2016-08-31 00:43:08 +02:00
|
|
|
raise TemplateParserException('''
|
2016-08-02 00:14:01 +02:00
|
|
|
No start tag
|
|
|
|
fn: %s
|
|
|
|
end tag:
|
|
|
|
%s
|
|
|
|
line %d, col %d
|
|
|
|
''' % (fn, token.tag, token.line, token.col))
|
|
|
|
|
|
|
|
state = State(no_start_tag)
|
|
|
|
|
|
|
|
def start_tag_matcher(start_token):
|
|
|
|
# type: (Token) -> None
|
|
|
|
state.depth += 1
|
2017-06-22 23:40:42 +02:00
|
|
|
start_tag = start_token.tag.strip('~')
|
2016-08-02 00:14:01 +02:00
|
|
|
start_line = start_token.line
|
|
|
|
start_col = start_token.col
|
|
|
|
|
|
|
|
old_matcher = state.matcher
|
2016-11-29 07:22:02 +01:00
|
|
|
|
2016-08-02 00:14:01 +02:00
|
|
|
def f(end_token):
|
|
|
|
# type: (Token) -> None
|
|
|
|
|
2017-06-22 23:40:42 +02:00
|
|
|
end_tag = end_token.tag.strip('~')
|
2016-08-02 00:14:01 +02:00
|
|
|
end_line = end_token.line
|
|
|
|
end_col = end_token.col
|
|
|
|
|
2016-08-18 16:17:06 +02:00
|
|
|
if start_tag == 'a':
|
|
|
|
max_lines = 3
|
|
|
|
else:
|
|
|
|
max_lines = 1
|
|
|
|
|
2016-08-02 00:14:01 +02:00
|
|
|
problem = None
|
2016-08-18 16:02:18 +02:00
|
|
|
if (start_tag == 'code') and (end_line == start_line + 1):
|
|
|
|
problem = 'Code tag is split across two lines.'
|
2016-08-02 00:14:01 +02:00
|
|
|
if start_tag != end_tag:
|
|
|
|
problem = 'Mismatched tag.'
|
2016-08-18 16:17:06 +02:00
|
|
|
elif check_indent and (end_line > start_line + max_lines):
|
|
|
|
if end_col != start_col:
|
|
|
|
problem = 'Bad indentation.'
|
2016-08-02 00:14:01 +02:00
|
|
|
if problem:
|
2016-08-31 00:43:08 +02:00
|
|
|
raise TemplateParserException('''
|
2016-08-02 00:14:01 +02:00
|
|
|
fn: %s
|
|
|
|
%s
|
|
|
|
start:
|
|
|
|
%s
|
|
|
|
line %d, col %d
|
|
|
|
end tag:
|
|
|
|
%s
|
|
|
|
line %d, col %d
|
|
|
|
''' % (fn, problem, start_token.s, start_line, start_col, end_tag, end_line, end_col))
|
|
|
|
state.matcher = old_matcher
|
|
|
|
state.depth -= 1
|
|
|
|
state.matcher = f
|
|
|
|
|
|
|
|
for token in tokens:
|
|
|
|
kind = token.kind
|
|
|
|
tag = token.tag
|
|
|
|
|
|
|
|
if kind == 'html_start':
|
2016-08-07 15:28:17 +02:00
|
|
|
start_tag_matcher(token)
|
2016-08-02 00:14:01 +02:00
|
|
|
elif kind == 'html_end':
|
|
|
|
state.matcher(token)
|
|
|
|
|
|
|
|
elif kind == 'handlebars_start':
|
|
|
|
start_tag_matcher(token)
|
|
|
|
elif kind == 'handlebars_end':
|
|
|
|
state.matcher(token)
|
|
|
|
|
|
|
|
elif kind == 'django_start':
|
|
|
|
if is_django_block_tag(tag):
|
|
|
|
start_tag_matcher(token)
|
|
|
|
elif kind == 'django_end':
|
|
|
|
state.matcher(token)
|
|
|
|
|
|
|
|
if state.depth != 0:
|
2016-08-31 00:57:37 +02:00
|
|
|
raise TemplateParserException('Missing end tag')
|
2016-08-02 00:14:01 +02:00
|
|
|
|
|
|
|
def is_special_html_tag(s, tag):
|
|
|
|
# type: (str, str) -> bool
|
2017-02-03 04:26:35 +01:00
|
|
|
return tag in ['link', 'meta', '!DOCTYPE']
|
2016-08-02 00:14:01 +02:00
|
|
|
|
2018-02-15 22:16:40 +01:00
|
|
|
def is_self_closing_html_tag(s: Text, tag: Text) -> bool:
|
|
|
|
self_closing_tag = tag in [
|
|
|
|
'area',
|
|
|
|
'base',
|
|
|
|
'br',
|
|
|
|
'col',
|
|
|
|
'embed',
|
|
|
|
'hr',
|
|
|
|
'img',
|
|
|
|
'input',
|
|
|
|
'param',
|
|
|
|
'source',
|
|
|
|
'track',
|
|
|
|
'wbr',
|
|
|
|
]
|
|
|
|
singleton_tag = s.endswith('/>')
|
|
|
|
return self_closing_tag or singleton_tag
|
|
|
|
|
2016-08-02 00:14:01 +02:00
|
|
|
def is_django_block_tag(tag):
|
|
|
|
# type: (str) -> bool
|
|
|
|
return tag in [
|
|
|
|
'autoescape',
|
|
|
|
'block',
|
|
|
|
'comment',
|
|
|
|
'for',
|
|
|
|
'if',
|
|
|
|
'ifequal',
|
|
|
|
'verbatim',
|
|
|
|
'blocktrans',
|
|
|
|
'trans',
|
|
|
|
'raw',
|
2017-07-19 21:04:34 +02:00
|
|
|
'with',
|
2016-08-02 00:14:01 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
def get_handlebars_tag(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
end = i + 2
|
2016-11-28 23:29:01 +01:00
|
|
|
while end < len(text) - 1 and text[end] != '}':
|
2016-08-02 00:14:01 +02:00
|
|
|
end += 1
|
|
|
|
if text[end] != '}' or text[end+1] != '}':
|
2017-02-21 13:53:52 +01:00
|
|
|
raise TokenizationException('Tag missing "}}"', text[i:end+2])
|
2016-08-02 00:14:01 +02:00
|
|
|
s = text[i:end+2]
|
|
|
|
return s
|
|
|
|
|
|
|
|
def get_django_tag(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
end = i + 2
|
2016-11-28 23:29:01 +01:00
|
|
|
while end < len(text) - 1 and text[end] != '%':
|
2016-08-02 00:14:01 +02:00
|
|
|
end += 1
|
|
|
|
if text[end] != '%' or text[end+1] != '}':
|
2017-02-21 13:53:52 +01:00
|
|
|
raise TokenizationException('Tag missing "%}"', text[i:end+2])
|
2016-08-02 00:14:01 +02:00
|
|
|
s = text[i:end+2]
|
|
|
|
return s
|
|
|
|
|
|
|
|
def get_html_tag(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
quote_count = 0
|
|
|
|
end = i + 1
|
2017-02-21 13:53:52 +01:00
|
|
|
unclosed_end = 0
|
|
|
|
while end < len(text) and (text[end] != '>' or quote_count % 2 != 0 and text[end] != '<'):
|
2016-08-02 00:14:01 +02:00
|
|
|
if text[end] == '"':
|
|
|
|
quote_count += 1
|
2017-02-21 13:53:52 +01:00
|
|
|
if not unclosed_end and text[end] == '<':
|
|
|
|
unclosed_end = end
|
2016-08-02 00:14:01 +02:00
|
|
|
end += 1
|
2017-02-21 13:53:52 +01:00
|
|
|
if quote_count % 2 != 0:
|
|
|
|
if unclosed_end:
|
|
|
|
raise TokenizationException('Unbalanced Quotes', text[i:unclosed_end])
|
|
|
|
else:
|
|
|
|
raise TokenizationException('Unbalanced Quotes', text[i:end+1])
|
2016-08-02 00:14:01 +02:00
|
|
|
if end == len(text) or text[end] != '>':
|
2017-02-21 13:53:52 +01:00
|
|
|
raise TokenizationException('Tag missing ">"', text[i:end+1])
|
2016-08-02 00:14:01 +02:00
|
|
|
s = text[i:end+1]
|
|
|
|
return s
|
2017-02-03 04:26:35 +01:00
|
|
|
|
|
|
|
def get_html_comment(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
end = i + 7
|
2017-02-21 13:53:52 +01:00
|
|
|
unclosed_end = 0
|
2017-02-03 04:26:35 +01:00
|
|
|
while end <= len(text):
|
|
|
|
if text[end-3:end] == '-->':
|
|
|
|
return text[i:end]
|
2017-02-21 13:53:52 +01:00
|
|
|
if not unclosed_end and text[end] == '<':
|
|
|
|
unclosed_end = end
|
2017-02-03 04:26:35 +01:00
|
|
|
end += 1
|
2017-02-21 13:53:52 +01:00
|
|
|
raise TokenizationException('Unclosed comment', text[i:unclosed_end])
|
2017-02-23 18:12:52 +01:00
|
|
|
|
|
|
|
def get_handlebar_comment(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
end = i + 5
|
|
|
|
unclosed_end = 0
|
|
|
|
while end <= len(text):
|
|
|
|
if text[end-2:end] == '}}':
|
|
|
|
return text[i:end]
|
|
|
|
if not unclosed_end and text[end] == '<':
|
|
|
|
unclosed_end = end
|
|
|
|
end += 1
|
|
|
|
raise TokenizationException('Unclosed comment', text[i:unclosed_end])
|
|
|
|
|
|
|
|
def get_django_comment(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
end = i + 4
|
|
|
|
unclosed_end = 0
|
|
|
|
while end <= len(text):
|
|
|
|
if text[end-2:end] == '#}':
|
|
|
|
return text[i:end]
|
|
|
|
if not unclosed_end and text[end] == '<':
|
|
|
|
unclosed_end = end
|
|
|
|
end += 1
|
|
|
|
raise TokenizationException('Unclosed comment', text[i:unclosed_end])
|
2018-04-03 10:09:00 +02:00
|
|
|
|
|
|
|
def get_handlebar_partial(text, i):
|
|
|
|
# type: (str, int) -> str
|
|
|
|
end = i + 10
|
|
|
|
unclosed_end = 0
|
|
|
|
while end <= len(text):
|
|
|
|
if text[end-2:end] == '}}':
|
|
|
|
return text[i:end]
|
|
|
|
if not unclosed_end and text[end] == '<':
|
|
|
|
unclosed_end = end
|
|
|
|
end += 1
|
|
|
|
raise TokenizationException('Unclosed partial', text[i:unclosed_end])
|