zulip/tools/lib/html_branches.py

99 lines
2.5 KiB
Python

import re
from collections import defaultdict
from .template_parser import FormattedError, Token, tokenize
class TagInfo:
def __init__(self, tag: str, classes: list[str], ids: list[str], token: Token) -> None:
self.tag = tag
self.classes = classes
self.ids = ids
self.token = token
self.words = [
self.tag,
*("." + s for s in classes),
*("#" + s for s in ids),
]
def text(self) -> str:
s = self.tag
if self.classes:
s += "." + ".".join(self.classes)
if self.ids:
s += "#" + "#".join(self.ids)
return s
def get_tag_info(token: Token) -> TagInfo:
s = token.s
tag = token.tag
classes: list[str] = []
ids: list[str] = []
searches = [
(classes, ' class="(.*?)"'),
(classes, " class='(.*?)'"),
(ids, ' id="(.*?)"'),
(ids, " id='(.*?)'"),
]
for lst, regex in searches:
m = re.search(regex, s)
if m:
for g in m.groups():
lst += split_for_id_and_class(g)
return TagInfo(tag=tag, classes=classes, ids=ids, token=token)
def split_for_id_and_class(element: str) -> list[str]:
# Here we split a given string which is expected to contain id or class
# attributes from HTML tags. This also takes care of template variables
# in string during splitting process. For eg. 'red black {{ a|b|c }}'
# is split as ['red', 'black', '{{ a|b|c }}']
outside_braces: bool = True
lst = []
s = ""
for ch in element:
if ch == "{":
outside_braces = False
if ch == "}":
outside_braces = True
if ch == " " and outside_braces:
if s != "":
lst.append(s)
s = ""
else:
s += ch
if s != "":
lst.append(s)
return lst
def build_id_dict(templates: list[str]) -> dict[str, list[str]]:
template_id_dict: dict[str, list[str]] = defaultdict(list)
for fn in templates:
with open(fn) as f:
text = f.read()
try:
list_tags = tokenize(text)
except FormattedError as e:
raise Exception(
f"""
fn: {fn}
{e}"""
)
for tag in list_tags:
info = get_tag_info(tag)
for ids in info.ids:
template_id_dict[ids].append("Line " + str(info.token.line) + ":" + fn)
return template_id_dict