zulip/tools/lib/html_grep.py

60 lines
1.7 KiB
Python
Raw Normal View History

from collections import defaultdict
from typing import Dict, List, Set
from .html_branches import html_branches, HtmlTreeBranch
def show_all_branches(fns: List[str]) -> None:
for fn in fns:
print(fn)
with open(fn) as f:
text = f.read()
branches = html_branches(text, fn=fn)
for branch in branches:
print(branch.text())
print('---')
2017-11-05 11:57:15 +01:00
class Grepper:
'''
A Grepper object is optimized to do repeated
searches of words that can be found in our
HtmlTreeBranch objects.
'''
def __init__(self, fns: List[str]) -> None:
all_branches = [] # type: List[HtmlTreeBranch]
for fn in fns:
with open(fn) as f:
text = f.read()
branches = html_branches(text, fn=fn)
all_branches += branches
self.word_dict = defaultdict(set) # type: Dict[str, Set[HtmlTreeBranch]]
for b in all_branches:
for word in b.words:
self.word_dict[word].add(b)
self.all_branches = set(all_branches)
def grep(self, word_set: Set[str]) -> None:
words = list(word_set) # type: List[str]
if len(words) == 0:
matches = self.all_branches
else:
matches = self.word_dict[words[0]]
for i in range(1, len(words)):
matches = matches & self.word_dict[words[i]]
branches = list(matches)
branches.sort(key=lambda branch: (branch.fn, branch.line))
for branch in branches:
print('%s %d' % (branch.fn, branch.line))
print(branch.staircase_text())
print('')
def grep(fns: List[str], words: Set[str]) -> None:
grepper = Grepper(fns)
grepper.grep(words)