2016-08-02 03:00:12 +02:00
|
|
|
from collections import defaultdict
|
|
|
|
from six.moves import range
|
2017-03-03 19:01:52 +01:00
|
|
|
from typing import Dict, List, Set
|
2016-08-02 03:00:12 +02:00
|
|
|
|
2016-09-11 20:23:29 +02:00
|
|
|
from .html_branches import html_branches, HtmlTreeBranch
|
2016-08-02 03:00:12 +02:00
|
|
|
|
|
|
|
def show_all_branches(fns):
|
|
|
|
# type: (List[str]) -> None
|
|
|
|
for fn in fns:
|
|
|
|
print(fn)
|
|
|
|
text = open(fn).read()
|
2016-09-11 20:23:29 +02:00
|
|
|
branches = html_branches(text, fn=fn)
|
2016-08-02 03:00:12 +02:00
|
|
|
for branch in branches:
|
|
|
|
print(branch.text())
|
|
|
|
print('---')
|
|
|
|
|
|
|
|
class Grepper(object):
|
|
|
|
'''
|
|
|
|
A Grepper object is optimized to do repeated
|
|
|
|
searches of words that can be found in our
|
|
|
|
HtmlTreeBranch objects.
|
|
|
|
'''
|
|
|
|
|
|
|
|
def __init__(self, fns):
|
|
|
|
# type: (List[str]) -> None
|
2017-05-07 16:55:13 +02:00
|
|
|
all_branches = [] # type: List[HtmlTreeBranch]
|
2016-08-02 03:00:12 +02:00
|
|
|
|
|
|
|
for fn in fns:
|
2016-09-11 20:23:29 +02:00
|
|
|
text = open(fn).read()
|
|
|
|
branches = html_branches(text, fn=fn)
|
2016-08-02 03:00:12 +02:00
|
|
|
all_branches += branches
|
|
|
|
|
2017-05-07 16:55:13 +02:00
|
|
|
self.word_dict = defaultdict(set) # type: Dict[str, Set[HtmlTreeBranch]]
|
2016-08-02 03:00:12 +02:00
|
|
|
for b in all_branches:
|
|
|
|
for word in b.words:
|
|
|
|
self.word_dict[word].add(b)
|
|
|
|
|
|
|
|
self.all_branches = set(all_branches)
|
|
|
|
|
|
|
|
def grep(self, word_set):
|
|
|
|
# type: (Set[str]) -> None
|
|
|
|
|
2017-05-07 16:55:13 +02:00
|
|
|
words = list(word_set) # type: List[str]
|
2016-08-02 03:00:12 +02:00
|
|
|
|
|
|
|
if len(words) == 0:
|
|
|
|
matches = self.all_branches
|
|
|
|
else:
|
|
|
|
matches = self.word_dict[words[0]]
|
|
|
|
for i in range(1, len(words)):
|
|
|
|
matches = matches & self.word_dict[words[i]]
|
|
|
|
|
|
|
|
branches = list(matches)
|
|
|
|
branches.sort(key=lambda branch: (branch.fn, branch.line))
|
|
|
|
for branch in branches:
|
|
|
|
print('%s %d' % (branch.fn, branch.line))
|
|
|
|
print(branch.staircase_text())
|
|
|
|
print('')
|
|
|
|
|
|
|
|
def grep(fns, words):
|
|
|
|
# type: (List[str], Set[str]) -> None
|
|
|
|
grepper = Grepper(fns)
|
|
|
|
grepper.grep(words)
|