log-search: Allow multiple search terms.

This allows AND'ing multiple terms together.
This commit is contained in:
Alex Vandiver 2022-05-05 11:44:47 -07:00 committed by Tim Abbott
parent bd73e7d411
commit 258b658cc0
1 changed files with 81 additions and 58 deletions

View File

@ -8,7 +8,7 @@ import re
import signal import signal
import sys import sys
from enum import Enum, auto from enum import Enum, auto
from typing import Callable, List, TextIO, Tuple from typing import List, Set, TextIO, Tuple
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ZULIP_PATH) sys.path.append(ZULIP_PATH)
@ -20,6 +20,7 @@ setup_path()
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings" os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
from django.conf import settings from django.conf import settings
from typing_extensions import Protocol
from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE
@ -58,7 +59,9 @@ def parser() -> argparse.ArgumentParser:
filtering = parser.add_argument_group("Filtering") filtering = parser.add_argument_group("Filtering")
filtering.add_argument( filtering.add_argument(
"filter", help="IP address, hostname, user-id, path, or status code to search for" "filter_terms",
help="IP address, hostname, user-id, path, or status code to search for; multiple are AND'ed together",
nargs="+",
) )
filtering.add_argument( filtering.add_argument(
"--all-lines", "--all-lines",
@ -161,10 +164,17 @@ class FilterType(Enum):
STATUS = auto() STATUS = auto()
class FilterFunc(Protocol):
def __call__(
self, m: re.Match, t: str = ... # type: ignore[type-arg] # Requires Python 3.9
) -> bool:
...
def main() -> None: def main() -> None:
args = parser().parse_args() args = parser().parse_args()
(filter_type, filter_func) = parse_filters(args) (filter_types, filter_funcs) = parse_filters(args)
logfile_names = parse_logfile_names(args) logfile_names = parse_logfile_names(args)
try: try:
@ -173,7 +183,8 @@ def main() -> None:
for logline in logfile: for logline in logfile:
# As a performance optimization, just do a substring # As a performance optimization, just do a substring
# check before we parse the line fully # check before we parse the line fully
if args.filter not in logline.lower(): lowered = logline.lower()
if not all(f in lowered for f in args.filter_terms):
continue continue
if args.nginx: if args.nginx:
@ -185,11 +196,11 @@ def main() -> None:
if args.nginx: if args.nginx:
print(f"! Failed to parse:\n{logline}", file=sys.stderr) print(f"! Failed to parse:\n{logline}", file=sys.stderr)
continue continue
if passes_filters(filter_func, match, args): if passes_filters(filter_funcs, match, args):
print_line( print_line(
match, match,
args, args,
filter_type=filter_type, filter_types=filter_types,
) )
except KeyboardInterrupt: except KeyboardInterrupt:
sys.exit(signal.SIGINT + 128) sys.exit(signal.SIGINT + 128)
@ -228,67 +239,79 @@ def parse_logfile_names(args: argparse.Namespace) -> List[str]:
def parse_filters( def parse_filters(
args: argparse.Namespace, args: argparse.Namespace,
) -> Tuple[FilterType, Callable[[re.Match], bool]]: # type: ignore[type-arg] # Requires Python 3.9 ) -> Tuple[Set[FilterType], List[FilterFunc]]:
# The heuristics below are not intended to be precise -- they # The heuristics below are not intended to be precise -- they
# certainly count things as "IPv4" or "IPv6" addresses that are # certainly count things as "IPv4" or "IPv6" addresses that are
# invalid. However, we expect the input here to already be # invalid. However, we expect the input here to already be
# reasonably well-formed. # reasonably well-formed.
filter = args.filter filter_types = set()
filter_funcs = []
filter_terms = []
if re.match(r"[1-5][0-9][0-9]$", filter): for filter_term in args.filter_terms:
filter_func = lambda m: m["code"] == filter if re.match(r"[1-5][0-9][0-9]$", filter_term):
filter_func = lambda m, t=filter_term: m["code"] == t
filter_type = FilterType.STATUS filter_type = FilterType.STATUS
if not args.nginx and filter == "502": if not args.nginx and filter_term == "502":
logging.warning("Adding --nginx -- 502's do not appear in Django logs.") logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
args.nginx = True args.nginx = True
elif re.match(r"[1-5]xx$", filter): elif re.match(r"[1-5]xx$", filter_term):
filter = filter[0] filter_term = filter_term[0]
filter_func = lambda m: m["code"].startswith(filter) filter_func = lambda m, t=filter_term: m["code"].startswith(t)
filter_type = FilterType.STATUS filter_type = FilterType.STATUS
elif re.match(r"\d+$", filter): elif re.match(r"\d+$", filter_term):
if args.nginx: if args.nginx:
raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx") raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
filter_func = lambda m: m["user_id"] == filter filter_func = lambda m, t=filter_term: m["user_id"] == t
filter_type = FilterType.USER_ID filter_type = FilterType.USER_ID
elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter): elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter_term):
filter_func = lambda m: m["ip"] == filter filter_func = lambda m, t=filter_term: m["ip"] == t
filter_type = FilterType.CLIENT_IP filter_type = FilterType.CLIENT_IP
elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter): elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter_term):
filter_func = lambda m: m["ip"] == filter filter_func = lambda m, t=filter_term: m["ip"] == t
filter_type = FilterType.CLIENT_IP filter_type = FilterType.CLIENT_IP
elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter.lower()): elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter_term.lower()):
filter = filter.lower() filter_term = filter_term.lower()
if args.nginx: if args.nginx:
filter_func = lambda m: m["hostname"].startswith(filter + ".") filter_func = lambda m, t=filter_term: m["hostname"].startswith(t + ".")
else: else:
filter_func = lambda m: m["hostname"] == filter filter_func = lambda m, t=filter_term: m["hostname"] == t
filter_type = FilterType.HOSTNAME filter_type = FilterType.HOSTNAME
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter.lower()) and re.search( elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter_term.lower()) and re.search(
r"[a-z-]", filter.lower() r"[a-z-]", filter_term.lower()
): ):
if not args.nginx: if not args.nginx:
raise parser().error("Cannot parse full domains with Python logs; try --nginx") raise parser().error("Cannot parse full domains with Python logs; try --nginx")
filter = filter.lower() filter_term = filter_term.lower()
filter_func = lambda m: m["hostname"] == filter filter_func = lambda m, t=filter_term: m["hostname"] == t
filter_type = FilterType.HOSTNAME filter_type = FilterType.HOSTNAME
elif re.match(r"/\S*$", filter): elif re.match(r"/\S*$", filter_term):
filter_func = lambda m: m["path"] == filter filter_func = lambda m, t=filter_term: m["path"] == t
filter_type = FilterType.PATH filter_type = FilterType.PATH
args.all_lines = True args.all_lines = True
else: else:
raise RuntimeError( raise RuntimeError(
f"Can't parse {filter} as an IP, hostname, user-id, path, or status code." f"Can't parse {filter_term} as an IP, hostname, user-id, path, or status code."
) )
return (filter_type, filter_func) if filter_type in filter_types:
parser().error("Supplied the same time of value more than once, which cannot match!")
filter_types.add(filter_type)
filter_funcs.append(filter_func)
filter_terms.append(filter_term)
# Push back the modified raw strings, so we can use them for fast substring searches
args.filter_terms = filter_terms
return (filter_types, filter_funcs)
def passes_filters( def passes_filters(
string_filter: Callable[[re.Match], bool], # type: ignore[type-arg] # Requires Python 3.9 string_filters: List[FilterFunc],
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9 match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
args: argparse.Namespace, args: argparse.Namespace,
) -> bool: ) -> bool:
if not string_filter(match): if not all(f(match) for f in string_filters):
return False return False
if args.all_lines: if args.all_lines:
@ -318,7 +341,7 @@ def passes_filters(
def print_line( def print_line(
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9 match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
args: argparse.Namespace, args: argparse.Namespace,
filter_type: FilterType, filter_types: Set[FilterType],
) -> None: ) -> None:
if args.full_line: if args.full_line:
print(match.group(0)) print(match.group(0))
@ -350,7 +373,7 @@ def print_line(
indicator = "!" indicator = "!"
color = FAIL color = FAIL
url = f"{BOLD}{match['path']}" url = f"{BOLD}{match['path']}"
if filter_type != FilterType.HOSTNAME: if FilterType.HOSTNAME not in filter_types:
hostname = match["hostname"] hostname = match["hostname"]
if hostname is None: if hostname is None:
hostname = "???." + settings.EXTERNAL_HOST hostname = "???." + settings.EXTERNAL_HOST
@ -370,8 +393,8 @@ def print_line(
parts = [ parts = [
ts, ts,
f"{duration:>5}ms", f"{duration:>5}ms",
f"{user_id:7}" if not args.nginx and filter_type != FilterType.USER_ID else None, f"{user_id:7}" if not args.nginx and FilterType.USER_ID not in filter_types else None,
f"{match['ip']:39}" if filter_type != FilterType.CLIENT_IP else None, f"{match['ip']:39}" if FilterType.CLIENT_IP not in filter_types else None,
indicator + match["code"], indicator + match["code"],
f"{match['method']:6}", f"{match['method']:6}",
url, url,