From 258b658cc04410a8817e2d91d83915c05b90e875 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Thu, 5 May 2022 11:44:47 -0700 Subject: [PATCH] log-search: Allow multiple search terms. This allows AND'ing multiple terms together. --- scripts/log-search | 139 ++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 58 deletions(-) diff --git a/scripts/log-search b/scripts/log-search index 4d5f903a9c..4fe330da32 100755 --- a/scripts/log-search +++ b/scripts/log-search @@ -8,7 +8,7 @@ import re import signal import sys from enum import Enum, auto -from typing import Callable, List, TextIO, Tuple +from typing import List, Set, TextIO, Tuple ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(ZULIP_PATH) @@ -20,6 +20,7 @@ setup_path() os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings" from django.conf import settings +from typing_extensions import Protocol from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE @@ -58,7 +59,9 @@ def parser() -> argparse.ArgumentParser: filtering = parser.add_argument_group("Filtering") filtering.add_argument( - "filter", help="IP address, hostname, user-id, path, or status code to search for" + "filter_terms", + help="IP address, hostname, user-id, path, or status code to search for; multiple are AND'ed together", + nargs="+", ) filtering.add_argument( "--all-lines", @@ -161,10 +164,17 @@ class FilterType(Enum): STATUS = auto() +class FilterFunc(Protocol): + def __call__( + self, m: re.Match, t: str = ... # type: ignore[type-arg] # Requires Python 3.9 + ) -> bool: + ... + + def main() -> None: args = parser().parse_args() - (filter_type, filter_func) = parse_filters(args) + (filter_types, filter_funcs) = parse_filters(args) logfile_names = parse_logfile_names(args) try: @@ -173,7 +183,8 @@ def main() -> None: for logline in logfile: # As a performance optimization, just do a substring # check before we parse the line fully - if args.filter not in logline.lower(): + lowered = logline.lower() + if not all(f in lowered for f in args.filter_terms): continue if args.nginx: @@ -185,11 +196,11 @@ def main() -> None: if args.nginx: print(f"! Failed to parse:\n{logline}", file=sys.stderr) continue - if passes_filters(filter_func, match, args): + if passes_filters(filter_funcs, match, args): print_line( match, args, - filter_type=filter_type, + filter_types=filter_types, ) except KeyboardInterrupt: sys.exit(signal.SIGINT + 128) @@ -228,67 +239,79 @@ def parse_logfile_names(args: argparse.Namespace) -> List[str]: def parse_filters( args: argparse.Namespace, -) -> Tuple[FilterType, Callable[[re.Match], bool]]: # type: ignore[type-arg] # Requires Python 3.9 +) -> Tuple[Set[FilterType], List[FilterFunc]]: # The heuristics below are not intended to be precise -- they # certainly count things as "IPv4" or "IPv6" addresses that are # invalid. However, we expect the input here to already be # reasonably well-formed. - filter = args.filter + filter_types = set() + filter_funcs = [] + filter_terms = [] - if re.match(r"[1-5][0-9][0-9]$", filter): - filter_func = lambda m: m["code"] == filter - filter_type = FilterType.STATUS - if not args.nginx and filter == "502": - logging.warning("Adding --nginx -- 502's do not appear in Django logs.") - args.nginx = True - elif re.match(r"[1-5]xx$", filter): - filter = filter[0] - filter_func = lambda m: m["code"].startswith(filter) - filter_type = FilterType.STATUS - elif re.match(r"\d+$", filter): - if args.nginx: - raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx") - filter_func = lambda m: m["user_id"] == filter - filter_type = FilterType.USER_ID - elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter): - filter_func = lambda m: m["ip"] == filter - filter_type = FilterType.CLIENT_IP - elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter): - filter_func = lambda m: m["ip"] == filter - filter_type = FilterType.CLIENT_IP - elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter.lower()): - filter = filter.lower() - if args.nginx: - filter_func = lambda m: m["hostname"].startswith(filter + ".") + for filter_term in args.filter_terms: + if re.match(r"[1-5][0-9][0-9]$", filter_term): + filter_func = lambda m, t=filter_term: m["code"] == t + filter_type = FilterType.STATUS + if not args.nginx and filter_term == "502": + logging.warning("Adding --nginx -- 502's do not appear in Django logs.") + args.nginx = True + elif re.match(r"[1-5]xx$", filter_term): + filter_term = filter_term[0] + filter_func = lambda m, t=filter_term: m["code"].startswith(t) + filter_type = FilterType.STATUS + elif re.match(r"\d+$", filter_term): + if args.nginx: + raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx") + filter_func = lambda m, t=filter_term: m["user_id"] == t + filter_type = FilterType.USER_ID + elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter_term): + filter_func = lambda m, t=filter_term: m["ip"] == t + filter_type = FilterType.CLIENT_IP + elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter_term): + filter_func = lambda m, t=filter_term: m["ip"] == t + filter_type = FilterType.CLIENT_IP + elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter_term.lower()): + filter_term = filter_term.lower() + if args.nginx: + filter_func = lambda m, t=filter_term: m["hostname"].startswith(t + ".") + else: + filter_func = lambda m, t=filter_term: m["hostname"] == t + filter_type = FilterType.HOSTNAME + elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter_term.lower()) and re.search( + r"[a-z-]", filter_term.lower() + ): + if not args.nginx: + raise parser().error("Cannot parse full domains with Python logs; try --nginx") + filter_term = filter_term.lower() + filter_func = lambda m, t=filter_term: m["hostname"] == t + filter_type = FilterType.HOSTNAME + elif re.match(r"/\S*$", filter_term): + filter_func = lambda m, t=filter_term: m["path"] == t + filter_type = FilterType.PATH + args.all_lines = True else: - filter_func = lambda m: m["hostname"] == filter - filter_type = FilterType.HOSTNAME - elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter.lower()) and re.search( - r"[a-z-]", filter.lower() - ): - if not args.nginx: - raise parser().error("Cannot parse full domains with Python logs; try --nginx") - filter = filter.lower() - filter_func = lambda m: m["hostname"] == filter - filter_type = FilterType.HOSTNAME - elif re.match(r"/\S*$", filter): - filter_func = lambda m: m["path"] == filter - filter_type = FilterType.PATH - args.all_lines = True - else: - raise RuntimeError( - f"Can't parse {filter} as an IP, hostname, user-id, path, or status code." - ) - return (filter_type, filter_func) + raise RuntimeError( + f"Can't parse {filter_term} as an IP, hostname, user-id, path, or status code." + ) + if filter_type in filter_types: + parser().error("Supplied the same time of value more than once, which cannot match!") + filter_types.add(filter_type) + filter_funcs.append(filter_func) + filter_terms.append(filter_term) + + # Push back the modified raw strings, so we can use them for fast substring searches + args.filter_terms = filter_terms + + return (filter_types, filter_funcs) def passes_filters( - string_filter: Callable[[re.Match], bool], # type: ignore[type-arg] # Requires Python 3.9 + string_filters: List[FilterFunc], match: re.Match, # type: ignore[type-arg] # Requires Python 3.9 args: argparse.Namespace, ) -> bool: - if not string_filter(match): + if not all(f(match) for f in string_filters): return False if args.all_lines: @@ -318,7 +341,7 @@ def passes_filters( def print_line( match: re.Match, # type: ignore[type-arg] # Requires Python 3.9 args: argparse.Namespace, - filter_type: FilterType, + filter_types: Set[FilterType], ) -> None: if args.full_line: print(match.group(0)) @@ -350,7 +373,7 @@ def print_line( indicator = "!" color = FAIL url = f"{BOLD}{match['path']}" - if filter_type != FilterType.HOSTNAME: + if FilterType.HOSTNAME not in filter_types: hostname = match["hostname"] if hostname is None: hostname = "???." + settings.EXTERNAL_HOST @@ -370,8 +393,8 @@ def print_line( parts = [ ts, f"{duration:>5}ms", - f"{user_id:7}" if not args.nginx and filter_type != FilterType.USER_ID else None, - f"{match['ip']:39}" if filter_type != FilterType.CLIENT_IP else None, + f"{user_id:7}" if not args.nginx and FilterType.USER_ID not in filter_types else None, + f"{match['ip']:39}" if FilterType.CLIENT_IP not in filter_types else None, indicator + match["code"], f"{match['method']:6}", url,