log-search: Allow multiple search terms.

This allows AND'ing multiple terms together.
This commit is contained in:
Alex Vandiver 2022-05-05 11:44:47 -07:00 committed by Tim Abbott
parent bd73e7d411
commit 258b658cc0
1 changed files with 81 additions and 58 deletions

View File

@ -8,7 +8,7 @@ import re
import signal import signal
import sys import sys
from enum import Enum, auto from enum import Enum, auto
from typing import Callable, List, TextIO, Tuple from typing import List, Set, TextIO, Tuple
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ZULIP_PATH) sys.path.append(ZULIP_PATH)
@ -20,6 +20,7 @@ setup_path()
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings" os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
from django.conf import settings from django.conf import settings
from typing_extensions import Protocol
from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE
@ -58,7 +59,9 @@ def parser() -> argparse.ArgumentParser:
filtering = parser.add_argument_group("Filtering") filtering = parser.add_argument_group("Filtering")
filtering.add_argument( filtering.add_argument(
"filter", help="IP address, hostname, user-id, path, or status code to search for" "filter_terms",
help="IP address, hostname, user-id, path, or status code to search for; multiple are AND'ed together",
nargs="+",
) )
filtering.add_argument( filtering.add_argument(
"--all-lines", "--all-lines",
@ -161,10 +164,17 @@ class FilterType(Enum):
STATUS = auto() STATUS = auto()
class FilterFunc(Protocol):
def __call__(
self, m: re.Match, t: str = ... # type: ignore[type-arg] # Requires Python 3.9
) -> bool:
...
def main() -> None: def main() -> None:
args = parser().parse_args() args = parser().parse_args()
(filter_type, filter_func) = parse_filters(args) (filter_types, filter_funcs) = parse_filters(args)
logfile_names = parse_logfile_names(args) logfile_names = parse_logfile_names(args)
try: try:
@ -173,7 +183,8 @@ def main() -> None:
for logline in logfile: for logline in logfile:
# As a performance optimization, just do a substring # As a performance optimization, just do a substring
# check before we parse the line fully # check before we parse the line fully
if args.filter not in logline.lower(): lowered = logline.lower()
if not all(f in lowered for f in args.filter_terms):
continue continue
if args.nginx: if args.nginx:
@ -185,11 +196,11 @@ def main() -> None:
if args.nginx: if args.nginx:
print(f"! Failed to parse:\n{logline}", file=sys.stderr) print(f"! Failed to parse:\n{logline}", file=sys.stderr)
continue continue
if passes_filters(filter_func, match, args): if passes_filters(filter_funcs, match, args):
print_line( print_line(
match, match,
args, args,
filter_type=filter_type, filter_types=filter_types,
) )
except KeyboardInterrupt: except KeyboardInterrupt:
sys.exit(signal.SIGINT + 128) sys.exit(signal.SIGINT + 128)
@ -228,67 +239,79 @@ def parse_logfile_names(args: argparse.Namespace) -> List[str]:
def parse_filters( def parse_filters(
args: argparse.Namespace, args: argparse.Namespace,
) -> Tuple[FilterType, Callable[[re.Match], bool]]: # type: ignore[type-arg] # Requires Python 3.9 ) -> Tuple[Set[FilterType], List[FilterFunc]]:
# The heuristics below are not intended to be precise -- they # The heuristics below are not intended to be precise -- they
# certainly count things as "IPv4" or "IPv6" addresses that are # certainly count things as "IPv4" or "IPv6" addresses that are
# invalid. However, we expect the input here to already be # invalid. However, we expect the input here to already be
# reasonably well-formed. # reasonably well-formed.
filter = args.filter filter_types = set()
filter_funcs = []
filter_terms = []
if re.match(r"[1-5][0-9][0-9]$", filter): for filter_term in args.filter_terms:
filter_func = lambda m: m["code"] == filter if re.match(r"[1-5][0-9][0-9]$", filter_term):
filter_type = FilterType.STATUS filter_func = lambda m, t=filter_term: m["code"] == t
if not args.nginx and filter == "502": filter_type = FilterType.STATUS
logging.warning("Adding --nginx -- 502's do not appear in Django logs.") if not args.nginx and filter_term == "502":
args.nginx = True logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
elif re.match(r"[1-5]xx$", filter): args.nginx = True
filter = filter[0] elif re.match(r"[1-5]xx$", filter_term):
filter_func = lambda m: m["code"].startswith(filter) filter_term = filter_term[0]
filter_type = FilterType.STATUS filter_func = lambda m, t=filter_term: m["code"].startswith(t)
elif re.match(r"\d+$", filter): filter_type = FilterType.STATUS
if args.nginx: elif re.match(r"\d+$", filter_term):
raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx") if args.nginx:
filter_func = lambda m: m["user_id"] == filter raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
filter_type = FilterType.USER_ID filter_func = lambda m, t=filter_term: m["user_id"] == t
elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter): filter_type = FilterType.USER_ID
filter_func = lambda m: m["ip"] == filter elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter_term):
filter_type = FilterType.CLIENT_IP filter_func = lambda m, t=filter_term: m["ip"] == t
elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter): filter_type = FilterType.CLIENT_IP
filter_func = lambda m: m["ip"] == filter elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter_term):
filter_type = FilterType.CLIENT_IP filter_func = lambda m, t=filter_term: m["ip"] == t
elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter.lower()): filter_type = FilterType.CLIENT_IP
filter = filter.lower() elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter_term.lower()):
if args.nginx: filter_term = filter_term.lower()
filter_func = lambda m: m["hostname"].startswith(filter + ".") if args.nginx:
filter_func = lambda m, t=filter_term: m["hostname"].startswith(t + ".")
else:
filter_func = lambda m, t=filter_term: m["hostname"] == t
filter_type = FilterType.HOSTNAME
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter_term.lower()) and re.search(
r"[a-z-]", filter_term.lower()
):
if not args.nginx:
raise parser().error("Cannot parse full domains with Python logs; try --nginx")
filter_term = filter_term.lower()
filter_func = lambda m, t=filter_term: m["hostname"] == t
filter_type = FilterType.HOSTNAME
elif re.match(r"/\S*$", filter_term):
filter_func = lambda m, t=filter_term: m["path"] == t
filter_type = FilterType.PATH
args.all_lines = True
else: else:
filter_func = lambda m: m["hostname"] == filter raise RuntimeError(
filter_type = FilterType.HOSTNAME f"Can't parse {filter_term} as an IP, hostname, user-id, path, or status code."
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter.lower()) and re.search( )
r"[a-z-]", filter.lower() if filter_type in filter_types:
): parser().error("Supplied the same time of value more than once, which cannot match!")
if not args.nginx: filter_types.add(filter_type)
raise parser().error("Cannot parse full domains with Python logs; try --nginx") filter_funcs.append(filter_func)
filter = filter.lower() filter_terms.append(filter_term)
filter_func = lambda m: m["hostname"] == filter
filter_type = FilterType.HOSTNAME # Push back the modified raw strings, so we can use them for fast substring searches
elif re.match(r"/\S*$", filter): args.filter_terms = filter_terms
filter_func = lambda m: m["path"] == filter
filter_type = FilterType.PATH return (filter_types, filter_funcs)
args.all_lines = True
else:
raise RuntimeError(
f"Can't parse {filter} as an IP, hostname, user-id, path, or status code."
)
return (filter_type, filter_func)
def passes_filters( def passes_filters(
string_filter: Callable[[re.Match], bool], # type: ignore[type-arg] # Requires Python 3.9 string_filters: List[FilterFunc],
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9 match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
args: argparse.Namespace, args: argparse.Namespace,
) -> bool: ) -> bool:
if not string_filter(match): if not all(f(match) for f in string_filters):
return False return False
if args.all_lines: if args.all_lines:
@ -318,7 +341,7 @@ def passes_filters(
def print_line( def print_line(
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9 match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
args: argparse.Namespace, args: argparse.Namespace,
filter_type: FilterType, filter_types: Set[FilterType],
) -> None: ) -> None:
if args.full_line: if args.full_line:
print(match.group(0)) print(match.group(0))
@ -350,7 +373,7 @@ def print_line(
indicator = "!" indicator = "!"
color = FAIL color = FAIL
url = f"{BOLD}{match['path']}" url = f"{BOLD}{match['path']}"
if filter_type != FilterType.HOSTNAME: if FilterType.HOSTNAME not in filter_types:
hostname = match["hostname"] hostname = match["hostname"]
if hostname is None: if hostname is None:
hostname = "???." + settings.EXTERNAL_HOST hostname = "???." + settings.EXTERNAL_HOST
@ -370,8 +393,8 @@ def print_line(
parts = [ parts = [
ts, ts,
f"{duration:>5}ms", f"{duration:>5}ms",
f"{user_id:7}" if not args.nginx and filter_type != FilterType.USER_ID else None, f"{user_id:7}" if not args.nginx and FilterType.USER_ID not in filter_types else None,
f"{match['ip']:39}" if filter_type != FilterType.CLIENT_IP else None, f"{match['ip']:39}" if FilterType.CLIENT_IP not in filter_types else None,
indicator + match["code"], indicator + match["code"],
f"{match['method']:6}", f"{match['method']:6}",
url, url,