diff --git a/scripts/lib/zulip_tools.py b/scripts/lib/zulip_tools.py index b825cfd93c..f5a3a39dd3 100755 --- a/scripts/lib/zulip_tools.py +++ b/scripts/lib/zulip_tools.py @@ -32,6 +32,8 @@ ENDC = "\033[0m" BLACKONYELLOW = "\x1b[0;30;43m" WHITEONRED = "\x1b[0;37;41m" BOLDRED = "\x1B[1;31m" +BOLD = "\x1b[1m" +GRAY = "\x1b[90m" GREEN = "\x1b[32m" YELLOW = "\x1b[33m" diff --git a/scripts/log-search b/scripts/log-search new file mode 100755 index 0000000000..5f60b68c56 --- /dev/null +++ b/scripts/log-search @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 + +import argparse +import gzip +import os +import re +import sys +from enum import Enum, auto +from typing import Callable, TextIO + +ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(ZULIP_PATH) +from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE + + +def parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Search nginx logfiles for an IP or hostname, ignoring commonly-fetched URLs." + ) + log_selection = parser.add_argument_group("File selection") + log_selection_options = log_selection.add_mutually_exclusive_group() + log_selection_options.add_argument( + "--log-files", + "-n", + help="Number of log files to search", + choices=range(1, 16), + type=int, + ) + log_selection_options.add_argument( + "--all-logs", + "-A", + help="Parse all logfiles, not just most recent", + action="store_true", + ) + log_selection_options.add_argument( + "--min-hours", + "-H", + help="Estimated minimum number of hours; includes previous log file, if estimated less than this", + type=int, + choices=range(0, 24), + default=3, + ) + + filtering = parser.add_argument_group("Filtering") + filtering.add_argument("filter", help="IP address or hostname to search for") + filtering.add_argument( + "--all-lines", + "-L", + help="Show all matching lines; equivalent to -suemtpr", + action="store_true", + ) + filtering.add_argument("--static", "-s", help="Include static file paths", action="store_true") + filtering.add_argument("--uploads", "-u", help="Include file upload paths", action="store_true") + filtering.add_argument("--events", "-e", help="Include event fetch paths", action="store_true") + filtering.add_argument("--messages", "-m", help="Include message paths", action="store_true") + filtering.add_argument( + "--typing", + "-t", + help="Include typing notification path", + action="store_true", + ) + filtering.add_argument("--presence", "-p", help="Include presence paths", action="store_true") + filtering.add_argument( + "--report", "-r", help="Include timing report paths", action="store_true" + ) + + output = parser.add_argument_group("Output") + output.add_argument("--full-line", "-F", help="Show full matching line", action="store_true") + return parser + + +def maybe_gzip(logfile_name: str) -> TextIO: + if logfile_name.endswith(".gz"): + return gzip.open(logfile_name, "rt") + return open(logfile_name, "r") + + +NGINX_LOG_LINE_RE = re.compile( + r""" + (?P \S+ ) \s+ + - \s+ + (?P \S+ ) \s+ + \[ + (?P \d+/\w+/\d+ ) + : + (?P