mirror of https://github.com/zulip/zulip.git
log-search: Add a tool to search nginx logs by IP/hostname.
This is a script to search nginx log files by server hostname or client IP address, and output matching lines, all while skipping common and less-interesting request lines.
This commit is contained in:
parent
d86b7c4ef7
commit
ba1237119c
|
@ -32,6 +32,8 @@ ENDC = "\033[0m"
|
|||
BLACKONYELLOW = "\x1b[0;30;43m"
|
||||
WHITEONRED = "\x1b[0;37;41m"
|
||||
BOLDRED = "\x1B[1;31m"
|
||||
BOLD = "\x1b[1m"
|
||||
GRAY = "\x1b[90m"
|
||||
|
||||
GREEN = "\x1b[32m"
|
||||
YELLOW = "\x1b[33m"
|
||||
|
|
|
@ -0,0 +1,260 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import gzip
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from enum import Enum, auto
|
||||
from typing import Callable, TextIO
|
||||
|
||||
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(ZULIP_PATH)
|
||||
from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE
|
||||
|
||||
|
||||
def parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Search nginx logfiles for an IP or hostname, ignoring commonly-fetched URLs."
|
||||
)
|
||||
log_selection = parser.add_argument_group("File selection")
|
||||
log_selection_options = log_selection.add_mutually_exclusive_group()
|
||||
log_selection_options.add_argument(
|
||||
"--log-files",
|
||||
"-n",
|
||||
help="Number of log files to search",
|
||||
choices=range(1, 16),
|
||||
type=int,
|
||||
)
|
||||
log_selection_options.add_argument(
|
||||
"--all-logs",
|
||||
"-A",
|
||||
help="Parse all logfiles, not just most recent",
|
||||
action="store_true",
|
||||
)
|
||||
log_selection_options.add_argument(
|
||||
"--min-hours",
|
||||
"-H",
|
||||
help="Estimated minimum number of hours; includes previous log file, if estimated less than this",
|
||||
type=int,
|
||||
choices=range(0, 24),
|
||||
default=3,
|
||||
)
|
||||
|
||||
filtering = parser.add_argument_group("Filtering")
|
||||
filtering.add_argument("filter", help="IP address or hostname to search for")
|
||||
filtering.add_argument(
|
||||
"--all-lines",
|
||||
"-L",
|
||||
help="Show all matching lines; equivalent to -suemtpr",
|
||||
action="store_true",
|
||||
)
|
||||
filtering.add_argument("--static", "-s", help="Include static file paths", action="store_true")
|
||||
filtering.add_argument("--uploads", "-u", help="Include file upload paths", action="store_true")
|
||||
filtering.add_argument("--events", "-e", help="Include event fetch paths", action="store_true")
|
||||
filtering.add_argument("--messages", "-m", help="Include message paths", action="store_true")
|
||||
filtering.add_argument(
|
||||
"--typing",
|
||||
"-t",
|
||||
help="Include typing notification path",
|
||||
action="store_true",
|
||||
)
|
||||
filtering.add_argument("--presence", "-p", help="Include presence paths", action="store_true")
|
||||
filtering.add_argument(
|
||||
"--report", "-r", help="Include timing report paths", action="store_true"
|
||||
)
|
||||
|
||||
output = parser.add_argument_group("Output")
|
||||
output.add_argument("--full-line", "-F", help="Show full matching line", action="store_true")
|
||||
return parser
|
||||
|
||||
|
||||
def maybe_gzip(logfile_name: str) -> TextIO:
|
||||
if logfile_name.endswith(".gz"):
|
||||
return gzip.open(logfile_name, "rt")
|
||||
return open(logfile_name, "r")
|
||||
|
||||
|
||||
NGINX_LOG_LINE_RE = re.compile(
|
||||
r"""
|
||||
(?P<ip> \S+ ) \s+
|
||||
- \s+
|
||||
(?P<user> \S+ ) \s+
|
||||
\[
|
||||
(?P<date> \d+/\w+/\d+ )
|
||||
:
|
||||
(?P<time> \d+:\d+:\d+ )
|
||||
\s+ \+0000
|
||||
\] \s+
|
||||
"
|
||||
(?P<method> \S+ )
|
||||
\s+
|
||||
(?P<path> [^"]+ )
|
||||
\s+
|
||||
(?P<http_version> HTTP/[^"]+ )
|
||||
" \s+
|
||||
(?P<code> \d+ ) \s+
|
||||
(?P<bytes> \d+ ) \s+
|
||||
"(?P<referer> [^"]* )" \s+
|
||||
"(?P<user_agent> [^"]* )" \s+
|
||||
(?P<hostname> \S+ ) \s+
|
||||
(?P<duration> \S+ )
|
||||
""",
|
||||
re.X,
|
||||
)
|
||||
|
||||
|
||||
class FilterType(Enum):
|
||||
HOSTNAME = auto()
|
||||
CLIENT_IP = auto()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parser().parse_args()
|
||||
|
||||
logfile_names = ["/var/log/nginx/access.log"]
|
||||
if args.all_logs:
|
||||
logfile_count = 15
|
||||
elif args.log_files is not None:
|
||||
logfile_count = args.log_files
|
||||
else:
|
||||
# Detect if there was a logfile rotation in the last
|
||||
# (min-hours)-ish hours, and if so include the previous
|
||||
# logfile as well.
|
||||
logfile_count = 1
|
||||
try:
|
||||
current_size = os.path.getsize(logfile_names[0])
|
||||
past_size = os.path.getsize(logfile_names[0] + ".1")
|
||||
if current_size < (args.min_hours / 24.0) * past_size:
|
||||
logfile_count = 2
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
for n in range(1, logfile_count):
|
||||
logname = f"/var/log/nginx/access.log.{n}"
|
||||
if n > 1:
|
||||
logname += ".gz"
|
||||
logfile_names.append(logname)
|
||||
|
||||
# The heuristics below are not intended to be precise -- they
|
||||
# certainly count things as "IPv4" or "IPv6" addresses that are
|
||||
# invalid. However, we expect the input here to already be
|
||||
# reasonably well-formed.
|
||||
filter = args.filter
|
||||
if re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter):
|
||||
string_filter = lambda m: m["ip"] == filter
|
||||
filter_type = FilterType.CLIENT_IP
|
||||
elif re.match(r"\d{1,3}\.(\d{1,3}\.){0,2}$", filter):
|
||||
string_filter = lambda m: m["ip"].startswith(filter)
|
||||
filter_type = FilterType.CLIENT_IP
|
||||
elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter):
|
||||
string_filter = lambda m: m["ip"] == filter
|
||||
filter_type = FilterType.CLIENT_IP
|
||||
elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter.lower()):
|
||||
filter = filter.lower()
|
||||
string_filter = lambda m: m["hostname"].startswith(filter + ".")
|
||||
filter_type = FilterType.HOSTNAME
|
||||
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter.lower()) and re.search(
|
||||
r"[a-z-]", filter.lower()
|
||||
):
|
||||
filter = filter.lower()
|
||||
string_filter = lambda m: m["hostname"] == filter
|
||||
filter_type = FilterType.HOSTNAME
|
||||
else:
|
||||
raise RuntimeError(f"Can't parse {filter} as an IP or hostname.")
|
||||
assert filter_type is not None
|
||||
|
||||
for logfile_name in reversed(logfile_names):
|
||||
with maybe_gzip(logfile_name) as logfile:
|
||||
for logline in logfile:
|
||||
# As a performance optimization, just do a substring
|
||||
# check before we parse the line fully
|
||||
if filter not in logline.lower():
|
||||
continue
|
||||
|
||||
match = NGINX_LOG_LINE_RE.match(logline)
|
||||
if match is None:
|
||||
print(f"! Failed to parse:\n{logline}", file=sys.stderr)
|
||||
continue
|
||||
if passes_filters(string_filter, match, args):
|
||||
print_line(
|
||||
match,
|
||||
args,
|
||||
filter_type=filter_type,
|
||||
)
|
||||
|
||||
|
||||
def passes_filters(
|
||||
string_filter: Callable[[re.Match], bool], # type: ignore[type-arg] # Requires Python 3.9
|
||||
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
|
||||
args: argparse.Namespace,
|
||||
) -> bool:
|
||||
if not string_filter(match):
|
||||
return False
|
||||
|
||||
if args.all_lines:
|
||||
return True
|
||||
|
||||
path = match["path"]
|
||||
if path.startswith("/static/") and not args.static:
|
||||
return False
|
||||
if path.startswith("/user_uploads/") and not args.uploads:
|
||||
return False
|
||||
if re.match(r"/(json|api/v1)/events($|\?|/)", path) and not args.events:
|
||||
return False
|
||||
if path in ("/api/v1/typing", "/json/typing") and not args.typing:
|
||||
return False
|
||||
if re.match(r"/(json|api/v1)/messages($|\?|/)", path) and not args.messages:
|
||||
return False
|
||||
if path in ("/api/v1/users/me/presence", "/json/users/me/presence") and not args.messages:
|
||||
return False
|
||||
if path.startswith(("/api/v1/report/", "/json/report/")) and not args.report:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def print_line(
|
||||
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
|
||||
args: argparse.Namespace,
|
||||
filter_type: FilterType,
|
||||
) -> None:
|
||||
if args.full_line:
|
||||
print(match.group(0))
|
||||
return
|
||||
|
||||
if args.all_logs or args.log_files is not None and args.log_files > 1:
|
||||
ts = match["date"] + ":" + match["time"]
|
||||
else:
|
||||
ts = match["time"]
|
||||
code = int(match["code"])
|
||||
indicator = " "
|
||||
color = ""
|
||||
if code == 401:
|
||||
indicator = ":"
|
||||
color = CYAN
|
||||
elif code == 499:
|
||||
indicator = "-"
|
||||
color = GRAY
|
||||
elif code >= 400 and code < 499:
|
||||
indicator = ">"
|
||||
color = OKBLUE
|
||||
elif code >= 500 and code <= 599:
|
||||
indicator = "!"
|
||||
color = FAIL
|
||||
url = f"{BOLD}{match['path']}"
|
||||
if filter_type != FilterType.HOSTNAME:
|
||||
url = "https://" + match["hostname"] + url
|
||||
|
||||
parts = [
|
||||
ts,
|
||||
f"{match['ip']:39}" if filter_type != FilterType.CLIENT_IP else None,
|
||||
indicator + match["code"],
|
||||
f"{match['method']:6}",
|
||||
url,
|
||||
]
|
||||
|
||||
print(color + " ".join([p for p in parts if p is not None]) + ENDC)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue