zulip/scripts/log-search

536 lines
18 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
import argparse
import calendar
import gzip
import logging
import os
import re
2022-05-05 02:23:39 +02:00
import signal
import sys
from datetime import date, datetime, timedelta, timezone
from enum import Enum, auto
from re import Match
from typing import TextIO
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ZULIP_PATH)
from scripts.lib.setup_path import setup_path
setup_path()
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
from typing import Protocol
from django.conf import settings
from scripts.lib.zulip_tools import (
BOLD,
CYAN,
ENDC,
FAIL,
GRAY,
OKBLUE,
get_config,
get_config_file,
)
def parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Search logfiles, ignoring commonly-fetched URLs.")
log_selection = parser.add_argument_group("File selection")
log_selection_options = log_selection.add_mutually_exclusive_group()
access_log_retention_days = int(
get_config(get_config_file(), "application_server", "access_log_retention_days", "14")
)
log_selection_options.add_argument(
"--log-files",
"-n",
help="Number of log files to search",
choices=range(1, access_log_retention_days + 2),
metavar=f"[1-{access_log_retention_days + 1}]",
type=int,
)
log_selection_options.add_argument(
"--all-logs",
"-A",
help="Parse all logfiles, not just most recent",
action="store_true",
)
log_selection_options.add_argument(
"--min-hours",
"-H",
help="Estimated minimum number of hours; includes previous log file, if estimated less than this",
type=int,
choices=range(24),
default=3,
)
log_selection.add_argument(
"--nginx",
"-N",
help="Parse from NGINX logs, not server.log",
action="store_true",
)
filtering = parser.add_argument_group("Filtering")
filtering.add_argument(
"filter_terms",
2024-02-26 16:31:47 +01:00
help="IP address, hostname, user-id, HTTP method, path, datetime prefix, or status code to search for; multiple are AND'ed together",
nargs="+",
)
filtering.add_argument(
"--all-lines",
"-L",
help="Show all matching lines; equivalent to -suemtpr",
action="store_true",
)
filtering.add_argument("--static", "-s", help="Include static file paths", action="store_true")
filtering.add_argument("--uploads", "-u", help="Include file upload paths", action="store_true")
2022-05-05 21:29:30 +02:00
filtering.add_argument("--avatars", "-a", help="Include avatar paths", action="store_true")
filtering.add_argument("--events", "-e", help="Include event fetch paths", action="store_true")
filtering.add_argument("--messages", "-m", help="Include message paths", action="store_true")
filtering.add_argument(
"--typing",
"-t",
help="Include typing notification path",
action="store_true",
)
filtering.add_argument("--presence", "-p", help="Include presence paths", action="store_true")
filtering.add_argument(
"--report", "-r", help="Include Sentry reporting paths", action="store_true"
)
filtering.add_argument(
"--no-other", "-O", help="Exclude paths not explicitly included", action="store_true"
)
filtering.add_argument(
"--client",
"--user-agent",
"-C",
help="Only include requests whose client/user-agent contains this string",
)
output = parser.add_argument_group("Output")
output.add_argument("--full-line", "-F", help="Show full matching line", action="store_true")
output.add_argument("--timeline", "-T", help="Show start, end, and gaps", action="store_true")
return parser
def maybe_gzip(logfile_name: str) -> TextIO:
if logfile_name.endswith(".gz"):
return gzip.open(logfile_name, "rt")
return open(logfile_name) # noqa: SIM115
NGINX_LOG_LINE_RE = re.compile(
r"""
(?P<ip> \S+ ) \s+
- \s+
(?P<user> \S+ ) \s+
\[
(?P<date> \d+/\w+/\d+ )
:
(?P<time> \d+:\d+:\d+ )
\s+ [+-]\d+
\] \s+
"
(?P<method> \S+ )
\s+
(?P<full_path> (?P<path> [^"?]+ ) (\?[^"]*)? )
\s+
(?P<http_version> HTTP/[^"]+ )
" \s+
(?P<code> \d+ ) \s+
(?P<bytes> \d+ ) \s+
"(?P<referer> [^"]* )" \s+
"(?P<user_agent> [^"]* )" \s+
(?P<hostname> \S+ ) \s+
(?P<duration> \S+ )
""",
re.VERBOSE,
)
PYTHON_LOG_LINE_RE = re.compile(
r"""
(?P<date> \d+-\d+-\d+ ) \s+
(?P<time> \d+:\d+:\d+\.\d+ ) \s+
INFO \s+ # All access log lines are INFO
(pid:\d+ \s+) ?
\[ (?P<source> zr(:\d+)?) \] \s+
(?P<ip>
\d{1,3}(\.\d{1,3}){3}
| ([a-f0-9:]+:+){1,7}[a-f0-9]*
) \s+
(?P<method> [A-Z]+ ) \s+
(?P<code> \d+ ) \s+
(?P<duration> \S+ ) \s+ # This can be "217ms" or "1.7s"
( \( [^)]+ \) \s+ )*
(?P<full_path> (?P<path> /\S* ) ) \s+
.* # Multiple extra things can go here
\(
(?P<user>
( (?P<user_id> \d+ ) | unauth )
@
(?P<hostname> \S+ )
| zulip-server:\S+
| scim-client:\S+
| internal
) \s+ via \s+ (?P<user_agent> .* )
\)
""",
re.VERBOSE,
)
class FilterType(Enum):
HOSTNAME = auto()
CLIENT_IP = auto()
2022-04-29 02:54:33 +02:00
USER_ID = auto()
METHOD = auto()
2022-05-05 02:28:37 +02:00
PATH = auto()
STATUS = auto()
2024-02-26 16:31:47 +01:00
DATETIME = auto()
class FilterFunc(Protocol):
def __call__(self, m: Match[str], t: str = ...) -> bool: ...
def main() -> None:
args = parser().parse_args()
(filter_types, filter_funcs, substr_terms) = parse_filters(args)
logfile_names = parse_logfile_names(args)
if args.timeline and args.nginx:
print("! nginx logs not suggested for timeline, due to imprecision", file=sys.stderr)
use_color = sys.stdout.isatty()
lowered_terms = [term.lower() for term in substr_terms]
2022-05-05 02:23:39 +02:00
try:
for logfile_name in reversed(logfile_names):
with maybe_gzip(logfile_name) as logfile:
for logline in logfile:
# As a performance optimization, just do a substring
# check before we parse the line fully
lowered = logline.lower()
if not all(f in lowered for f in lowered_terms):
2022-05-05 02:23:39 +02:00
continue
if args.nginx:
2022-05-05 02:23:39 +02:00
match = NGINX_LOG_LINE_RE.match(logline)
else:
match = PYTHON_LOG_LINE_RE.match(logline)
if match is None:
# We expect other types of loglines in the Python logfiles
if args.nginx:
print(f"! Failed to parse:\n{logline}", file=sys.stderr)
continue
if passes_filters(filter_funcs, match, args):
2022-05-05 02:23:39 +02:00
print_line(
match,
args,
filter_types=filter_types,
use_color=use_color,
2022-05-05 02:23:39 +02:00
)
except BrokenPipeError:
# Python flushes standard streams on exit; redirect remaining output
# to devnull to avoid another BrokenPipeError at shutdown
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
sys.exit(1)
2022-05-05 02:23:39 +02:00
except KeyboardInterrupt:
sys.exit(signal.SIGINT + 128)
def parse_logfile_names(args: argparse.Namespace) -> list[str]:
if args.nginx:
base_path = "/var/log/nginx/access.log"
else:
base_path = "/var/log/zulip/server.log"
for term in args.filter_terms:
date_term = re.match(r"2\d\d\d-\d\d-\d\d", term)
if not date_term:
continue
# They're limiting to a specific day; find the right logfile
# which is going to have any matches
rotations = int(
(datetime.now(tz=timezone.utc).date() - date.fromisoformat(date_term[0]))
/ timedelta(days=1)
)
access_log_retention_days = int(
get_config(get_config_file(), "application_server", "access_log_retention_days", "14")
)
if rotations > access_log_retention_days:
raise RuntimeError(f"Date is too old (more than {access_log_retention_days} days ago)")
if rotations == 0:
return [base_path]
if rotations == 1:
return [f"{base_path}.1"]
else:
return [f"{base_path}.{rotations}.gz"]
logfile_names = [base_path]
if args.all_logs:
logfile_count = 15
elif args.log_files is not None:
logfile_count = args.log_files
else:
# Detect if there was a logfile rotation in the last
# (min-hours)-ish hours, and if so include the previous
# logfile as well.
logfile_count = 1
try:
current_size = os.path.getsize(base_path)
past_size = os.path.getsize(base_path + ".1")
if current_size < (args.min_hours / 24.0) * past_size:
logfile_count = 2
except FileNotFoundError:
pass
for n in range(1, logfile_count):
logname = f"{base_path}.{n}"
if n > 1:
logname += ".gz"
logfile_names.append(logname)
return logfile_names
2024-02-26 16:31:47 +01:00
month_no_to_name_lookup = {f"{k:02d}": v for k, v in enumerate(calendar.month_abbr)}
month_name_to_no_lookup = {v: k for k, v in month_no_to_name_lookup.items()}
def convert_from_nginx_date(date: str) -> str:
day_of_month, month_abbr, year = date.split("/")
return f"{year}-{month_name_to_no_lookup[month_abbr]}-{day_of_month}"
2024-02-26 16:31:47 +01:00
def convert_to_nginx_date(date: str) -> str:
year, month_no, day_of_month = date.split("-")
return f"{day_of_month}/{month_no_to_name_lookup[month_no]}/{year}"
def parse_filters(
args: argparse.Namespace,
) -> tuple[set[FilterType], list[FilterFunc], list[str]]:
# The heuristics below are not intended to be precise -- they
# certainly count things as "IPv4" or "IPv6" addresses that are
# invalid. However, we expect the input here to already be
# reasonably well-formed.
filter_types = set()
filter_funcs = []
filter_terms = []
if args.events and not args.nginx:
logging.warning("Adding --nginx -- /events requests do not appear in Django logs.")
args.nginx = True
for filter_term in args.filter_terms:
if re.match(r"[1-5][0-9][0-9]$", filter_term):
filter_func = lambda m, t=filter_term: m["code"] == t
filter_type = FilterType.STATUS
if not args.nginx and filter_term == "502":
logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
args.nginx = True
elif re.match(r"[1-5]xx$", filter_term):
filter_term = filter_term[0]
filter_func = lambda m, t=filter_term: m["code"].startswith(t)
filter_type = FilterType.STATUS
elif re.match(r"\d+$", filter_term):
if args.nginx:
raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
filter_func = lambda m, t=filter_term: m["user_id"] == t
filter_type = FilterType.USER_ID
elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter_term):
filter_func = lambda m, t=filter_term: m["ip"] == t
filter_type = FilterType.CLIENT_IP
elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter_term):
filter_func = lambda m, t=filter_term: m["ip"] == t
filter_type = FilterType.CLIENT_IP
elif re.match(r"DELETE|GET|HEAD|OPTIONS|PATCH|POST|PUT", filter_term):
filter_func = lambda m, t=filter_term: m["method"].upper() == t
filter_type = FilterType.METHOD
2024-02-26 16:31:47 +01:00
elif re.match(r"(2\d\d\d-\d\d-\d\d)( \d(\d(:(\d(\d(:(\d\d?)?)?)?)?)?)?)?", filter_term):
if args.nginx:
datetime_parts = filter_term.split(" ")
filter_term = ":".join(
[convert_to_nginx_date(datetime_parts[0]), *datetime_parts[1:]]
)
filter_func = lambda m, t=filter_term: f"{m['date']}:{m['time']}".startswith(t)
else:
filter_func = lambda m, t=filter_term: f"{m['date']} {m['time']}".startswith(t)
filter_type = FilterType.DATETIME
elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter_term.lower()):
filter_term = filter_term.lower()
if args.nginx:
filter_func = lambda m, t=filter_term: m["hostname"].startswith(t + ".")
else:
filter_func = lambda m, t=filter_term: m["hostname"] == t
filter_type = FilterType.HOSTNAME
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter_term.lower()) and re.search(
r"[a-z-]", filter_term.lower()
):
if not args.nginx:
raise parser().error("Cannot parse full domains with Python logs; try --nginx")
filter_term = filter_term.lower()
filter_func = lambda m, t=filter_term: m["hostname"] == t
filter_type = FilterType.HOSTNAME
elif re.match(r"/\S*$", filter_term):
filter_func = lambda m, t=filter_term: m["path"] == t
filter_type = FilterType.PATH
args.all_lines = True
else:
raise RuntimeError(
f"Can't parse {filter_term} as an IP, hostname, user-id, HTTP method, path, or status code."
)
if filter_type in filter_types:
parser().error("Supplied the same time of value more than once, which cannot match!")
filter_types.add(filter_type)
filter_funcs.append(filter_func)
filter_terms.append(filter_term)
if args.client:
filter_funcs.append(lambda m, t=args.client: t in m["user_agent"])
filter_terms.append(args.client)
return (filter_types, filter_funcs, filter_terms)
def passes_filters(
string_filters: list[FilterFunc],
match: Match[str],
args: argparse.Namespace,
) -> bool:
if not all(f(match) for f in string_filters):
return False
if args.all_lines:
return True
path = match["path"]
if path.startswith("/static/"):
return args.static
elif path.startswith("/user_uploads/"):
return args.uploads
elif path.startswith(("/user_avatars/", "/avatar/")):
return args.avatars
elif re.match(r"/(json|api/v1)/events($|\?|/)", path):
return args.events
elif path in ("/api/v1/typing", "/json/typing"):
return args.typing
elif re.match(r"/(json|api/v1)/messages($|\?|/)", path):
return args.messages
elif path in ("/api/v1/users/me/presence", "/json/users/me/presence"):
return args.presence
elif path == "/error_tracing":
return args.report
else:
return not args.no_other
last_match_end: datetime | None = None
def print_line(
match: Match[str],
args: argparse.Namespace,
filter_types: set[FilterType],
use_color: bool,
) -> None:
global last_match_end
if args.full_line:
print(match.group(0))
return
if args.nginx:
date = convert_from_nginx_date(match["date"])
else:
date = match["date"]
if args.all_logs or (args.log_files is not None and args.log_files > 1):
ts = date + " " + match["time"]
else:
ts = match["time"]
2022-04-29 03:07:41 +02:00
if match["duration"].endswith("ms"):
duration_ms = int(match["duration"][:-2])
2022-04-29 03:07:41 +02:00
else:
duration_ms = int(float(match["duration"][:-1]) * 1000)
2022-04-29 03:07:41 +02:00
code = int(match["code"])
indicator = " "
color = ""
if code == 401:
indicator = ":"
color = CYAN
elif code == 499:
indicator = "-"
color = GRAY
elif code >= 400 and code < 499:
indicator = ">"
color = OKBLUE
elif code >= 500 and code <= 599:
indicator = "!"
color = FAIL
if use_color:
url = f"{BOLD}{match['full_path']}"
else:
url = match["full_path"]
color = ""
if FilterType.HOSTNAME not in filter_types:
2022-04-29 02:54:33 +02:00
hostname = match["hostname"]
if hostname is None:
hostname = "???." + settings.EXTERNAL_HOST
elif not args.nginx:
if hostname != "root":
hostname += "." + settings.EXTERNAL_HOST
elif settings.EXTERNAL_HOST == "zulipchat.com":
2022-04-29 02:54:33 +02:00
hostname = "zulip.com"
else:
hostname = settings.EXTERNAL_HOST
2022-04-29 02:54:33 +02:00
url = "https://" + hostname + url
user_id = ""
if not args.nginx and match["user_id"] is not None:
user_id = match["user_id"] + "@"
if args.timeline:
logline_end = datetime.fromisoformat(date + " " + match["time"])
logline_start = logline_end - timedelta(milliseconds=duration_ms)
if last_match_end is not None:
gap_ms = int((logline_start - last_match_end) / timedelta(milliseconds=1))
if gap_ms > 5000:
print()
print(f"========== {int(gap_ms / 1000):>4} second gap ==========")
print()
elif gap_ms > 1000:
print(f"============ {gap_ms:>5}ms gap ============")
elif gap_ms > 0:
print(f"------------ {gap_ms:>5}ms gap ------------")
else:
print(f"!!!!!!!!!! {abs(gap_ms):>5}ms overlap !!!!!!!!!!")
if args.all_logs or (args.log_files is not None and args.log_files > 1):
print(logline_start.isoformat(" ", timespec="milliseconds") + " (start)")
else:
print(logline_start.time().isoformat(timespec="milliseconds") + " (start)")
last_match_end = logline_end
parts = [
ts,
f"{duration_ms:>5}ms",
f"{user_id:7}" if not args.nginx and FilterType.USER_ID not in filter_types else None,
f"{match['ip']:39}" if FilterType.CLIENT_IP not in filter_types else None,
indicator + match["code"],
f"{match['method']:6}",
url,
]
print(color + " ".join(p for p in parts if p is not None) + (ENDC if use_color else ""))
if __name__ == "__main__":
main()