mirror of https://github.com/zulip/zulip.git
log-search: Allow multiple search terms.
This allows AND'ing multiple terms together.
This commit is contained in:
parent
bd73e7d411
commit
258b658cc0
|
@ -8,7 +8,7 @@ import re
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from typing import Callable, List, TextIO, Tuple
|
from typing import List, Set, TextIO, Tuple
|
||||||
|
|
||||||
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
sys.path.append(ZULIP_PATH)
|
sys.path.append(ZULIP_PATH)
|
||||||
|
@ -20,6 +20,7 @@ setup_path()
|
||||||
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
|
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from typing_extensions import Protocol
|
||||||
|
|
||||||
from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE
|
from scripts.lib.zulip_tools import BOLD, CYAN, ENDC, FAIL, GRAY, OKBLUE
|
||||||
|
|
||||||
|
@ -58,7 +59,9 @@ def parser() -> argparse.ArgumentParser:
|
||||||
|
|
||||||
filtering = parser.add_argument_group("Filtering")
|
filtering = parser.add_argument_group("Filtering")
|
||||||
filtering.add_argument(
|
filtering.add_argument(
|
||||||
"filter", help="IP address, hostname, user-id, path, or status code to search for"
|
"filter_terms",
|
||||||
|
help="IP address, hostname, user-id, path, or status code to search for; multiple are AND'ed together",
|
||||||
|
nargs="+",
|
||||||
)
|
)
|
||||||
filtering.add_argument(
|
filtering.add_argument(
|
||||||
"--all-lines",
|
"--all-lines",
|
||||||
|
@ -161,10 +164,17 @@ class FilterType(Enum):
|
||||||
STATUS = auto()
|
STATUS = auto()
|
||||||
|
|
||||||
|
|
||||||
|
class FilterFunc(Protocol):
|
||||||
|
def __call__(
|
||||||
|
self, m: re.Match, t: str = ... # type: ignore[type-arg] # Requires Python 3.9
|
||||||
|
) -> bool:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = parser().parse_args()
|
args = parser().parse_args()
|
||||||
|
|
||||||
(filter_type, filter_func) = parse_filters(args)
|
(filter_types, filter_funcs) = parse_filters(args)
|
||||||
logfile_names = parse_logfile_names(args)
|
logfile_names = parse_logfile_names(args)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -173,7 +183,8 @@ def main() -> None:
|
||||||
for logline in logfile:
|
for logline in logfile:
|
||||||
# As a performance optimization, just do a substring
|
# As a performance optimization, just do a substring
|
||||||
# check before we parse the line fully
|
# check before we parse the line fully
|
||||||
if args.filter not in logline.lower():
|
lowered = logline.lower()
|
||||||
|
if not all(f in lowered for f in args.filter_terms):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if args.nginx:
|
if args.nginx:
|
||||||
|
@ -185,11 +196,11 @@ def main() -> None:
|
||||||
if args.nginx:
|
if args.nginx:
|
||||||
print(f"! Failed to parse:\n{logline}", file=sys.stderr)
|
print(f"! Failed to parse:\n{logline}", file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
if passes_filters(filter_func, match, args):
|
if passes_filters(filter_funcs, match, args):
|
||||||
print_line(
|
print_line(
|
||||||
match,
|
match,
|
||||||
args,
|
args,
|
||||||
filter_type=filter_type,
|
filter_types=filter_types,
|
||||||
)
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
sys.exit(signal.SIGINT + 128)
|
sys.exit(signal.SIGINT + 128)
|
||||||
|
@ -228,67 +239,79 @@ def parse_logfile_names(args: argparse.Namespace) -> List[str]:
|
||||||
|
|
||||||
def parse_filters(
|
def parse_filters(
|
||||||
args: argparse.Namespace,
|
args: argparse.Namespace,
|
||||||
) -> Tuple[FilterType, Callable[[re.Match], bool]]: # type: ignore[type-arg] # Requires Python 3.9
|
) -> Tuple[Set[FilterType], List[FilterFunc]]:
|
||||||
# The heuristics below are not intended to be precise -- they
|
# The heuristics below are not intended to be precise -- they
|
||||||
# certainly count things as "IPv4" or "IPv6" addresses that are
|
# certainly count things as "IPv4" or "IPv6" addresses that are
|
||||||
# invalid. However, we expect the input here to already be
|
# invalid. However, we expect the input here to already be
|
||||||
# reasonably well-formed.
|
# reasonably well-formed.
|
||||||
|
|
||||||
filter = args.filter
|
filter_types = set()
|
||||||
|
filter_funcs = []
|
||||||
|
filter_terms = []
|
||||||
|
|
||||||
if re.match(r"[1-5][0-9][0-9]$", filter):
|
for filter_term in args.filter_terms:
|
||||||
filter_func = lambda m: m["code"] == filter
|
if re.match(r"[1-5][0-9][0-9]$", filter_term):
|
||||||
|
filter_func = lambda m, t=filter_term: m["code"] == t
|
||||||
filter_type = FilterType.STATUS
|
filter_type = FilterType.STATUS
|
||||||
if not args.nginx and filter == "502":
|
if not args.nginx and filter_term == "502":
|
||||||
logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
|
logging.warning("Adding --nginx -- 502's do not appear in Django logs.")
|
||||||
args.nginx = True
|
args.nginx = True
|
||||||
elif re.match(r"[1-5]xx$", filter):
|
elif re.match(r"[1-5]xx$", filter_term):
|
||||||
filter = filter[0]
|
filter_term = filter_term[0]
|
||||||
filter_func = lambda m: m["code"].startswith(filter)
|
filter_func = lambda m, t=filter_term: m["code"].startswith(t)
|
||||||
filter_type = FilterType.STATUS
|
filter_type = FilterType.STATUS
|
||||||
elif re.match(r"\d+$", filter):
|
elif re.match(r"\d+$", filter_term):
|
||||||
if args.nginx:
|
if args.nginx:
|
||||||
raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
|
raise parser().error("Cannot parse user-ids with nginx logs; try without --nginx")
|
||||||
filter_func = lambda m: m["user_id"] == filter
|
filter_func = lambda m, t=filter_term: m["user_id"] == t
|
||||||
filter_type = FilterType.USER_ID
|
filter_type = FilterType.USER_ID
|
||||||
elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter):
|
elif re.match(r"\d{1,3}(\.\d{1,3}){3}$", filter_term):
|
||||||
filter_func = lambda m: m["ip"] == filter
|
filter_func = lambda m, t=filter_term: m["ip"] == t
|
||||||
filter_type = FilterType.CLIENT_IP
|
filter_type = FilterType.CLIENT_IP
|
||||||
elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter):
|
elif re.match(r"([a-f0-9:]+:+){1,7}[a-f0-9]+$", filter_term):
|
||||||
filter_func = lambda m: m["ip"] == filter
|
filter_func = lambda m, t=filter_term: m["ip"] == t
|
||||||
filter_type = FilterType.CLIENT_IP
|
filter_type = FilterType.CLIENT_IP
|
||||||
elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter.lower()):
|
elif re.match(r"[a-z0-9]([a-z0-9-]*[a-z0-9])?$", filter_term.lower()):
|
||||||
filter = filter.lower()
|
filter_term = filter_term.lower()
|
||||||
if args.nginx:
|
if args.nginx:
|
||||||
filter_func = lambda m: m["hostname"].startswith(filter + ".")
|
filter_func = lambda m, t=filter_term: m["hostname"].startswith(t + ".")
|
||||||
else:
|
else:
|
||||||
filter_func = lambda m: m["hostname"] == filter
|
filter_func = lambda m, t=filter_term: m["hostname"] == t
|
||||||
filter_type = FilterType.HOSTNAME
|
filter_type = FilterType.HOSTNAME
|
||||||
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter.lower()) and re.search(
|
elif re.match(r"[a-z0-9-]+(\.[a-z0-9-]+)+$", filter_term.lower()) and re.search(
|
||||||
r"[a-z-]", filter.lower()
|
r"[a-z-]", filter_term.lower()
|
||||||
):
|
):
|
||||||
if not args.nginx:
|
if not args.nginx:
|
||||||
raise parser().error("Cannot parse full domains with Python logs; try --nginx")
|
raise parser().error("Cannot parse full domains with Python logs; try --nginx")
|
||||||
filter = filter.lower()
|
filter_term = filter_term.lower()
|
||||||
filter_func = lambda m: m["hostname"] == filter
|
filter_func = lambda m, t=filter_term: m["hostname"] == t
|
||||||
filter_type = FilterType.HOSTNAME
|
filter_type = FilterType.HOSTNAME
|
||||||
elif re.match(r"/\S*$", filter):
|
elif re.match(r"/\S*$", filter_term):
|
||||||
filter_func = lambda m: m["path"] == filter
|
filter_func = lambda m, t=filter_term: m["path"] == t
|
||||||
filter_type = FilterType.PATH
|
filter_type = FilterType.PATH
|
||||||
args.all_lines = True
|
args.all_lines = True
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Can't parse {filter} as an IP, hostname, user-id, path, or status code."
|
f"Can't parse {filter_term} as an IP, hostname, user-id, path, or status code."
|
||||||
)
|
)
|
||||||
return (filter_type, filter_func)
|
if filter_type in filter_types:
|
||||||
|
parser().error("Supplied the same time of value more than once, which cannot match!")
|
||||||
|
filter_types.add(filter_type)
|
||||||
|
filter_funcs.append(filter_func)
|
||||||
|
filter_terms.append(filter_term)
|
||||||
|
|
||||||
|
# Push back the modified raw strings, so we can use them for fast substring searches
|
||||||
|
args.filter_terms = filter_terms
|
||||||
|
|
||||||
|
return (filter_types, filter_funcs)
|
||||||
|
|
||||||
|
|
||||||
def passes_filters(
|
def passes_filters(
|
||||||
string_filter: Callable[[re.Match], bool], # type: ignore[type-arg] # Requires Python 3.9
|
string_filters: List[FilterFunc],
|
||||||
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
|
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
|
||||||
args: argparse.Namespace,
|
args: argparse.Namespace,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
if not string_filter(match):
|
if not all(f(match) for f in string_filters):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if args.all_lines:
|
if args.all_lines:
|
||||||
|
@ -318,7 +341,7 @@ def passes_filters(
|
||||||
def print_line(
|
def print_line(
|
||||||
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
|
match: re.Match, # type: ignore[type-arg] # Requires Python 3.9
|
||||||
args: argparse.Namespace,
|
args: argparse.Namespace,
|
||||||
filter_type: FilterType,
|
filter_types: Set[FilterType],
|
||||||
) -> None:
|
) -> None:
|
||||||
if args.full_line:
|
if args.full_line:
|
||||||
print(match.group(0))
|
print(match.group(0))
|
||||||
|
@ -350,7 +373,7 @@ def print_line(
|
||||||
indicator = "!"
|
indicator = "!"
|
||||||
color = FAIL
|
color = FAIL
|
||||||
url = f"{BOLD}{match['path']}"
|
url = f"{BOLD}{match['path']}"
|
||||||
if filter_type != FilterType.HOSTNAME:
|
if FilterType.HOSTNAME not in filter_types:
|
||||||
hostname = match["hostname"]
|
hostname = match["hostname"]
|
||||||
if hostname is None:
|
if hostname is None:
|
||||||
hostname = "???." + settings.EXTERNAL_HOST
|
hostname = "???." + settings.EXTERNAL_HOST
|
||||||
|
@ -370,8 +393,8 @@ def print_line(
|
||||||
parts = [
|
parts = [
|
||||||
ts,
|
ts,
|
||||||
f"{duration:>5}ms",
|
f"{duration:>5}ms",
|
||||||
f"{user_id:7}" if not args.nginx and filter_type != FilterType.USER_ID else None,
|
f"{user_id:7}" if not args.nginx and FilterType.USER_ID not in filter_types else None,
|
||||||
f"{match['ip']:39}" if filter_type != FilterType.CLIENT_IP else None,
|
f"{match['ip']:39}" if FilterType.CLIENT_IP not in filter_types else None,
|
||||||
indicator + match["code"],
|
indicator + match["code"],
|
||||||
f"{match['method']:6}",
|
f"{match['method']:6}",
|
||||||
url,
|
url,
|
||||||
|
|
Loading…
Reference in New Issue