2013-03-20 15:31:27 +01:00
|
|
|
import hashlib
|
2016-08-14 18:33:29 +02:00
|
|
|
import heapq
|
|
|
|
import itertools
|
2019-12-16 06:27:34 +01:00
|
|
|
import re
|
2020-09-05 04:02:13 +02:00
|
|
|
import secrets
|
2017-11-05 06:39:22 +01:00
|
|
|
from itertools import zip_longest
|
2020-06-11 00:54:34 +02:00
|
|
|
from time import sleep
|
2020-06-23 08:03:47 +02:00
|
|
|
from typing import Any, Callable, Iterator, List, Optional, Sequence, Set, Tuple, TypeVar
|
2013-08-08 16:50:58 +02:00
|
|
|
|
2013-04-16 22:57:50 +02:00
|
|
|
from django.conf import settings
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
T = TypeVar("T")
|
2016-06-03 18:39:57 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def statsd_key(val: str, clean_periods: bool = False) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
if ":" in val:
|
|
|
|
val = val.split(":")[0]
|
|
|
|
val = val.replace("-", "_")
|
2013-04-30 23:58:59 +02:00
|
|
|
if clean_periods:
|
2021-02-12 08:20:45 +01:00
|
|
|
val = val.replace(".", "_")
|
2013-04-16 22:57:50 +02:00
|
|
|
|
|
|
|
return val
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class StatsDWrapper:
|
2013-04-16 22:57:50 +02:00
|
|
|
"""Transparently either submit metrics to statsd
|
|
|
|
or do nothing without erroring out"""
|
|
|
|
|
|
|
|
# Backported support for gauge deltas
|
|
|
|
# as our statsd server supports them but supporting
|
|
|
|
# pystatsd is not released yet
|
2021-02-12 08:19:30 +01:00
|
|
|
def _our_gauge(self, stat: str, value: float, rate: float = 1, delta: bool = False) -> None:
|
2019-01-31 14:32:37 +01:00
|
|
|
"""Set a gauge value."""
|
|
|
|
from django_statsd.clients import statsd
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-01-31 14:32:37 +01:00
|
|
|
if delta:
|
2021-02-12 08:20:45 +01:00
|
|
|
value_str = f"{value:+g}|g"
|
2019-01-31 14:32:37 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
value_str = f"{value:g}|g"
|
2019-01-31 14:32:37 +01:00
|
|
|
statsd._send(stat, value_str, rate)
|
2013-04-16 22:57:50 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def __getattr__(self, name: str) -> Any:
|
2013-04-16 22:57:50 +02:00
|
|
|
# Hand off to statsd if we have it enabled
|
|
|
|
# otherwise do nothing
|
2021-02-12 08:20:45 +01:00
|
|
|
if name in ["timer", "timing", "incr", "decr", "gauge"]:
|
|
|
|
if settings.STATSD_HOST != "":
|
2013-04-16 22:57:50 +02:00
|
|
|
from django_statsd.clients import statsd
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if name == "gauge":
|
2013-04-16 22:57:50 +02:00
|
|
|
return self._our_gauge
|
|
|
|
else:
|
|
|
|
return getattr(statsd, name)
|
|
|
|
else:
|
|
|
|
return lambda *args, **kwargs: None
|
|
|
|
|
|
|
|
raise AttributeError
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2013-04-16 22:57:50 +02:00
|
|
|
statsd = StatsDWrapper()
|
2013-03-12 17:51:35 +01:00
|
|
|
|
|
|
|
# Runs the callback with slices of all_list of a given batch_size
|
2021-02-12 08:19:30 +01:00
|
|
|
def run_in_batches(
|
|
|
|
all_list: Sequence[T],
|
|
|
|
batch_size: int,
|
|
|
|
callback: Callable[[Sequence[T]], None],
|
|
|
|
sleep_time: int = 0,
|
|
|
|
logger: Optional[Callable[[str], None]] = None,
|
|
|
|
) -> None:
|
2013-03-12 17:51:35 +01:00
|
|
|
if len(all_list) == 0:
|
|
|
|
return
|
|
|
|
|
2016-11-09 13:44:29 +01:00
|
|
|
limit = (len(all_list) // batch_size) + 1
|
2015-11-01 17:15:05 +01:00
|
|
|
for i in range(limit):
|
2021-02-12 08:19:30 +01:00
|
|
|
start = i * batch_size
|
|
|
|
end = (i + 1) * batch_size
|
2013-03-12 17:51:35 +01:00
|
|
|
if end >= len(all_list):
|
|
|
|
end = len(all_list)
|
|
|
|
batch = all_list[start:end]
|
|
|
|
|
|
|
|
if logger:
|
2020-06-10 06:41:04 +02:00
|
|
|
logger(f"Executing {end-start} in batch {i+1} of {limit}")
|
2013-03-12 17:51:35 +01:00
|
|
|
|
|
|
|
callback(batch)
|
2013-03-18 18:09:16 +01:00
|
|
|
|
|
|
|
if i != limit - 1:
|
|
|
|
sleep(sleep_time)
|
2013-03-20 15:31:27 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def make_safe_digest(string: str, hash_func: Callable[[bytes], Any] = hashlib.sha1) -> str:
|
2013-03-20 15:31:27 +01:00
|
|
|
"""
|
|
|
|
return a hex digest of `string`.
|
|
|
|
"""
|
|
|
|
# hashlib.sha1, md5, etc. expect bytes, so non-ASCII strings must
|
|
|
|
# be encoded.
|
2021-08-02 23:20:39 +02:00
|
|
|
return hash_func(string.encode()).hexdigest()
|
2013-04-16 22:57:50 +02:00
|
|
|
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def log_statsd_event(name: str) -> None:
|
2013-04-16 22:57:50 +02:00
|
|
|
"""
|
|
|
|
Sends a single event to statsd with the desired name and the current timestamp
|
|
|
|
|
|
|
|
This can be used to provide vertical lines in generated graphs,
|
|
|
|
for example when doing a prod deploy, bankruptcy request, or
|
|
|
|
other one-off events
|
|
|
|
|
|
|
|
Note that to draw this event as a vertical line in graphite
|
|
|
|
you can use the drawAsInfinite() command
|
|
|
|
"""
|
2020-06-10 06:41:04 +02:00
|
|
|
event_name = f"events.{name}"
|
2013-06-07 23:53:20 +02:00
|
|
|
statsd.incr(event_name)
|
2013-08-08 16:50:58 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-08-01 11:18:37 +02:00
|
|
|
def generate_api_key() -> str:
|
2020-09-05 04:02:13 +02:00
|
|
|
api_key = ""
|
|
|
|
while len(api_key) < 32:
|
|
|
|
# One iteration suffices 99.4992% of the time.
|
|
|
|
api_key += secrets.token_urlsafe(3 * 9).replace("_", "").replace("-", "")
|
|
|
|
return api_key[:32]
|
2018-08-01 11:18:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-12-16 06:27:34 +01:00
|
|
|
def has_api_key_format(key: str) -> bool:
|
|
|
|
return bool(re.fullmatch(r"([A-Za-z0-9]){32}", key))
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-07-25 11:20:48 +02:00
|
|
|
def assert_is_not_none(value: Optional[T]) -> T:
|
|
|
|
assert value is not None
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def query_chunker(
|
|
|
|
queries: List[Any],
|
|
|
|
id_collector: Optional[Set[int]] = None,
|
|
|
|
chunk_size: int = 1000,
|
|
|
|
db_chunk_size: Optional[int] = None,
|
|
|
|
) -> Iterator[Any]:
|
|
|
|
"""
|
2016-08-14 18:33:29 +02:00
|
|
|
This merges one or more Django ascending-id queries into
|
|
|
|
a generator that returns chunks of chunk_size row objects
|
|
|
|
during each yield, preserving id order across all results..
|
|
|
|
|
|
|
|
Queries should satisfy these conditions:
|
|
|
|
- They should be Django filters.
|
|
|
|
- They should return Django objects with "id" attributes.
|
|
|
|
- They should be disjoint.
|
|
|
|
|
|
|
|
The generator also populates id_collector, which we use
|
|
|
|
internally to enforce unique ids, but which the caller
|
|
|
|
can pass in to us if they want the side effect of collecting
|
|
|
|
all ids.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2016-08-14 18:33:29 +02:00
|
|
|
if db_chunk_size is None:
|
|
|
|
db_chunk_size = chunk_size // len(queries)
|
|
|
|
|
|
|
|
assert db_chunk_size >= 2
|
|
|
|
assert chunk_size >= 2
|
|
|
|
|
|
|
|
if id_collector is not None:
|
2021-02-12 08:19:30 +01:00
|
|
|
assert len(id_collector) == 0
|
2016-08-14 18:33:29 +02:00
|
|
|
else:
|
|
|
|
id_collector = set()
|
|
|
|
|
2020-06-23 08:03:47 +02:00
|
|
|
def chunkify(q: Any, i: int) -> Iterator[Tuple[int, int, Any]]:
|
2021-02-12 08:20:45 +01:00
|
|
|
q = q.order_by("id")
|
2016-08-14 18:33:29 +02:00
|
|
|
min_id = -1
|
|
|
|
while True:
|
|
|
|
rows = list(q.filter(id__gt=min_id)[0:db_chunk_size])
|
|
|
|
if len(rows) == 0:
|
|
|
|
break
|
|
|
|
for row in rows:
|
|
|
|
yield (row.id, i, row)
|
|
|
|
min_id = rows[-1].id
|
|
|
|
|
|
|
|
iterators = [chunkify(q, i) for i, q in enumerate(queries)]
|
|
|
|
merged_query = heapq.merge(*iterators)
|
|
|
|
|
|
|
|
while True:
|
|
|
|
tup_chunk = list(itertools.islice(merged_query, 0, chunk_size))
|
|
|
|
if len(tup_chunk) == 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
# Do duplicate-id management here.
|
2020-04-09 21:51:58 +02:00
|
|
|
tup_ids = {tup[0] for tup in tup_chunk}
|
2016-08-14 18:33:29 +02:00
|
|
|
assert len(tup_ids) == len(tup_chunk)
|
|
|
|
assert len(tup_ids.intersection(id_collector)) == 0
|
|
|
|
id_collector.update(tup_ids)
|
|
|
|
|
|
|
|
yield [row for row_id, i, row in tup_chunk]
|
2016-07-19 14:35:08 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_list_in_batches(
|
|
|
|
lst: List[Any], chunk_size: int, process_batch: Callable[[List[Any]], None]
|
|
|
|
) -> None:
|
2018-10-15 14:24:13 +02:00
|
|
|
offset = 0
|
|
|
|
|
|
|
|
while True:
|
2021-02-12 08:19:30 +01:00
|
|
|
items = lst[offset : offset + chunk_size]
|
2018-10-15 14:24:13 +02:00
|
|
|
if not items:
|
|
|
|
break
|
|
|
|
process_batch(items)
|
|
|
|
offset += chunk_size
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def split_by(array: List[Any], group_size: int, filler: Any) -> List[List[Any]]:
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
|
|
|
Group elements into list of size `group_size` and fill empty cells with
|
|
|
|
`filler`. Recipe from https://docs.python.org/3/library/itertools.html
|
|
|
|
"""
|
|
|
|
args = [iter(array)] * group_size
|
|
|
|
return list(map(list, zip_longest(*args, fillvalue=filler)))
|