2024-07-12 02:30:25 +02:00
|
|
|
from collections.abc import Iterable
|
2020-04-27 20:04:38 +02:00
|
|
|
|
2019-02-11 15:19:38 +01:00
|
|
|
import ahocorasick
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.db import transaction
|
|
|
|
|
|
|
|
from zerver.lib.cache import (
|
|
|
|
cache_with_key,
|
|
|
|
realm_alert_words_automaton_cache_key,
|
|
|
|
realm_alert_words_cache_key,
|
|
|
|
)
|
2023-12-15 21:04:40 +01:00
|
|
|
from zerver.models import AlertWord, Realm, UserProfile
|
|
|
|
from zerver.models.alert_words import flush_realm_alert_words
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2013-09-03 22:41:17 +02:00
|
|
|
|
2023-06-08 22:17:05 +02:00
|
|
|
@cache_with_key(lambda realm: realm_alert_words_cache_key(realm.id), timeout=3600 * 24)
|
2024-07-12 02:30:17 +02:00
|
|
|
def alert_words_in_realm(realm: Realm) -> dict[int, list[str]]:
|
2021-02-12 08:19:30 +01:00
|
|
|
user_ids_and_words = AlertWord.objects.filter(realm=realm, user_profile__is_active=True).values(
|
|
|
|
"user_profile_id", "word"
|
|
|
|
)
|
2024-07-12 02:30:17 +02:00
|
|
|
user_ids_with_words: dict[int, list[str]] = {}
|
2020-04-15 12:34:26 +02:00
|
|
|
for id_and_word in user_ids_and_words:
|
|
|
|
user_ids_with_words.setdefault(id_and_word["user_profile_id"], [])
|
|
|
|
user_ids_with_words[id_and_word["user_profile_id"]].append(id_and_word["word"])
|
2013-10-09 20:48:05 +02:00
|
|
|
return user_ids_with_words
|
2013-09-03 22:41:17 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-06-08 22:17:05 +02:00
|
|
|
@cache_with_key(lambda realm: realm_alert_words_automaton_cache_key(realm.id), timeout=3600 * 24)
|
2019-02-11 15:19:38 +01:00
|
|
|
def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
|
|
|
|
user_id_with_words = alert_words_in_realm(realm)
|
2021-02-12 08:19:30 +01:00
|
|
|
alert_word_automaton = ahocorasick.Automaton()
|
2023-02-02 04:35:24 +01:00
|
|
|
for user_id, alert_words in user_id_with_words.items():
|
2019-02-11 15:19:38 +01:00
|
|
|
for alert_word in alert_words:
|
|
|
|
alert_word_lower = alert_word.lower()
|
|
|
|
if alert_word_automaton.exists(alert_word_lower):
|
|
|
|
(key, user_ids_for_alert_word) = alert_word_automaton.get(alert_word_lower)
|
|
|
|
user_ids_for_alert_word.add(user_id)
|
|
|
|
else:
|
2020-04-09 21:51:58 +02:00
|
|
|
alert_word_automaton.add_word(alert_word_lower, (alert_word_lower, {user_id}))
|
2019-02-11 15:19:38 +01:00
|
|
|
alert_word_automaton.make_automaton()
|
|
|
|
# If the kind is not AHOCORASICK after calling make_automaton, it means there is no key present
|
|
|
|
# and hence we cannot call items on the automaton yet. To avoid it we return None for such cases
|
|
|
|
# where there is no alert-words in the realm.
|
2022-02-16 01:54:42 +01:00
|
|
|
# https://pyahocorasick.readthedocs.io/en/latest/#make-automaton
|
2019-02-11 15:19:38 +01:00
|
|
|
if alert_word_automaton.kind != ahocorasick.AHOCORASICK:
|
|
|
|
return None
|
|
|
|
return alert_word_automaton
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def user_alert_words(user_profile: UserProfile) -> list[str]:
|
2020-04-15 12:34:26 +02:00
|
|
|
return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True))
|
2013-09-03 22:41:17 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-27 20:04:38 +02:00
|
|
|
@transaction.atomic
|
2024-07-12 02:30:17 +02:00
|
|
|
def add_user_alert_words(user_profile: UserProfile, new_words: Iterable[str]) -> list[str]:
|
2020-04-27 20:04:38 +02:00
|
|
|
existing_words_lower = {word.lower() for word in user_alert_words(user_profile)}
|
2013-09-03 22:41:17 +02:00
|
|
|
|
2020-04-27 20:04:38 +02:00
|
|
|
# Keeping the case, use a dictionary to get the set of
|
|
|
|
# case-insensitive distinct, new alert words
|
2024-07-12 02:30:17 +02:00
|
|
|
word_dict: dict[str, str] = {}
|
2020-04-27 20:04:38 +02:00
|
|
|
for word in new_words:
|
|
|
|
if word.lower() in existing_words_lower:
|
|
|
|
continue
|
|
|
|
word_dict[word.lower()] = word
|
2013-09-03 22:41:17 +02:00
|
|
|
|
2020-04-15 12:34:26 +02:00
|
|
|
AlertWord.objects.bulk_create(
|
2020-04-27 20:04:38 +02:00
|
|
|
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
|
|
|
|
for word in word_dict.values()
|
|
|
|
)
|
2020-04-27 20:45:15 +02:00
|
|
|
# Django bulk_create operations don't flush caches, so we need to do this ourselves.
|
2023-06-08 22:17:05 +02:00
|
|
|
flush_realm_alert_words(user_profile.realm_id)
|
2020-04-15 12:34:26 +02:00
|
|
|
|
2020-04-27 20:04:38 +02:00
|
|
|
return user_alert_words(user_profile)
|
2013-09-11 17:24:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-27 20:04:38 +02:00
|
|
|
@transaction.atomic
|
2024-07-12 02:30:17 +02:00
|
|
|
def remove_user_alert_words(user_profile: UserProfile, delete_words: Iterable[str]) -> list[str]:
|
2020-04-27 20:04:38 +02:00
|
|
|
# TODO: Ideally, this would be a bulk query, but Django doesn't have a `__iexact`.
|
2020-10-26 22:27:53 +01:00
|
|
|
# We can clean this up if/when PostgreSQL has more native support for case-insensitive fields.
|
2020-04-27 20:45:15 +02:00
|
|
|
# If we turn this into a bulk operation, we will need to call flush_realm_alert_words() here.
|
2020-04-27 20:04:38 +02:00
|
|
|
for delete_word in delete_words:
|
|
|
|
AlertWord.objects.filter(user_profile=user_profile, word__iexact=delete_word).delete()
|
2020-04-15 12:34:26 +02:00
|
|
|
return user_alert_words(user_profile)
|