zulip/zilencer/management/commands/populate_db.py

1149 lines
44 KiB
Python

import itertools
import os
import random
from collections import defaultdict
from datetime import datetime
from typing import Any, Callable, Dict, List, Mapping, Sequence, Tuple
import bmemcached
import orjson
from django.conf import settings
from django.contrib.sessions.models import Session
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandParser
from django.db import connection
from django.db.models import F
from django.utils.timezone import now as timezone_now
from django.utils.timezone import timedelta as timezone_timedelta
from scripts.lib.zulip_tools import get_or_create_dev_uuid_var_path
from zerver.lib.actions import (
STREAM_ASSIGNMENT_COLORS,
build_message_send_dict,
check_add_realm_emoji,
do_change_user_role,
do_create_realm,
do_send_messages,
do_update_user_custom_profile_data_if_changed,
try_add_realm_custom_profile_field,
try_add_realm_default_custom_profile_field,
)
from zerver.lib.bulk_create import bulk_create_streams
from zerver.lib.cache import cache_set
from zerver.lib.generate_test_data import create_test_data, generate_topics
from zerver.lib.onboarding import create_if_missing_realm_internal_bots
from zerver.lib.push_notifications import logger as push_notifications_logger
from zerver.lib.server_initialization import create_internal_realm, create_users
from zerver.lib.storage import static_path
from zerver.lib.types import ProfileFieldData
from zerver.lib.url_preview.preview import CACHE_NAME as PREVIEW_CACHE_NAME
from zerver.lib.user_groups import create_user_group
from zerver.lib.users import add_service
from zerver.lib.utils import generate_api_key
from zerver.models import (
AlertWord,
Client,
CustomProfileField,
DefaultStream,
Huddle,
Message,
Reaction,
Realm,
RealmAuditLog,
RealmDomain,
Recipient,
Service,
Stream,
Subscription,
UserMessage,
UserPresence,
UserProfile,
get_client,
get_huddle,
get_realm,
get_stream,
get_user,
get_user_by_delivery_email,
get_user_profile_by_id,
)
settings.USING_TORNADO = False
# Disable using memcached caches to avoid 'unsupported pickle
# protocol' errors if `populate_db` is run with a different Python
# from `run-dev.py`.
default_cache = settings.CACHES["default"]
settings.CACHES["default"] = {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
DEFAULT_EMOJIS = [
("+1", "1f44d"),
("smiley", "1f603"),
("eyes", "1f440"),
("crying_cat_face", "1f63f"),
("arrow_up", "2b06"),
("confetti_ball", "1f38a"),
("hundred_points", "1f4af"),
]
def clear_database() -> None:
# Hacky function only for use inside populate_db. Designed to
# allow running populate_db repeatedly in series to work without
# flushing memcached or clearing the database manually.
# With `zproject.test_settings`, we aren't using real memcached
# and; we only need to flush memcached if we're populating a
# database that would be used with it (i.e. zproject.dev_settings).
if default_cache["BACKEND"] == "django_bmemcached.memcached.BMemcached":
bmemcached.Client(
(default_cache["LOCATION"],),
**default_cache["OPTIONS"],
).flush_all()
model: Any = None # Hack because mypy doesn't know these are model classes
for model in [
Message,
Stream,
UserProfile,
Recipient,
Realm,
Subscription,
Huddle,
UserMessage,
Client,
DefaultStream,
]:
model.objects.all().delete()
Session.objects.all().delete()
# Suppress spammy output from the push notifications logger
push_notifications_logger.disabled = True
def subscribe_users_to_streams(realm: Realm, stream_dict: Dict[str, Dict[str, Any]]) -> None:
subscriptions_to_add = []
event_time = timezone_now()
all_subscription_logs = []
profiles = UserProfile.objects.select_related().filter(realm=realm)
for i, stream_name in enumerate(stream_dict):
stream = Stream.objects.get(name=stream_name, realm=realm)
recipient = Recipient.objects.get(type=Recipient.STREAM, type_id=stream.id)
for profile in profiles:
# Subscribe to some streams.
s = Subscription(
recipient=recipient,
user_profile=profile,
is_user_active=profile.is_active,
color=STREAM_ASSIGNMENT_COLORS[i % len(STREAM_ASSIGNMENT_COLORS)],
)
subscriptions_to_add.append(s)
log = RealmAuditLog(
realm=profile.realm,
modified_user=profile,
modified_stream=stream,
event_last_message_id=0,
event_type=RealmAuditLog.SUBSCRIPTION_CREATED,
event_time=event_time,
)
all_subscription_logs.append(log)
Subscription.objects.bulk_create(subscriptions_to_add)
RealmAuditLog.objects.bulk_create(all_subscription_logs)
def create_alert_words(realm_id: int) -> None:
user_ids = UserProfile.objects.filter(
realm_id=realm_id,
is_bot=False,
is_active=True,
).values_list("id", flat=True)
alert_words = [
"algorithms",
"complexity",
"founded",
"galaxy",
"grammar",
"illustrious",
"natural",
"objective",
"people",
"robotics",
"study",
]
recs: List[AlertWord] = []
for user_id in user_ids:
random.shuffle(alert_words)
for i in range(4):
recs.append(
AlertWord(
realm_id=realm_id,
user_profile_id=user_id,
word=alert_words[i],
)
)
AlertWord.objects.bulk_create(recs)
class Command(BaseCommand):
help = "Populate a test database"
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"-n", "--num-messages", type=int, default=500, help="The number of messages to create."
)
parser.add_argument(
"-b",
"--batch-size",
type=int,
default=1000,
help="How many messages to process in a single batch",
)
parser.add_argument(
"--extra-users", type=int, default=0, help="The number of extra users to create"
)
parser.add_argument(
"--extra-bots", type=int, default=0, help="The number of extra bots to create"
)
parser.add_argument(
"--extra-streams", type=int, default=0, help="The number of extra streams to create"
)
parser.add_argument("--max-topics", type=int, help="The number of maximum topics to create")
parser.add_argument(
"--huddles",
dest="num_huddles",
type=int,
default=3,
help="The number of huddles to create.",
)
parser.add_argument(
"--personals",
dest="num_personals",
type=int,
default=6,
help="The number of personal pairs to create.",
)
parser.add_argument("--threads", type=int, default=1, help="The number of threads to use.")
parser.add_argument(
"--percent-huddles",
type=float,
default=15,
help="The percent of messages to be huddles.",
)
parser.add_argument(
"--percent-personals",
type=float,
default=15,
help="The percent of messages to be personals.",
)
parser.add_argument(
"--stickyness",
type=float,
default=20,
help="The percent of messages to repeat recent folks.",
)
parser.add_argument(
"--nodelete",
action="store_false",
dest="delete",
help="Whether to delete all the existing messages.",
)
parser.add_argument(
"--test-suite",
action="store_true",
help="Configures populate_db to create a deterministic "
"data set for the backend tests.",
)
def handle(self, **options: Any) -> None:
if options["percent_huddles"] + options["percent_personals"] > 100:
self.stderr.write("Error! More than 100% of messages allocated.\n")
return
# Get consistent data for backend tests.
if options["test_suite"]:
random.seed(0)
with connection.cursor() as cursor:
# Sometimes bugs relating to confusing recipient.id for recipient.type_id
# or <object>.id for <object>.recipient_id remain undiscovered by the test suite
# due to these numbers happening to coincide in such a way that it makes tests
# accidentally pass. By bumping the Recipient.id sequence by a large enough number,
# we can have those ids in a completely different range of values than object ids,
# eliminatng the possibility of such coincidences.
cursor.execute("SELECT setval('zerver_recipient_id_seq', 100)")
# If max_topics is not set, we set it proportional to the
# number of messages.
if options["max_topics"] is None:
options["max_topics"] = 1 + options["num_messages"] // 100
if options["delete"]:
# Start by clearing all the data in our database
clear_database()
# Create our three default realms
# Could in theory be done via zerver.lib.actions.do_create_realm, but
# welcome-bot (needed for do_create_realm) hasn't been created yet
create_internal_realm()
zulip_realm = do_create_realm(
string_id="zulip",
name="Zulip Dev",
emails_restricted_to_domains=False,
email_address_visibility=Realm.EMAIL_ADDRESS_VISIBILITY_ADMINS,
description="The Zulip development environment default organization."
" It's great for testing!",
invite_required=False,
plan_type=Realm.SELF_HOSTED,
org_type=Realm.CORPORATE,
)
RealmDomain.objects.create(realm=zulip_realm, domain="zulip.com")
assert zulip_realm.notifications_stream is not None
zulip_realm.notifications_stream.name = "Verona"
zulip_realm.notifications_stream.description = "A city in Italy"
zulip_realm.notifications_stream.save(update_fields=["name", "description"])
if options["test_suite"]:
mit_realm = do_create_realm(
string_id="zephyr",
name="MIT",
emails_restricted_to_domains=True,
invite_required=False,
plan_type=Realm.SELF_HOSTED,
org_type=Realm.CORPORATE,
)
RealmDomain.objects.create(realm=mit_realm, domain="mit.edu")
lear_realm = do_create_realm(
string_id="lear",
name="Lear & Co.",
emails_restricted_to_domains=False,
invite_required=False,
plan_type=Realm.SELF_HOSTED,
org_type=Realm.CORPORATE,
)
# Default to allowing all members to send mentions in
# large streams for the test suite to keep
# mention-related tests simple.
zulip_realm.wildcard_mention_policy = Realm.WILDCARD_MENTION_POLICY_MEMBERS
zulip_realm.save(update_fields=["wildcard_mention_policy"])
# Create test Users (UserProfiles are automatically created,
# as are subscriptions to the ability to receive personals).
names = [
("Zoe", "ZOE@zulip.com"),
("Othello, the Moor of Venice", "othello@zulip.com"),
("Iago", "iago@zulip.com"),
("Prospero from The Tempest", "prospero@zulip.com"),
("Cordelia, Lear's daughter", "cordelia@zulip.com"),
("King Hamlet", "hamlet@zulip.com"),
("aaron", "AARON@zulip.com"),
("Polonius", "polonius@zulip.com"),
("Desdemona", "desdemona@zulip.com"),
("शिव", "shiva@zulip.com"),
]
# For testing really large batches:
# Create extra users with semi realistic names to make search
# functions somewhat realistic. We'll still create 1000 users
# like Extra222 User for some predicability.
num_names = options["extra_users"]
num_boring_names = 300
for i in range(min(num_names, num_boring_names)):
full_name = f"Extra{i:03} User"
names.append((full_name, f"extrauser{i}@zulip.com"))
if num_names > num_boring_names:
fnames = [
"Amber",
"Arpita",
"Bob",
"Cindy",
"Daniela",
"Dan",
"Dinesh",
"Faye",
"François",
"George",
"Hank",
"Irene",
"James",
"Janice",
"Jenny",
"Jill",
"John",
"Kate",
"Katelyn",
"Kobe",
"Lexi",
"Manish",
"Mark",
"Matt",
"Mayna",
"Michael",
"Pete",
"Peter",
"Phil",
"Phillipa",
"Preston",
"Sally",
"Scott",
"Sandra",
"Steve",
"Stephanie",
"Vera",
]
mnames = ["de", "van", "von", "Shaw", "T."]
lnames = [
"Adams",
"Agarwal",
"Beal",
"Benson",
"Bonita",
"Davis",
"George",
"Harden",
"James",
"Jones",
"Johnson",
"Jordan",
"Lee",
"Leonard",
"Singh",
"Smith",
"Patel",
"Towns",
"Wall",
]
non_ascii_names = [
"Günter",
"أحمد",
"Magnús",
"आशी",
"イツキ",
"语嫣",
"அருண்",
"Александр",
"José",
]
# to imitate emoji insertions in usernames
raw_emojis = ["😎", "😂", "🐱‍👤"]
for i in range(num_boring_names, num_names):
fname = random.choice(fnames) + str(i)
full_name = fname
if random.random() < 0.7:
if random.random() < 0.3:
full_name += " " + random.choice(non_ascii_names)
else:
full_name += " " + random.choice(mnames)
if random.random() < 0.1:
full_name += " {} ".format(random.choice(raw_emojis))
else:
full_name += " " + random.choice(lnames)
email = fname.lower() + "@zulip.com"
names.append((full_name, email))
create_users(zulip_realm, names, tos_version=settings.TOS_VERSION)
iago = get_user_by_delivery_email("iago@zulip.com", zulip_realm)
do_change_user_role(iago, UserProfile.ROLE_REALM_ADMINISTRATOR, acting_user=None)
iago.is_staff = True
iago.save(update_fields=["is_staff"])
desdemona = get_user_by_delivery_email("desdemona@zulip.com", zulip_realm)
do_change_user_role(desdemona, UserProfile.ROLE_REALM_OWNER, acting_user=None)
shiva = get_user_by_delivery_email("shiva@zulip.com", zulip_realm)
do_change_user_role(shiva, UserProfile.ROLE_MODERATOR, acting_user=None)
guest_user = get_user_by_delivery_email("polonius@zulip.com", zulip_realm)
guest_user.role = UserProfile.ROLE_GUEST
guest_user.save(update_fields=["role"])
# These bots are directly referenced from code and thus
# are needed for the test suite.
zulip_realm_bots = [
("Zulip Error Bot", "error-bot@zulip.com"),
("Zulip Default Bot", "default-bot@zulip.com"),
]
for i in range(options["extra_bots"]):
zulip_realm_bots.append((f"Extra Bot {i}", f"extrabot{i}@zulip.com"))
create_users(zulip_realm, zulip_realm_bots, bot_type=UserProfile.DEFAULT_BOT)
zoe = get_user_by_delivery_email("zoe@zulip.com", zulip_realm)
zulip_webhook_bots = [
("Zulip Webhook Bot", "webhook-bot@zulip.com"),
]
# If a stream is not supplied in the webhook URL, the webhook
# will (in some cases) send the notification as a PM to the
# owner of the webhook bot, so bot_owner can't be None
create_users(
zulip_realm,
zulip_webhook_bots,
bot_type=UserProfile.INCOMING_WEBHOOK_BOT,
bot_owner=zoe,
)
aaron = get_user_by_delivery_email("AARON@zulip.com", zulip_realm)
zulip_outgoing_bots = [
("Outgoing Webhook", "outgoing-webhook@zulip.com"),
]
create_users(
zulip_realm,
zulip_outgoing_bots,
bot_type=UserProfile.OUTGOING_WEBHOOK_BOT,
bot_owner=aaron,
)
outgoing_webhook = get_user("outgoing-webhook@zulip.com", zulip_realm)
add_service(
"outgoing-webhook",
user_profile=outgoing_webhook,
interface=Service.GENERIC,
base_url="http://127.0.0.1:5002",
token=generate_api_key(),
)
# Add the realm internal bots to each realm.
create_if_missing_realm_internal_bots()
# Create public streams.
signups_stream = Realm.INITIAL_PRIVATE_STREAM_NAME
stream_list = [
"Verona",
"Denmark",
"Scotland",
"Venice",
"Rome",
signups_stream,
]
stream_dict: Dict[str, Dict[str, Any]] = {
"Denmark": {"description": "A Scandinavian country"},
"Scotland": {"description": "Located in the United Kingdom"},
"Venice": {"description": "A northeastern Italian city"},
"Rome": {"description": "Yet another Italian city", "is_web_public": True},
}
bulk_create_streams(zulip_realm, stream_dict)
recipient_streams: List[int] = [
Stream.objects.get(name=name, realm=zulip_realm).id for name in stream_list
]
# Create subscriptions to streams. The following
# algorithm will give each of the users a different but
# deterministic subset of the streams (given a fixed list
# of users). For the test suite, we have a fixed list of
# subscriptions to make sure test data is consistent
# across platforms.
subscriptions_list: List[Tuple[UserProfile, Recipient]] = []
profiles: Sequence[UserProfile] = (
UserProfile.objects.select_related().filter(is_bot=False).order_by("email")
)
if options["test_suite"]:
subscriptions_map = {
"AARON@zulip.com": ["Verona"],
"cordelia@zulip.com": ["Verona"],
"hamlet@zulip.com": ["Verona", "Denmark", signups_stream],
"iago@zulip.com": [
"Verona",
"Denmark",
"Scotland",
signups_stream,
],
"othello@zulip.com": ["Verona", "Denmark", "Scotland"],
"prospero@zulip.com": ["Verona", "Denmark", "Scotland", "Venice"],
"ZOE@zulip.com": ["Verona", "Denmark", "Scotland", "Venice", "Rome"],
"polonius@zulip.com": ["Verona"],
"desdemona@zulip.com": [
"Verona",
"Denmark",
"Venice",
signups_stream,
],
"shiva@zulip.com": ["Verona", "Denmark", "Scotland"],
}
for profile in profiles:
email = profile.delivery_email
if email not in subscriptions_map:
raise Exception(f"Subscriptions not listed for user {email}")
for stream_name in subscriptions_map[email]:
stream = Stream.objects.get(name=stream_name, realm=zulip_realm)
r = Recipient.objects.get(type=Recipient.STREAM, type_id=stream.id)
subscriptions_list.append((profile, r))
else:
num_streams = len(recipient_streams)
num_users = len(profiles)
for i, profile in enumerate(profiles):
# Subscribe to some streams.
fraction = float(i) / num_users
num_recips = int(num_streams * fraction) + 1
for type_id in recipient_streams[:num_recips]:
r = Recipient.objects.get(type=Recipient.STREAM, type_id=type_id)
subscriptions_list.append((profile, r))
subscriptions_to_add: List[Subscription] = []
event_time = timezone_now()
all_subscription_logs: (List[RealmAuditLog]) = []
i = 0
for profile, recipient in subscriptions_list:
i += 1
color = STREAM_ASSIGNMENT_COLORS[i % len(STREAM_ASSIGNMENT_COLORS)]
s = Subscription(
recipient=recipient,
user_profile=profile,
is_user_active=profile.is_active,
color=color,
)
subscriptions_to_add.append(s)
log = RealmAuditLog(
realm=profile.realm,
modified_user=profile,
modified_stream_id=recipient.type_id,
event_last_message_id=0,
event_type=RealmAuditLog.SUBSCRIPTION_CREATED,
event_time=event_time,
)
all_subscription_logs.append(log)
Subscription.objects.bulk_create(subscriptions_to_add)
RealmAuditLog.objects.bulk_create(all_subscription_logs)
# Create custom profile field data
phone_number = try_add_realm_custom_profile_field(
zulip_realm, "Phone number", CustomProfileField.SHORT_TEXT, hint=""
)
biography = try_add_realm_custom_profile_field(
zulip_realm,
"Biography",
CustomProfileField.LONG_TEXT,
hint="What are you known for?",
)
favorite_food = try_add_realm_custom_profile_field(
zulip_realm,
"Favorite food",
CustomProfileField.SHORT_TEXT,
hint="Or drink, if you'd prefer",
)
field_data: ProfileFieldData = {
"vim": {"text": "Vim", "order": "1"},
"emacs": {"text": "Emacs", "order": "2"},
}
favorite_editor = try_add_realm_custom_profile_field(
zulip_realm, "Favorite editor", CustomProfileField.SELECT, field_data=field_data
)
birthday = try_add_realm_custom_profile_field(
zulip_realm, "Birthday", CustomProfileField.DATE
)
favorite_website = try_add_realm_custom_profile_field(
zulip_realm,
"Favorite website",
CustomProfileField.URL,
hint="Or your personal blog's URL",
)
mentor = try_add_realm_custom_profile_field(
zulip_realm, "Mentor", CustomProfileField.USER
)
github_profile = try_add_realm_default_custom_profile_field(zulip_realm, "github")
# Fill in values for Iago and Hamlet
hamlet = get_user_by_delivery_email("hamlet@zulip.com", zulip_realm)
do_update_user_custom_profile_data_if_changed(
iago,
[
{"id": phone_number.id, "value": "+1-234-567-8901"},
{"id": biography.id, "value": "Betrayer of Othello."},
{"id": favorite_food.id, "value": "Apples"},
{"id": favorite_editor.id, "value": "emacs"},
{"id": birthday.id, "value": "2000-01-01"},
{"id": favorite_website.id, "value": "https://zulip.readthedocs.io/en/latest/"},
{"id": mentor.id, "value": [hamlet.id]},
{"id": github_profile.id, "value": "zulip"},
],
)
do_update_user_custom_profile_data_if_changed(
hamlet,
[
{"id": phone_number.id, "value": "+0-11-23-456-7890"},
{
"id": biography.id,
"value": "I am:\n* The prince of Denmark\n* Nephew to the usurping Claudius",
},
{"id": favorite_food.id, "value": "Dark chocolate"},
{"id": favorite_editor.id, "value": "vim"},
{"id": birthday.id, "value": "1900-01-01"},
{"id": favorite_website.id, "value": "https://blog.zulig.org"},
{"id": mentor.id, "value": [iago.id]},
{"id": github_profile.id, "value": "zulipbot"},
],
)
else:
zulip_realm = get_realm("zulip")
recipient_streams = [
klass.type_id for klass in Recipient.objects.filter(type=Recipient.STREAM)
]
# Extract a list of all users
user_profiles: List[UserProfile] = list(UserProfile.objects.filter(is_bot=False))
# Create a test realm emoji.
IMAGE_FILE_PATH = static_path("images/test-images/checkbox.png")
with open(IMAGE_FILE_PATH, "rb") as fp:
check_add_realm_emoji(zulip_realm, "green_tick", iago, fp)
if not options["test_suite"]:
# Populate users with some bar data
for user in user_profiles:
status: int = UserPresence.ACTIVE
date = timezone_now()
client = get_client("website")
if user.full_name[0] <= "H":
client = get_client("ZulipAndroid")
UserPresence.objects.get_or_create(
user_profile=user,
realm_id=user.realm_id,
client=client,
timestamp=date,
status=status,
)
user_profiles_ids = [user_profile.id for user_profile in user_profiles]
# Create several initial huddles
for i in range(options["num_huddles"]):
get_huddle(random.sample(user_profiles_ids, random.randint(3, 4)))
# Create several initial pairs for personals
personals_pairs = [
random.sample(user_profiles_ids, 2) for i in range(options["num_personals"])
]
create_alert_words(zulip_realm.id)
# Generate a new set of test data.
create_test_data()
# prepopulate the URL preview/embed data for the links present
# in the config.generate_data.json data set. This makes it
# possible for populate_db to run happily without Internet
# access.
with open("zerver/tests/fixtures/docs_url_preview_data.json", "rb") as f:
urls_with_preview_data = orjson.loads(f.read())
for url in urls_with_preview_data:
cache_set(url, urls_with_preview_data[url], PREVIEW_CACHE_NAME)
if options["delete"]:
if options["test_suite"]:
# Create test users; the MIT ones are needed to test
# the Zephyr mirroring codepaths.
testsuite_mit_users = [
("Fred Sipb (MIT)", "sipbtest@mit.edu"),
("Athena Consulting Exchange User (MIT)", "starnine@mit.edu"),
("Esp Classroom (MIT)", "espuser@mit.edu"),
]
create_users(mit_realm, testsuite_mit_users, tos_version=settings.TOS_VERSION)
testsuite_lear_users = [
("King Lear", "king@lear.org"),
("Cordelia, Lear's daughter", "cordelia@zulip.com"),
]
create_users(lear_realm, testsuite_lear_users, tos_version=settings.TOS_VERSION)
if not options["test_suite"]:
# To keep the messages.json fixtures file for the test
# suite fast, don't add these users and subscriptions
# when running populate_db for the test suite
# to imitate emoji insertions in stream names
raw_emojis = ["😎", "😂", "🐱‍👤"]
zulip_stream_dict: Dict[str, Dict[str, Any]] = {
"devel": {"description": "For developing"},
# ビデオゲーム - VideoGames (japanese)
"ビデオゲーム": {
"description": "Share your favorite video games! {}".format(raw_emojis[2])
},
"announce": {
"description": "For announcements",
"stream_post_policy": Stream.STREAM_POST_POLICY_ADMINS,
},
"design": {"description": "For design"},
"support": {"description": "For support"},
"social": {"description": "For socializing"},
"test": {"description": "For testing `code`"},
"errors": {"description": "For errors"},
# 조리법 - Recipes (Korean) , Пельмени - Dumplings (Russian)
"조리법 "
+ raw_emojis[0]: {"description": "Everything cooking, from pasta to Пельмени"},
}
extra_stream_names = [
"802.11a",
"Ad Hoc Network",
"Augmented Reality",
"Cycling",
"DPI",
"FAQ",
"FiFo",
"commits",
"Control panel",
"desktop",
"компьютеры",
"Data security",
"desktop",
"काम",
"discussions",
"Cloud storage",
"GCI",
"Vaporware",
"Recent Trends",
"issues",
"live",
"Health",
"mobile",
"空間",
"provision",
"hidrógeno",
"HR",
"アニメ",
]
# Add stream names and stream descriptions
for i in range(options["extra_streams"]):
extra_stream_name = random.choice(extra_stream_names) + " " + str(i)
# to imitate emoji insertions in stream names
if random.random() <= 0.15:
extra_stream_name += random.choice(raw_emojis)
zulip_stream_dict[extra_stream_name] = {
"description": "Auto-generated extra stream.",
}
bulk_create_streams(zulip_realm, zulip_stream_dict)
# Now that we've created the notifications stream, configure it properly.
zulip_realm.notifications_stream = get_stream("announce", zulip_realm)
zulip_realm.save(update_fields=["notifications_stream"])
# Add a few default streams
for default_stream_name in ["design", "devel", "social", "support"]:
DefaultStream.objects.create(
realm=zulip_realm, stream=get_stream(default_stream_name, zulip_realm)
)
# Now subscribe everyone to these streams
subscribe_users_to_streams(zulip_realm, zulip_stream_dict)
create_user_groups()
if not options["test_suite"]:
# We populate the analytics database here for
# development purpose only
call_command("populate_analytics_db")
threads = options["threads"]
jobs: List[Tuple[int, List[List[int]], Dict[str, Any], Callable[[str], int], int]] = []
for i in range(threads):
count = options["num_messages"] // threads
if i < options["num_messages"] % threads:
count += 1
jobs.append(
(count, personals_pairs, options, self.stdout.write, random.randint(0, 10 ** 10))
)
for job in jobs:
generate_and_send_messages(job)
if options["delete"]:
if not options["test_suite"]:
# These bots are not needed by the test suite
# Also, we don't want interacting with each other
# in dev setup.
internal_zulip_users_nosubs = [
("Zulip Commit Bot", "commit-bot@zulip.com"),
("Zulip Trac Bot", "trac-bot@zulip.com"),
("Zulip Nagios Bot", "nagios-bot@zulip.com"),
]
create_users(
zulip_realm, internal_zulip_users_nosubs, bot_type=UserProfile.DEFAULT_BOT
)
mark_all_messages_as_read()
self.stdout.write("Successfully populated test database.\n")
def mark_all_messages_as_read() -> None:
"""
We want to keep these two flags intact after we
create messages:
has_alert_word
is_private
But we will mark all messages as read to save a step for users.
"""
# Mark all messages as read
UserMessage.objects.all().update(
flags=F("flags").bitor(UserMessage.flags.read),
)
recipient_hash: Dict[int, Recipient] = {}
def get_recipient_by_id(rid: int) -> Recipient:
if rid in recipient_hash:
return recipient_hash[rid]
return Recipient.objects.get(id=rid)
# Create some test messages, including:
# - multiple streams
# - multiple subjects per stream
# - multiple huddles
# - multiple personals converastions
# - multiple messages per subject
# - both single and multi-line content
def generate_and_send_messages(
data: Tuple[int, Sequence[Sequence[int]], Mapping[str, Any], Callable[[str], Any], int]
) -> int:
(tot_messages, personals_pairs, options, output, random_seed) = data
random.seed(random_seed)
with open(
os.path.join(get_or_create_dev_uuid_var_path("test-backend"), "test_messages.json"), "rb"
) as infile:
dialog = orjson.loads(infile.read())
random.shuffle(dialog)
texts = itertools.cycle(dialog)
# We need to filter out streams from the analytics realm as we don't want to generate
# messages to its streams - and they might also have no subscribers, which would break
# our message generation mechanism below.
stream_ids = Stream.objects.filter(realm=get_realm("zulip")).values_list("id", flat=True)
recipient_streams: List[int] = [
recipient.id
for recipient in Recipient.objects.filter(type=Recipient.STREAM, type_id__in=stream_ids)
]
recipient_huddles: List[int] = [h.id for h in Recipient.objects.filter(type=Recipient.HUDDLE)]
huddle_members: Dict[int, List[int]] = {}
for h in recipient_huddles:
huddle_members[h] = [s.user_profile.id for s in Subscription.objects.filter(recipient_id=h)]
# Generate different topics for each stream
possible_topics = {}
for stream_id in recipient_streams:
possible_topics[stream_id] = generate_topics(options["max_topics"])
message_batch_size = options["batch_size"]
num_messages = 0
random_max = 1000000
recipients: Dict[int, Tuple[int, int, Dict[str, Any]]] = {}
messages: List[Message] = []
while num_messages < tot_messages:
saved_data: Dict[str, Any] = {}
message = Message()
message.sending_client = get_client("populate_db")
message.content = next(texts)
randkey = random.randint(1, random_max)
if (
num_messages > 0
and random.randint(1, random_max) * 100.0 / random_max < options["stickyness"]
):
# Use an old recipient
message_type, recipient_id, saved_data = recipients[num_messages - 1]
if message_type == Recipient.PERSONAL:
personals_pair = saved_data["personals_pair"]
random.shuffle(personals_pair)
elif message_type == Recipient.STREAM:
message.subject = saved_data["subject"]
message.recipient = get_recipient_by_id(recipient_id)
elif message_type == Recipient.HUDDLE:
message.recipient = get_recipient_by_id(recipient_id)
elif randkey <= random_max * options["percent_huddles"] / 100.0:
message_type = Recipient.HUDDLE
message.recipient = get_recipient_by_id(random.choice(recipient_huddles))
elif (
randkey
<= random_max * (options["percent_huddles"] + options["percent_personals"]) / 100.0
):
message_type = Recipient.PERSONAL
personals_pair = random.choice(personals_pairs)
random.shuffle(personals_pair)
elif randkey <= random_max * 1.0:
message_type = Recipient.STREAM
message.recipient = get_recipient_by_id(random.choice(recipient_streams))
if message_type == Recipient.HUDDLE:
sender_id = random.choice(huddle_members[message.recipient.id])
message.sender = get_user_profile_by_id(sender_id)
elif message_type == Recipient.PERSONAL:
message.recipient = Recipient.objects.get(
type=Recipient.PERSONAL, type_id=personals_pair[0]
)
message.sender = get_user_profile_by_id(personals_pair[1])
saved_data["personals_pair"] = personals_pair
elif message_type == Recipient.STREAM:
# Pick a random subscriber to the stream
message.sender = random.choice(
Subscription.objects.filter(recipient=message.recipient)
).user_profile
message.subject = random.choice(possible_topics[message.recipient.id])
saved_data["subject"] = message.subject
message.date_sent = choose_date_sent(num_messages, tot_messages, options["threads"])
messages.append(message)
recipients[num_messages] = (message_type, message.recipient.id, saved_data)
num_messages += 1
if (num_messages % message_batch_size) == 0:
# Send the batch and empty the list:
send_messages(messages)
messages = []
if len(messages) > 0:
# If there are unsent messages after exiting the loop, send them:
send_messages(messages)
return tot_messages
def send_messages(messages: List[Message]) -> None:
# We disable USING_RABBITMQ here, so that deferred work is
# executed in do_send_message_messages, rather than being
# queued. This is important, because otherwise, if run-dev.py
# wasn't running when populate_db was run, a developer can end
# up with queued events that reference objects from a previous
# life of the database, which naturally throws exceptions.
settings.USING_RABBITMQ = False
message_dict_list = []
for message in messages:
message_dict = build_message_send_dict(message=message)
message_dict_list.append(message_dict)
do_send_messages(message_dict_list)
bulk_create_reactions(messages)
settings.USING_RABBITMQ = True
def get_message_to_users(message_ids: List[int]) -> Dict[int, List[int]]:
rows = UserMessage.objects.filter(
message_id__in=message_ids,
).values("message_id", "user_profile_id")
result: Dict[int, List[int]] = defaultdict(list)
for row in rows:
result[row["message_id"]].append(row["user_profile_id"])
return result
def bulk_create_reactions(all_messages: List[Message]) -> None:
reactions: List[Reaction] = []
num_messages = int(0.2 * len(all_messages))
messages = random.sample(all_messages, num_messages)
message_ids = [message.id for message in messages]
message_to_users = get_message_to_users(message_ids)
for message_id in message_ids:
msg_user_ids = message_to_users[message_id]
if msg_user_ids:
# Now let between 1 and 7 users react.
#
# Ideally, we'd make exactly 1 reaction more common than
# this algorithm generates.
max_num_users = min(7, len(msg_user_ids))
num_users = random.randrange(1, max_num_users + 1)
user_ids = random.sample(msg_user_ids, num_users)
for user_id in user_ids:
# each user does between 1 and 3 emojis
num_emojis = random.choice([1, 2, 3])
emojis = random.sample(DEFAULT_EMOJIS, num_emojis)
for emoji_name, emoji_code in emojis:
reaction = Reaction(
user_profile_id=user_id,
message_id=message_id,
emoji_name=emoji_name,
emoji_code=emoji_code,
reaction_type=Reaction.UNICODE_EMOJI,
)
reactions.append(reaction)
Reaction.objects.bulk_create(reactions)
def choose_date_sent(num_messages: int, tot_messages: int, threads: int) -> datetime:
# Spoofing time not supported with threading
if threads != 1:
return timezone_now()
# Distrubutes 80% of messages starting from 5 days ago, over a period
# of 3 days. Then, distributes remaining messages over past 24 hours.
amount_in_first_chunk = int(tot_messages * 0.8)
amount_in_second_chunk = tot_messages - amount_in_first_chunk
if num_messages < amount_in_first_chunk:
# Distribute starting from 5 days ago, over a period
# of 3 days:
spoofed_date = timezone_now() - timezone_timedelta(days=5)
interval_size = 3 * 24 * 60 * 60 / amount_in_first_chunk
lower_bound = interval_size * num_messages
upper_bound = interval_size * (num_messages + 1)
else:
# We're in the last 20% of messages, distribute them over the last 24 hours:
spoofed_date = timezone_now() - timezone_timedelta(days=1)
interval_size = 24 * 60 * 60 / amount_in_second_chunk
lower_bound = interval_size * (num_messages - amount_in_first_chunk)
upper_bound = interval_size * (num_messages - amount_in_first_chunk + 1)
offset_seconds = random.uniform(lower_bound, upper_bound)
spoofed_date += timezone_timedelta(seconds=offset_seconds)
return spoofed_date
def create_user_groups() -> None:
zulip = get_realm("zulip")
members = [
get_user_by_delivery_email("cordelia@zulip.com", zulip),
get_user_by_delivery_email("hamlet@zulip.com", zulip),
]
create_user_group("hamletcharacters", members, zulip, description="Characters of Hamlet")