performance: Add get_users_by_delivery_email().

The main purpose of this new function is to allow
us to validate emails in bulk, which we don't do
yet (still setting the stage for that).

This is still a speedup, though, since in our
caller we grab only three fields now.

And other than that, we're essentially doing
the same query for the single-email case, just
outside the loop.
This commit is contained in:
Steve Howell 2020-03-02 17:56:52 +00:00 committed by Tim Abbott
parent b35ffde5fb
commit 220c2a5ff3
2 changed files with 44 additions and 6 deletions

View File

@ -9,14 +9,13 @@ from zerver.lib.name_restrictions import is_disposable_domain
from zerver.models import ( from zerver.models import (
email_to_username, email_to_username,
email_to_domain, email_to_domain,
get_user_by_delivery_email, get_users_by_delivery_email,
is_cross_realm_bot_email, is_cross_realm_bot_email,
DisposableEmailError, DisposableEmailError,
DomainNotAllowedForRealmError, DomainNotAllowedForRealmError,
EmailContainsPlusError, EmailContainsPlusError,
Realm, Realm,
RealmDomain, RealmDomain,
UserProfile,
) )
def validate_disposable(email: str) -> None: def validate_disposable(email: str) -> None:
@ -126,8 +125,25 @@ def get_existing_user_errors(
already exist. There's a bit of fiddly logic related already exist. There's a bit of fiddly logic related
to cross-realm bots and mirror dummies too. to cross-realm bots and mirror dummies too.
''' '''
errors = {} # type: Dict[str, Tuple[str, Optional[str], bool]] errors = {} # type: Dict[str, Tuple[str, Optional[str], bool]]
users = get_users_by_delivery_email(emails, target_realm).only(
'email',
'is_active',
'is_mirror_dummy',
)
'''
A note on casing: We will preserve the casing used by
the user for email in most of this code. The only
exception is when we do existence checks against
the `user_dict` dictionary. (We don't allow two
users in the same realm to have the same effective
delivery email.)
'''
user_dict = {user.email.lower(): user for user in users}
def process_email(email: str) -> None: def process_email(email: str) -> None:
if is_cross_realm_bot_email(email): if is_cross_realm_bot_email(email):
msg = email_reserved_for_system_bots_error(email) msg = email_reserved_for_system_bots_error(email)
@ -136,9 +152,9 @@ def get_existing_user_errors(
errors[email] = (msg, code, deactivated) errors[email] = (msg, code, deactivated)
return return
try: existing_user_profile = user_dict.get(email.lower())
existing_user_profile = get_user_by_delivery_email(email, target_realm)
except UserProfile.DoesNotExist: if existing_user_profile is None:
# HAPPY PATH! Most people invite users that don't exist yet. # HAPPY PATH! Most people invite users that don't exist yet.
return return

View File

@ -3,7 +3,7 @@ from typing import Any, DefaultDict, Dict, List, Set, Tuple, TypeVar, \
from django.db import models from django.db import models
from django.db.models.query import QuerySet from django.db.models.query import QuerySet
from django.db.models import Manager, Sum, CASCADE from django.db.models import Manager, Q, Sum, CASCADE
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser, UserManager, \ from django.contrib.auth.models import AbstractBaseUser, UserManager, \
PermissionsMixin PermissionsMixin
@ -2144,6 +2144,28 @@ def get_user_by_delivery_email(email: str, realm: Realm) -> UserProfile:
return UserProfile.objects.select_related().get( return UserProfile.objects.select_related().get(
delivery_email__iexact=email.strip(), realm=realm) delivery_email__iexact=email.strip(), realm=realm)
def get_users_by_delivery_email(emails: Set[str], realm: Realm) -> QuerySet:
"""This is similar to get_users_by_delivery_email, and
it has the same security caveats. It gets multiple
users and returns a QuerySet, since most callers
will only need two or three fields.
If you are using this to get large UserProfile objects, you are
probably making a mistake, but if you must,
then use `select_related`.
"""
'''
Django doesn't support delivery_email__iexact__in, so
we simply OR all the filters that we'd do for the
one-email case.
'''
email_filter = Q()
for email in emails:
email_filter |= Q(delivery_email__iexact=email.strip())
return UserProfile.objects.filter(realm=realm).filter(email_filter)
@cache_with_key(user_profile_cache_key, timeout=3600*24*7) @cache_with_key(user_profile_cache_key, timeout=3600*24*7)
def get_user(email: str, realm: Realm) -> UserProfile: def get_user(email: str, realm: Realm) -> UserProfile:
"""Fetches the user by its visible-to-other users username (in the """Fetches the user by its visible-to-other users username (in the