From 220c2a5ff391e3de282c7ca6489282e727ffb558 Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Mon, 2 Mar 2020 17:56:52 +0000 Subject: [PATCH] performance: Add get_users_by_delivery_email(). The main purpose of this new function is to allow us to validate emails in bulk, which we don't do yet (still setting the stage for that). This is still a speedup, though, since in our caller we grab only three fields now. And other than that, we're essentially doing the same query for the single-email case, just outside the loop. --- zerver/lib/email_validation.py | 26 +++++++++++++++++++++----- zerver/models.py | 24 +++++++++++++++++++++++- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/zerver/lib/email_validation.py b/zerver/lib/email_validation.py index 74d067b0d0..356a3552dd 100644 --- a/zerver/lib/email_validation.py +++ b/zerver/lib/email_validation.py @@ -9,14 +9,13 @@ from zerver.lib.name_restrictions import is_disposable_domain from zerver.models import ( email_to_username, email_to_domain, - get_user_by_delivery_email, + get_users_by_delivery_email, is_cross_realm_bot_email, DisposableEmailError, DomainNotAllowedForRealmError, EmailContainsPlusError, Realm, RealmDomain, - UserProfile, ) def validate_disposable(email: str) -> None: @@ -126,8 +125,25 @@ def get_existing_user_errors( already exist. There's a bit of fiddly logic related to cross-realm bots and mirror dummies too. ''' + errors = {} # type: Dict[str, Tuple[str, Optional[str], bool]] + users = get_users_by_delivery_email(emails, target_realm).only( + 'email', + 'is_active', + 'is_mirror_dummy', + ) + + ''' + A note on casing: We will preserve the casing used by + the user for email in most of this code. The only + exception is when we do existence checks against + the `user_dict` dictionary. (We don't allow two + users in the same realm to have the same effective + delivery email.) + ''' + user_dict = {user.email.lower(): user for user in users} + def process_email(email: str) -> None: if is_cross_realm_bot_email(email): msg = email_reserved_for_system_bots_error(email) @@ -136,9 +152,9 @@ def get_existing_user_errors( errors[email] = (msg, code, deactivated) return - try: - existing_user_profile = get_user_by_delivery_email(email, target_realm) - except UserProfile.DoesNotExist: + existing_user_profile = user_dict.get(email.lower()) + + if existing_user_profile is None: # HAPPY PATH! Most people invite users that don't exist yet. return diff --git a/zerver/models.py b/zerver/models.py index 3537e98a5b..bfaf59a7e1 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -3,7 +3,7 @@ from typing import Any, DefaultDict, Dict, List, Set, Tuple, TypeVar, \ from django.db import models from django.db.models.query import QuerySet -from django.db.models import Manager, Sum, CASCADE +from django.db.models import Manager, Q, Sum, CASCADE from django.conf import settings from django.contrib.auth.models import AbstractBaseUser, UserManager, \ PermissionsMixin @@ -2144,6 +2144,28 @@ def get_user_by_delivery_email(email: str, realm: Realm) -> UserProfile: return UserProfile.objects.select_related().get( delivery_email__iexact=email.strip(), realm=realm) +def get_users_by_delivery_email(emails: Set[str], realm: Realm) -> QuerySet: + """This is similar to get_users_by_delivery_email, and + it has the same security caveats. It gets multiple + users and returns a QuerySet, since most callers + will only need two or three fields. + + If you are using this to get large UserProfile objects, you are + probably making a mistake, but if you must, + then use `select_related`. + """ + + ''' + Django doesn't support delivery_email__iexact__in, so + we simply OR all the filters that we'd do for the + one-email case. + ''' + email_filter = Q() + for email in emails: + email_filter |= Q(delivery_email__iexact=email.strip()) + + return UserProfile.objects.filter(realm=realm).filter(email_filter) + @cache_with_key(user_profile_cache_key, timeout=3600*24*7) def get_user(email: str, realm: Realm) -> UserProfile: """Fetches the user by its visible-to-other users username (in the