refactor: Use sets of stream_ids for email digests.

I now use sets for stream_ids in more of the digest
code.

As part of this I replaced exclude_subscription_modified_streams
with streams_recently_modified_for_user.

It's easier for the caller to just ask for ids
to delete from its callee than it is to pass
in a set/list to mutate.

The simpler boundary between the functions makes
the tests easier to write--you can see the
`filtered_streams` logic goes away in this diff.

I also make the tests a bit more thorough by using
combinations of Cordelia/Othello and Verona/Denmark
to try to find multiple possible flaws.

And I make the time intervals longer than 1s to
avoid false negatives from slow CI boxes.
This commit is contained in:
Steve Howell 2020-11-05 13:55:45 +00:00 committed by Tim Abbott
parent 6b48fb3c08
commit c5dc9d386f
2 changed files with 57 additions and 32 deletions

View File

@ -226,8 +226,7 @@ def bulk_get_digest_context(users: List[UserProfile], cutoff: float) -> Dict[int
user_ids = [user.id for user in users]
def get_stream_map(user_ids: List[int]) -> Dict[int, List[int]]:
# maps user_id -> [stream_id, stream_id, ...]
def get_stream_map(user_ids: List[int]) -> Dict[int, Set[int]]:
rows = Subscription.objects.filter(
user_profile_id__in=user_ids,
recipient__type=Recipient.STREAM,
@ -235,9 +234,10 @@ def bulk_get_digest_context(users: List[UserProfile], cutoff: float) -> Dict[int
is_muted=False,
).values('user_profile_id', 'recipient__type_id')
dct: Dict[int, List[int]] = defaultdict(list)
# maps user_id -> {stream_id, stream_id, ...}
dct: Dict[int, Set[int]] = defaultdict(set)
for row in rows:
dct[row['user_profile_id']].append(row['recipient__type_id'])
dct[row['user_profile_id']].add(row['recipient__type_id'])
return dct
@ -250,14 +250,12 @@ def bulk_get_digest_context(users: List[UserProfile], cutoff: float) -> Dict[int
unsubscribe_link = one_click_unsubscribe_link(user, "digest")
context.update(unsubscribe_link=unsubscribe_link)
home_view_streams = stream_map[user.id]
stream_ids = stream_map[user.id]
if not user.long_term_idle:
stream_ids = home_view_streams
else:
stream_ids = exclude_subscription_modified_streams(user, home_view_streams, cutoff_date)
if user.long_term_idle:
stream_ids -= streams_recently_modified_for_user(user, cutoff_date)
topic_activity = get_recent_topic_activity(stream_ids, cutoff_date)
topic_activity = get_recent_topic_activity(sorted(list(stream_ids)), cutoff_date)
hot_topics = get_hot_topics(topic_activity)
# Gather hot conversations.
@ -298,11 +296,7 @@ def bulk_handle_digest_email(user_ids: List[int], cutoff: float) -> None:
def handle_digest_email(user_id: int, cutoff: float) -> None:
bulk_handle_digest_email([user_id], cutoff)
def exclude_subscription_modified_streams(user_profile: UserProfile,
stream_ids: List[int],
cutoff_date: datetime.datetime) -> List[int]:
"""Exclude streams from given list where users' subscription was modified."""
def streams_recently_modified_for_user(user: UserProfile, cutoff_date: datetime.datetime) -> Set[int]:
events = [
RealmAuditLog.SUBSCRIPTION_CREATED,
RealmAuditLog.SUBSCRIPTION_ACTIVATED,
@ -311,9 +305,9 @@ def exclude_subscription_modified_streams(user_profile: UserProfile,
# Streams where the user's subscription was changed
modified_streams = RealmAuditLog.objects.filter(
realm=user_profile.realm,
modified_user=user_profile,
realm=user.realm,
modified_user=user,
event_time__gt=cutoff_date,
event_type__in=events).values_list('modified_stream_id', flat=True)
return list(set(stream_ids) - set(modified_streams))
return set(modified_streams)

View File

@ -11,9 +11,9 @@ from zerver.lib.actions import do_create_user
from zerver.lib.digest import (
bulk_handle_digest_email,
enqueue_emails,
exclude_subscription_modified_streams,
gather_new_streams,
handle_digest_email,
streams_recently_modified_for_user,
)
from zerver.lib.streams import create_stream_if_needed
from zerver.lib.test_classes import ZulipTestCase
@ -199,30 +199,61 @@ class TestDigestEmailMessages(ZulipTestCase):
self.assertIn('some content', teaser_messages[0]['content'][0]['plain'])
self.assertIn(teaser_messages[0]['sender'], expected_participants)
def test_exclude_subscription_modified_streams(self) -> None:
def test_streams_recently_modified_for_user(self) -> None:
othello = self.example_user('othello')
cordelia = self.example_user('cordelia')
for stream in ['Verona', 'Scotland', 'Denmark']:
self.subscribe(othello, stream)
# Delete all RealmAuditLogs to ignore any changes to subscriptions to
# streams done for the setup.
RealmAuditLog.objects.all().delete()
self.subscribe(cordelia, stream)
realm = othello.realm
stream_names = self.get_streams(othello)
stream_ids = {name: get_stream(name, realm).id for name in stream_names}
denmark = get_stream('Denmark', realm)
verona = get_stream('Verona', realm)
two_hours_ago = timezone_now() - datetime.timedelta(hours=2)
one_hour_ago = timezone_now() - datetime.timedelta(hours=1)
# Delete all RealmAuditLogs to start with a clean slate.
RealmAuditLog.objects.all().delete()
# Unsubscribe and subscribe Othello from a stream
self.unsubscribe(othello, 'Denmark')
self.subscribe(othello, 'Denmark')
self.assertEqual(
streams_recently_modified_for_user(othello, one_hour_ago),
{denmark.id}
)
# Backdate all our logs (so that Denmark will no longer
# appear like a recently modified stream for Othello).
RealmAuditLog.objects.all().update(event_time=two_hours_ago)
# Now Denmark no longer appears recent to Othello.
self.assertEqual(
streams_recently_modified_for_user(othello, one_hour_ago),
set()
)
# Unsubscribe and subscribe from a stream
self.unsubscribe(othello, 'Verona')
self.subscribe(othello, 'Verona')
one_sec_ago = timezone_now() - datetime.timedelta(seconds=1)
# Now, Verona, but not Denmark, appears recent.
self.assertEqual(
streams_recently_modified_for_user(othello, one_hour_ago),
{verona.id},
)
filtered_stream_ids = exclude_subscription_modified_streams(
othello, list(stream_ids.values()), one_sec_ago)
self.assertNotIn(stream_ids['Verona'], filtered_stream_ids)
self.assertIn(stream_ids['Scotland'], filtered_stream_ids)
self.assertIn(stream_ids['Denmark'], filtered_stream_ids)
# make sure we don't mix up Othello and Cordelia
self.unsubscribe(cordelia, 'Denmark')
self.subscribe(cordelia, 'Denmark')
self.assertEqual(
streams_recently_modified_for_user(cordelia, one_hour_ago),
{denmark.id}
)
@mock.patch('zerver.lib.digest.queue_digest_recipient')
@mock.patch('zerver.lib.digest.timezone_now')