From 08be671c371dbd68140c2944fae9f205aa27f3d6 Mon Sep 17 00:00:00 2001 From: Prakhar Pratyush Date: Tue, 19 Nov 2024 21:56:11 +0530 Subject: [PATCH] streams: Add 'ChannelEmailAddress' model. This commit removes the 'email_token' field from the 'Stream' model, introduces a new model 'ChannelEmailAddress', and backfills it. This is a prep work towards our plan to generate unique channel emails for different users, which can be used to check post permissions at message send time. --- zerver/actions/streams.py | 5 + zerver/lib/email_mirror.py | 18 ++- zerver/lib/email_mirror_helpers.py | 11 +- zerver/lib/export.py | 6 +- ...mail_token_backfill_channelemailaddress.py | 119 ++++++++++++++++++ zerver/models/__init__.py | 1 + zerver/models/streams.py | 33 +++-- zerver/tests/test_email_mirror.py | 52 ++++---- zerver/tests/test_subs.py | 4 +- 9 files changed, 207 insertions(+), 42 deletions(-) create mode 100644 zerver/migrations/0629_remove_stream_email_token_backfill_channelemailaddress.py diff --git a/zerver/actions/streams.py b/zerver/actions/streams.py index 665443c4be..db6d47268b 100644 --- a/zerver/actions/streams.py +++ b/zerver/actions/streams.py @@ -59,6 +59,7 @@ from zerver.lib.users import ( from zerver.models import ( ArchivedAttachment, Attachment, + ChannelEmailAddress, DefaultStream, DefaultStreamGroup, Message, @@ -90,6 +91,8 @@ def do_deactivate_stream(stream: Stream, *, acting_user: UserProfile | None) -> stream.deactivated = True stream.save(update_fields=["deactivated"]) + ChannelEmailAddress.objects.filter(channel=stream).update(deactivated=True) + assert stream.recipient_id is not None if was_web_public: assert was_public @@ -201,6 +204,8 @@ def do_unarchive_stream(stream: Stream, new_name: str, *, acting_user: UserProfi ] ) + ChannelEmailAddress.objects.filter(channel=stream).update(deactivated=False) + # Update caches cache_set(display_recipient_cache_key(stream.recipient_id), new_name) messages = Message.objects.filter( diff --git a/zerver/lib/email_mirror.py b/zerver/lib/email_mirror.py index 5f477fa06b..370517507d 100644 --- a/zerver/lib/email_mirror.py +++ b/zerver/lib/email_mirror.py @@ -31,7 +31,15 @@ from zerver.lib.send_email import FromAddress from zerver.lib.streams import access_stream_for_send_message from zerver.lib.string_validation import is_character_printable from zerver.lib.upload import upload_message_attachment -from zerver.models import Message, MissedMessageEmailAddress, Realm, Recipient, Stream, UserProfile +from zerver.models import ( + ChannelEmailAddress, + Message, + MissedMessageEmailAddress, + Realm, + Recipient, + Stream, + UserProfile, +) from zerver.models.clients import get_client from zerver.models.streams import get_stream_by_id_in_realm from zerver.models.users import get_system_bot, get_user_profile_by_id @@ -359,11 +367,13 @@ def decode_stream_email_address(email: str) -> tuple[Stream, dict[str, bool]]: token, options = decode_email_address(email) try: - stream = Stream.objects.get(email_token=token) - except Stream.DoesNotExist: + channel_email_address = ChannelEmailAddress.objects.select_related("channel").get( + email_token=token + ) + except ChannelEmailAddress.DoesNotExist: raise ZulipEmailForwardError("Bad stream token from email recipient " + email) - return stream, options + return channel_email_address.channel, options def find_emailgateway_recipient(message: EmailMessage) -> str: diff --git a/zerver/lib/email_mirror_helpers.py b/zerver/lib/email_mirror_helpers.py index c1b267cf32..510b8bd659 100644 --- a/zerver/lib/email_mirror_helpers.py +++ b/zerver/lib/email_mirror_helpers.py @@ -5,7 +5,8 @@ from typing import Any from django.conf import settings from django.utils.text import slugify -from zerver.models import Stream +from zerver.models import ChannelEmailAddress, Stream +from zerver.models.users import get_system_bot def default_option_handler_factory(address_option: str) -> Callable[[dict[str, Any]], None]: @@ -48,7 +49,13 @@ def get_email_gateway_message_string_from_address(address: str) -> str: def encode_email_address(stream: Stream, show_sender: bool = False) -> str: - return encode_email_address_helper(stream.name, stream.email_token, show_sender) + channel_email_address, ignored = ChannelEmailAddress.objects.get_or_create( + realm=stream.realm, + channel=stream, + creator=stream.creator, + sender=get_system_bot(settings.EMAIL_GATEWAY_BOT, stream.realm_id), + ) + return encode_email_address_helper(stream.name, channel_email_address.email_token, show_sender) def encode_email_address_helper(name: str, email_token: str, show_sender: bool = False) -> str: diff --git a/zerver/lib/export.py b/zerver/lib/export.py index 107ce0f5a4..64d78f8418 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -142,6 +142,7 @@ ALL_ZULIP_TABLES = { "zerver_archivetransaction", "zerver_botconfigdata", "zerver_botstoragedata", + "zerver_channelemailaddress", "zerver_client", "zerver_customprofilefield", "zerver_customprofilefieldvalue", @@ -271,6 +272,9 @@ NON_EXPORTED_TABLES = { # The importer cannot trust ImageAttachment objects anyway and needs to check # and process images for thumbnailing on its own. "zerver_imageattachment", + # ChannelEmailAddress entries are low value to export since + # channel email addresses include the server's hostname. + "zerver_channelemailaddress", # For any tables listed below here, it's a bug that they are not present in the export. } @@ -905,7 +909,6 @@ def get_realm_config() -> Config: stream_config = Config( table="zerver_stream", model=Stream, - exclude=["email_token"], normal_parent=realm_config, include_rows="realm_id__in", ) @@ -2255,7 +2258,6 @@ def get_single_user_config() -> Config: virtual_parent=recipient_config, id_source=("zerver_recipient", "type_id"), source_filter=lambda r: r["type"] == Recipient.STREAM, - exclude=["email_token"], ) Config( diff --git a/zerver/migrations/0629_remove_stream_email_token_backfill_channelemailaddress.py b/zerver/migrations/0629_remove_stream_email_token_backfill_channelemailaddress.py new file mode 100644 index 0000000000..0ae15c3809 --- /dev/null +++ b/zerver/migrations/0629_remove_stream_email_token_backfill_channelemailaddress.py @@ -0,0 +1,119 @@ +# Generated by Django 5.0.9 on 2024-11-21 09:30 + +import django.db.models.deletion +import django.utils.timezone +from django.conf import settings +from django.db import connection, migrations, models +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from psycopg2.sql import SQL + +from zerver.models.streams import generate_email_token_for_stream + + +def backfill_channelemailaddress(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + UserProfile = apps.get_model("zerver", "UserProfile") + + with connection.cursor() as cursor: + cursor.execute(SQL("SELECT MAX(id) FROM zerver_stream;")) + (max_id,) = cursor.fetchone() + + if max_id is None: + return + + BATCH_SIZE = 10000 + max_id += BATCH_SIZE / 2 + lower_id_bound = 0 + mail_gateway_bot_id = UserProfile.objects.get(email__iexact=settings.EMAIL_GATEWAY_BOT).id + while lower_id_bound < max_id: + upper_id_bound = min(lower_id_bound + BATCH_SIZE, max_id) + with connection.cursor() as cursor: + query = SQL(""" + INSERT INTO zerver_channelemailaddress (realm_id, channel_id, creator_id, sender_id, email_token, date_created, deactivated) + SELECT realm_id, id, creator_id, %(mail_gateway_bot_id)s, email_token, date_created, deactivated + FROM zerver_stream + WHERE id > %(lower_id_bound)s AND id <= %(upper_id_bound)s; + """) + cursor.execute( + query, + { + "mail_gateway_bot_id": mail_gateway_bot_id, + "lower_id_bound": lower_id_bound, + "upper_id_bound": upper_id_bound, + }, + ) + + print(f"Processed {upper_id_bound} / {max_id}") + lower_id_bound += BATCH_SIZE + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("zerver", "0628_remove_realm_invite_to_realm_policy"), + ] + + operations = [ + migrations.CreateModel( + name="ChannelEmailAddress", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "email_token", + models.CharField( + db_index=True, + default=generate_email_token_for_stream, + max_length=32, + unique=True, + ), + ), + ("date_created", models.DateTimeField(default=django.utils.timezone.now)), + ("deactivated", models.BooleanField(default=False)), + ( + "channel", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="zerver.stream" + ), + ), + ( + "creator", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="+", + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "realm", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="zerver.realm" + ), + ), + ( + "sender", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="+", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "unique_together": {("channel", "creator", "sender")}, + }, + ), + migrations.RunPython( + backfill_channelemailaddress, reverse_code=migrations.RunPython.noop, elidable=True + ), + migrations.RemoveField( + model_name="stream", + name="email_token", + ), + ] diff --git a/zerver/models/__init__.py b/zerver/models/__init__.py index 0e791cbb31..73e70fd025 100644 --- a/zerver/models/__init__.py +++ b/zerver/models/__init__.py @@ -60,6 +60,7 @@ from zerver.models.scheduled_jobs import ScheduledMessage as ScheduledMessage from zerver.models.scheduled_jobs import ( ScheduledMessageNotificationEmail as ScheduledMessageNotificationEmail, ) +from zerver.models.streams import ChannelEmailAddress as ChannelEmailAddress from zerver.models.streams import DefaultStream as DefaultStream from zerver.models.streams import DefaultStreamGroup as DefaultStreamGroup from zerver.models.streams import Stream as Stream diff --git a/zerver/models/streams.py b/zerver/models/streams.py index c8c4d920b3..60b86be75b 100644 --- a/zerver/models/streams.py +++ b/zerver/models/streams.py @@ -111,15 +111,6 @@ class Stream(models.Model): # and the reason for denormalizing field is performance. is_in_zephyr_realm = models.BooleanField(default=False) - # Used by the e-mail forwarder. The e-mail RFC specifies a maximum - # e-mail length of 254, and our max stream length is 30, so we - # have plenty of room for the token. - email_token = models.CharField( - max_length=32, - default=generate_email_token_for_stream, - unique=True, - ) - # For old messages being automatically deleted. # Value NULL means "use retention policy of the realm". # Value -1 means "disable retention policy for this stream unconditionally". @@ -175,7 +166,6 @@ class Stream(models.Model): # Stream fields included whenever a Stream object is provided to # Zulip clients via the API. A few details worth noting: # * "id" is represented as "stream_id" in most API interfaces. - # * "email_token" is not realm-public and thus is not included here. # * is_in_zephyr_realm is a backend-only optimization. # * "deactivated" streams are filtered from the API entirely. # * "realm" and "recipient" are not exposed to clients via the API. @@ -400,3 +390,26 @@ class DefaultStreamGroup(models.Model): def get_default_stream_groups(realm: Realm) -> QuerySet[DefaultStreamGroup]: return DefaultStreamGroup.objects.filter(realm=realm) + + +class ChannelEmailAddress(models.Model): + realm = models.ForeignKey(Realm, on_delete=CASCADE) + channel = models.ForeignKey(Stream, on_delete=CASCADE) + creator = models.ForeignKey(UserProfile, null=True, on_delete=CASCADE, related_name="+") + sender = models.ForeignKey(UserProfile, on_delete=CASCADE, related_name="+") + + # Used by the e-mail forwarder. The e-mail RFC specifies a maximum + # e-mail length of 254, and our max stream length is 30, so we + # have plenty of room for the token. + email_token = models.CharField( + max_length=32, + default=generate_email_token_for_stream, + unique=True, + db_index=True, + ) + + date_created = models.DateTimeField(default=timezone_now) + deactivated = models.BooleanField(default=False) + + class Meta: + unique_together = ("channel", "creator", "sender") diff --git a/zerver/tests/test_email_mirror.py b/zerver/tests/test_email_mirror.py index b34560cc2b..3fce6ac7bc 100644 --- a/zerver/tests/test_email_mirror.py +++ b/zerver/tests/test_email_mirror.py @@ -38,7 +38,7 @@ from zerver.lib.send_email import FromAddress from zerver.lib.streams import ensure_stream from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import mock_queue_publish, most_recent_message, most_recent_usermessage -from zerver.models import Attachment, Recipient, Stream, UserProfile +from zerver.models import Attachment, ChannelEmailAddress, Recipient, Stream, UserProfile from zerver.models.messages import Message from zerver.models.realms import get_realm from zerver.models.streams import get_stream @@ -74,35 +74,32 @@ class TestEncodeDecode(ZulipTestCase): stream_name = "dev. help" stream = ensure_stream(realm, stream_name, acting_user=None) email_address = encode_email_address(stream) - self.assertEqual(email_address, f"dev-help.{stream.email_token}@testserver") + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token + self.assertEqual(email_address, f"dev-help.{email_token}@testserver") # The default form of the email address (with an option - "include-footer"): - token, options = decode_email_address( - f"dev-help.{stream.email_token}.include-footer@testserver", - ) + token, options = decode_email_address(f"dev-help.{email_token}.include-footer@testserver") self._assert_options(options, include_footer=True) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) # Using + instead of . as the separator is also supported for backwards compatibility, # since that was the original form of addresses that we used: - token, options = decode_email_address( - f"dev-help+{stream.email_token}+include-footer@testserver", - ) + token, options = decode_email_address(f"dev-help+{email_token}+include-footer@testserver") self._assert_options(options, include_footer=True) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) token, options = decode_email_address(email_address) self._assert_options(options) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) # We also handle mixing + and . but it shouldn't be recommended to users. email_address_all_options = ( "dev-help.{}+include-footer.show-sender+include-quotes@testserver" ) - email_address_all_options = email_address_all_options.format(stream.email_token) + email_address_all_options = email_address_all_options.format(email_token) token, options = decode_email_address(email_address_all_options) self._assert_options(options, show_sender=True, include_footer=True, include_quotes=True) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) email_address = email_address.replace("@testserver", "@zulip.org") email_address_all_options = email_address_all_options.replace("@testserver", "@zulip.org") @@ -115,13 +112,13 @@ class TestEncodeDecode(ZulipTestCase): with self.settings(EMAIL_GATEWAY_EXTRA_PATTERN_HACK="@zulip.org"): token, options = decode_email_address(email_address) self._assert_options(options) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) token, options = decode_email_address(email_address_all_options) self._assert_options( options, show_sender=True, include_footer=True, include_quotes=True ) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) with self.assertRaises(ZulipEmailForwardError): decode_email_address("bogus") @@ -133,6 +130,7 @@ class TestEncodeDecode(ZulipTestCase): stream_name = "Тестовы some ascii letters" stream = ensure_stream(realm, stream_name, acting_user=None) email_address = encode_email_address(stream) + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token msg_string = get_email_gateway_message_string_from_address(email_address) parts = msg_string.split("+") @@ -143,7 +141,7 @@ class TestEncodeDecode(ZulipTestCase): # Correctly decode the resulting address that doesn't have the stream name: token, show_sender = decode_email_address(email_address) self.assertFalse(show_sender) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) asciiable_stream_name = "ąężć" stream = ensure_stream(realm, asciiable_stream_name, acting_user=None) @@ -153,24 +151,28 @@ class TestEncodeDecode(ZulipTestCase): def test_decode_ignores_stream_name(self) -> None: stream = get_stream("Denmark", get_realm("zulip")) stream_to_address = encode_email_address(stream) + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token stream_to_address = stream_to_address.replace("denmark", "Some_name") # get the email_token: token = decode_email_address(stream_to_address)[0] - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) def test_encode_with_show_sender(self) -> None: stream = get_stream("Denmark", get_realm("zulip")) stream_to_address = encode_email_address(stream, show_sender=True) + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token token, options = decode_email_address(stream_to_address) self._assert_options(options, show_sender=True) - self.assertEqual(token, stream.email_token) + self.assertEqual(token, email_token) def test_decode_prefer_text_options(self) -> None: stream = get_stream("Denmark", get_realm("zulip")) - address_prefer_text = f"Denmark.{stream.email_token}.prefer-text@testserver" - address_prefer_html = f"Denmark.{stream.email_token}.prefer-html@testserver" + encode_email_address(stream) + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token + address_prefer_text = f"Denmark.{email_token}.prefer-text@testserver" + address_prefer_html = f"Denmark.{email_token}.prefer-html@testserver" token, options = decode_email_address(address_prefer_text) self._assert_options(options, prefer_text=True) @@ -844,8 +846,10 @@ class TestEmailMirrorMessagesWithAttachments(ZulipTestCase): self.login_user(user_profile) self.subscribe(user_profile, "Denmark") stream = get_stream("Denmark", user_profile.realm) - stream_address = f"Denmark.{stream.email_token}@testserver" - stream_address_prefer_html = f"Denmark.{stream.email_token}.prefer-html@testserver" + encode_email_address(stream) + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token + stream_address = f"Denmark.{email_token}@testserver" + stream_address_prefer_html = f"Denmark.{email_token}.prefer-html@testserver" text = "Test message" html = "Test html message" @@ -877,7 +881,9 @@ class TestEmailMirrorMessagesWithAttachments(ZulipTestCase): self.login_user(user_profile) self.subscribe(user_profile, "Denmark") stream = get_stream("Denmark", user_profile.realm) - stream_address_prefer_html = f"Denmark.{stream.email_token}.prefer-html@testserver" + encode_email_address(stream) + email_token = ChannelEmailAddress.objects.get(channel=stream).email_token + stream_address_prefer_html = f"Denmark.{email_token}.prefer-html@testserver" text = "Test message" # This should be correctly identified as empty html body: diff --git a/zerver/tests/test_subs.py b/zerver/tests/test_subs.py index dea65654bd..2a58a3c3e0 100644 --- a/zerver/tests/test_subs.py +++ b/zerver/tests/test_subs.py @@ -106,6 +106,7 @@ from zerver.lib.types import ( ) from zerver.models import ( Attachment, + ChannelEmailAddress, DefaultStream, DefaultStreamGroup, Message, @@ -5881,8 +5882,9 @@ class GetStreamsTest(ZulipTestCase): denmark_stream = get_stream("Denmark", realm) result = self.client_get(f"/json/streams/{denmark_stream.id}/email_address") json = self.assert_json_success(result) + email_token = ChannelEmailAddress.objects.get(channel=denmark_stream).email_token denmark_email = encode_email_address_helper( - denmark_stream.name, denmark_stream.email_token, show_sender=True + denmark_stream.name, email_token, show_sender=True ) self.assertEqual(json["email"], denmark_email)