2013-12-16 23:32:08 +01:00
|
|
|
import logging
|
|
|
|
import re
|
2020-09-05 04:02:13 +02:00
|
|
|
import secrets
|
2022-07-27 23:33:49 +02:00
|
|
|
from email.headerregistry import Address, AddressHeader
|
2020-06-05 23:26:35 +02:00
|
|
|
from email.message import EmailMessage
|
2022-07-27 23:33:49 +02:00
|
|
|
from typing import Dict, List, Match, Optional, Tuple
|
2013-12-16 23:32:08 +01:00
|
|
|
|
|
|
|
from django.conf import settings
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2022-04-14 23:50:10 +02:00
|
|
|
from zerver.actions.message_send import (
|
2021-01-06 04:06:18 +01:00
|
|
|
check_send_message,
|
2020-06-11 00:54:34 +02:00
|
|
|
internal_send_huddle_message,
|
|
|
|
internal_send_private_message,
|
|
|
|
internal_send_stream_message,
|
|
|
|
)
|
|
|
|
from zerver.lib.email_mirror_helpers import (
|
|
|
|
ZulipEmailForwardError,
|
2021-08-31 23:46:34 +02:00
|
|
|
ZulipEmailForwardUserError,
|
2020-06-11 00:54:34 +02:00
|
|
|
decode_email_address,
|
|
|
|
get_email_gateway_message_string_from_address,
|
|
|
|
)
|
2019-03-15 18:51:39 +01:00
|
|
|
from zerver.lib.email_notifications import convert_html_to_markdown
|
2022-11-17 09:30:48 +01:00
|
|
|
from zerver.lib.exceptions import JsonableError, RateLimitedError
|
2020-12-19 02:36:50 +01:00
|
|
|
from zerver.lib.message import normalize_body, truncate_topic
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.queue import queue_json_publish
|
|
|
|
from zerver.lib.rate_limiter import RateLimitedObject
|
|
|
|
from zerver.lib.send_email import FromAddress
|
2022-08-20 18:11:55 +02:00
|
|
|
from zerver.lib.string_validation import is_character_printable
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.upload import upload_message_file
|
|
|
|
from zerver.models import (
|
|
|
|
Message,
|
|
|
|
MissedMessageEmailAddress,
|
|
|
|
Realm,
|
|
|
|
Recipient,
|
|
|
|
Stream,
|
|
|
|
UserProfile,
|
2021-01-06 04:06:18 +01:00
|
|
|
get_client,
|
2020-06-11 00:54:34 +02:00
|
|
|
get_display_recipient,
|
2021-03-07 20:04:54 +01:00
|
|
|
get_realm,
|
2020-06-11 00:54:34 +02:00
|
|
|
get_stream_by_id_in_realm,
|
|
|
|
get_system_bot,
|
|
|
|
get_user,
|
|
|
|
)
|
2019-09-21 02:00:00 +02:00
|
|
|
from zproject.backends import is_user_active
|
|
|
|
|
2013-12-16 23:32:08 +01:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-03-22 16:33:57 +01:00
|
|
|
def redact_email_address(error_message: str) -> str:
|
|
|
|
if not settings.EMAIL_GATEWAY_EXTRA_PATTERN_HACK:
|
2022-07-27 23:33:49 +02:00
|
|
|
domain = Address(addr_spec=settings.EMAIL_GATEWAY_PATTERN).domain
|
2019-03-22 16:33:57 +01:00
|
|
|
else:
|
|
|
|
# EMAIL_GATEWAY_EXTRA_PATTERN_HACK is of the form '@example.com'
|
|
|
|
domain = settings.EMAIL_GATEWAY_EXTRA_PATTERN_HACK[1:]
|
|
|
|
|
2022-07-27 23:33:49 +02:00
|
|
|
def redact(address_match: Match[str]) -> str:
|
|
|
|
email_address = address_match[0]
|
2019-03-22 16:33:57 +01:00
|
|
|
# Annotate basic info about the address before scrubbing:
|
|
|
|
if is_missed_message_address(email_address):
|
2022-07-27 23:33:49 +02:00
|
|
|
annotation = " <Missed message address>"
|
2019-03-22 16:33:57 +01:00
|
|
|
else:
|
|
|
|
try:
|
2020-01-10 10:25:56 +01:00
|
|
|
target_stream_id = decode_stream_email_address(email_address)[0].id
|
2022-07-27 23:33:49 +02:00
|
|
|
annotation = f" <Address to stream id: {target_stream_id}>"
|
2019-03-22 16:33:57 +01:00
|
|
|
except ZulipEmailForwardError:
|
2022-07-27 23:33:49 +02:00
|
|
|
annotation = " <Invalid address>"
|
2019-03-22 16:33:57 +01:00
|
|
|
|
|
|
|
# Scrub the address from the message, to the form XXXXX@example.com:
|
2022-07-27 23:33:49 +02:00
|
|
|
return "X" * len(address_match[1]) + address_match[2] + annotation
|
2019-03-22 16:33:57 +01:00
|
|
|
|
2022-07-27 23:33:49 +02:00
|
|
|
return re.sub(rf"\b(\S*?)(@{re.escape(domain)})", redact, error_message)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def report_to_zulip(error_message: str) -> None:
|
2016-08-23 06:24:20 +02:00
|
|
|
if settings.ERROR_BOT is None:
|
|
|
|
return
|
2021-03-07 20:04:54 +01:00
|
|
|
error_bot_realm = get_realm(settings.STAFF_SUBDOMAIN)
|
|
|
|
error_bot = get_system_bot(settings.ERROR_BOT, error_bot_realm.id)
|
|
|
|
error_stream = Stream.objects.get(name="errors", realm=error_bot_realm)
|
2020-02-10 16:49:19 +01:00
|
|
|
send_zulip(
|
|
|
|
error_bot,
|
|
|
|
error_stream,
|
|
|
|
"email mirror error",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
f"""~~~\n{error_message}\n~~~""",
|
2020-02-10 16:49:19 +01:00
|
|
|
)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-05 09:30:26 +02:00
|
|
|
def log_and_report(email_message: EmailMessage, error_message: str, to: Optional[str]) -> None:
|
2019-03-22 16:33:57 +01:00
|
|
|
recipient = to or "No recipient found"
|
2021-02-12 08:19:30 +01:00
|
|
|
error_message = "Sender: {}\nTo: {}\n{}".format(
|
|
|
|
email_message.get("From"), recipient, error_message
|
|
|
|
)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2019-03-22 16:33:57 +01:00
|
|
|
error_message = redact_email_address(error_message)
|
|
|
|
logger.error(error_message)
|
|
|
|
report_to_zulip(error_message)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2014-07-25 10:40:40 +02:00
|
|
|
# Temporary missed message addresses
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-12-26 13:46:55 +01:00
|
|
|
def generate_missed_message_token() -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
return "mm" + secrets.token_hex(16)
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def is_missed_message_address(address: str) -> bool:
|
2019-03-21 11:28:14 +01:00
|
|
|
try:
|
|
|
|
msg_string = get_email_gateway_message_string_from_address(address)
|
|
|
|
except ZulipEmailForwardError:
|
|
|
|
return False
|
|
|
|
|
2016-09-22 18:11:09 +02:00
|
|
|
return is_mm_32_format(msg_string)
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def is_mm_32_format(msg_string: Optional[str]) -> bool:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2016-09-22 18:11:09 +02:00
|
|
|
Missed message strings are formatted with a little "mm" prefix
|
|
|
|
followed by a randomly generated 32-character string.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
return msg_string is not None and msg_string.startswith("mm") and len(msg_string) == 34
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def get_missed_message_token_from_address(address: str) -> str:
|
2015-10-14 17:11:50 +02:00
|
|
|
msg_string = get_email_gateway_message_string_from_address(address)
|
|
|
|
|
2016-09-22 18:41:10 +02:00
|
|
|
if not is_mm_32_format(msg_string):
|
2021-02-12 08:20:45 +01:00
|
|
|
raise ZulipEmailForwardError("Could not parse missed message address")
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2019-12-26 13:46:55 +01:00
|
|
|
return msg_string
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-01-10 10:36:35 +01:00
|
|
|
def get_usable_missed_message_address(address: str) -> MissedMessageEmailAddress:
|
2019-12-26 13:46:55 +01:00
|
|
|
token = get_missed_message_token_from_address(address)
|
|
|
|
try:
|
2022-02-23 05:32:17 +01:00
|
|
|
mm_address = MissedMessageEmailAddress.objects.select_related().get(email_token=token)
|
2019-12-26 13:46:55 +01:00
|
|
|
except MissedMessageEmailAddress.DoesNotExist:
|
2022-02-23 05:32:17 +01:00
|
|
|
raise ZulipEmailForwardError("Zulip notification reply address is invalid.")
|
2020-01-10 10:36:35 +01:00
|
|
|
|
|
|
|
return mm_address
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def create_missed_message_address(user_profile: UserProfile, message: Message) -> str:
|
2021-05-13 20:13:27 +02:00
|
|
|
# If the email gateway isn't configured, we specify a reply
|
|
|
|
# address, since there's no useful way for the user to reply into
|
|
|
|
# Zulip.
|
2021-02-12 08:20:45 +01:00
|
|
|
if settings.EMAIL_GATEWAY_PATTERN == "":
|
2017-06-26 19:43:32 +02:00
|
|
|
return FromAddress.NOREPLY
|
2016-07-31 16:49:31 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
mm_address = MissedMessageEmailAddress.objects.create(
|
|
|
|
message=message, user_profile=user_profile, email_token=generate_missed_message_token()
|
|
|
|
)
|
2019-12-26 13:46:55 +01:00
|
|
|
return str(mm_address)
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def construct_zulip_body(
|
|
|
|
message: EmailMessage,
|
|
|
|
realm: Realm,
|
2023-01-18 18:38:21 +01:00
|
|
|
*,
|
|
|
|
sender: UserProfile,
|
2021-02-12 08:19:30 +01:00
|
|
|
show_sender: bool = False,
|
|
|
|
include_quotes: bool = False,
|
|
|
|
include_footer: bool = False,
|
|
|
|
prefer_text: bool = True,
|
|
|
|
) -> str:
|
2020-01-15 16:28:46 +01:00
|
|
|
body = extract_body(message, include_quotes, prefer_text)
|
2017-10-04 00:05:46 +02:00
|
|
|
# Remove null characters, since Zulip will reject
|
|
|
|
body = body.replace("\x00", "")
|
2019-06-06 12:14:12 +02:00
|
|
|
if not include_footer:
|
2019-05-26 18:07:21 +02:00
|
|
|
body = filter_footer(body)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if not body.endswith("\n"):
|
|
|
|
body += "\n"
|
2023-01-18 18:38:21 +01:00
|
|
|
body += extract_and_upload_attachments(message, realm, sender)
|
2020-12-19 02:36:50 +01:00
|
|
|
if not body.rstrip():
|
2021-02-12 08:20:45 +01:00
|
|
|
body = "(No email body)"
|
2019-02-08 14:13:33 +01:00
|
|
|
|
|
|
|
if show_sender:
|
2023-01-18 18:38:21 +01:00
|
|
|
from_address = str(message.get("From", ""))
|
|
|
|
body = f"From: {from_address}\n{body}"
|
2019-02-08 14:13:33 +01:00
|
|
|
|
2017-10-04 00:03:00 +02:00
|
|
|
return body
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2013-12-16 23:32:08 +01:00
|
|
|
## Sending the Zulip ##
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-02-10 16:49:19 +01:00
|
|
|
def send_zulip(sender: UserProfile, stream: Stream, topic: str, content: str) -> None:
|
|
|
|
internal_send_stream_message(
|
2017-01-24 07:06:13 +01:00
|
|
|
sender,
|
2020-02-10 16:49:19 +01:00
|
|
|
stream,
|
2018-11-25 07:40:16 +01:00
|
|
|
truncate_topic(topic),
|
2020-12-19 02:36:50 +01:00
|
|
|
normalize_body(content),
|
2021-02-12 08:19:30 +01:00
|
|
|
email_gateway=True,
|
|
|
|
)
|
|
|
|
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-01-06 04:06:18 +01:00
|
|
|
def send_mm_reply_to_stream(
|
|
|
|
user_profile: UserProfile, stream: Stream, topic: str, body: str
|
|
|
|
) -> None:
|
|
|
|
try:
|
|
|
|
check_send_message(
|
|
|
|
sender=user_profile,
|
|
|
|
client=get_client("Internal"),
|
|
|
|
message_type_name="stream",
|
|
|
|
message_to=[stream.id],
|
|
|
|
topic_name=topic,
|
|
|
|
message_content=body,
|
|
|
|
)
|
|
|
|
except JsonableError as error:
|
2021-04-20 23:27:25 +02:00
|
|
|
error_message = "Error sending message to stream {stream} via message notification email reply:\n{error}".format(
|
2021-01-06 04:06:18 +01:00
|
|
|
stream=stream.name, error=error.msg
|
|
|
|
)
|
|
|
|
internal_send_private_message(
|
2021-03-07 20:35:35 +01:00
|
|
|
get_system_bot(settings.NOTIFICATION_BOT, user_profile.realm_id),
|
|
|
|
user_profile,
|
|
|
|
error_message,
|
2021-01-06 04:06:18 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-06-05 09:30:26 +02:00
|
|
|
def get_message_part_by_type(message: EmailMessage, content_type: str) -> Optional[str]:
|
2013-12-16 23:32:08 +01:00
|
|
|
charsets = message.get_charsets()
|
|
|
|
|
|
|
|
for idx, part in enumerate(message.walk()):
|
|
|
|
if part.get_content_type() == content_type:
|
|
|
|
content = part.get_payload(decode=True)
|
2017-11-09 09:03:33 +01:00
|
|
|
assert isinstance(content, bytes)
|
2013-12-16 23:32:08 +01:00
|
|
|
if charsets[idx]:
|
2022-05-13 08:22:52 +02:00
|
|
|
try:
|
|
|
|
return content.decode(charsets[idx], errors="ignore")
|
|
|
|
except LookupError:
|
|
|
|
# The RFCs do not define how to handle unknown
|
|
|
|
# charsets, but treating as US-ASCII seems
|
|
|
|
# reasonable; fall through to below.
|
|
|
|
pass
|
|
|
|
|
2019-05-09 16:01:34 +02:00
|
|
|
# If no charset has been specified in the header, assume us-ascii,
|
|
|
|
# by RFC6657: https://tools.ietf.org/html/rfc6657
|
2022-05-13 08:22:52 +02:00
|
|
|
return content.decode("us-ascii", errors="ignore")
|
2019-05-09 16:01:34 +02:00
|
|
|
|
2017-03-05 00:18:18 +01:00
|
|
|
return None
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def extract_body(
|
|
|
|
message: EmailMessage, include_quotes: bool = False, prefer_text: bool = True
|
|
|
|
) -> str:
|
2020-01-16 13:07:04 +01:00
|
|
|
plaintext_content = extract_plaintext_body(message, include_quotes)
|
|
|
|
html_content = extract_html_body(message, include_quotes)
|
|
|
|
|
|
|
|
if plaintext_content is None and html_content is None:
|
2020-08-01 18:36:08 +02:00
|
|
|
logger.warning("Content types: %s", [part.get_content_type() for part in message.walk()])
|
2020-01-16 13:07:04 +01:00
|
|
|
raise ZulipEmailForwardUserError("Unable to find plaintext or HTML message body")
|
|
|
|
if not plaintext_content and not html_content:
|
|
|
|
raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.")
|
|
|
|
|
|
|
|
if prefer_text:
|
|
|
|
if plaintext_content:
|
|
|
|
return plaintext_content
|
|
|
|
else:
|
|
|
|
assert html_content # Needed for mypy. Ensured by the validating block above.
|
|
|
|
return html_content
|
|
|
|
else:
|
|
|
|
if html_content:
|
|
|
|
return html_content
|
|
|
|
else:
|
|
|
|
assert plaintext_content # Needed for mypy. Ensured by the validating block above.
|
|
|
|
return plaintext_content
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-01-16 13:07:04 +01:00
|
|
|
talon_initialized = False
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
|
|
|
|
def extract_plaintext_body(message: EmailMessage, include_quotes: bool = False) -> Optional[str]:
|
2021-03-12 07:06:16 +01:00
|
|
|
import talon_core
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-08-08 22:11:29 +02:00
|
|
|
global talon_initialized
|
|
|
|
if not talon_initialized:
|
2021-03-12 07:06:16 +01:00
|
|
|
talon_core.init()
|
2018-08-08 22:11:29 +02:00
|
|
|
talon_initialized = True
|
|
|
|
|
2013-12-16 23:32:08 +01:00
|
|
|
plaintext_content = get_message_part_by_type(message, "text/plain")
|
2020-01-16 13:07:04 +01:00
|
|
|
if plaintext_content is not None:
|
2019-07-14 03:51:53 +02:00
|
|
|
if include_quotes:
|
2019-03-09 22:35:45 +01:00
|
|
|
return plaintext_content
|
2019-05-26 18:28:39 +02:00
|
|
|
else:
|
2021-03-12 07:06:16 +01:00
|
|
|
return talon_core.quotations.extract_from_plain(plaintext_content)
|
2020-01-16 13:07:04 +01:00
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def extract_html_body(message: EmailMessage, include_quotes: bool = False) -> Optional[str]:
|
2021-03-12 07:06:16 +01:00
|
|
|
import talon_core
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-01-16 13:07:04 +01:00
|
|
|
global talon_initialized
|
|
|
|
if not talon_initialized: # nocoverage
|
2021-03-12 07:06:16 +01:00
|
|
|
talon_core.init()
|
2020-01-16 13:07:04 +01:00
|
|
|
talon_initialized = True
|
2013-12-16 23:32:08 +01:00
|
|
|
|
|
|
|
html_content = get_message_part_by_type(message, "text/html")
|
2020-01-16 13:07:04 +01:00
|
|
|
if html_content is not None:
|
2019-07-14 03:51:53 +02:00
|
|
|
if include_quotes:
|
2019-03-09 22:35:45 +01:00
|
|
|
return convert_html_to_markdown(html_content)
|
2019-05-26 18:28:39 +02:00
|
|
|
else:
|
2021-03-12 07:06:16 +01:00
|
|
|
return convert_html_to_markdown(talon_core.quotations.extract_from_html(html_content))
|
2020-01-16 13:07:04 +01:00
|
|
|
else:
|
|
|
|
return None
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
def filter_footer(text: str) -> str:
|
2013-12-16 23:32:08 +01:00
|
|
|
# Try to filter out obvious footers.
|
2019-03-09 22:35:45 +01:00
|
|
|
possible_footers = [line for line in text.split("\n") if line.strip() == "--"]
|
2013-12-16 23:32:08 +01:00
|
|
|
if len(possible_footers) != 1:
|
|
|
|
# Be conservative and don't try to scrub content if there
|
|
|
|
# isn't a trivial footer structure.
|
|
|
|
return text
|
|
|
|
|
2021-03-12 04:27:19 +01:00
|
|
|
return re.split(r"^\s*--\s*$", text, 1, flags=re.MULTILINE)[0].strip()
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-01-18 18:38:21 +01:00
|
|
|
def extract_and_upload_attachments(message: EmailMessage, realm: Realm, sender: UserProfile) -> str:
|
2020-01-14 16:33:48 +01:00
|
|
|
attachment_links = []
|
|
|
|
for part in message.walk():
|
2013-12-16 23:32:08 +01:00
|
|
|
content_type = part.get_content_type()
|
2020-06-05 23:26:35 +02:00
|
|
|
filename = part.get_filename()
|
2013-12-16 23:32:08 +01:00
|
|
|
if filename:
|
2016-07-04 17:13:24 +02:00
|
|
|
attachment = part.get_payload(decode=True)
|
2017-11-09 09:03:33 +01:00
|
|
|
if isinstance(attachment, bytes):
|
2021-02-12 08:19:30 +01:00
|
|
|
s3_url = upload_message_file(
|
|
|
|
filename,
|
|
|
|
len(attachment),
|
|
|
|
content_type,
|
|
|
|
attachment,
|
2023-01-18 18:38:21 +01:00
|
|
|
sender,
|
2021-02-12 08:19:30 +01:00
|
|
|
target_realm=realm,
|
|
|
|
)
|
2020-06-10 06:41:04 +02:00
|
|
|
formatted_link = f"[{filename}]({s3_url})"
|
2016-07-04 17:13:24 +02:00
|
|
|
attachment_links.append(formatted_link)
|
|
|
|
else:
|
2021-02-12 08:19:30 +01:00
|
|
|
logger.warning(
|
|
|
|
"Payload is not bytes (invalid attachment %s in message from %s).",
|
|
|
|
filename,
|
|
|
|
message.get("From"),
|
|
|
|
)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
return "\n".join(attachment_links)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-01-10 10:25:56 +01:00
|
|
|
def decode_stream_email_address(email: str) -> Tuple[Stream, Dict[str, bool]]:
|
2019-05-26 16:25:23 +02:00
|
|
|
token, options = decode_email_address(email)
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2019-03-17 10:36:16 +01:00
|
|
|
try:
|
|
|
|
stream = Stream.objects.get(email_token=token)
|
|
|
|
except Stream.DoesNotExist:
|
2013-12-16 23:32:08 +01:00
|
|
|
raise ZulipEmailForwardError("Bad stream token from email recipient " + email)
|
|
|
|
|
2019-05-26 16:25:23 +02:00
|
|
|
return stream, options
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-05 09:30:26 +02:00
|
|
|
def find_emailgateway_recipient(message: EmailMessage) -> str:
|
2013-12-16 23:32:08 +01:00
|
|
|
# We can't use Delivered-To; if there is a X-Gm-Original-To
|
|
|
|
# it is more accurate, so try to find the most-accurate
|
|
|
|
# recipient list in descending priority order
|
2021-02-12 08:19:30 +01:00
|
|
|
recipient_headers = [
|
|
|
|
"X-Gm-Original-To",
|
|
|
|
"Delivered-To",
|
|
|
|
"Envelope-To",
|
|
|
|
"Resent-To",
|
|
|
|
"Resent-CC",
|
|
|
|
"To",
|
|
|
|
"CC",
|
|
|
|
]
|
2013-12-16 23:32:08 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split("%s")]
|
2013-12-16 23:32:08 +01:00
|
|
|
match_email_re = re.compile(".*?".join(pattern_parts))
|
2019-01-03 15:53:27 +01:00
|
|
|
|
2020-06-05 23:26:35 +02:00
|
|
|
for header_name in recipient_headers:
|
|
|
|
for header_value in message.get_all(header_name, []):
|
2020-07-22 21:11:25 +02:00
|
|
|
if isinstance(header_value, AddressHeader):
|
|
|
|
emails = [addr.addr_spec for addr in header_value.addresses]
|
|
|
|
else:
|
|
|
|
emails = [str(header_value)]
|
|
|
|
|
|
|
|
for email in emails:
|
|
|
|
if match_email_re.match(email):
|
|
|
|
return email
|
2013-12-16 23:32:08 +01:00
|
|
|
|
|
|
|
raise ZulipEmailForwardError("Missing recipient in mirror email")
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-01-04 10:46:35 +01:00
|
|
|
def strip_from_subject(subject: str) -> str:
|
|
|
|
# strips RE and FWD from the subject
|
|
|
|
# from: https://stackoverflow.com/questions/9153629/regex-code-for-removing-fwd-re-etc-from-email-subject
|
|
|
|
reg = r"([\[\(] *)?\b(RE|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$"
|
2021-02-12 08:19:30 +01:00
|
|
|
stripped = re.sub(reg, "", subject, flags=re.IGNORECASE | re.MULTILINE)
|
2019-01-04 10:46:35 +01:00
|
|
|
return stripped.strip()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-03-09 22:35:45 +01:00
|
|
|
def is_forwarded(subject: str) -> bool:
|
|
|
|
# regex taken from strip_from_subject, we use it to detect various forms
|
|
|
|
# of FWD at the beginning of the subject.
|
|
|
|
reg = r"([\[\(] *)?\b(FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$"
|
|
|
|
return bool(re.match(reg, subject, flags=re.IGNORECASE))
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-05 09:30:26 +02:00
|
|
|
def process_stream_message(to: str, message: EmailMessage) -> None:
|
2020-06-05 23:26:35 +02:00
|
|
|
subject_header = message.get("Subject", "")
|
2019-03-09 16:52:54 +01:00
|
|
|
subject = strip_from_subject(subject_header) or "(no topic)"
|
|
|
|
|
2022-08-20 18:11:55 +02:00
|
|
|
# We don't want to reject email messages with disallowed characters in the Subject,
|
|
|
|
# so we just remove them to make it a valid Zulip topic name.
|
|
|
|
subject = "".join([char for char in subject if is_character_printable(char)]) or "(no topic)"
|
|
|
|
|
2020-01-10 10:25:56 +01:00
|
|
|
stream, options = decode_stream_email_address(to)
|
2019-05-26 18:28:39 +02:00
|
|
|
# Don't remove quotations if message is forwarded, unless otherwise specified:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "include_quotes" not in options:
|
|
|
|
options["include_quotes"] = is_forwarded(subject_header)
|
2019-05-26 18:28:39 +02:00
|
|
|
|
2023-01-18 18:38:21 +01:00
|
|
|
user_profile = get_system_bot(settings.EMAIL_GATEWAY_BOT, stream.realm_id)
|
|
|
|
body = construct_zulip_body(message, stream.realm, sender=user_profile, **options)
|
|
|
|
send_zulip(user_profile, stream, subject, body)
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info(
|
|
|
|
"Successfully processed email to %s (%s)",
|
2021-02-12 08:19:30 +01:00
|
|
|
stream.name,
|
|
|
|
stream.realm.string_id,
|
2020-05-02 08:44:14 +02:00
|
|
|
)
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-05 09:30:26 +02:00
|
|
|
def process_missed_message(to: str, message: EmailMessage) -> None:
|
2020-01-10 10:36:35 +01:00
|
|
|
mm_address = get_usable_missed_message_address(to)
|
2019-12-26 14:08:41 +01:00
|
|
|
mm_address.increment_times_used()
|
|
|
|
|
|
|
|
user_profile = mm_address.user_profile
|
|
|
|
topic = mm_address.message.topic_name()
|
|
|
|
|
|
|
|
if mm_address.message.recipient.type == Recipient.PERSONAL:
|
|
|
|
# We need to reply to the sender so look up their personal recipient_id
|
|
|
|
recipient = mm_address.message.sender.recipient
|
|
|
|
else:
|
|
|
|
recipient = mm_address.message.recipient
|
|
|
|
|
|
|
|
if not is_user_active(user_profile):
|
2021-04-20 23:27:25 +02:00
|
|
|
logger.warning("Sending user is not active. Ignoring this message notification email.")
|
2019-12-26 14:08:41 +01:00
|
|
|
return
|
|
|
|
|
2023-01-18 18:38:21 +01:00
|
|
|
body = construct_zulip_body(message, user_profile.realm, sender=user_profile)
|
2019-12-26 14:08:41 +01:00
|
|
|
|
2021-07-24 18:16:48 +02:00
|
|
|
assert recipient is not None
|
2019-12-26 14:08:41 +01:00
|
|
|
if recipient.type == Recipient.STREAM:
|
|
|
|
stream = get_stream_by_id_in_realm(recipient.type_id, user_profile.realm)
|
2021-01-06 04:06:18 +01:00
|
|
|
send_mm_reply_to_stream(user_profile, stream, topic, body)
|
2019-12-26 14:08:41 +01:00
|
|
|
recipient_str = stream.name
|
|
|
|
elif recipient.type == Recipient.PERSONAL:
|
|
|
|
display_recipient = get_display_recipient(recipient)
|
|
|
|
assert not isinstance(display_recipient, str)
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_str = display_recipient[0]["email"]
|
2019-12-26 14:08:41 +01:00
|
|
|
recipient_user = get_user(recipient_str, user_profile.realm)
|
2021-02-18 19:58:04 +01:00
|
|
|
internal_send_private_message(user_profile, recipient_user, body)
|
2019-12-26 14:08:41 +01:00
|
|
|
elif recipient.type == Recipient.HUDDLE:
|
|
|
|
display_recipient = get_display_recipient(recipient)
|
|
|
|
assert not isinstance(display_recipient, str)
|
2021-02-12 08:20:45 +01:00
|
|
|
emails = [user_dict["email"] for user_dict in display_recipient]
|
|
|
|
recipient_str = ", ".join(emails)
|
2021-02-12 08:19:30 +01:00
|
|
|
internal_send_huddle_message(user_profile.realm, user_profile, emails, body)
|
2019-12-26 14:08:41 +01:00
|
|
|
else:
|
|
|
|
raise AssertionError("Invalid recipient type!")
|
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info(
|
|
|
|
"Successfully processed email from user %s to %s",
|
2021-02-12 08:19:30 +01:00
|
|
|
user_profile.id,
|
|
|
|
recipient_str,
|
2020-05-02 08:44:14 +02:00
|
|
|
)
|
2014-07-25 10:40:40 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_message(message: EmailMessage, rcpt_to: Optional[str] = None) -> None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
to: Optional[str] = None
|
2013-12-16 23:32:08 +01:00
|
|
|
|
|
|
|
try:
|
2013-12-17 22:37:29 +01:00
|
|
|
if rcpt_to is not None:
|
|
|
|
to = rcpt_to
|
|
|
|
else:
|
|
|
|
to = find_emailgateway_recipient(message)
|
2014-07-25 10:40:40 +02:00
|
|
|
|
|
|
|
if is_missed_message_address(to):
|
2019-12-26 13:46:55 +01:00
|
|
|
process_missed_message(to, message)
|
2014-07-25 10:40:40 +02:00
|
|
|
else:
|
2019-03-22 11:22:14 +01:00
|
|
|
process_stream_message(to, message)
|
2020-06-12 01:35:37 +02:00
|
|
|
except ZulipEmailForwardUserError as e:
|
|
|
|
# TODO: notify sender of error, retry if appropriate.
|
2021-08-31 23:47:03 +02:00
|
|
|
logger.info(e.args[0])
|
2015-11-01 17:08:33 +01:00
|
|
|
except ZulipEmailForwardError as e:
|
2020-06-12 01:35:37 +02:00
|
|
|
log_and_report(message, e.args[0], to)
|
2017-04-18 17:28:55 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-01-10 10:21:47 +01:00
|
|
|
def validate_to_address(rcpt_to: str) -> None:
|
2017-04-18 17:28:55 +02:00
|
|
|
if is_missed_message_address(rcpt_to):
|
2020-01-10 10:36:35 +01:00
|
|
|
get_usable_missed_message_address(rcpt_to)
|
2017-04-18 17:28:55 +02:00
|
|
|
else:
|
2020-01-10 10:25:56 +01:00
|
|
|
decode_stream_email_address(rcpt_to)
|
2020-01-10 10:21:47 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-05 23:35:52 +02:00
|
|
|
def mirror_email_message(rcpt_to: str, msg_base64: str) -> Dict[str, str]:
|
2020-01-10 10:21:47 +01:00
|
|
|
try:
|
|
|
|
validate_to_address(rcpt_to)
|
|
|
|
except ZulipEmailForwardError as e:
|
|
|
|
return {
|
|
|
|
"status": "error",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
"msg": f"5.1.1 Bad destination mailbox address: {e}",
|
2020-01-10 10:21:47 +01:00
|
|
|
}
|
|
|
|
|
2017-04-18 17:28:55 +02:00
|
|
|
queue_json_publish(
|
|
|
|
"email_mirror",
|
|
|
|
{
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
"rcpt_to": rcpt_to,
|
2020-06-05 23:35:52 +02:00
|
|
|
"msg_base64": msg_base64,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
},
|
2017-04-18 17:28:55 +02:00
|
|
|
)
|
|
|
|
return {"status": "success"}
|
2019-03-16 11:39:09 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-03-16 11:39:09 +01:00
|
|
|
# Email mirror rate limiter code:
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-03-16 11:39:09 +01:00
|
|
|
class RateLimitedRealmMirror(RateLimitedObject):
|
|
|
|
def __init__(self, realm: Realm) -> None:
|
|
|
|
self.realm = realm
|
2020-03-05 13:38:20 +01:00
|
|
|
super().__init__()
|
2019-03-16 11:39:09 +01:00
|
|
|
|
2020-03-06 10:49:04 +01:00
|
|
|
def key(self) -> str:
|
2020-06-09 00:25:09 +02:00
|
|
|
return f"{type(self).__name__}:{self.realm.string_id}"
|
2019-03-16 11:39:09 +01:00
|
|
|
|
|
|
|
def rules(self) -> List[Tuple[int, int]]:
|
|
|
|
return settings.RATE_LIMITING_MIRROR_REALM_RULES
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-03-16 11:39:09 +01:00
|
|
|
def rate_limit_mirror_by_realm(recipient_realm: Realm) -> None:
|
2021-11-05 02:17:02 +01:00
|
|
|
ratelimited, secs_to_freedom = RateLimitedRealmMirror(recipient_realm).rate_limit()
|
2019-03-16 11:39:09 +01:00
|
|
|
|
|
|
|
if ratelimited:
|
2022-11-17 09:30:48 +01:00
|
|
|
raise RateLimitedError(secs_to_freedom)
|