From a0efd76f4e8b7d58352ad0379b025290ebbcbcb2 Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Fri, 22 Mar 2019 16:33:57 +0100 Subject: [PATCH] email_mirror: Rewrite log_and_report and cover it with tests. log_and_report and its helper functions were mostly old code no longer well adapted to how email mirror works currently, as well as having no test coverage. We rewrite this part of the email to report errors in a similar manner, and add tests for it. We're able to get rid of the clunky and now useless debug_info dictionary in process message, as log_and_report only needs the recipient email in its third argument. --- zerver/lib/email_mirror.py | 57 +++++++++----- zerver/tests/test_email_mirror.py | 126 +++++++++++++++++++++++++++++- 2 files changed, 163 insertions(+), 20 deletions(-) diff --git a/zerver/lib/email_mirror.py b/zerver/lib/email_mirror.py index d4013b9292..304935bdbb 100644 --- a/zerver/lib/email_mirror.py +++ b/zerver/lib/email_mirror.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Tuple, List +from typing import Dict, Optional, Tuple, List import logging import re @@ -28,12 +28,34 @@ from zerver.models import Stream, Recipient, \ logger = logging.getLogger(__name__) -def redact_stream(error_message: str) -> str: - domain = settings.EMAIL_GATEWAY_PATTERN.rsplit('@')[-1] - stream_match = re.search('\\b(.*?)@' + domain, error_message) - if stream_match: - stream_name = stream_match.groups()[0] - return error_message.replace(stream_name, "X" * len(stream_name)) +def redact_email_address(error_message: str) -> str: + if not settings.EMAIL_GATEWAY_EXTRA_PATTERN_HACK: + domain = settings.EMAIL_GATEWAY_PATTERN.rsplit('@')[-1] + else: + # EMAIL_GATEWAY_EXTRA_PATTERN_HACK is of the form '@example.com' + domain = settings.EMAIL_GATEWAY_EXTRA_PATTERN_HACK[1:] + + address_match = re.search('\\b(\\S*?)@' + domain, error_message) + if address_match: + email_address = address_match.group(0) + # Annotate basic info about the address before scrubbing: + if is_missed_message_address(email_address): + redacted_message = error_message.replace(email_address, + "{} ".format(email_address)) + else: + try: + target_stream_id = extract_and_validate(email_address)[0].id + annotated_address = "{}
".format(email_address, target_stream_id) + redacted_message = error_message.replace(email_address, annotated_address) + except ZulipEmailForwardError: + redacted_message = error_message.replace(email_address, + "{} ".format(email_address)) + + # Scrub the address from the message, to the form XXXXX@example.com: + string_to_scrub = address_match.groups()[0] + redacted_message = redacted_message.replace(string_to_scrub, "X" * len(string_to_scrub)) + return redacted_message + return error_message def report_to_zulip(error_message: str) -> None: @@ -44,16 +66,14 @@ def report_to_zulip(error_message: str) -> None: send_zulip(settings.ERROR_BOT, error_stream, "email mirror error", """~~~\n%s\n~~~""" % (error_message,)) -def log_and_report(email_message: message.Message, error_message: str, debug_info: Dict[str, Any]) -> None: - scrubbed_error = u"Sender: %s\n%s" % (email_message.get("From"), - redact_stream(error_message)) +def log_and_report(email_message: message.Message, error_message: str, to: Optional[str]) -> None: + recipient = to or "No recipient found" + error_message = "Sender: {}\nTo: {}\n{}".format(email_message.get("From"), + recipient, error_message) - if "to" in debug_info: - scrubbed_error = "Stream: %s\n%s" % (redact_stream(debug_info["to"]), - scrubbed_error) - - logger.error(scrubbed_error) - report_to_zulip(scrubbed_error) + error_message = redact_email_address(error_message) + logger.error(error_message) + report_to_zulip(error_message) # Temporary missed message addresses @@ -349,14 +369,13 @@ def process_missed_message(to: str, message: message.Message, pre_checked: bool) send_to_missed_message_address(to, message) def process_message(message: message.Message, rcpt_to: Optional[str]=None, pre_checked: bool=False) -> None: - debug_info = {} + to = None # type: Optional[str] try: if rcpt_to is not None: to = rcpt_to else: to = find_emailgateway_recipient(message) - debug_info["to"] = to if is_missed_message_address(to): process_missed_message(to, message, pre_checked) @@ -367,7 +386,7 @@ def process_message(message: message.Message, rcpt_to: Optional[str]=None, pre_c # TODO: notify sender of error, retry if appropriate. logging.warning(str(e)) else: - log_and_report(message, str(e), debug_info) + log_and_report(message, str(e), to) def mirror_email_message(data: Dict[str, str]) -> Dict[str, str]: rcpt_to = data['recipient'] diff --git a/zerver/tests/test_email_mirror.py b/zerver/tests/test_email_mirror.py index 5c1fd70aeb..b3806cdda4 100644 --- a/zerver/tests/test_email_mirror.py +++ b/zerver/tests/test_email_mirror.py @@ -30,6 +30,8 @@ from zerver.lib.email_mirror import ( strip_from_subject, is_forwarded, filter_footer, + log_and_report, + redact_email_address, ZulipEmailForwardError, ) @@ -919,4 +921,126 @@ class TestEmailMirrorProcessMessageNoValidRecipient(ZulipTestCase): with mock.patch("zerver.lib.email_mirror.log_and_report") as mock_log_and_report: process_message(incoming_valid_message) mock_log_and_report.assert_called_with(incoming_valid_message, - "Missing recipient in mirror email", {}) + "Missing recipient in mirror email", None) + +class TestEmailMirrorLogAndReport(ZulipTestCase): + def test_log_and_report(self) -> None: + user_profile = self.example_user('hamlet') + self.login(user_profile.email) + self.subscribe(user_profile, "errors") + stream = get_stream("Denmark", user_profile.realm) + stream_to_address = encode_email_address(stream) + + address_parts = stream_to_address.split('@') + scrubbed_address = 'X'*len(address_parts[0]) + '@' + address_parts[1] + + incoming_valid_message = MIMEText('Test Body') + incoming_valid_message['Subject'] = "Test Subject" + incoming_valid_message['From'] = self.example_email('hamlet') + incoming_valid_message['To'] = stream_to_address + + log_and_report(incoming_valid_message, "test error message", stream_to_address) + message = most_recent_message(user_profile) + + self.assertEqual("email mirror error", message.topic_name()) + + msg_content = message.content.strip('~').strip() + expected_content = "Sender: {}\nTo: {}
\ntest error message" + expected_content = expected_content.format(self.example_email('hamlet'), scrubbed_address, + stream.id) + self.assertEqual(msg_content, expected_content) + + log_and_report(incoming_valid_message, "test error message", None) + message = most_recent_message(user_profile) + self.assertEqual("email mirror error", message.topic_name()) + msg_content = message.content.strip('~').strip() + expected_content = "Sender: {}\nTo: No recipient found\ntest error message" + expected_content = expected_content.format(self.example_email('hamlet')) + self.assertEqual(msg_content, expected_content) + + @mock.patch('zerver.lib.email_mirror.logger.error') + def test_log_and_report_no_errorbot(self, mock_error: mock.MagicMock) -> None: + with self.settings(ERROR_BOT=None): + incoming_valid_message = MIMEText('Test Body') + incoming_valid_message['Subject'] = "Test Subject" + incoming_valid_message['From'] = self.example_email('hamlet') + log_and_report(incoming_valid_message, "test error message", None) + + expected_content = "Sender: {}\nTo: No recipient found\ntest error message" + expected_content = expected_content.format(self.example_email('hamlet')) + mock_error.assert_called_with(expected_content) + + def test_redact_email_address(self) -> None: + user_profile = self.example_user('hamlet') + self.login(user_profile.email) + self.subscribe(user_profile, "errors") + stream = get_stream("Denmark", user_profile.realm) + + # Test for a stream address: + stream_to_address = encode_email_address(stream) + stream_address_parts = stream_to_address.split('@') + scrubbed_stream_address = 'X'*len(stream_address_parts[0]) + '@' + stream_address_parts[1] + + error_message = "test message {}" + error_message = error_message.format(stream_to_address) + expected_message = "test message {}
" + expected_message = expected_message.format(scrubbed_stream_address, stream.id) + + redacted_message = redact_email_address(error_message) + self.assertEqual(redacted_message, expected_message) + + # Test for an invalid email address: + invalid_address = "invalid@testserver" + error_message = "test message {}" + error_message = error_message.format(invalid_address) + expected_message = "test message {} " + expected_message = expected_message.format('XXXXXXX@testserver') + + redacted_message = redact_email_address(error_message) + self.assertEqual(redacted_message, expected_message) + + # Test for a missed message address: + result = self.client_post( + "/json/messages", + { + "type": "private", + "content": "test_redact_email_message", + "client": "test suite", + "to": ujson.dumps([self.example_email('cordelia'), self.example_email('iago')]) + }) + self.assert_json_success(result) + + cordelia_profile = self.example_user('cordelia') + user_message = most_recent_usermessage(cordelia_profile) + mm_address = create_missed_message_address(user_profile, user_message.message) + + error_message = "test message {}" + error_message = error_message.format(mm_address) + expected_message = "test message {} " + expected_message = expected_message.format('X'*34 + '@testserver') + + redacted_message = redact_email_address(error_message) + self.assertEqual(redacted_message, expected_message) + + # Test if redacting correctly scrubs multiple occurrences of the address: + error_message = "test message first occurrence: {} second occurrence: {}" + error_message = error_message.format(stream_to_address, stream_to_address) + expected_message = "test message first occurrence: {}
" + expected_message += " second occurrence: {}
" + expected_message = expected_message.format(scrubbed_stream_address, stream.id, + scrubbed_stream_address, stream.id) + + redacted_message = redact_email_address(error_message) + self.assertEqual(redacted_message, expected_message) + + # Test with EMAIL_GATEWAY_EXTRA_PATTERN_HACK: + with self.settings(EMAIL_GATEWAY_EXTRA_PATTERN_HACK='@zulip.org'): + stream_to_address = stream_to_address.replace('@testserver', '@zulip.org') + scrubbed_stream_address = scrubbed_stream_address.replace('@testserver', '@zulip.org') + error_message = "test message {}" + error_message = error_message.format(stream_to_address) + expected_message = "test message {}
" + expected_message = expected_message.format(scrubbed_stream_address, stream.id) + + redacted_message = redact_email_address(error_message) + self.assertEqual(redacted_message, expected_message)