diff --git a/requirements/common.txt b/requirements/common.txt index af61d2daad..ea38bf26af 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -137,3 +137,5 @@ ujson==1.33 uritemplate==0.6 zope.interface==4.1.2 + +-r emailmirror.txt diff --git a/requirements/emailmirror.txt b/requirements/emailmirror.txt new file mode 100644 index 0000000000..0b1b6a972c --- /dev/null +++ b/requirements/emailmirror.txt @@ -0,0 +1,12 @@ +talon==1.2.10 +cchardet==1.0.0 +cssselect==0.9.2 +lxml==3.6.0 +regex==2016.6.19 + +# Currently, Scikit-Learn and Numpy cannot be installed with pip in one +# install pass. See https://github.com/scikit-learn/scikit-learn/issues/4164 +# for further details. +-r numpy.txt + +scikit-learn==0.16.1 diff --git a/requirements/numpy.txt b/requirements/numpy.txt new file mode 100644 index 0000000000..89b6297f3d --- /dev/null +++ b/requirements/numpy.txt @@ -0,0 +1,5 @@ +# Currently, Scikit-Learn and Numpy cannot be installed with pip in one +# install pass. See https://github.com/scikit-learn/scikit-learn/issues/4164 +# for further details. +numpy==1.11.0 +scipy==0.17.1 diff --git a/scripts/lib/setup_venv.py b/scripts/lib/setup_venv.py index 7eafaabf8a..fbb0547435 100644 --- a/scripts/lib/setup_venv.py +++ b/scripts/lib/setup_venv.py @@ -28,6 +28,8 @@ VENV_DEPENDENCIES = [ "python-dev", "python-pip", "python-virtualenv", + "libxml2-dev", # Used for installing talon + "libxslt1-dev", # Used for installing talon ] def setup_virtualenv(target_venv_path, requirements_file, virtualenv_args=None): @@ -67,5 +69,12 @@ def do_setup_virtualenv(venv_path, requirements_file, virtualenv_args): exec(open(activate_this).read(), {}, dict(__file__=activate_this)) # type: ignore # https://github.com/python/mypy/issues/1577 run(["pip", "install", "--upgrade", "pip", "wheel"]) + # Currently, Scikit-Learn and Numpy cannot be installed with pip in one + # install pass. See https://github.com/scikit-learn/scikit-learn/issues/4164 + # for further details. + if 'dev.txt' in requirements_file or 'prod.txt' in requirements_file: + numpy = os.path.join(ZULIP_PATH, 'requirements', 'numpy.txt') + run(["pip", "install", "--no-deps", "--requirement", numpy]) + run(["pip", "install", "--no-deps", "--requirement", requirements_file]) run(["sudo", "chmod", "-R", "a+rX", venv_path]) diff --git a/zerver/lib/email_mirror.py b/zerver/lib/email_mirror.py index 77b4802c45..aa1250c229 100644 --- a/zerver/lib/email_mirror.py +++ b/zerver/lib/email_mirror.py @@ -21,6 +21,10 @@ from zerver.models import Stream, Recipient, get_user_profile_by_email, \ Message, Realm, UserProfile from six import text_type, binary_type import six +import talon +from talon import quotations + +talon.init() logger = logging.getLogger(__name__) @@ -199,11 +203,12 @@ def extract_body(message): # that. plaintext_content = get_message_part_by_type(message, "text/plain") if plaintext_content: - return plaintext_content + return quotations.extract_from_plain(plaintext_content) # If we only have an HTML version, try to make that look nice. html_content = get_message_part_by_type(message, "text/html") if html_content: + html_content = quotations.extract_from_html(html_content) return convert_html_to_markdown(html_content) raise ZulipEmailForwardError("Unable to find plaintext or HTML message body") diff --git a/zerver/tests/test_email_mirror.py b/zerver/tests/test_email_mirror.py index cb0b7488ea..3808317342 100644 --- a/zerver/tests/test_email_mirror.py +++ b/zerver/tests/test_email_mirror.py @@ -208,3 +208,77 @@ class TestDigestEmailMessages(AuthedTestCase): self.assertEqual(mock_send_future_email.call_count, 1) self.assertEqual(mock_send_future_email.call_args[0][0][0]['email'], u'othello@zulip.com') + +class TestReplyExtraction(AuthedTestCase): + def test_reply_is_extracted_from_plain(self): + + # build dummy messages for stream + # test valid incoming stream message is processed properly + self.login("hamlet@zulip.com") + user_profile = get_user_profile_by_email("hamlet@zulip.com") + self.subscribe_to_stream(user_profile.email, "Denmark") + stream = get_stream("Denmark", user_profile.realm) + + stream_to_address = encode_email_address(stream) + text = """Reply + + -----Original Message----- + + Quote""" + + incoming_valid_message = MIMEText(text) + + incoming_valid_message['Subject'] = 'TestStreamEmailMessages Subject' + incoming_valid_message['From'] = "hamlet@zulip.com" + incoming_valid_message['To'] = stream_to_address + incoming_valid_message['Reply-to'] = "othello@zulip.com" + + process_message(incoming_valid_message) + + # Hamlet is subscribed to this stream so should see the email message from Othello. + message = most_recent_message(user_profile) + + self.assertEqual(message.content, "Reply") + + def test_reply_is_extracted_from_html(self): + + # build dummy messages for stream + # test valid incoming stream message is processed properly + self.login("hamlet@zulip.com") + user_profile = get_user_profile_by_email("hamlet@zulip.com") + self.subscribe_to_stream(user_profile.email, "Denmark") + stream = get_stream("Denmark", user_profile.realm) + + stream_to_address = encode_email_address(stream) + html = """ + + +

Reply

+
+ +
+ On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: +
+ +
+ Quote +
+ +
+ + + """ + + incoming_valid_message = MIMEText(html, 'html') + + incoming_valid_message['Subject'] = 'TestStreamEmailMessages Subject' + incoming_valid_message['From'] = "hamlet@zulip.com" + incoming_valid_message['To'] = stream_to_address + incoming_valid_message['Reply-to'] = "othello@zulip.com" + + process_message(incoming_valid_message) + + # Hamlet is subscribed to this stream so should see the email message from Othello. + message = most_recent_message(user_profile) + + self.assertEqual(message.content, 'Reply')