Extract reply from email.

This commit is contained in:
Umair Khan 2016-06-23 12:43:21 +05:00 committed by Tim Abbott
parent 75bd3541ea
commit f1f48f305e
6 changed files with 108 additions and 1 deletions

View File

@ -137,3 +137,5 @@ ujson==1.33
uritemplate==0.6
zope.interface==4.1.2
-r emailmirror.txt

View File

@ -0,0 +1,12 @@
talon==1.2.10
cchardet==1.0.0
cssselect==0.9.2
lxml==3.6.0
regex==2016.6.19
# Currently, Scikit-Learn and Numpy cannot be installed with pip in one
# install pass. See https://github.com/scikit-learn/scikit-learn/issues/4164
# for further details.
-r numpy.txt
scikit-learn==0.16.1

5
requirements/numpy.txt Normal file
View File

@ -0,0 +1,5 @@
# Currently, Scikit-Learn and Numpy cannot be installed with pip in one
# install pass. See https://github.com/scikit-learn/scikit-learn/issues/4164
# for further details.
numpy==1.11.0
scipy==0.17.1

View File

@ -28,6 +28,8 @@ VENV_DEPENDENCIES = [
"python-dev",
"python-pip",
"python-virtualenv",
"libxml2-dev", # Used for installing talon
"libxslt1-dev", # Used for installing talon
]
def setup_virtualenv(target_venv_path, requirements_file, virtualenv_args=None):
@ -67,5 +69,12 @@ def do_setup_virtualenv(venv_path, requirements_file, virtualenv_args):
exec(open(activate_this).read(), {}, dict(__file__=activate_this)) # type: ignore # https://github.com/python/mypy/issues/1577
run(["pip", "install", "--upgrade", "pip", "wheel"])
# Currently, Scikit-Learn and Numpy cannot be installed with pip in one
# install pass. See https://github.com/scikit-learn/scikit-learn/issues/4164
# for further details.
if 'dev.txt' in requirements_file or 'prod.txt' in requirements_file:
numpy = os.path.join(ZULIP_PATH, 'requirements', 'numpy.txt')
run(["pip", "install", "--no-deps", "--requirement", numpy])
run(["pip", "install", "--no-deps", "--requirement", requirements_file])
run(["sudo", "chmod", "-R", "a+rX", venv_path])

View File

@ -21,6 +21,10 @@ from zerver.models import Stream, Recipient, get_user_profile_by_email, \
Message, Realm, UserProfile
from six import text_type, binary_type
import six
import talon
from talon import quotations
talon.init()
logger = logging.getLogger(__name__)
@ -199,11 +203,12 @@ def extract_body(message):
# that.
plaintext_content = get_message_part_by_type(message, "text/plain")
if plaintext_content:
return plaintext_content
return quotations.extract_from_plain(plaintext_content)
# If we only have an HTML version, try to make that look nice.
html_content = get_message_part_by_type(message, "text/html")
if html_content:
html_content = quotations.extract_from_html(html_content)
return convert_html_to_markdown(html_content)
raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")

View File

@ -208,3 +208,77 @@ class TestDigestEmailMessages(AuthedTestCase):
self.assertEqual(mock_send_future_email.call_count, 1)
self.assertEqual(mock_send_future_email.call_args[0][0][0]['email'],
u'othello@zulip.com')
class TestReplyExtraction(AuthedTestCase):
def test_reply_is_extracted_from_plain(self):
# build dummy messages for stream
# test valid incoming stream message is processed properly
self.login("hamlet@zulip.com")
user_profile = get_user_profile_by_email("hamlet@zulip.com")
self.subscribe_to_stream(user_profile.email, "Denmark")
stream = get_stream("Denmark", user_profile.realm)
stream_to_address = encode_email_address(stream)
text = """Reply
-----Original Message-----
Quote"""
incoming_valid_message = MIMEText(text)
incoming_valid_message['Subject'] = 'TestStreamEmailMessages Subject'
incoming_valid_message['From'] = "hamlet@zulip.com"
incoming_valid_message['To'] = stream_to_address
incoming_valid_message['Reply-to'] = "othello@zulip.com"
process_message(incoming_valid_message)
# Hamlet is subscribed to this stream so should see the email message from Othello.
message = most_recent_message(user_profile)
self.assertEqual(message.content, "Reply")
def test_reply_is_extracted_from_html(self):
# build dummy messages for stream
# test valid incoming stream message is processed properly
self.login("hamlet@zulip.com")
user_profile = get_user_profile_by_email("hamlet@zulip.com")
self.subscribe_to_stream(user_profile.email, "Denmark")
stream = get_stream("Denmark", user_profile.realm)
stream_to_address = encode_email_address(stream)
html = """
<html>
<body>
<p>Reply</p>
<blockquote>
<div>
On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:
</div>
<div>
Quote
</div>
</blockquote>
</body>
</html>
"""
incoming_valid_message = MIMEText(html, 'html')
incoming_valid_message['Subject'] = 'TestStreamEmailMessages Subject'
incoming_valid_message['From'] = "hamlet@zulip.com"
incoming_valid_message['To'] = stream_to_address
incoming_valid_message['Reply-to'] = "othello@zulip.com"
process_message(incoming_valid_message)
# Hamlet is subscribed to this stream so should see the email message from Othello.
message = most_recent_message(user_profile)
self.assertEqual(message.content, 'Reply')