mirror of https://github.com/zulip/zulip.git
Move use of html2text to a subprocess call.
(imported from commit 36e8a6f030d75196c28fbdc0e58c6968952d95ff)
This commit is contained in:
parent
0b592a27b1
commit
8382e074fe
|
@ -58,6 +58,7 @@ import platform
|
|||
import logging
|
||||
from collections import defaultdict
|
||||
import urllib
|
||||
import subprocess
|
||||
|
||||
# Store an event in the log for re-importing messages
|
||||
def log_event(event):
|
||||
|
@ -2190,3 +2191,26 @@ def alias_for_realm(domain):
|
|||
return RealmAlias.objects.get(domain=domain)
|
||||
except RealmAlias.DoesNotExist:
|
||||
return None
|
||||
|
||||
def convert_html_to_markdown(html):
|
||||
# On Linux, the tool installs as html2markdown, and there's a command called
|
||||
# html2text that does something totally different. On OSX, the tool installs
|
||||
# as html2text.
|
||||
commands = ["html2markdown", "html2text"]
|
||||
|
||||
for command in commands:
|
||||
try:
|
||||
# A body width of 0 means do not try to wrap the text for us.
|
||||
p = subprocess.Popen(
|
||||
["html2text", "--body-width=0"], stdout=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
markdown = p.communicate(input=html)[0].strip()
|
||||
# We want images to get linked and inline previewed, but html2text will turn
|
||||
# them into links of the form `![](http://foo.com/image.png)`, which is
|
||||
# ugly. Run a regex over the resulting description, turning links of the
|
||||
# form `![](http://foo.com/image.png?12345)` into
|
||||
# `[image.png](http://foo.com/image.png)`.
|
||||
return re.sub(r"!\[\]\((\S*)/(\S*)\?(\S*)\)", r"[\2](\1/\2)", markdown)
|
||||
|
|
|
@ -27,15 +27,13 @@ import sys
|
|||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from zerver.lib.actions import decode_email_address
|
||||
from zerver.lib.actions import decode_email_address, convert_html_to_markdown
|
||||
from zerver.lib.upload import upload_message_image
|
||||
from zerver.models import Stream, get_user_profile_by_email, UserProfile
|
||||
|
||||
from twisted.internet import protocol, reactor, ssl
|
||||
from twisted.mail import imap4
|
||||
|
||||
import html2text
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../api"))
|
||||
import zulip
|
||||
|
||||
|
@ -159,10 +157,7 @@ def extract_body(message):
|
|||
# If we only have an HTML version, try to make that look nice.
|
||||
html_content = get_message_part_by_type(message, "text/html")
|
||||
if html_content:
|
||||
converter = html2text.HTML2Text()
|
||||
converter.ignore_links = True
|
||||
converter.ignore_images = True
|
||||
return converter.handle(html_content)
|
||||
return convert_html_to_markdown(html_content)
|
||||
|
||||
raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ from django.conf import settings
|
|||
from django.views.decorators.csrf import csrf_exempt
|
||||
from zerver.models import UserProfile, get_client, MAX_SUBJECT_LENGTH, \
|
||||
get_user_profile_by_email
|
||||
from zerver.lib.actions import check_send_message
|
||||
from zerver.lib.actions import check_send_message, convert_html_to_markdown
|
||||
from zerver.lib.response import json_success, json_error
|
||||
from zerver.decorator import authenticated_api_view, REQ, \
|
||||
has_request_variables, json_to_dict, authenticated_rest_api_view, \
|
||||
|
@ -15,7 +15,6 @@ from zerver.views import send_message_backend
|
|||
from django.db.models import Q
|
||||
|
||||
from defusedxml.ElementTree import fromstring as xml_fromstring
|
||||
import html2text
|
||||
|
||||
import base64
|
||||
import logging
|
||||
|
@ -646,19 +645,7 @@ def format_freshdesk_property_change_message(ticket, event_info):
|
|||
|
||||
def format_freshdesk_ticket_creation_message(ticket):
|
||||
# They send us the description as HTML.
|
||||
html2text.BODY_WIDTH = 0 # Do not try to wrap the text for us.
|
||||
converter = html2text.HTML2Text()
|
||||
converter.ignore_links = False
|
||||
converter.ignore_images = False
|
||||
cleaned_description = converter.handle(ticket.description).strip()
|
||||
# We want images to get linked and inline previewed, but html2text will turn
|
||||
# them into links of the form `![](http://foo.com/image.png)`, which is
|
||||
# ugly. Run a regex over the resulting description, turning links of the
|
||||
# form `![](http://foo.com/image.png?12345)` into
|
||||
# `[image.png](http://foo.com/image.png)`.
|
||||
cleaned_description = re.sub(r"!\[\]\((\S*)/(\S*)\?(\S*)\)", r"[\2](\1/\2)",
|
||||
cleaned_description)
|
||||
|
||||
cleaned_description = convert_html_to_markdown(ticket.description)
|
||||
content = "%s <%s> created [ticket #%s](%s):\n\n" % (
|
||||
ticket.requester_name, ticket.requester_email, ticket.id, ticket.url)
|
||||
content += """~~~ quote
|
||||
|
|
Loading…
Reference in New Issue