Don't puke on non-ASCII characters when shelling out to html2text.

(imported from commit a407c9fb2a090075d5e26b5db00388f4f81de1f5)
2013-11-15 11:10:45 -05:00 · 2013-11-15 11:10:45 -05:00 · b0209fec05
parent 545e2539f8
commit b0209fec05
1 changed files with 3 additions and 2 deletions
--- a/zerver/lib/actions.py
+++ b/zerver/lib/actions.py
@ -2227,10 +2227,11 @@ def convert_html_to_markdown(html):
        except OSError:
            continue

-    markdown = p.communicate(input=html)[0].strip()
+    markdown = p.communicate(input=html.encode("utf-8"))[0].strip()
    # We want images to get linked and inline previewed, but html2text will turn
    # them into links of the form `![](http://foo.com/image.png)`, which is
    # ugly. Run a regex over the resulting description, turning links of the
    # form `![](http://foo.com/image.png?12345)` into
    # `[image.png](http://foo.com/image.png)`.
-    return re.sub(r"!\[\]\((\S*)/(\S*)\?(\S*)\)", r"[\2](\1/\2)", markdown)
+    return re.sub(r"!\[\]\((\S*)/(\S*)\?(\S*)\)",
+                  r"[\2](\1/\2)", markdown).decode("utf-8")