zulip/zephyr/lib/bugdown/__init__.py

import markdown
import logging
import traceback
import re

from zephyr.lib.avatar  import gravatar_hash
from zephyr.lib.bugdown import codehilite, fenced_code

class Gravatar(markdown.inlinepatterns.Pattern):
    def handleMatch(self, match):
        img = markdown.util.etree.Element('img')
        img.set('class', 'message_body_gravatar img-rounded')
        img.set('src', 'https://secure.gravatar.com/avatar/%s?d=identicon&s=30'
            % (gravatar_hash(match.group('email')),))
        return img

class AutoLink(markdown.inlinepatterns.Pattern):
    def handleMatch(self, match):
        url = match.group('url')
        a = markdown.util.etree.Element('a')
        a.set('href', url)
        a.text = url
        return a

class UListProcessor(markdown.blockprocessors.OListProcessor):
    """ Process unordered list blocks.

        Based on markdown.blockprocessors.UListProcessor, but does not accept
        '+' as a bullet character."""

    TAG = 'ul'
    RE = re.compile(r'^[ ]{0,3}[*-][ ]+(.*)')

class Bugdown(markdown.Extension):
    def extendMarkdown(self, md, md_globals):
        for k in ('image_link', 'image_reference', 'automail', 'autolink'):
            del md.inlinePatterns[k]

        for k in ('hashheader', 'setextheader', 'olist', 'ulist'):
            del md.parser.blockprocessors[k]

        md.parser.blockprocessors.add('ulist', UListProcessor(md.parser), '>hr')

        md.inlinePatterns.add('gravatar', Gravatar(r'!gravatar\((?P<email>[^)]*)\)'), '_begin')

        # A link starts after whitespace and continues to the next whitespace.
        link_regex = r'\b(?P<url>https?://[^\s[\](){}<>]+)'
        md.inlinePatterns.add('autolink', AutoLink(link_regex), '>link')

# We need to re-initialize the markdown engine every 30 messages
# due to some sort of performance leak in the markdown library.
MAX_MD_ENGINE_USES = 30

_md_engine = None
_use_count = 0

# We want to log Markdown parser failures, but shouldn't log the actual input
# message for privacy reasons.  The compromise is to replace all alphanumeric
# characters with 'x'.
#
# We also use repr() to improve reproducibility, and to escape terminal control
# codes, which can do surprisingly nasty things.
_privacy_re = re.compile(r'\w', flags=re.UNICODE)
def _sanitize_for_log(md):
    return repr(_privacy_re.sub('x', md))

def _linkify(match):
    url = match.group('url')
    return ' [%s](%s) ' % (url, url)

def convert(md):
    """Convert Markdown to HTML, with Humbug-specific settings and hacks."""
    global _md_engine, _use_count

    if _md_engine is None:
        _md_engine = markdown.Markdown(
            safe_mode     = 'escape',
            output_format = 'html',
            extensions    = ['nl2br',
                codehilite.makeExtension(configs=[
                    ('force_linenos', False),
                    ('guess_lang',    False)]),
                fenced_code.makeExtension(),
                Bugdown()])

    try:
        html = _md_engine.convert(md)
    except:
        # FIXME: Do something more reasonable here!
        html = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'
        logging.getLogger('').error('Exception in Markdown parser: %sInput (sanitized) was: %s'
            % (traceback.format_exc(), _sanitize_for_log(md)))

    _use_count += 1
    if _use_count >= MAX_MD_ENGINE_USES:
        _md_engine = None
        _use_count = 0

    return html
Move our various Markdown hacks into their own file (imported from commit b03a5c64cc95964936c4aba7d667807969e35d21) 2012-10-15 22:03:50 +02:00			`import markdown`
bugdown: Log Markdown parser failures (imported from commit 9e225a32b71edbfd9007cc2fbae32be31896233d) 2012-10-22 05:06:28 +02:00			`import logging`
			`import traceback`
bugdown: Hide alphanumeric characters in exception logs, for privacy (imported from commit 39481494b7910307f56e566035c1b464c83d196e) 2012-10-25 21:38:47 +02:00			`import re`
Move our various Markdown hacks into their own file (imported from commit b03a5c64cc95964936c4aba7d667807969e35d21) 2012-10-15 22:03:50 +02:00
Use our local copy of codehilite (imported from commit bc9704a19017061d55ff0e16589d57ee0c46caa5) 2012-10-20 05:34:14 +02:00			`from zephyr.lib.avatar import gravatar_hash`
bugdown: Use our local copy of fenced_code And wire it up to our local copy of codehilite. This fixes highlighting in fenced code blocks, e.g. ~~~~ .js var x = function () { return "hi"; }; ~~~~ (imported from commit 0efb0c9b98a3acdf55e18bb1918af7960f3425be) 2012-11-19 18:31:03 +01:00			`from zephyr.lib.bugdown import codehilite, fenced_code`
Allow users to embed Gravatars in messages We'll use this internally for the commit bot. We might eventually disable it for external users. (imported from commit 3136cd9faadc6b81355889d2ee6472985da87fbe) 2012-10-17 04:42:19 +02:00
			`class Gravatar(markdown.inlinepatterns.Pattern):`
			`def handleMatch(self, match):`
			`img = markdown.util.etree.Element('img')`
			`img.set('class', 'message_body_gravatar img-rounded')`
			`img.set('src', 'https://secure.gravatar.com/avatar/%s?d=identicon&s=30'`
bugdown: Use named match for Gravatar email (imported from commit 778c4b6d754f975c89a91336593d2f62e49249d5) 2012-10-22 02:15:44 +02:00			`% (gravatar_hash(match.group('email')),))`
Allow users to embed Gravatars in messages We'll use this internally for the commit bot. We might eventually disable it for external users. (imported from commit 3136cd9faadc6b81355889d2ee6472985da87fbe) 2012-10-17 04:42:19 +02:00			`return img`

bugdown: Linkify using a Markdown extension This prevents trying to linkify inside code blocks. (imported from commit 97dd20ecee19f41650aa98e68aa8e9908ece5b33) 2012-10-22 02:32:18 +02:00			`class AutoLink(markdown.inlinepatterns.Pattern):`
			`def handleMatch(self, match):`
			`url = match.group('url')`
			`a = markdown.util.etree.Element('a')`
			`a.set('href', url)`
			`a.text = url`
			`return a`

bugdown: Disable + as a bullet character for an unordered list Fixes #272. (imported from commit 8afaf14965ed1f6a4bb3ccfc9d4c2d807148666d) 2012-11-02 18:25:37 +01:00			`class UListProcessor(markdown.blockprocessors.OListProcessor):`
			`""" Process unordered list blocks.`

			`Based on markdown.blockprocessors.UListProcessor, but does not accept`
			`'+' as a bullet character."""`

			`TAG = 'ul'`
			`RE = re.compile(r'^[ ]{0,3}[-][ ]+(.)')`

Markdown: Disable images (imported from commit 6656b15fa690b463265af6384a73529ee635f688) 2012-10-16 17:35:58 +02:00			`class Bugdown(markdown.Extension):`
			`def extendMarkdown(self, md, md_globals):`
bugdown: Disable builtin 'autolink' feature This only linkifies inside angle brackets, per print md.inlinePatterns['autolink'].getCompiledRegExp().pattern We have our own linkification extension. (imported from commit 20cab11aaafee075e0caf933d8d197717976988c) 2012-10-22 02:40:58 +02:00			`for k in ('image_link', 'image_reference', 'automail', 'autolink'):`
bugdown: Decrease code duplication (imported from commit 40158134b60e477d8a353d050fa62c9ded0e9e9f) 2012-10-22 02:35:36 +02:00			`del md.inlinePatterns[k]`

bugdown: Disable + as a bullet character for an unordered list Fixes #272. (imported from commit 8afaf14965ed1f6a4bb3ccfc9d4c2d807148666d) 2012-11-02 18:25:37 +01:00			`for k in ('hashheader', 'setextheader', 'olist', 'ulist'):`
bugdown: Decrease code duplication (imported from commit 40158134b60e477d8a353d050fa62c9ded0e9e9f) 2012-10-22 02:35:36 +02:00			`del md.parser.blockprocessors[k]`
Markdown: Disable images (imported from commit 6656b15fa690b463265af6384a73529ee635f688) 2012-10-16 17:35:58 +02:00
bugdown: Disable + as a bullet character for an unordered list Fixes #272. (imported from commit 8afaf14965ed1f6a4bb3ccfc9d4c2d807148666d) 2012-11-02 18:25:37 +01:00			`md.parser.blockprocessors.add('ulist', UListProcessor(md.parser), '>hr')`

bugdown: Use named match for Gravatar email (imported from commit 778c4b6d754f975c89a91336593d2f62e49249d5) 2012-10-22 02:15:44 +02:00			`md.inlinePatterns.add('gravatar', Gravatar(r'!gravatar\((?P<email>[^)]*)\)'), '_begin')`
Allow users to embed Gravatars in messages We'll use this internally for the commit bot. We might eventually disable it for external users. (imported from commit 3136cd9faadc6b81355889d2ee6472985da87fbe) 2012-10-17 04:42:19 +02:00
bugdown: Linkify using a Markdown extension This prevents trying to linkify inside code blocks. (imported from commit 97dd20ecee19f41650aa98e68aa8e9908ece5b33) 2012-10-22 02:32:18 +02:00			`# A link starts after whitespace and continues to the next whitespace.`
bugdown: Disallow ()[]{}<> in linkified URLs. This allows us to handle messages like my website (available at http://google.com) (imported from commit 51330507947fc039b6f29f06dfa1c6d21f779aa4) 2012-10-22 23:30:05 +02:00			`link_regex = r'\b(?P<url>https?://[^\s[\](){}<>]+)'`
bugdown: Install autolink pattern after link pattern This fixes explicit links. (imported from commit 6867d271344d35c2c8d54b3393219113a095bc4f) 2012-10-22 23:47:49 +02:00			`md.inlinePatterns.add('autolink', AutoLink(link_regex), '>link')`
bugdown: Linkify using a Markdown extension This prevents trying to linkify inside code blocks. (imported from commit 97dd20ecee19f41650aa98e68aa8e9908ece5b33) 2012-10-22 02:32:18 +02:00
Move our various Markdown hacks into their own file (imported from commit b03a5c64cc95964936c4aba7d667807969e35d21) 2012-10-15 22:03:50 +02:00			`# We need to re-initialize the markdown engine every 30 messages`
			`# due to some sort of performance leak in the markdown library.`
			`MAX_MD_ENGINE_USES = 30`

			`_md_engine = None`
			`_use_count = 0`

bugdown: Hide alphanumeric characters in exception logs, for privacy (imported from commit 39481494b7910307f56e566035c1b464c83d196e) 2012-10-25 21:38:47 +02:00			`# We want to log Markdown parser failures, but shouldn't log the actual input`
			`# message for privacy reasons. The compromise is to replace all alphanumeric`
			`# characters with 'x'.`
			`#`
			`# We also use repr() to improve reproducibility, and to escape terminal control`
			`# codes, which can do surprisingly nasty things.`
			`_privacy_re = re.compile(r'\w', flags=re.UNICODE)`
			`def _sanitize_for_log(md):`
			`return repr(_privacy_re.sub('x', md))`

Move our various Markdown hacks into their own file (imported from commit b03a5c64cc95964936c4aba7d667807969e35d21) 2012-10-15 22:03:50 +02:00			`def _linkify(match):`
			`url = match.group('url')`
			`return ' [%s](%s) ' % (url, url)`

			`def convert(md):`
			`"""Convert Markdown to HTML, with Humbug-specific settings and hacks."""`
			`global _md_engine, _use_count`

			`if _md_engine is None:`
			`_md_engine = markdown.Markdown(`
			`safe_mode = 'escape',`
bugdown: Generate HTML, not XHTML Our pages are declared as HTML5: <!DOCTYPE html> The markdown library only supports HTML4, but that's probably closer than XHTML. (imported from commit c78be9ae9bccf029def8d94d3647b0ccce8b2252) 2012-10-24 22:02:15 +02:00			`output_format = 'html',`
bugdown: Use our local copy of fenced_code And wire it up to our local copy of codehilite. This fixes highlighting in fenced code blocks, e.g. ~~~~ .js var x = function () { return "hi"; }; ~~~~ (imported from commit 0efb0c9b98a3acdf55e18bb1918af7960f3425be) 2012-11-19 18:31:03 +01:00			`extensions = ['nl2br',`
bugdown: Don't guess code highlighting language Disables highlighting unless a language is specified. (imported from commit fe5d1a4a8042241336ee7ac01682553f6b35e956) 2012-11-19 17:34:37 +01:00			`codehilite.makeExtension(configs=[`
			`('force_linenos', False),`
			`('guess_lang', False)]),`
bugdown: Use our local copy of fenced_code And wire it up to our local copy of codehilite. This fixes highlighting in fenced code blocks, e.g. ~~~~ .js var x = function () { return "hi"; }; ~~~~ (imported from commit 0efb0c9b98a3acdf55e18bb1918af7960f3425be) 2012-11-19 18:31:03 +01:00			`fenced_code.makeExtension(),`
Use our local copy of codehilite (imported from commit bc9704a19017061d55ff0e16589d57ee0c46caa5) 2012-10-20 05:34:14 +02:00			`Bugdown()])`
Move our various Markdown hacks into their own file (imported from commit b03a5c64cc95964936c4aba7d667807969e35d21) 2012-10-15 22:03:50 +02:00
			`try:`
			`html = _md_engine.convert(md)`
			`except:`
			`# FIXME: Do something more reasonable here!`
			`html = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'`
bugdown: Hide alphanumeric characters in exception logs, for privacy (imported from commit 39481494b7910307f56e566035c1b464c83d196e) 2012-10-25 21:38:47 +02:00			`logging.getLogger('').error('Exception in Markdown parser: %sInput (sanitized) was: %s'`
			`% (traceback.format_exc(), _sanitize_for_log(md)))`
Move our various Markdown hacks into their own file (imported from commit b03a5c64cc95964936c4aba7d667807969e35d21) 2012-10-15 22:03:50 +02:00
			`_use_count += 1`
			`if _use_count >= MAX_MD_ENGINE_USES:`
			`_md_engine = None`
			`_use_count = 0`

			`return html`