mirror of https://github.com/zulip/zulip.git
rss-bot: Add --unwrap, --math options.
These are for processing arXiv API results.
This commit is contained in:
parent
8e978df957
commit
461856dd56
|
@ -31,6 +31,7 @@ from six.moves.html_parser import HTMLParser
|
||||||
import logging
|
import logging
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
|
@ -82,6 +83,16 @@ parser.add_option('--feed-file',
|
||||||
help='The file containing a list of RSS feed URLs to follow, one URL per line',
|
help='The file containing a list of RSS feed URLs to follow, one URL per line',
|
||||||
default=os.path.join(RSS_DATA_DIR, "rss-feeds"),
|
default=os.path.join(RSS_DATA_DIR, "rss-feeds"),
|
||||||
action='store')
|
action='store')
|
||||||
|
parser.add_option('--unwrap',
|
||||||
|
dest='unwrap',
|
||||||
|
action='store_true',
|
||||||
|
help='Convert word-wrapped paragraphs into single lines',
|
||||||
|
default=False)
|
||||||
|
parser.add_option('--math',
|
||||||
|
dest='math',
|
||||||
|
action='store_true',
|
||||||
|
help='Convert $ to $$ (for KaTeX processing)',
|
||||||
|
default=False)
|
||||||
parser.add_option_group(zulip.generate_option_group(parser))
|
parser.add_option_group(zulip.generate_option_group(parser))
|
||||||
(opts, args) = parser.parse_args() # type: Tuple[Any, List[str]]
|
(opts, args) = parser.parse_args() # type: Tuple[Any, List[str]]
|
||||||
|
|
||||||
|
@ -147,6 +158,12 @@ def compute_entry_hash(entry):
|
||||||
entry_id = entry.get("id", entry.get("link"))
|
entry_id = entry.get("id", entry.get("link"))
|
||||||
return hashlib.md5(entry_id + str(entry_time)).hexdigest()
|
return hashlib.md5(entry_id + str(entry_time)).hexdigest()
|
||||||
|
|
||||||
|
def unwrap_text(body):
|
||||||
|
# type: (str) -> str
|
||||||
|
# Replace \n by space if it is preceded and followed by a non-\n.
|
||||||
|
# Example: '\na\nb\nc\n\nd\n' -> '\na b c\n\nd\n'
|
||||||
|
return re.sub('(?<=[^\n])\n(?=[^\n])', ' ', body)
|
||||||
|
|
||||||
def elide_subject(subject):
|
def elide_subject(subject):
|
||||||
# type: (str) -> str
|
# type: (str) -> str
|
||||||
MAX_TOPIC_LENGTH = 60
|
MAX_TOPIC_LENGTH = 60
|
||||||
|
@ -156,10 +173,18 @@ def elide_subject(subject):
|
||||||
|
|
||||||
def send_zulip(entry, feed_name):
|
def send_zulip(entry, feed_name):
|
||||||
# type: (Any, str) -> Dict[str, Any]
|
# type: (Any, str) -> Dict[str, Any]
|
||||||
|
body = entry.summary # type: str
|
||||||
|
if opts.unwrap:
|
||||||
|
body = unwrap_text(body)
|
||||||
|
|
||||||
content = "**[%s](%s)**\n%s\n%s" % (entry.title,
|
content = "**[%s](%s)**\n%s\n%s" % (entry.title,
|
||||||
entry.link,
|
entry.link,
|
||||||
strip_tags(entry.summary),
|
strip_tags(body),
|
||||||
entry.link) # type: str
|
entry.link) # type: str
|
||||||
|
|
||||||
|
if opts.math:
|
||||||
|
content = content.replace('$', '$$')
|
||||||
|
|
||||||
message = {"type": "stream",
|
message = {"type": "stream",
|
||||||
"sender": opts.zulip_email,
|
"sender": opts.zulip_email,
|
||||||
"to": opts.stream,
|
"to": opts.stream,
|
||||||
|
|
Loading…
Reference in New Issue