mirror of https://github.com/zulip/zulip.git
mypy: Annotate /api/integrations/rss/rss-bot.
With a few tweaks by tabbott.
This commit is contained in:
parent
ef893dc8dd
commit
059b124027
|
@ -34,12 +34,13 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
|
from typing import Dict, List, Tuple, Any
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
import zulip
|
import zulip
|
||||||
VERSION = "0.9"
|
VERSION = "0.9" # type: str
|
||||||
RSS_DATA_DIR = os.path.expanduser(os.path.join('~', '.cache', 'zulip-rss'))
|
RSS_DATA_DIR = os.path.expanduser(os.path.join('~', '.cache', 'zulip-rss')) # type: str
|
||||||
OLDNESS_THRESHOLD = 30 # days
|
OLDNESS_THRESHOLD = 30 # type: int
|
||||||
|
|
||||||
usage = """Usage: Send summaries of RSS entries for your favorite feeds to Zulip.
|
usage = """Usage: Send summaries of RSS entries for your favorite feeds to Zulip.
|
||||||
|
|
||||||
|
@ -65,7 +66,7 @@ stream every 5 minutes is:
|
||||||
|
|
||||||
*/5 * * * * /usr/local/share/zulip/integrations/rss/rss-bot"""
|
*/5 * * * * /usr/local/share/zulip/integrations/rss/rss-bot"""
|
||||||
|
|
||||||
parser = optparse.OptionParser(usage)
|
parser = optparse.OptionParser(usage) # type: optparse.OptionParser
|
||||||
parser.add_option('--stream',
|
parser.add_option('--stream',
|
||||||
dest='stream',
|
dest='stream',
|
||||||
help='The stream to which to send RSS messages.',
|
help='The stream to which to send RSS messages.',
|
||||||
|
@ -82,9 +83,10 @@ parser.add_option('--feed-file',
|
||||||
default=os.path.join(RSS_DATA_DIR, "rss-feeds"),
|
default=os.path.join(RSS_DATA_DIR, "rss-feeds"),
|
||||||
action='store')
|
action='store')
|
||||||
parser.add_option_group(zulip.generate_option_group(parser))
|
parser.add_option_group(zulip.generate_option_group(parser))
|
||||||
(opts, args) = parser.parse_args()
|
(opts, args) = parser.parse_args() # type: Tuple[Any, List[str]]
|
||||||
|
|
||||||
def mkdir_p(path):
|
def mkdir_p(path):
|
||||||
|
# type: (str) -> None
|
||||||
# Python doesn't have an analog to `mkdir -p` < Python 3.2.
|
# Python doesn't have an analog to `mkdir -p` < Python 3.2.
|
||||||
try:
|
try:
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
|
@ -101,90 +103,98 @@ except OSError:
|
||||||
print("Unable to store RSS data at %s." % (opts.data_dir,), file=sys.stderr)
|
print("Unable to store RSS data at %s." % (opts.data_dir,), file=sys.stderr)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
log_file = os.path.join(opts.data_dir, "rss-bot.log")
|
log_file = os.path.join(opts.data_dir, "rss-bot.log") # type: str
|
||||||
log_format = "%(asctime)s: %(message)s"
|
log_format = "%(asctime)s: %(message)s" # type: str
|
||||||
logging.basicConfig(format=log_format)
|
logging.basicConfig(format=log_format)
|
||||||
|
|
||||||
formatter = logging.Formatter(log_format)
|
formatter = logging.Formatter(log_format) # type: logging.Formatter
|
||||||
file_handler = logging.FileHandler(log_file)
|
file_handler = logging.FileHandler(log_file) # type: logging.FileHandler
|
||||||
file_handler.setFormatter(formatter)
|
file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__) # type: logging.Logger
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
logger.addHandler(file_handler)
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
def log_error_and_exit(error):
|
def log_error_and_exit(error):
|
||||||
|
# type: (str) -> None
|
||||||
logger.error(error)
|
logger.error(error)
|
||||||
logger.error(usage)
|
logger.error(usage)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
class MLStripper(HTMLParser):
|
class MLStripper(HTMLParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
# type: () -> None
|
||||||
self.reset()
|
self.reset()
|
||||||
self.fed = []
|
self.fed = [] # type: List[str]
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
|
# type: (str) -> None
|
||||||
self.fed.append(data)
|
self.fed.append(data)
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
|
# type: () -> str
|
||||||
return ''.join(self.fed)
|
return ''.join(self.fed)
|
||||||
|
|
||||||
def strip_tags(html):
|
def strip_tags(html):
|
||||||
|
# type: (str) -> str
|
||||||
stripper = MLStripper()
|
stripper = MLStripper()
|
||||||
stripper.feed(html)
|
stripper.feed(html)
|
||||||
return stripper.get_data()
|
return stripper.get_data()
|
||||||
|
|
||||||
def compute_entry_hash(entry):
|
def compute_entry_hash(entry):
|
||||||
|
# type: (Dict[str, Any]) -> str
|
||||||
entry_time = entry.get("published", entry.get("updated"))
|
entry_time = entry.get("published", entry.get("updated"))
|
||||||
entry_id = entry.get("id", entry.get("link"))
|
entry_id = entry.get("id", entry.get("link"))
|
||||||
return hashlib.md5(entry_id + str(entry_time)).hexdigest()
|
return hashlib.md5(entry_id + str(entry_time)).hexdigest()
|
||||||
|
|
||||||
def elide_subject(subject):
|
def elide_subject(subject):
|
||||||
|
# type: (str) -> str
|
||||||
MAX_TOPIC_LENGTH = 60
|
MAX_TOPIC_LENGTH = 60
|
||||||
if len(subject) > MAX_TOPIC_LENGTH:
|
if len(subject) > MAX_TOPIC_LENGTH:
|
||||||
subject = subject[:MAX_TOPIC_LENGTH - 3].rstrip() + '...'
|
subject = subject[:MAX_TOPIC_LENGTH - 3].rstrip() + '...'
|
||||||
return subject
|
return subject
|
||||||
|
|
||||||
def send_zulip(entry, feed_name):
|
def send_zulip(entry, feed_name):
|
||||||
|
# type: (Any, str) -> Dict[str, Any]
|
||||||
content = "**[%s](%s)**\n%s\n%s" % (entry.title,
|
content = "**[%s](%s)**\n%s\n%s" % (entry.title,
|
||||||
entry.link,
|
entry.link,
|
||||||
strip_tags(entry.summary),
|
strip_tags(entry.summary),
|
||||||
entry.link)
|
entry.link) # type: str
|
||||||
message = {"type": "stream",
|
message = {"type": "stream",
|
||||||
"sender": opts.zulip_email,
|
"sender": opts.zulip_email,
|
||||||
"to": opts.stream,
|
"to": opts.stream,
|
||||||
"subject": elide_subject(feed_name),
|
"subject": elide_subject(feed_name),
|
||||||
"content": content,
|
"content": content,
|
||||||
}
|
} # type: Dict[str, str]
|
||||||
return client.send_message(message)
|
return client.send_message(message)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(opts.feed_file, "r") as f:
|
with open(opts.feed_file, "r") as f:
|
||||||
feed_urls = [feed.strip() for feed in f.readlines()]
|
feed_urls = [feed.strip() for feed in f.readlines()] # type: List[str]
|
||||||
except IOError:
|
except IOError:
|
||||||
log_error_and_exit("Unable to read feed file at %s." % (opts.feed_file,))
|
log_error_and_exit("Unable to read feed file at %s." % (opts.feed_file,))
|
||||||
|
|
||||||
client = zulip.Client(email=opts.zulip_email, api_key=opts.zulip_api_key,
|
client = zulip.Client(email=opts.zulip_email, api_key=opts.zulip_api_key,
|
||||||
site=opts.zulip_site, client="ZulipRSS/" + VERSION)
|
site=opts.zulip_site, client="ZulipRSS/" + VERSION) # type: zulip.Client
|
||||||
|
|
||||||
first_message = True
|
first_message = True # type: bool
|
||||||
|
|
||||||
for feed_url in feed_urls:
|
for feed_url in feed_urls:
|
||||||
feed_file = os.path.join(opts.data_dir, urllib.parse.urlparse(feed_url).netloc)
|
feed_file = os.path.join(opts.data_dir, urllib.parse.urlparse(feed_url).netloc) # Type: str
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(feed_file, "r") as f:
|
with open(feed_file, "r") as f:
|
||||||
old_feed_hashes = dict((line.strip(), True) for line in f.readlines())
|
old_feed_hashes = dict((line.strip(), True) for line in f.readlines()) # type: Dict[str, bool]
|
||||||
except IOError:
|
except IOError:
|
||||||
old_feed_hashes = {}
|
old_feed_hashes = {}
|
||||||
|
|
||||||
new_hashes = []
|
new_hashes = [] # type: List[str]
|
||||||
data = feedparser.parse(feed_url)
|
data = feedparser.parse(feed_url) # type: feedparser.parse
|
||||||
|
|
||||||
for entry in data.entries:
|
for entry in data.entries:
|
||||||
entry_hash = compute_entry_hash(entry)
|
entry_hash = compute_entry_hash(entry) # type: str
|
||||||
# An entry has either been published or updated.
|
# An entry has either been published or updated.
|
||||||
entry_time = entry.get("published_parsed", entry.get("updated_parsed"))
|
entry_time = entry.get("published_parsed", entry.get("updated_parsed")) # type: Tuple[int, int]
|
||||||
if entry_time is not None and (time.time() - calendar.timegm(entry_time)) > OLDNESS_THRESHOLD * 60 * 60 * 24:
|
if entry_time is not None and (time.time() - calendar.timegm(entry_time)) > OLDNESS_THRESHOLD * 60 * 60 * 24:
|
||||||
# As a safeguard against misbehaving feeds, don't try to process
|
# As a safeguard against misbehaving feeds, don't try to process
|
||||||
# entries older than some threshold.
|
# entries older than some threshold.
|
||||||
|
@ -197,12 +207,12 @@ for feed_url in feed_urls:
|
||||||
# entries in reverse chronological order.
|
# entries in reverse chronological order.
|
||||||
break
|
break
|
||||||
|
|
||||||
feed_name = data.feed.title or feed_url
|
feed_name = data.feed.title or feed_url # type: str
|
||||||
|
|
||||||
response = send_zulip(entry, feed_name)
|
response = send_zulip(entry, feed_name) # type: Dict[str, Any]
|
||||||
if response["result"] != "success":
|
if response["result"] != "success":
|
||||||
logger.error("Error processing %s" % (feed_url,))
|
logger.error("Error processing %s" % (feed_url,))
|
||||||
logger.error(response)
|
logger.error(str(response))
|
||||||
if first_message:
|
if first_message:
|
||||||
# This is probably some fundamental problem like the stream not
|
# This is probably some fundamental problem like the stream not
|
||||||
# existing or something being misconfigured, so bail instead of
|
# existing or something being misconfigured, so bail instead of
|
||||||
|
|
|
@ -26,7 +26,6 @@ api/integrations/svn/zulip_svn_config.py
|
||||||
api/integrations/trac/zulip_trac_config.py
|
api/integrations/trac/zulip_trac_config.py
|
||||||
api/integrations/asana/zulip_asana_mirror
|
api/integrations/asana/zulip_asana_mirror
|
||||||
api/integrations/git/post-receive
|
api/integrations/git/post-receive
|
||||||
api/integrations/rss/rss-bot
|
|
||||||
tools/deprecated/iframe-bot/show-last-messages
|
tools/deprecated/iframe-bot/show-last-messages
|
||||||
tools/deprecated/inject-messages/inject-messages
|
tools/deprecated/inject-messages/inject-messages
|
||||||
zproject/settings.py
|
zproject/settings.py
|
||||||
|
|
Loading…
Reference in New Issue