Move RSS and Twitter bots to integrations/

(imported from commit a8105d5161eae6708d6322037cd6a45bef237050)
This commit is contained in:
Leo Franchi 2013-10-02 11:10:46 -04:00
parent 6b920ae9ad
commit 14a0228557
3 changed files with 72 additions and 0 deletions

209
integrations/rss/rss-bot Executable file
View file

@ -0,0 +1,209 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# RSS integration for Zulip
#
# Copyright © 2013 Zulip, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import calendar
import errno
import hashlib
from HTMLParser import HTMLParser
import logging
import optparse
import os
import sys
import time
import urlparse
import feedparser
import zulip
RSS_DATA_DIR = os.path.expanduser(os.path.join('~', '.cache', 'zulip-rss'))
OLDNESS_THRESHOLD = 30 # days
usage = """Usage: Send summaries of RSS entries for your favorite feeds to Zulip.
This bot requires the feedparser module.
To use this script:
1. Create an RSS feed file containing 1 feed URL per line (default feed
file location: ~/.cache/zulip-rss/rss-feeds)
2. Subscribe to the stream that will receive RSS updates (default stream: rss)
3. create a ~/.zuliprc, or specify user and api-key with command line arguments
4. Test the script by running it manually, like this:
/usr/local/share/zulip/demos/rss-bot
You can customize the location on the feed file and recipient stream, e.g.:
/usr/local/share/zulip/demos/rss-bot --feed-file=/path/to/my-feeds --stream=my-rss-stream
4. Configure a crontab entry for this script. A sample crontab entry for
processing feeds stored in the default location and sending to the default
stream every 5 minutes is:
*/5 * * * * /usr/local/share/zulip/demos/rss-bot"""
parser = optparse.OptionParser(usage)
parser.add_option('--stream',
dest='stream',
help='The stream to which to send RSS messages.',
default="rss",
action='store')
parser.add_option('--data-dir',
dest='data_dir',
help='The directory where feed metadata is stored',
default=os.path.join(RSS_DATA_DIR),
action='store')
parser.add_option('--feed-file',
dest='feed_file',
help='The file containing a list of RSS feed URLs to follow, one URL per line',
default=os.path.join(RSS_DATA_DIR, "rss-feeds"),
action='store')
parser.add_option_group(zulip.generate_option_group(parser))
(opts, args) = parser.parse_args()
def mkdir_p(path):
# Python doesn't have an analog to `mkdir -p` < Python 3.2.
try:
os.makedirs(path)
except OSError, e:
if e.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
try:
mkdir_p(opts.data_dir)
except OSError:
# We can't write to the logfile, so just print and give up.
print >>sys.stderr, "Unable to store RSS data at %s." % (opts.data_dir,)
exit(1)
log_file = os.path.join(opts.data_dir, "rss-bot.log")
log_format = "%(asctime)s: %(message)s"
logging.basicConfig(format=log_format)
formatter = logging.Formatter(log_format)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(file_handler)
def log_error_and_exit(error):
logger.error(error)
logger.error(usage)
exit(1)
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, data):
self.fed.append(data)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
stripper = MLStripper()
stripper.feed(html)
return stripper.get_data()
def compute_entry_hash(entry):
entry_time = entry.get("published", entry.get("updated"))
return hashlib.md5(entry.id + entry_time).hexdigest()
def send_zulip(entry, feed_name):
content = "**[%s](%s)**\n%s\n%s" % (entry.title,
entry.link,
strip_tags(entry.summary),
entry.link)
message = {"type": "stream",
"sender": opts.email,
"to": opts.stream,
"subject": feed_name,
"content": content,
}
return client.send_message(message)
try:
with open(opts.feed_file, "r") as f:
feed_urls = [feed.strip() for feed in f.readlines()]
except IOError:
log_error_and_exit("Unable to read feed file at %s." % (opts.feed_file,))
client = zulip.Client(email=opts.email, api_key=opts.api_key,
site=opts.site)
first_message = True
for feed_url in feed_urls:
feed_file = os.path.join(opts.data_dir, urlparse.urlparse(feed_url).netloc)
try:
with open(feed_file, "r") as f:
old_feed_hashes = dict((line.strip(), True) for line in f.readlines())
except IOError:
old_feed_hashes = {}
new_hashes = []
data = feedparser.parse(feed_url)
for entry in data.entries:
entry_hash = compute_entry_hash(entry)
# An entry has either been published or updated.
entry_time = entry.get("published_parsed", entry.get("updated_parsed"))
if (time.time() - calendar.timegm(entry_time)) > OLDNESS_THRESHOLD * 60 * 60 * 24:
# As a safeguard against misbehaving feeds, don't try to process
# entries older than some threshold.
continue
if entry_hash in old_feed_hashes:
# We've already seen this. No need to process any older entries.
break
if (not old_feed_hashes) and (len(new_hashes) >= 3):
# On a first run, pick up the 3 most recent entries. An RSS feed has
# entries in reverse chronological order.
break
response = send_zulip(entry, data.feed.title)
if response["result"] != "success":
logger.error("Error processing %s" % (feed_url,))
logger.error(response)
if first_message:
# This is probably some fundamental problem like the stream not
# existing or something being misconfigured, so bail instead of
# getting the same error for every RSS entry.
log_error_and_exit("Failed to process first message")
# Go ahead and move on -- perhaps this entry is corrupt.
new_hashes.append(entry_hash)
first_message = False
with open(feed_file, "a") as f:
for hash in new_hashes:
f.write(hash + "\n")
logger.info("Sent zulips for %d %s entries" % (len(new_hashes), feed_url))

161
integrations/twitter/twitter-bot Executable file
View file

@ -0,0 +1,161 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Twitter integration for Zulip
#
# Copyright © 2013 Zulip, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import os
import sys
import optparse
import ConfigParser
import zulip
CONFIGFILE = os.path.expanduser("~/.zulip_twitterrc")
def write_config(config, since_id, user):
config.set('twitter', 'since_id', since_id)
config.set('twitter', 'user_id', user)
with open(CONFIGFILE, 'wb') as configfile:
config.write(configfile)
parser = optparse.OptionParser(r"""
%prog --user foo@example.com --api-key 0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 --twitter-id twitter_handle
Slurp tweets on your timeline into a specific zulip stream.
Run this on your personal machine. Your API key and twitter id are revealed to local
users through the command line or config file.
This bot uses OAuth to authenticate with twitter. Please create a ~/.zulip_twitterrc with
the following contents:
[twitter]
consumer_key =
consumer_secret =
access_token_key =
access_token_secret =
In order to obtain a consumer key & secret, you must register a new application under your twitter account:
1. Go to http://dev.twitter.com
2. Log in
3. In the menu under your username, click My Applications
4. Create a new application
Make sure to go the application you created and click "create my access token" as well. Fill in the values displayed.
Depends on: twitter-python
""")
parser.add_option('--twitter-id',
help='Twitter username to poll for new tweets from"',
metavar='URL')
parser.add_option('--stream',
help='Default zulip stream to write tweets to')
parser.add_option('--limit-tweets',
default=15,
type='int',
help='Maximum number of tweets to push at once')
parser.add_option_group(zulip.generate_option_group(parser))
(options, args) = parser.parse_args()
if not options.twitter_id:
parser.error('You must specify --twitter-id')
try:
config = ConfigParser.ConfigParser()
config.read(CONFIGFILE)
consumer_key = config.get('twitter', 'consumer_key')
consumer_secret = config.get('twitter', 'consumer_secret')
access_token_key = config.get('twitter', 'access_token_key')
access_token_secret = config.get('twitter', 'access_token_secret')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
parser.error("Please provide a ~/.zulip_twitterrc")
if not consumer_key or not consumer_secret or not access_token_key or not access_token_secret:
parser.error("Please provide a ~/.zulip_twitterrc")
try:
import twitter
except ImportError:
parser.error("Please install twitter-python")
api = twitter.Api(consumer_key=consumer_key,
consumer_secret=consumer_secret,
access_token_key=access_token_key,
access_token_secret=access_token_secret)
user = api.VerifyCredentials()
if not user.GetId():
print "Unable to log in to twitter with supplied credentials. Please double-check and try again"
sys.exit()
try:
since_id = config.getint('twitter', 'since_id')
except ConfigParser.NoOptionError:
since_id = -1
try:
user_id = config.get('twitter', 'user_id')
except ConfigParser.NoOptionError:
user_id = options.twitter_id
client = zulip.Client(
email=options.email,
api_key=options.api_key,
site=options.site,
verbose=True)
if since_id < 0 or options.twitter_id != user_id:
# No since id yet, fetch the latest and then start monitoring from next time
# Or, a different user id is being asked for, so start from scratch
# Either way, fetch last 5 tweets to start off
statuses = api.GetFriendsTimeline(user=options.twitter_id, count=5)
else:
# We have a saved last id, so insert all newer tweets into the zulip stream
statuses = api.GetFriendsTimeline(user=options.twitter_id, since_id=since_id)
for status in statuses[::-1][:options.limit_tweets]:
composed = "%s (%s)" % (status.GetUser().GetName(), status.GetUser().GetScreenName())
message = {
"type": "stream",
"to": [options.stream],
"subject": composed,
"content": status.GetText(),
}
ret = client.send_message(message)
if ret['result'] == 'error':
# If sending failed (e.g. no such stream), abort and retry next time
print "Error sending message to zulip: %s" % ret['msg']
break
else:
since_id = status.GetId()
write_config(config, since_id, user_id)

View file

@ -0,0 +1,189 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Twitter search integration for Zulip
#
# Copyright © 2013 Zulip, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import os
import sys
import optparse
import ConfigParser
import zulip
CONFIGFILE = os.path.expanduser("~/.zulip_twitterrc")
def write_config(config, since_id):
if 'search' not in config.sections():
config.add_section('search')
config.set('search', 'since_id', since_id)
with open(CONFIGFILE, 'wb') as configfile:
config.write(configfile)
parser = optparse.OptionParser(r"""
%prog --user foo@zulip.com --api-key a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 --search="@nprnews,quantum physics"
Send Twitter search results to a Zulip stream.
Depends on: twitter-python
To use this script:
1. Set up Twitter authentication, as described below
2. Subscribe to the stream that will receive Twitter updates (default stream: twitter)
3. Test the script by running it manually, like this:
/usr/local/share/zulip/demos/twitter-search-bot --search="@nprnews,quantum physics"
4. Configure a crontab entry for this script. A sample crontab entry
that will process tweets every 5 minutes is:
*/5 * * * * /usr/local/share/zulip/demos/twitter-search-bot --search="@nprnews,quantum physics"
== Setting up Twitter authentications ==
Run this on a personal or trusted machine, because your API key is
visible to local users through the command line or config file.
This bot uses OAuth to authenticate with twitter. Please create a
~/.zulip_twitterrc with the following contents:
[twitter]
consumer_key =
consumer_secret =
access_token_key =
access_token_secret =
In order to obtain a consumer key & secret, you must register a
new application under your twitter account:
1. Go to http://dev.twitter.com
2. Log in
3. In the menu under your username, click My Applications
4. Create a new application
Make sure to go the application you created and click "create my
access token" as well. Fill in the values displayed.
""")
parser.add_option('--search',
dest='search_terms',
help='Terms to search on',
action='store')
parser.add_option('--stream',
dest='stream',
help='The stream to which to send tweets',
default="twitter",
action='store')
parser.add_option('--limit-tweets',
default=15,
type='int',
help='Maximum number of tweets to send at once')
parser.add_option_group(zulip.generate_option_group(parser))
(opts, args) = parser.parse_args()
if not opts.search_terms:
parser.error('You must specify a search term.')
try:
config = ConfigParser.ConfigParser()
config.read(CONFIGFILE)
consumer_key = config.get('twitter', 'consumer_key')
consumer_secret = config.get('twitter', 'consumer_secret')
access_token_key = config.get('twitter', 'access_token_key')
access_token_secret = config.get('twitter', 'access_token_secret')
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
parser.error("Please provide a ~/.zulip_twitterrc")
if not (consumer_key and consumer_secret and access_token_key and access_token_secret):
parser.error("Please provide a ~/.zulip_twitterrc")
try:
since_id = config.getint('search', 'since_id')
except (ConfigParser.NoOptionError, ConfigParser.NoSectionError):
since_id = 0
try:
import twitter
except ImportError:
parser.error("Please install twitter-python")
api = twitter.Api(consumer_key=consumer_key,
consumer_secret=consumer_secret,
access_token_key=access_token_key,
access_token_secret=access_token_secret)
user = api.VerifyCredentials()
if not user.GetId():
print "Unable to log in to twitter with supplied credentials.\
Please double-check and try again."
sys.exit()
client = zulip.Client(
email=opts.email,
api_key=opts.api_key,
site=opts.site,
verbose=True)
search_query = " OR ".join(opts.search_terms.split(","))
statuses = api.GetSearch(search_query, since_id=since_id)
for status in statuses[::-1][:opts.limit_tweets]:
# https://twitter.com/eatevilpenguins/status/309995853408530432
composed = "%s (%s)" % (status.GetUser().GetName(),
status.GetUser().GetScreenName())
url = "https://twitter.com/%s/status/%s" % (status.GetUser().GetScreenName(),
status.GetId())
content = status.GetText()
search_term_used = None
for term in opts.search_terms.split(","):
if term.lower() in content.lower():
search_term_used = term
break
# For some reason (perhaps encodings or message tranformations we
# didn't anticipate), we don't know what term was used, so use a
# default.
if not search_term_used:
search_term_used = "mentions"
message = {
"type": "stream",
"to": [opts.stream],
"subject": search_term_used,
"content": url,
}
ret = client.send_message(message)
if ret['result'] == 'error':
# If sending failed (e.g. no such stream), abort and retry next time
print "Error sending message to zulip: %s" % ret['msg']
break
else:
since_id = status.GetId()
write_config(config, since_id)