Source code for leo.plugins.rss
#@+leo-ver=5-thin
#@+node:ekr.20131004162848.11444: * @file rss.py
#@@language python
#@@tabwidth -4
#@+<< docstring >>
#@+node:peckj.20131002201824.5539: ** << docstring >>
'''Adds primitive RSS reader functionality to Leo.
By Jacob M. Peck.
RSS feeds
=========
This plugin requires the python module 'feedparser' to be installed.
This plugin operates on RSS feed definitions, which are defined as nodes
with headlines that start with `@feed`, and with bodies that contain a
valid `@url` directive.
For example, the following is a valid feed definition::
@feed Hack a Day
@url http://feeds2.feedburner.com/hackaday/LgoM
Hack a Day's feed. Awesome tech stuff.
Each `@feed` node also stores a viewed history of previous stories, so that
the next time the feed is parsed, you will only see new stories. This history
can be reset with the `rss-clear-etc` commands below.
Important Note
==============
This plugin currently doesn't have any undo capability - any changes performed
by the following commands are not undoable.
Configuration Settings
======================
This plugin is configured with the following @settings:
@string rss-date-format
-----------------------
Format string to provide datetime.time.strftime, to format entry dates. Defaults
to '%Y-%m-%d %I:%M %p' if not provided.
@bool rss-sort-newest-first
---------------------------
If True, newest entries are placed before older entries. If False, older entries
are placed before newer entries.
@string rss-headline-format
---------------------------
The format of an entry headline, specified with various tokens. Defaults to
'[<date>] <title>' if not provided.
Valid tokens are:
| <date> - the date, formatted according to `@string rss-date-format`
| <title> - the entry title
| <link> - the entry link (not recommended in headline)
| <summary> - the entry summary (extremely not recommeded in headline)
Anything that isn't a valid token is retained untouched, such as the square
brackets in the default setting.
@data rss-body-format
---------------------
The body of this node will provide the structure of the body of parsed entry
nodes. Empty lines should be denoted with '\\n' on a line by itself. It defaults
to the following, if not provided::
@url <link>
\n
<title>
<date>
\n
<summary>
Valid tokens are the same as for `@string rss-headline-format`. Any instance of
'\n' on a line by itself is replaced with an empty line. All other strings that
are not valid tokens are retained untouched, such as the `@url` directive in the
default.
Commands
========
This plugin uses commands to operate on these `@feed` definitions. The following
commands are available:
rss-parse-selected-feed
-----------------------
Parses the selected `@feed` node, creating entries for each story as
children of the `@feed` node. Can be SLOW for large feeds.
rss-parse-all-feeds
-------------------
Parses all `@feed` nodes in the current outline, creating entries for each story
as children of the appropriate `@feed` nodes. Not recommended, as it can make
Leo appear to be locked up while running.
rss-delete-selected-feed-stories
--------------------------------
Deletes all the children of the selected `@feed` node.
rss-delete-all-feed-stories
---------------------------
Deletes all children of all `@feed` nodes in the current outline.
rss-clear-selected-feed-history
-------------------------------
Clears the selected `@feed` node's viewed stories history.
rss-clear-all-feed-histories
----------------------------
Clears the viewed stories history of every `@feed` node in the current outline.
'''
#@-<< docstring >>
__version__ = '0.3'
#@+<< version history >>
#@+node:peckj.20131002201824.5540: ** << version history >>
#@+at
#
# Version 0.1 - initial functionality. NO UNDO.
# Version 0.2 - bugfix for bug reported by Viktor Ransmayr regarding not having dates in the feed.
# Version 0.3 - configurable with a few @settings in myLeoSettings.leo
#@-<< version history >>
#@+<< imports >>
#@+node:peckj.20131002201824.5541: ** << imports >>
import leo.core.leoGlobals as g
import time
# Whatever other imports your plugins uses.
# feedparser = g.importExtension('feedparser',pluginName='rss.py',verbose=False)
try:
import feedparser
except ImportError:
feedparser = None
print('rss.py: can not import feedparser')
#@-<< imports >>
#@+others
#@+node:peckj.20131002201824.5542: ** init
[docs]def init ():
'''Return True if the plugin has loaded successfully.'''
if g.app.gui is None:
g.app.createQtGui(__file__)
ok = g.app.gui.guiName().startswith('qt') and feedparser is not None
if ok:
g.registerHandler(('new','open2'),onCreate)
g.plugin_signon(__name__)
else:
g.es('Module \'feedparser\' not installed. Plugin rss.py not loaded.', color='red')
return ok
#@+node:peckj.20131002201824.5543: ** onCreate
[docs]def onCreate (tag, keys):
c = keys.get('c')
if not c: return
theRSSController = RSSController(c)
c.theRSSController = theRSSController
#@+node:peckj.20131002201824.5544: ** class RSSController
[docs]class RSSController(object):
#@+others
#@+node:peckj.20131002201824.5545: *3* __init__ (RSSController, rss.py)
def __init__(self, c):
self.c = c
# Warning: hook handlers must use keywords.get('c'), NOT self.c.
self._NO_TIME = (3000, 0, 0, 0, 0, 0, 0, 0, 0)
self._NO_SUMMARY = 'NO SUMMARY'
self._NO_NAME = 'NO TITLE'
self._NO_LINK = 'NO LINK'
# register commands
c.k.registerCommand('rss-parse-selected-feed', self.parse_selected_feed)
c.k.registerCommand('rss-parse-all-feeds', self.parse_all_feeds)
c.k.registerCommand('rss-delete-selected-feed-stories', self.delete_selected_feed_stories)
c.k.registerCommand('rss-delete-all-feed-stories', self.delete_all_feed_stories)
c.k.registerCommand('rss-clear-selected-feed-history', self.clear_selected_feed_history)
c.k.registerCommand('rss-clear-all-feed-histories', self.clear_all_feed_histories)
#@+node:peckj.20131003102740.5571: *3* feed related
#@+node:peckj.20131002201824.5546: *4* get_all_feeds
[docs] def get_all_feeds(self):
## a feed definition is a vnode where v.h.startswith('@feed') and v.b.startswith('@url some_url')
feeds = []
for p in self.c.all_positions():
if self.is_feed(p) and p.v not in feeds:
feeds.append(p.v)
return feeds
#@+node:peckj.20131002201824.5547: *4* is_feed
[docs] def is_feed(self, pos):
## a feed definition is a vnode where v.h.startswith('@feed') and g.getUrlFromNode(p) is truthy
return pos.v.h.startswith('@feed') and g.getUrlFromNode(pos)
#@+node:peckj.20131002201824.11901: *4* parse_feed
[docs] def parse_feed(self, feed):
c = self.c
g.es("Parsing feed: %s" % feed.h, color='blue')
feedurl = g.getUrlFromNode(feed)
# pylint: disable=no-member
# feedparser.parse *does* exist.
data = feedparser.parse(feedurl)
# check for bad feed
if data.bozo == 1:
g.es("Error: bad feed data.", color='red')
return
# grab config settings
sort_newest_first = c.config.getBool('rss-sort-newest-first', default=True)
body_format = c.config.getData('rss-body-format') or ['@url <link>','\\n','<title>','<date>','\\n','<summary>']
body_format = "\n".join(body_format)
body_format = body_format.replace('\\n','')
headline_format = c.config.getString('rss-headline-format') or '[<date>] <title>'
date_format = c.config.getString('rss-date-format') or '%Y-%m-%d %I:%M %p'
# process entries
# pylint: disable=unnecessary-lambda
stories = sorted(data.entries, key=lambda entry: self.grab_date_parsed(entry))
if sort_newest_first:
stories.reverse()
pos = feed
for entry in stories:
if not self.entry_in_history(feed, entry):
date = time.strftime(date_format,self.grab_date_parsed(entry))
name = entry.get('title',default=self._NO_NAME)
link = entry.get('link',default=self._NO_LINK)
desc = entry.get('summary',default=self._NO_SUMMARY)
headline = (
headline_format.replace('<date>',date).
replace('<title>',name).
replace('<summary>',desc).
replace('<link>',link)
)
body = (
body_format.replace('<date>',date).
replace('<title>',name).
replace('<summary>',desc).
replace('<link>',link))
newp = pos.insertAsLastChild()
newp.h = headline
newp.b = body
self.add_entry_to_history(feed, entry)
self.c.redraw()
#@+node:peckj.20131011131135.5848: *4* grab_date_parsed
[docs] def grab_date_parsed(self, entry):
published = None
keys = ['published_parsed', 'created_parsed', 'updated_parsed']
for k in keys:
published = entry.get(k, default=None)
if published is not None:
return published
if published is None:
return self._NO_TIME
#@+node:peckj.20131011131135.5850: *4* grab_date
[docs] def grab_date(self, entry):
published = None
keys = ['published', 'created', 'updated']
for k in keys:
published = entry.get(k, default=None)
if published is not None:
return published
if published is None:
return ""
#@+node:peckj.20131003102740.5570: *3* history stuff
#@+node:peckj.20131003095152.10662: *4* hash_entry
[docs] def hash_entry(self, entry):
s = entry.title + self.grab_date(entry) + entry.summary + entry.link
return str(hash(s) & 0xffffffff)
#@+node:peckj.20131003095152.10663: *4* add_entry_to_history
[docs] def add_entry_to_history(self, feed, entry):
e_hash = self.hash_entry(entry)
h = self.get_history(feed)
if e_hash not in h:
h.append(e_hash)
self.set_history(feed, h)
#@+node:peckj.20131003095152.10666: *4* entry_in_history
[docs] def entry_in_history(self, feed, entry):
e_hash = self.hash_entry(entry)
return e_hash in self.get_history(feed)
#@+node:peckj.20131003095152.10667: *4* get_history
[docs] def get_history(self, feed):
d = feed.v.u
inner_d = d.get('rss', "")
return inner_d.split(':::')
#@+node:peckj.20131003095152.10668: *4* set_history
#@+node:peckj.20131003095152.10665: *4* clear_history
#@+node:peckj.20131002201824.11902: *3* commands
#@+node:peckj.20131002201824.11903: *4* parse_selected_feed
[docs] def parse_selected_feed(self,event=None):
'''Parses the selected `@feed` node, creating entries for each story as
children of the `@feed` node. Can be SLOW for large feeds.
'''
feed = self.c.p
if self.is_feed(feed):
self.parse_feed(feed)
g.es('Done parsing feed.', color='blue')
else:
g.es('Not a valid @feed node.', color='red')
#@+node:peckj.20131003081633.7944: *4* parse_all_feeds
[docs] def parse_all_feeds(self,event=None):
'''Parses all `@feed` nodes in the current outline, creating entries for
each story as children of the appropriate `@feed` nodes. Not recommended,
as it can make Leo appear to be locked up while running.
'''
for feed in self.get_all_feeds():
self.parse_feed(self.c.vnode2position(feed))
g.es('Done parsing all feeds.', color='blue')
#@+node:peckj.20131003085421.6060: *4* delete_selected_feed_stories
[docs] def delete_selected_feed_stories(self, event=None):
'''Deletes all the children of the selected `@feed` node.
'''
pos = self.c.p
if self.is_feed(pos):
self.c.deletePositionsInList(pos.children())
self.c.redraw()
else:
g.es('Not a valid @feed node.', color='red')
#@+node:peckj.20131003090809.6563: *4* delete_all_feed_stories
[docs] def delete_all_feed_stories(self, event=None):
'''Deletes all children of all `@feed` nodes in the current outline.
'''
for feed in self.get_all_feeds():
self.c.deletePositionsInList(self.c.vnode2position(feed).children())
self.c.redraw()
#@+node:peckj.20131003101848.5579: *4* clear_selected_feed_history
[docs] def clear_selected_feed_history(self,event=None):
'''Clears the selected `@feed` node's viewed stories history.
'''
if self.is_feed(self.c.p):
self.clear_history(self.c.p)
#@+node:peckj.20131003101848.5580: *4* clear_all_feed_histories
[docs] def clear_all_feed_histories(self,event=None):
'''Clears the viewed stories history of every `@feed` node in the current outline.
'''
for feed in self.get_all_feeds():
self.clear_history(self.c.vnode2position(feed))
#@-others
#@-others
#@-leo