yummy.py
author Deepak Sarda
Tue Jul 22 23:29:20 2008 -0700 (5 weeks ago)
changeset 3 892e0c36f095
parent 2f1043f131e49
permissions -rwxr-xr-x
Fix for urlencode not handling unicode strings properly
     1 #!/usr/bin/env python
     2 import cPickle as pickle
     3 import feedparser
     4 import logging
     5 import os
     6 import sys
     7 import time
     8 import urllib
     9 import urllib2
    10 import xml.etree.cElementTree as ET
    11 import ConfigParser
    12 
    13 __author__      = 'Deepak Sarda'
    14 __version__     = '0.1.1'
    15 __copyright__   = '(c) 2008 Deepak Sarda'
    16 __license__     = 'Public Domain'
    17 __url__         = 'http://antrix.net/'
    18 
    19 ## Configuration
    20 #
    21 # config file format is as follows
    22 # ; example ~/.yummy.cfg file
    23 # [yummy]
    24 # user = delicious-user-name
    25 # pass = delicious-password
    26 # ; source_url is your public shared items feed url from Google Reader
    27 # source_url = http://www.google.com/reader/public/atom/user/../broadcast
    28 # ; end of config file
    29 config_file = os.path.expanduser('~/.yummy.cfg')
    30 
    31 # If state file doesn't exist, it will be created
    32 state_file = os.path.expanduser('~/.yummy.state')
    33 
    34 # Set to logging.DEBUG for debug messages
    35 LOG_LEVEL = logging.INFO
    36 
    37 # End configuration
    38 
    39 class Post(object):
    40     """Class to model a del.icio.us Post. It limits attributes 
    41     to a retricted subset, i.e. those used in a del.icio.us post."""
    42 
    43     __slots__ = ['description', 'url', 'extended', 'tags']
    44 
    45     def __contains__(self, key):
    46         try:
    47             getattr(self, key)
    48         except:
    49             return False
    50         return True
    51 
    52     # urllib.urlencode() just needs this beyond the basic stuff above
    53     def items(self):
    54         return [(k, getattr(self, k).encode('utf-8')) 
    55                         for k in self.__slots__ if k in self]
    56 
    57 def posts(feed):
    58     """Iterates over a Feedparser feed object and returns Posts.
    59     Tailored for greader shared items feed to return title,
    60     link and annotation."""
    61 
    62     for entry in feed.entries:
    63         d = Post()
    64         d.description = entry.title
    65         d.url = entry.link
    66         d.tags = "linker via:greader" # TODO: this should be configurable
    67         for content in entry.content:
    68             if content.base.startswith(
    69                     'http://www.google.com/reader/public/atom/user/'):
    70                 d.extended = content.value
    71 
    72         yield d
    73 
    74 class Yummy(object):
    75     _endpoint = 'https://api.del.icio.us/v1/posts/add?'
    76 
    77     def __init__(self, statefile, source_url, user, pw):
    78         """`statefile` is where data about which items have already been
    79         posted to delicious is saved.
    80         `source_url` is the Google Reader feed url from which to pick items
    81         `user` is the delicious user name
    82         `pw` is the delicious password
    83         """
    84 
    85         self._store = statefile
    86         try:
    87             self._processed = pickle.load(open(statefile))
    88         except:
    89             logging.error('Error loading state file: %s' % statefile)
    90             self._processed = set()
    91 
    92         self._source_url = source_url
    93 
    94         pass_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
    95         pass_mgr.add_password(None, 'api.del.icio.us', user, pw)
    96         handler = urllib2.HTTPBasicAuthHandler(pass_mgr)
    97         opener = urllib2.build_opener(handler)
    98         opener.addheaders = [('User-Agent', 
    99                        'yummy - greader->delicious poster (%s)' % __version__)]
   100         urllib2.install_opener(opener)
   101 
   102     def update(self):
   103         """Updates delicious with posts sourced from source_url"""
   104 
   105         logging.debug('fetching source feed')
   106         feed = feedparser.parse(self._source_url)
   107         logging.debug('fetched feed. it has %s entries' % len(feed.entries))
   108 
   109         for post in posts(feed):
   110             if post.url in self._processed:
   111                 logging.debug('Skipping already processed URL: %s' % post.url)
   112                 continue
   113 
   114             params = urllib.urlencode(post)
   115             logging.debug('Posting url: %s' % self._endpoint + params)
   116             try:
   117                 response = urllib2.urlopen(self._endpoint + params)
   118                 xml = ET.parse(response)
   119             except urllib2.HTTPError, exc:
   120                 logging.error('HTTPError: %d' % (exc.code))
   121             except urllib2.URLError, exc:
   122                 logging.error('URL error' % str(exc))
   123             else:
   124                 result = xml.getroot()
   125                 logging.debug('response is: %s: %s' % 
   126                                     (result.tag, result.get('code')))
   127 
   128                 if result.get('code') == 'done':
   129                     self._processed.add(post.url)
   130                 else:
   131                     logging.error('Error posting to delicious.' \
   132                             'Response was: %s' % result.get('code'))
   133             
   134             # delicious folks require us to wait a second between requests
   135             time.sleep(1)
   136 
   137         # Done processing feed. Save state to data store before returning
   138         logging.debug('Done processing all urls in feed')
   139         f = open(self._store, 'w')
   140         pickle.dump(self._processed, f)
   141         f.close()
   142 
   143 if __name__ == '__main__':
   144     logging.basicConfig(level=LOG_LEVEL)
   145 
   146     config = ConfigParser.ConfigParser()
   147     if not config.read(config_file):
   148         logging.error('Could not read config file: %s' % config_file)
   149         sys.exit(1)
   150 
   151     username = config.get('yummy', 'user')
   152     password = config.get('yummy', 'pass')
   153     source_url = config.get('yummy', 'source_url')
   154 
   155     y = Yummy(state_file, source_url, username, password)
   156     y.update()