Support filtering feeds by author names, to pull only parts of a shared blog
authorMagnus Hagander <magnus@hagander.net>
Mon, 29 Dec 2008 15:45:11 +0000 (15:45 +0000)
committerMagnus Hagander <magnus@hagander.net>
Mon, 29 Dec 2008 15:45:11 +0000 (15:45 +0000)
without requiring use of specific tags/categories.

In passing, fix a number of error messages to throw the proper type of
exception.

Selena Deckelmann, with some polishing from me.

aggregator.py
planetadmin/register/models.py
planetadmin/register/templates/index.html
planetadmin/register/urls.py
planetadmin/register/views.py

index dba6d176cb736c79bf963968585db1a6458f56e2..182d02335191dff4ebe9e2f7f48a4dec039bf22f 100755 (executable)
@@ -17,11 +17,12 @@ class Aggregator:
        def __init__(self, db):
                self.db = db
                self.stored = 0
+               self.authorfilter = None
                socket.setdefaulttimeout(20)
                
        def Update(self):
                feeds = self.db.cursor()
-               feeds.execute('SELECT id,feedurl,name,lastget FROM planet.feeds')
+               feeds.execute('SELECT id,feedurl,name,lastget,authorfilter FROM planet.feeds')
                for feed in feeds.fetchall():
                        try:
                                self.ParseFeed(feed)
@@ -43,7 +44,12 @@ class Aggregator:
                        print "Feed %s status %s" % (feedinfo[1], feed.status)
                        return
 
+               self.authorfilter = feedinfo[4]
+
                for entry in feed.entries:
+                       if not self.matches_filter(entry):
+                               continue
+                               
                        # Grab the entry. At least atom feeds from wordpress store what we
                        # want in entry.content[0].value and *also* has a summary that's
                        # much shorter. Other blog software store what we want in the summary
@@ -66,6 +72,20 @@ class Aggregator:
                self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})
                #self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
 
+       def matches_filter(self, entry):
+               # For now, we only match against self.authorfilter. In the future,
+               # there may be more filters.
+               if self.authorfilter:
+                       # Match against an author filter
+                       
+                       if entry.has_key('author_detail'):
+                               return entry.author_detail.name == self.authorfilter
+                       else: 
+                               return False
+
+               # No filters, always return true
+               return True
+
        def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
                c = self.db.cursor()
                c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
index 94488577ef33de7d25fed80532dec5b9d468d084..85e0be4903ca8e0289efcdc81afe0fcfb8e7e87a 100644 (file)
@@ -9,6 +9,7 @@ class Blog(models.Model):
        lastget = models.DateTimeField(default='2000-01-01')
        userid = models.CharField(max_length=255, blank=False)
        approved = models.BooleanField()
+       authorfilter = models.CharField(max_length=255,default='')
 
        def __str__(self):
                return self.feedurl
@@ -59,7 +60,6 @@ class AuditEntry(models.Model):
        def __str__(self):
                return "%s (%s): %s" % (self.logtime, self.user, self.logtxt)
 
-
        class Meta:
                db_table = 'planetadmin\".\"auditlog'
                ordering = ['logtime']
index 0fee79e2634f0b4bb66975f4bb7fde06979b3b90..edeb263279334f1a5f8ac5c0c37e4ce7146b30b4 100644 (file)
@@ -34,7 +34,18 @@ We have the following blog(s) registered:
 {%endif%}
  </td>
  <td>{{blog.approved|yesno:"Yes,No"}}</td>
- <td>Feed: <a href="{{blog.feedurl}}">{{blog.feedurl}}</a><br/>Blog: <a href="{{blog.blogurl}}">{{blog.blogurl}}</a></td>
+ <td>Feed: <a href="{{blog.feedurl}}">{{blog.feedurl}}</a><br/>Blog: <a href="{{blog.blogurl}}">{{blog.blogurl}}</a><br/>
+OPTIONAL Author Filter:
+{%if user.is_superuser %}
+ <form method="post" action="modifyauthorfilter/{{blog.id}}/">
+ <input type="text" name="authorfilter" value="{{blog.authorfilter}}">
+ <input type="submit" value="Save Author Filter">
+ </form>
+{%else%}
+{{blog.authorfilter}}
+{%endif%}
+ </td>
+
  <td>{% if blog.approved  or user.is_superuser%}
   <a href="blogposts/{{blog.id}}/">Posts</a><br/>
 {%else%}
@@ -63,6 +74,7 @@ attachment is correct.
 </p>
 <form method="post" action="new/">
 <input type="text" name="feedurl"><br/>
+Filter by author (OPTIONAL): <input type="text" name="authorfilter"><br/>
 <input type="submit" value="New blog">
 </form>
 {%endif%}
@@ -73,6 +85,7 @@ way to do it :-P</p>
 <form method="post" action="new/">
 Blog url: <input type="text" name="feedurl"><br/>
 Userid (blank for yourself): <input type="text" name="userid"><br/>
+Filter by author (OPTIONAL): <input type="text" name="authorfilter"><br/>
 <input type="submit" value="New blog">
 </form>
 {%endif%}
index fc61d2d77ee08023d348cbb1d56a1f1017f4f536..6e5f05a7480caa1d9d4f88f9c3586f5802ebe270 100644 (file)
@@ -15,6 +15,7 @@ urlpatterns = patterns('',
     (r'^detach/(\d+)/$', 'planetadmin.register.views.detach'),
     (r'^delete/(\d+)/$', 'planetadmin.register.views.delete'),
     (r'^modify/(\d+)/$', 'planetadmin.register.views.modify'),
+    (r'^modifyauthorfilter/(\d+)/$', 'planetadmin.register.views.modifyauthorfilter'),
 
     (r'^blogposts/(\d+)/$', 'planetadmin.register.views.blogposts'),
     (r'^blogposts/(\d+)/hide/(\d+)/$', 'planetadmin.register.views.blogpost_hide'),
index 7885bc7afa97e54b2f82eb7c2db42144ca6d477e..f8bc95fb550d1a3fb8da6025fab87e7e36947a44 100644 (file)
@@ -31,23 +31,31 @@ def new(request):
        if not request.method== 'POST':
                raise pExcept('must be POST')
        feedurl = request.POST['feedurl']
+       try: 
+               user = request.POST['userid']
+       except: 
+               user = request.user.username
+       authorfilter  = request.POST['authorfilter']
        if not len(feedurl) > 1:
                raise pExcept('must include blog url!')
 
        # See if we can find the blog already
        try:
-               blog = Blog.objects.get(feedurl=feedurl)
+               blog = Blog.objects.get(userid=userid)
        except:
                blog = None
 
        if blog:
                if blog.userid:
-                       return HttpResponse("Specified blog is already registered to account '%s'" % (blog.userid))
+                       raise pExcept("User %s has already registered blog %s." % (blog.userid, blog.feedurl))
+               # Rest of this is not really useful, but will be modified so that a single user can have multiple blogs in the future
                # Found a match, so we're going to register this blog
                # For safety reasons, we're going to require approval before we do it as well :-P
                if not settings.NOTIFYADDR:
                        raise pExcept('Notify address not specified, cannot complete')
                blog.userid = request.user.username
+               blog.feedurl = feedurl
+               blog.authorfilter = authorfilter
                blog.approved = False
                AuditEntry(request.user.username, 'Requested blog attachment for %s' % blog.feedurl).save()
                send_mail('New blog assignment', """
@@ -60,8 +68,9 @@ So, head off to the admin interface and approve or reject this!
                blog.save()
                return HttpResponse('The blog has been attached to your account. For security reasons, it has been disapproved until a moderator has approved this connection.')
 
+       # TODO: add support for 'feed://' urls
        if not feedurl.startswith('http://'):
-               return HttpResponse('Only http served blogs are accepted!')
+               raise pExcept('Only http served blogs are accepted!')
 
        # Attempting to register a new blog. First let's see that we can download it
        socket.setdefaulttimeout(20)
@@ -71,22 +80,32 @@ So, head off to the admin interface and approve or reject this!
                lnk = feed.feed.link
                l = len(feed.entries)
                if l < 1:
-                       return HttpResponse('Blog feed contains no entries.')
+                       raise pExcept('Blog feed contains no entries.')
        except Exception, e:
-               return HttpResponse('Failed to download blog feed')
+               raise pExcept('Failed to download blog feed')
        if not status == 200:
-               return HttpResponse('Attempt to download blog feed returned status %s.' % (status))
+               raise pExcept('Attempt to download blog feed returned status %s.' % (status))
        
        if not settings.NOTIFYADDR:
                raise pExcept('Notify address not specified, cannot complete')
 
        blog = Blog()
-       blog.name = request.user.first_name
-       if request.user.is_superuser:
-               blog.userid = request.POST['userid']
+       if issuperuser(request.user):
+               blog.userid = request.POST['userid'] or request.user.username
+               # Try to guess who's name should go on this blog, default to the current
+               # users name if we can't find it in the feed.
+               blog.name = request.user.first_name
+               try:
+                       e = feed.entries[0]
+                       blog.name = e.author_detail.name or request.user.first_name
+               except:
+                       pass
        else:
-               blog.userid= request.user.username
+               blog.userid = request.user.username
+               blog.name = request.user.first_name
+
        blog.feedurl = feedurl
+       blog.authorfilter = authorfilter
        blog.blogurl = lnk
        blog.approved = False
        send_mail('New blog assignment', """
@@ -107,7 +126,7 @@ def delete(request, id):
        blog = get_object_or_404(Blog, id=id)
        if not request.user.is_superuser:
                if not blog.userid == request.user.username:
-                       return HttpResponse("You can only delete your own feeds! Don't try to hack!")
+                       raise pError("You can only delete your own feeds! Don't try to hack!")
        send_mail('Blog deleted', """
 The user '%s' has deleted the blog at
 %s (name %s)
@@ -116,6 +135,18 @@ The user '%s' has deleted the blog at
        AuditEntry(request.user.username, 'Deleted blog %s' % blog.feedurl).save()
        return HttpResponseRedirect('../..')
 
+@login_required
+@transaction.commit_on_success
+def modifyauthorfilter(request, id):
+       blog = get_object_or_404(Blog, id=id)
+       if not request.user.is_superuser:
+               if not blog.userid == request.user.username:
+                       raise Exception("You can only update your own author filter! Don't try to hack!")
+       blog.authorfilter = request.POST['authorfilter']
+       blog.save()
+       AuditEntry(request.user.username, 'Changed author filter of blog %s' % blog.feedurl).save()
+       return HttpResponseRedirect('../..')
+
 @user_passes_test(issuperuser)
 @transaction.commit_on_success
 def modify(request, id):