A brand new planet manager and runtime
authorMagnus Hagander <magnus@hagander.net>
Wed, 10 Feb 2016 17:33:19 +0000 (18:33 +0100)
committerMagnus Hagander <magnus@hagander.net>
Wed, 10 Feb 2016 17:37:42 +0000 (18:37 +0100)
Completely restructure the planet postgresql code -- it was definitely
getting old.

A new registration interface makes it much more friendly, and gives the
user a chance to see what's going on.

A new moderation interface allows for a better view of what's happening
with the feeds that are pending, as well as the ability to give a message
to the subscriber when rejecting a blog.

Aggregation uses the same framework, but now runs as a django management
command, and also supports async parallelism when fetching the blogs to
make it much faster (since most of them time out on http).

Planet itself is now rendered in django, and expects a Varnish sitting in
front of it, to which it will generate purges properly. Makes any changes
made in the interface more or less instant, instead of having to wait for
15 minutes.

33 files changed:
aggregator.py [deleted file]
discovery.py [deleted file]
generator.py [deleted file]
hamnadmin/hamnadmin/exceptions.py [deleted file]
hamnadmin/hamnadmin/local_settings.py.sample
hamnadmin/hamnadmin/register/feeds.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/forms.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/management/__init__.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/management/commands/__init__.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/migrations/0002_cleanup_models.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/models.py
hamnadmin/hamnadmin/register/templates/edit.html [new file with mode: 0644]
hamnadmin/hamnadmin/register/templates/index.html
hamnadmin/hamnadmin/register/templates/moderate.html [new file with mode: 0644]
hamnadmin/hamnadmin/register/templates/moderate_reject.html [new file with mode: 0644]
hamnadmin/hamnadmin/register/templates/regbase.html
hamnadmin/hamnadmin/register/templatetags/__init__.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/templatetags/hamn.py [new file with mode: 0644]
hamnadmin/hamnadmin/register/urls.py
hamnadmin/hamnadmin/register/views.py
hamnadmin/hamnadmin/settings.py
hamnadmin/hamnadmin/urls.py
hamnadmin/hamnadmin/util/__init__.py [new file with mode: 0644]
hamnadmin/hamnadmin/util/aggregate.py [new file with mode: 0644]
hamnadmin/hamnadmin/util/html.py [new file with mode: 0644]
hamnadmin/hamnadmin/util/shortlink.py [new file with mode: 0644]
hamnadmin/hamnadmin/util/varnish.py [new file with mode: 0644]
planethtml.py [deleted file]
shorturl.py [deleted file]
template/feeds.tmpl
template/index.tmpl
www/css/planet_reg.css [new file with mode: 0644]

diff --git a/aggregator.py b/aggregator.py
deleted file mode 100755 (executable)
index efdcd96..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/usr/bin/env python
-# vim: ai ts=4 sts=4 sw=4
-"""PostgreSQL Planet Aggregator
-
-This file contains the functions to suck down RSS/Atom feeds 
-(using feedparser) and store the results in a PostgreSQL database.
-
-Copyright (C) 2008-2010 PostgreSQL Global Development Group
-"""
-
-import psycopg2
-import feedparser
-import datetime
-import socket
-import ConfigParser
-
-class Aggregator:
-       def __init__(self, db):
-               self.db = db
-               self.stored = 0
-               self.authorfilter = None
-               socket.setdefaulttimeout(20)
-               
-       def Update(self):
-               feeds = self.db.cursor()
-               feeds.execute('SELECT id,feedurl,name,lastget,authorfilter FROM feeds')
-               for feed in feeds.fetchall():
-                       try:
-                               n = self.ParseFeed(feed)
-                               if n > 0:
-                                       c = self.db.cursor()
-                                       c.execute("INSERT INTO aggregatorlog (feed, success, info) VALUES (%(feed)s, 't', %(info)s)", {
-                                               'feed': feed[0],
-                                               'info': 'Fetched %s posts.' % n,
-                                       })
-                       except Exception, e:
-                               print "Exception when parsing feed '%s': %s" % (feed[1], e)
-                               self.db.rollback()
-                               c = self.db.cursor()
-                               c.execute("INSERT INTO aggregatorlog (feed, success, info) VALUES (%(feed)s, 'f', %(info)s)", {
-                                       'feed': feed[0],
-                                       'info': 'Error: "%s"' % e,
-                               })
-                       self.db.commit()
-
-       def ParseFeed(self, feedinfo):
-               numadded = 0
-               parsestart = datetime.datetime.now()
-               feed = feedparser.parse(feedinfo[1], modified=feedinfo[3].timetuple())
-               
-               if not hasattr(feed, 'status'):
-                       # bozo_excpetion can seemingly be set when there is no error as well,
-                       # so make sure we only check if we didn't get a status.
-                       if hasattr(feed,'bozo_exception'):
-                               raise Exception('Feed load error %s' % feed.bozo_exception)
-                       raise Exception('Feed load error with no exception!')
-
-               if feed.status == 304:
-                       # not changed
-                       return 0
-               if feed.status != 200:
-                       raise Exception('Feed returned status %s' % feed.status)
-
-               self.authorfilter = feedinfo[4]
-
-               for entry in feed.entries:
-                       if not self.matches_filter(entry):
-                               continue
-                               
-                       # Grab the entry. At least atom feeds from wordpress store what we
-                       # want in entry.content[0].value and *also* has a summary that's
-                       # much shorter.
-                       # We therefor check all available texts, and just pick the one that
-                       # is longest.
-                       txtalts = []
-                       try:
-                               txtalts.append(entry.content[0].value)
-                       except:
-                               pass
-                       if entry.has_key('summary'):
-                               txtalts.append(entry.summary)
-
-                       # Select the longest text
-                       txt = max(txtalts, key=len)
-                       if txt == '':
-                               # Not a critical error, we just ignore empty posts
-                               print "Failed to get text for entry at %s" % entry.link
-                               continue
-
-                       if entry.has_key('guidislink'):
-                               guidisperma = entry.guidislink
-                       else:
-                               guidisperma = True
-                       dat = None
-                       if hasattr(entry, 'published_parsed'):
-                               dat = datetime.datetime(*(entry.published_parsed[0:6]))
-                       elif hasattr(entry, 'updated_parsed'):
-                               dat = datetime.datetime(*(entry.updated_parsed[0:6]))
-                       else:
-                               print "Failed to get date for entry at %s (keys %s)" % (entry.link, entry.keys())
-                               continue
-
-                       if self.StoreEntry(feedinfo[0], entry.id, dat, entry.link, guidisperma, entry.title, txt) > 0:
-                               numadded += 1
-
-               # Check if we got back a Last-Modified time
-               if hasattr(feed, 'modified_parsed') and feed['modified_parsed']:
-                       # Last-Modified header retreived. If we did receive it, we will
-                       # trust the content (assuming we can parse it)
-                       d = datetime.datetime(*feed['modified_parsed'][:6])
-                       if (d-datetime.datetime.now()).days > 5:
-                               # Except if it's ridiculously long in the future, we'll set it
-                               # to right now instead, to deal with buggy blog software. We
-                               # currently define rediculously long as 5 days
-                               d = datetime.datetime.now()
-
-                       self.db.cursor().execute("UPDATE feeds SET lastget=%(date)s WHERE id=%(feed)s AND NOT lastget=%(date)s", { 'date': d, 'feed': feedinfo[0]})
-               else:
-                       # We didn't get a Last-Modified time, so set it to the entry date
-                       # for the latest entry in this feed. Only do this if we have more
-                       # than one entry.
-                       if numadded > 0:
-                               self.db.cursor().execute("UPDATE feeds SET lastget=COALESCE((SELECT max(dat) FROM posts WHERE posts.feed=feeds.id),'2000-01-01') WHERE feeds.id=%(feed)s", {'feed': feedinfo[0]})
-
-               # Return the number of feeds we actually added
-               return numadded
-
-       def matches_filter(self, entry):
-               # For now, we only match against self.authorfilter. In the future,
-               # there may be more filters.
-               if self.authorfilter:
-                       # Match against an author filter
-                       
-                       if entry.has_key('author_detail'):
-                               return entry.author_detail.name == self.authorfilter
-                       elif entry.has_key('author'):
-                               return entry.author == self.authorfilter
-                       else: 
-                               return False
-
-               # No filters, always return true
-               return True
-
-       def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
-               c = self.db.cursor()
-               c.execute("SELECT id FROM posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
-               if c.rowcount > 0:
-                       return 0
-               print "Store entry %s from feed %s" % (guid, feedid)
-               c.execute("INSERT INTO posts (feed,guid,link,guidisperma,dat,title,txt) VALUES (%(feed)s,%(guid)s,%(link)s,%(guidisperma)s,%(date)s,%(title)s,%(txt)s)",
-                       {'feed': feedid,
-                        'guid': guid,
-                        'link': link,
-                        'guidisperma': guidisperma,
-                        'date': date,
-                        'title': title,
-                        'txt': txt})
-               self.stored += 1
-               return 1
-
-if __name__=="__main__":
-       c = ConfigParser.ConfigParser()
-       c.read('planet.ini')
-       Aggregator(psycopg2.connect(c.get('planet','db'))).Update()
diff --git a/discovery.py b/discovery.py
deleted file mode 100755 (executable)
index a4a4bae..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-"""PostgreSQL Planet Aggregator
-
-This file contains the functions to suck down RSS/Atom feeds 
-(using feedparser), determining the actual blog URL (for the
-HTML posts), and update the database with them.
-
-Copyright (C) 2008 PostgreSQL Global Development Group
-"""
-
-import psycopg2
-import feedparser
-import datetime
-import socket
-import ConfigParser
-
-class Aggregator:
-       def __init__(self, db):
-               self.db = db
-               self.stored = 0
-               socket.setdefaulttimeout(20)
-               
-       def Update(self):
-               feeds = self.db.cursor()
-               feeds.execute("SELECT id,feedurl,name,blogurl FROM feeds WHERE blogurl='' AND feedurl NOT LIKE '%planet%'")
-               for feed in feeds.fetchall():
-                       self.DiscoverFeed(feed)
-               self.db.commit()
-
-       def DiscoverFeed(self, feedinfo):
-               feed = feedparser.parse(feedinfo[1])
-
-               if feed.status != 200:
-                       # not ok!
-                       print "Feed %s status %s" % (feedinfo[1], feed.status)
-                       return
-
-               try:
-                       if feed.feed.link:
-                               print "Setting feed for %s to %s" % (feedinfo[2], feed.feed.link)
-                               c = self.db.cursor()
-                               c.execute("UPDATE feeds SET blogurl='%s' WHERE id=%i" % (feed.feed.link, feedinfo[0]))
-               except:
-                       print "Exception when processing feed for %s" % (feedinfo[2])
-                       print feed
-
-if __name__=="__main__":
-       c = ConfigParser.ConfigParser()
-       c.read('planet.ini')
-       Aggregator(psycopg2.connect(c.get('planet','db'))).Update()
diff --git a/generator.py b/generator.py
deleted file mode 100755 (executable)
index 1e72f50..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-#!/usr/bin/env python
-"""PostgreSQL Planet Aggregator
-
-This file contains the functions to generate output RSS and
-HTML data from what's currently in the database.
-
-Copyright (C) 2008-2009 PostgreSQL Global Development Group
-"""
-
-import psycopg2
-import psycopg2.extensions
-import PyRSS2Gen
-import ConfigParser 
-import datetime
-import os.path
-import sys
-import tidy
-import urllib
-from django.template import Context
-from django.template.loader import get_template
-from django.conf import settings
-from HTMLParser import HTMLParser
-from planethtml import *
-
-class Generator:
-       def __init__(self,cfg):
-               self.db = psycopg2.connect(cfg.get('planet','db'))
-               self.tidyopts = dict(   drop_proprietary_attributes=1,
-                                       alt_text='',
-                                       hide_comments=1,
-                                       output_xhtml=1,
-                                       show_body_only=1,
-                                       clean=1,
-                                       char_encoding='utf8',
-                                       )
-               self.items = []
-               self.topposters = []
-               self.topteams = []
-               self.allposters = []
-               self.allteams = []
-               self.staticfiles = ['add', ]
-               if cfg.has_option('twitter','account'):
-                       self.twittername = cfg.get('twitter','account')
-               else:
-                       self.twittername = None
-
-               settings.configure(
-                       TEMPLATE_DIRS=('template',),
-               )
-
-       def Generate(self):
-               rss = PyRSS2Gen.RSS2(
-                       title = 'Planet PostgreSQL',
-                       link = 'http://planet.postgresql.org',
-                       description = 'Planet PostgreSQL',
-                       generator = 'Planet PostgreSQL',
-                       lastBuildDate = datetime.datetime.utcnow())
-               rssshort = PyRSS2Gen.RSS2(
-                       title = 'Planet PostgreSQL (short)',
-                       link = 'http://planet.postgresql.org',
-                       description = 'Planet PostgreSQL (short)',
-                       generator = 'Planet PostgreSQL',
-                       lastBuildDate = datetime.datetime.utcnow())
-
-               psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
-               self.db.set_client_encoding('UTF8')
-               c = self.db.cursor()
-               c.execute("SET TIMEZONE=GMT")
-               c.execute("SELECT guid,link,dat,title,txt,feeds.name,blogurl,guidisperma,teams.name,teams.teamurl FROM posts INNER JOIN feeds ON feeds.id=posts.feed LEFT JOIN teams ON feeds.team = teams.id WHERE feeds.approved AND NOT posts.hidden ORDER BY dat DESC LIMIT 30")
-               for post in c.fetchall():
-                       desc = self.TruncateAndCleanDescription(post[4])
-                       rss.items.append(PyRSS2Gen.RSSItem(
-                               title=post[5] + ': ' + post[3],
-                               link=post[1],
-                               guid=PyRSS2Gen.Guid(post[0],post[7]),
-                               pubDate=post[2],
-                               description=post[4]))
-                       rssshort.items.append(PyRSS2Gen.RSSItem(
-                               title=post[5] + ': ' + post[3],
-                               link=post[1],
-                               guid=PyRSS2Gen.Guid(post[0],post[7]),
-                               pubDate=post[2],
-                               description=desc))
-                       self.items.append(PlanetPost(post[0], post[1], post[2], post[3], post[5], post[6], desc, post[8], post[9]))
-
-               c.execute("""
-SELECT feeds.name,blogurl,feedurl,count(*),teams.name,teams.teamurl,NULL,max(posts.dat) FROM feeds
-INNER JOIN posts ON feeds.id=posts.feed
-LEFT JOIN teams ON teams.id=feeds.team
-WHERE age(dat) < '1 month' AND approved AND NOT hidden
-AND NOT excludestats
-GROUP BY feeds.name,blogurl,feedurl,teams.name,teamurl ORDER BY 4 DESC, 8 DESC, 1 LIMIT 20
-""")
-
-               self.topposters = [PlanetFeed(feed) for feed in c.fetchall()]
-               if len(self.topposters) < 2: self.topposters = []
-
-               c.execute("""
-SELECT NULL,NULL,NULL,NULL,teams.name, teamurl, count(*) FROM
-feeds
-INNER JOIN posts ON feeds.id=posts.feed
-INNER JOIN teams ON teams.id=feeds.team
-WHERE age(dat) < '1 month' AND approved AND NOT hidden
-AND NOT excludestats
-GROUP BY teams.name, teamurl ORDER BY 7 DESC, 1 LIMIT 10""")
-
-               self.topteams = [PlanetFeed(feed) for feed in c.fetchall()]
-               if len(self.topteams) < 2: self.topteams = []
-
-               c.execute("""
-SELECT name,blogurl,feedurl,NULL,NULL,NULL,NULL FROM feeds
-WHERE approved AND team IS NULL ORDER BY name,blogurl
-""")
-               self.allposters = [PlanetFeed(feed) for feed in c.fetchall()]
-               c.execute("""
-SELECT feeds.name AS feedname,blogurl,feedurl,NULL,teams.name,teamurl,NULL
-FROM feeds INNER JOIN teams ON feeds.team=teams.id
-WHERE approved ORDER BY teams.name,feeds.name,blogurl
-""")
-               self.allteams = [PlanetFeed(feed) for feed in c.fetchall()]
-
-               rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
-               rssshort.write_xml(open("www/rss20_short.xml","w"), encoding='utf-8')
-
-               self.WriteFromTemplate('index.tmpl', 'www/index.html')
-               self.WriteFromTemplate('feeds.tmpl', 'www/feeds.html')
-               for staticfile in self.staticfiles:
-                       self.UpdateStaticFile(staticfile)
-
-       def WriteFromTemplate(self, templatename, outputname):
-               tmpl = get_template(templatename)
-               f = open(outputname, "w")
-               f.write(tmpl.render(Context({
-                       'topposters': self.topposters,
-                       'topteams': self.topteams,
-                       'allposters': self.allposters,
-                       'allteams': self.allteams,
-                       'posts': self.items,
-                       'twittername': self.twittername,
-               })).encode('utf-8'))
-               f.close()
-
-       def UpdateStaticFile(self, filename):
-               if not os.path.exists("www/%s.html" % (filename)) or \
-                       os.path.getmtime("www/%s.html" % (filename)) < os.path.getmtime("template/%s.tmpl" % (filename)):
-                       print "Updating %s.html" % (filename)
-                       self.WriteFromTemplate("%s.tmpl" % (filename), "www/%s.html" % (filename))
-               
-
-       def TruncateAndCleanDescription(self, txt):
-               # First apply Tidy
-               txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **self.tidyopts)),'utf8')
-
-               # Then truncate as necessary
-               ht = HtmlTruncator(2048)
-               ht.feed(txt)
-               out = ht.GetText()
-
-               # Remove initial <br /> tags
-               while out.startswith('<br'):
-                       out = out[out.find('>')+1:]
-
-               return out
-
-class HtmlTruncator(HTMLParser):
-       def __init__(self, maxlen):
-               HTMLParser.__init__(self)
-               self.len = 0
-               self.maxlen = maxlen
-               self.fulltxt = ''
-               self.trunctxt = ''
-               self.tagstack = []
-               self.skiprest = False
-       
-       def feed(self, txt):
-               txt = txt.lstrip()
-               self.fulltxt += txt
-               HTMLParser.feed(self, txt)
-
-       def handle_startendtag(self, tag, attrs):
-               if self.skiprest: return
-               self.trunctxt += self.get_starttag_text()
-       
-       def quoteurl(self, str):
-               p = str.split(":",2)
-               if len(p) < 2:
-                       # Don't crash on invalid URLs
-                       return ""
-               return p[0] + ":" + urllib.quote(p[1])
-
-       def cleanhref(self, attrs):
-               if attrs[0] == 'href':
-                       return 'href', self.quoteurl(attrs[1])
-               return attrs
-
-       def handle_starttag(self, tag, attrs):
-               if self.skiprest: return
-               self.trunctxt += "<" + tag
-               self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
-               self.trunctxt += ">"
-               self.tagstack.append(tag)
-
-       def handle_endtag(self, tag):
-               if self.skiprest: return
-               self.trunctxt += "</" + tag + ">"
-               self.tagstack.pop()
-
-       def handle_entityref(self, ref):
-               self.len += 1
-               if self.skiprest: return
-               self.trunctxt += "&" + ref + ";"
-
-       def handle_data(self, data):
-               self.len += len(data)
-               if self.skiprest: return
-               self.trunctxt += data
-               if self.len > self.maxlen:
-                       # Passed max length, so truncate text as close to the limit as possible
-                       self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
-
-                       # Now append any tags that weren't properly closed
-                       self.tagstack.reverse()
-                       for tag in self.tagstack:
-                               self.trunctxt += "</" + tag + ">"
-                       self.skiprest = True
-
-                       # Finally, append the continuation chars
-                       self.trunctxt += "[...]"
-
-       def GetText(self):
-               if self.len > self.maxlen:
-                       return self.trunctxt
-               else:
-                       return self.fulltxt
-
-if __name__=="__main__":
-       c = ConfigParser.ConfigParser()
-       c.read('planet.ini')
-       Generator(c).Generate()
diff --git a/hamnadmin/hamnadmin/exceptions.py b/hamnadmin/hamnadmin/exceptions.py
deleted file mode 100644 (file)
index 8f3f4b8..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-from django.shortcuts import render_to_response
-
-class pExcept(Exception):
-        pass
-
-class PlanetExceptionMiddleware:
-       def process_exception(self, request, exception):
-               if isinstance(exception, pExcept):
-                       return render_to_response('internal_error.html', {
-                               'msg': exception
-                       })
-               return None
index 24120bbc1f13cbdfa63a8c0a5f4013807bd8cdb9..27650c7e61c10cc40b716b5bbeed92cd3b8cf8fd 100644 (file)
@@ -13,3 +13,4 @@ NOTIFYADDR='some@where.com'
 PGAUTH_REDIRECT="..."
 PGAUTH_KEY="..."
 
+VARNISH_URL=None
diff --git a/hamnadmin/hamnadmin/register/feeds.py b/hamnadmin/hamnadmin/register/feeds.py
new file mode 100644 (file)
index 0000000..5f9ee34
--- /dev/null
@@ -0,0 +1,36 @@
+from django.contrib.syndication.views import Feed
+
+from hamnadmin.util.html import TruncateAndClean
+
+from models import Post
+
+class PostFeed(Feed):
+       title = 'Planet PostgreSQL'
+       link = 'http://planet.postgresql.org'
+       feed_url = 'http://planet.postgresql.org/rss20.xml'
+       description = 'Planet PostgreSQL'
+       generator = 'Planet PostgreSQL'
+
+       def get_object(self, request, type=None):
+               return type
+
+       def items(self, type):
+               qs = Post.objects.filter(feed__approved=True, hidden=False).order_by('-dat')
+               if type == "_short":
+                       qs = qs.extra(select = {'short': 1})
+               return qs[:30]
+
+       def item_title(self, item):
+               return item.title
+
+       def item_link(self, item):
+               return item.shortlink
+
+       def item_pubdate(self, item):
+               return item.dat
+
+       def item_description(self, item):
+               if hasattr(item, 'short'):
+                       return TruncateAndClean(item.txt)
+               else:
+                       return item.txt
diff --git a/hamnadmin/hamnadmin/register/forms.py b/hamnadmin/hamnadmin/register/forms.py
new file mode 100644 (file)
index 0000000..8f00187
--- /dev/null
@@ -0,0 +1,57 @@
+from django import forms
+from django.contrib import messages
+from django.core.validators import MinLengthValidator
+
+from models import Blog
+
+from hamnadmin.util.aggregate import FeedFetcher
+
+class BlogEditForm(forms.ModelForm):
+       class Meta:
+               model = Blog
+               fields = ('feedurl', 'team', 'twitteruser', 'authorfilter')
+
+       def __init__(self, request, *args, **kwargs):
+               self.request = request
+               super(BlogEditForm, self).__init__(*args, **kwargs)
+               for f in self.fields.values():
+                       f.widget.attrs['class'] = 'form-control'
+
+               if kwargs['instance'].approved:
+                       self.fields['feedurl'].help_text="Note that changing the feed URL will disable the blog pending new moderation"
+                       self.fields['authorfilter'].help_text="Note that changing the author filter will disable the blog pending new moderation"
+
+
+       def clean(self):
+               tracemessages = []
+               def _trace(msg):
+                       tracemessages.append(msg)
+
+               # Create a fake instance to pass down. We'll just throw it away
+               feedobj = Blog(feedurl=self.cleaned_data['feedurl'], authorfilter=self.cleaned_data['authorfilter'])
+               fetcher = FeedFetcher(feedobj, _trace)
+               try:
+                       entries = list(fetcher.parse())
+               except Exception, e:
+                       raise forms.ValidationError("Failed to retreive and parse feed: %s" % e)
+               if len(entries) == 0:
+                       for m in tracemessages:
+                               messages.info(self.request, m)
+                       raise forms.ValidationError("No entries found in blog. You cannot submit a blog until it contains entries.")
+
+               return self.cleaned_data
+
+       def clean_twitteruser(self):
+               if self.cleaned_data['twitteruser'].startswith('@'):
+                       return self.cleaned_data['twitteruser'][1:]
+               else:
+                       return self.cleaned_data['twitteruser']
+
+class ModerateRejectForm(forms.Form):
+       message = forms.CharField(min_length=30, required=True, widget=forms.Textarea)
+       modsonly = forms.BooleanField(required=False, label="Moderators only", help_text="Should message be sent only to moderators, and not to the submitter (for spam submissions mainly)")
+
+       def __init__(self, *args, **kwargs):
+               super(ModerateRejectForm, self).__init__(*args, **kwargs)
+               for f in self.fields.values():
+                       f.widget.attrs['class'] = 'form-control'
diff --git a/hamnadmin/hamnadmin/register/management/__init__.py b/hamnadmin/hamnadmin/register/management/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/hamnadmin/hamnadmin/register/management/commands/__init__.py b/hamnadmin/hamnadmin/register/management/commands/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py b/hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py
new file mode 100644 (file)
index 0000000..8136040
--- /dev/null
@@ -0,0 +1,120 @@
+#from gevent import monkey
+#monkey.patch_all()
+from gevent.threadpool import ThreadPool
+import gevent
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from django.conf import settings
+
+from hamnadmin.register.models import Blog, Post, AggregatorLog
+from hamnadmin.util.aggregate import FeedFetcher
+from hamnadmin.mailqueue.util import send_simple_mail
+from hamnadmin.util.varnish import purge_root_and_feeds
+
+
+class BreakoutException(Exception):
+       pass
+
+
+
+class Command(BaseCommand):
+       help = 'Aggregate one or more feeds'
+
+       def add_arguments(self, parser):
+               parser.add_argument('--id', type=int, help="Fetch just one feed specified by id")
+               parser.add_argument('-d', '--debug', action='store_true', help="Enable debug mode, don't save anything")
+               parser.add_argument('-f', '--full', action='store_true', help="Fetch full feed, regardless of last fetch date")
+               parser.add_argument('-p', '--parallelism', type=int, default=10, help="Number of parallell requests")
+
+       def trace(self, msg):
+               if self.verbose:
+                       self.stdout.write(msg)
+
+       def handle(self, *args, **options):
+               self.verbose = options['verbosity'] > 1
+               self.debug = options['debug']
+               if self.debug:
+                       self.verbose=True
+               self.full = options['full']
+
+               if options['id']:
+                       feeds = Blog.objects.filter(pk=options['id'])
+               else:
+                       feeds = Blog.objects.filter(pk__in=(1,2))
+
+               # Fan out the fetching itself
+               fetchers = [FeedFetcher(f, self.trace) for f in feeds]
+               num = len(fetchers)
+               pool = ThreadPool(options['parallelism'])
+               pr = pool.map_async(self._fetch_one_feed, fetchers)
+               while not pr.ready():
+                       gevent.sleep(1)
+                       self.trace("Fetching feeds (%s/%s done), please wait..." % (num-pool.task_queue.unfinished_tasks, num))
+
+               total_entries = 0
+               # Fetching was async, but results processing will be sync. Don't want to deal with
+               # multithreaded database connections and such complications.
+               try:
+                       with transaction.atomic():
+                               for feed, results in pr.get():
+                                       if isinstance(results, Exception):
+                                               AggregatorLog(feed=feed,
+                                                                         success=False,
+                                                                         info=results).save()
+                                       else:
+                                               if feed.approved:
+                                                       had_entries = True
+                                               else:
+                                                       had_entries = feed.has_entries
+                                               entries = 0
+
+                                               for entry in results:
+                                                       self.trace("Found entry at %s" % entry.link)
+                                                       # Entry is a post, but we need to check if it's already there. Check
+                                                       # is done on guid.
+                                                       if not Post.objects.filter(feed=feed, guid=entry.guid).exists():
+                                                               self.trace("Saving entry at %s" % entry.link)
+                                                               entry.save()
+                                                               entry.update_shortlink()
+                                                               AggregatorLog(feed=feed,
+                                                                                         success=True,
+                                                                                         info="Fetched entry at '%s'" % entry.link).save()
+                                                               entries += 1
+                                                               total_entries += 1
+                                               # Save an update to when the feed was last scanned
+                                               feed.save()
+
+                                               if entries > 0 and not had_entries:
+                                                       # Entries showed up on a blog that was previously empty
+                                                       send_simple_mail(settings.EMAIL_SENDER,
+                                                                                        settings.NOTIFICATION_RECEIVER,
+                                                                                        "A blog was added to Planet PostgreSQL",
+                                                                                        u"The blog at {0} by {1}\nwas added to Planet PostgreSQL, and has now received entries.\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(feed.feedurl, feed.userid),
+                                                                                        sendername="Planet PostgreSQL",
+                                                                                        receivername="Planet PostgreSQL Moderators",
+                                                       )
+
+                               if self.debug:
+                                       # Roll back transaction without error
+                                       raise BreakoutException()
+               except BreakoutException:
+                       self.stderr.write("Rolling back all changes")
+                       pass
+
+               if total_entries > 0 and not self.debug:
+                       purge_root_and_feeds()
+
+       def _fetch_one_feed(self, fetcher):
+               if self.full:
+                       self.trace("Fetching %s" % fetcher.feed.feedurl)
+                       since = None
+               else:
+                       since = fetcher.feed.lastget
+                       self.trace("Fetching %s since %s" % (fetcher.feed.feedurl, since))
+               try:
+                       entries = list(fetcher.parse(since))
+               except Exception, e:
+                       self.stderr.write("Failed to fetch '%s': %s" % (fetcher.feed.feedurl, e))
+                       return (fetcher.feed, e)
+               return (fetcher.feed, entries)
diff --git a/hamnadmin/hamnadmin/register/migrations/0002_cleanup_models.py b/hamnadmin/hamnadmin/register/migrations/0002_cleanup_models.py
new file mode 100644 (file)
index 0000000..0104c10
--- /dev/null
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('register', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='blog',
+            name='approved',
+            field=models.BooleanField(default=False),
+        ),
+        migrations.AlterField(
+            model_name='post',
+            name='guidisperma',
+            field=models.BooleanField(default=False),
+        ),
+        migrations.AlterField(
+            model_name='post',
+            name='hidden',
+            field=models.BooleanField(default=False),
+        ),
+        migrations.AlterField(
+            model_name='post',
+            name='twittered',
+            field=models.BooleanField(default=False),
+        ),
+        migrations.AlterUniqueTogether(
+            name='post',
+            unique_together=set([('id', 'guid')]),
+        ),
+    ]
index 9b918a495db7d2a16bc1d4d35db02a9634bf4596..18b7db64c85b32a8edf08d579e12dbafa763c7e6 100644 (file)
@@ -1,6 +1,8 @@
 from django.db import models
 from django.contrib.auth.models import User
-from datetime import datetime
+from datetime import datetime, timedelta
+
+from hamnadmin.util.shortlink import urlvalmap
 
 class Team(models.Model):
        teamurl = models.CharField(max_length=255, blank=False)
@@ -20,8 +22,8 @@ class Blog(models.Model):
        name = models.CharField(max_length=255, blank=False)
        blogurl = models.CharField(max_length=255, blank=False)
        lastget = models.DateTimeField(default=datetime(2000,1,1))
-       userid = models.CharField(max_length=255, blank=False)
-       approved = models.BooleanField()
+       userid = models.CharField(max_length=255, blank=False, null=False)
+       approved = models.BooleanField(default=False)
        authorfilter = models.CharField(max_length=255,default='',blank=True)
        team = models.ForeignKey(Team,db_column='team', blank=True, null=True)
        twitteruser = models.CharField(max_length=255, default='', blank=True)
@@ -35,6 +37,22 @@ class Blog(models.Model):
                u = User.objects.get(username=self.userid)
                return u.email
 
+       @property
+       def recent_failures(self):
+               return self.aggregatorlog_set.filter(success=False, ts__gt=datetime.now()-timedelta(days=1)).count()
+
+       @property
+       def has_entries(self):
+               return self.posts.filter(hidden=False).exists()
+
+       @property
+       def latestentry(self):
+               return self.posts.filter(hidden=False)[0]
+
+       @property
+       def recent_entries(self):
+               return self.posts.order_by('-dat')[:10]
+
        class Meta:
                db_table = 'feeds'
                ordering = ['approved','name']
@@ -49,9 +67,9 @@ class Post(models.Model):
        txt = models.TextField()
        dat = models.DateTimeField()
        title = models.CharField(max_length=255)
-       guidisperma = models.BooleanField()
-       hidden = models.BooleanField()
-       twittered = models.BooleanField()
+       guidisperma = models.BooleanField(default=False)
+       hidden = models.BooleanField(default=False)
+       twittered = models.BooleanField(default=False)
        shortlink = models.CharField(max_length=255)
 
        def __unicode__(self):
@@ -60,10 +78,24 @@ class Post(models.Model):
        class Meta:
                db_table = 'posts'
                ordering = ['-dat']
+               unique_together = [
+                       ('id', 'guid'),
+               ]
 
        class Admin:
                pass
 
+       def update_shortlink(self):
+               self.shortlink = self._get_shortlink()
+               self.save()
+
+       def _get_shortlink(self):
+               s = ""
+               i = self.id
+               while i > 0:
+                       s = urlvalmap[i % 64] + s
+                       i /= 64
+               return "http://postgr.es/p/%s" % s
 
 class AuditEntry(models.Model):
        logtime = models.DateTimeField(default=datetime.now)
@@ -93,4 +125,4 @@ class AggregatorLog(models.Model):
                ordering = ['-ts']
 
        def __unicode__(self):
-               return "Log entry (%s)" % self.ts
+               return "Log entry for %s (%s)" % (self.feed.name, self.ts)
diff --git a/hamnadmin/hamnadmin/register/templates/edit.html b/hamnadmin/hamnadmin/register/templates/edit.html
new file mode 100644 (file)
index 0000000..5670c3c
--- /dev/null
@@ -0,0 +1,136 @@
+{% extends "regbase.html" %}
+{%block content%}
+<h1>Edit blog</h1>
+
+{%if not new%}
+<h3>Status</h3>
+<p>
+{%if blog.approved%}
+This blog is currently <span class="label label-success">approved</span>.
+{%if blog.recent_failures%}
+{%if blog.recent_failures > 5%}
+This blog has generated <span class="label label-warning">sporadic errors</span> in the past 24 hours. See
+  the <a href="#log">log</a> below for details.
+{%else%}
+This blog has generated <span class="label label-danger">multiple failures</span> in the past 24 hours. See
+  the <a href="#log">log</a> below for details.
+{%endif%}
+{%else%}{#recent_failures#}
+The blog last posted an entry at <span class="label label-info">{{blog.latestentry.dat|date:"Y-m-d H:i:s"}}</span>.
+{%endif%}
+
+{%else%}
+This blog is currently <span class="label label-warning">not approved</span>.</p>
+{%if blog.has_entries%}
+The blog is currently waiting for moderator approval.
+{%else%}
+  The blog has not yet received any posts. As soon as the fist post is syndicated, the blog will be
+  automatically submitted for approval.
+{%endif%}{#has_entries#}
+{%endif%}{#approved#}
+{%endif%}{#not new#}
+</p>
+
+{%if messages%}
+<h3>Results</h3>
+<div class="alert alert-info">
+{%for m in messages%}
+{{m}}<br/>
+{%endfor%}
+</div>
+{%endif%}
+
+{%if posts%}
+<h3>Posts</h3>
+<p>The latest posts received from this blog are:</p>
+<table class="table table-condensed table-striped">
+  <tr>
+    <th>Date</th>
+    <th>Title</th>
+    <th>Show/Hide</th>
+  </tr>
+  {%for p in posts%}
+  <tr>
+    <td>{{p.dat|date:"Y-m-d H:i:s"}}</td>
+    <td><a href="{{p.link}}">{{p.title}}</a></td>
+    <td>
+      {%if p.hidden%}
+      <a class="btn btn-default btn-sm" href="/register/blogposts/{{blog.id}}/unhide/{{p.id}}/">Unhide</a>
+      {%else%}
+      <a class="btn btn-default btn-sm" href="/register/blogposts/{{blog.id}}/hide/{{p.id}}/">Hide</a>
+      {%endif%}
+      <a class="btn btn-default btn-sm" onClick="confirmDelete({{blog.id}},{{p.id}})">Delete/reload</a>
+      </td>
+  </tr>
+  {%endfor%}
+</table>
+<a class="btn btn-default" href="/register/">Return</a>
+{%endif%}
+
+
+<h3>Edit</h3>
+
+{%if form.errors%}
+<div class="alert alert-danger">Please correct the errors below, and re-submit the form.</div>
+{%endif%}
+
+{%if form.non_field_errors%}
+{%for e in form.non_field_errors%}
+<div class="alert alert-danger">{{e}}</div>
+{%endfor%}
+{%endif%}
+
+<form method="post" action="." class="form-horizontal">{%csrf_token%}
+  {%for field in form%}
+  <div class="form-group{%if field.errors%} has-error{%endif%}">
+    <label for="{{field.id}}" class="col-sm-2">{{field.label}}</label>
+    <div class="col-sm-10">
+      <div class="controls">
+       {{field}}
+       <span class="help-block">{{field.help_text}}</span>
+       {%if field.errors%}
+       <span class="help-block">{{field.errors}}</span>
+       {%endif%}
+      </div>
+    </div>
+  </div>
+  {%endfor%}
+  <input class="btn btn-default" type="submit" value="Save">
+  <a class="btn btn-default" href="/register/">Cancel</a>
+  <a class="btn btn-default" onClick="confirmDeleteBlog({{blog.id}})">Delete</a>
+</form>
+
+{%if not new%}
+<h3>Log</h3>
+<table class="table table-condensed table-striped">
+  <tr>
+    <th>Time</th>
+    <th>Success</th>
+    <th>Text</th>
+  </tr>
+  {%for l in log%}
+  <tr>
+    <td>{{l.ts|date:"Y-m-d H:i:s"}}</td>
+    <td><span class="label label-{{l.success|yesno:"success,danger"}}">{{l.success|yesno:"Success,Failure"}}</span></td>
+    <td>{{l.info}}</td>
+  </tr>
+  {%endfor%}
+</table>
+{%endif%}{#not new#}
+
+{%endblock%}
+{%block extrahead%}
+<script language="javascript">
+function confirmDelete(blogid, postid) {
+  if (confirm("Are you sure you want to delete the post?\n\nThe post will automatically re-appear on the next crawl if it's still in the feed. If you don't want that to happen, hide the post instead!\n\nAre you sure?")) {
+     document.location.href='/register/blogposts/' + blogid + '/delete/' + postid + '/';
+  }
+}
+
+function confirmDeleteBlog(blogid) {
+  if(confirm("Are you sure you want to delete this blog?\n\nAll entries in the blog as well as all history will be deleted.\n")) {
+     document.location.href='/register/delete/' + blogid + '/';
+  }
+}
+</script>
+{%endblock%}
index f4dbc194d4474a38d17ad210a2fe5b1af3b1bbbe..56d5287ce71b4a7723c40eb888a6ec73cfedb7fd 100644 (file)
@@ -1,85 +1,68 @@
 {% extends "regbase.html" %}
-{%block regcontent%}
+{%block content%}
 {%if blogs %}
-<script language="javascript">
- function confirmDelete(name,id) {
-  if (confirm('Are you sure you want to delete the blog \'' + name + '\'?'))
-    location.href='delete/' + id + '/';
- }
-</script>
+<h1>Your blogs</h1>
+
+{%if messages%}
+<div class="alert alert-info">
+{%for m in messages%}
+{{m}}<br/>
+{%endfor%}
+</div>
+{%endif%}
+
 <p>
-We have the following blog(s) registered:
+You have the following blog(s) registered:
 </p>
-<table border="1" cellspacing="0" cellpadding="1">
+<table class="table table-condensed table-striped table-bordered">
 <tr>
  <th>Name</th>
  <th>Approved</th>
  <th>Feed URL/Blog URL</th>
+ <th>Status</th>
  <th>Operations</th>
 </tr>
 {%for blog in blogs%}
 <tr valign="top">
  <td>{{blog.name}}</td>
- <td>{{blog.approved|yesno:"Yes,No"}}</td>
+ <td><span class="label label-{{blog.approved|yesno:"success,warning"}}">{{blog.approved|yesno:"Yes,No"}}</span></td>
  <td>Feed: <a href="{{blog.feedurl}}">{{blog.feedurl}}</a><br/>Blog: <a href="{{blog.blogurl}}">{{blog.blogurl}}</a><br/>
-Author Filter: {{blog.authorfilter}}<br/>
-Twitter name: {%if blog.twitteruser%}@<a href="http://twitter.com/{{blog.twitteruser}}">{{blog.twitteruser}}</a>{%endif%}<br/>
-Last http get: {{blog.lastget}}<br/>
+Last http get: {{blog.lastget|date:"Y-m-d H:i:s"}}<br/>
  </td>
 
- <td><a href="log/{{blog.id}}/">View log</a><br/>
-{% if blog.approved  or user.is_superuser%}
-  <a href="blogposts/{{blog.id}}/">Posts</a><br/>
-{%else%}
-Not approved yet.<br/>
-{%endif%}
-  <a href="#" onClick="confirmDelete('{{blog.feedurl}}',{{blog.id}})">Delete blog</a><br/>
-{%if user.is_superuser %}
-  <nobr><a href="discover/{{blog.id}}/">Fetch metadata</a></nobr><br/>
-{%endif%}
-  <nobr><a href="reset/{{blog.id}}/">Reset http time</a></nobr><br/>
+ <td>
+   {%if blog.approved%}
+    {%if blog.recent_failures%}
+     {%if blog.recent_failures > 5%}
+      <a href="log/{{blog.id}}/"><span class="label label-danger">Multiple failures</span></a>
+     {%else%}
+      <a href="log/{{blog.id}}/"><span class="label label-warning">Sporadic errors</span></a>
+    {%endif%}
+   {%else%}
+    <span class="label label-success">Approved and working</span>
+   {%endif%}
+  {%else%}
+   {%if blog.has_entries%}
+    <span class="label label-info">Pending approval</span>
+   {%else%}
+    <span class="label label-danger">No entries found</span>
+   {%endif%}
+  {%endif%}
+ </td>
+ <td>
+   <a class="btn btn-default" role="button" href="edit/{{blog.id}}/">Edit</a>
 </td>
 </tr>
 {%endfor%}
 </table>
-<p>
-If you need any entries for your blog(s) changed that are not available as editable
-on this place, please contact <a href="mailto:planet@postgresql.org">planet@postgresql.org</a>.
-</p>
 {%else%}
 <p>We have no blogs registered to your account.</p>
 {%endif%}
 <hr/>
-<p>
-To register a new blog , please enter the
-URL to your RSS feed (PostgreSQL category only!) below.
-</p>
+<h2>Register blog</h2>
 <p>
 Note that your blog will have to be approved before it appears
 on the planet.
 </p>
-<form method="post" action="new/">
-<table border="0" cellspacing="1" cellpadding="0">
-<tr>
- <td>Feed URL</td>
- <td><input type="text" name="feedurl"></td>
-</tr>
-<tr>
- <td>Filter by author (OPTIONAL, advanced):</td>
- <td><input type="text" name="authorfilter"></td>
-</tr>
-<tr>
- <td>Part of team:</td>
- <td><select name="team">
-  <option value="-1">* No team</option>
-  {% for team in teams %}
-  <option value="{{team.id}}">{{team.name}}</option>
-  {%endfor%}
- </select>
- <input type="checkbox" name="ok_team" value="yesitsfine">Check this box to indicate that the owner
- of this team is aware of your registration.
- </td>
-</table>
-<input type="submit" value="New blog">
-</form>
+<a class="btn btn-default" href="new/">Register new blog</a>
 {%endblock%}
diff --git a/hamnadmin/hamnadmin/register/templates/moderate.html b/hamnadmin/hamnadmin/register/templates/moderate.html
new file mode 100644 (file)
index 0000000..cc1cccc
--- /dev/null
@@ -0,0 +1,76 @@
+{% extends "regbase.html" %}
+{%block content%}
+<h1>Blog moderation</h1>
+
+{%if messages%}
+<h3>Results</h3>
+<div class="alert alert-info">
+{%for m in messages%}
+{{m}}<br/>
+{%endfor%}
+</div>
+{%endif%}
+
+<p>The following blogs require moderator action</p>
+{%for blog in blogs%}
+<div class="panel panel-info">
+  <div class="panel-heading">{{blog.name|default:blog.feedurl}}</div>
+  <div class="panel-body">
+    <div class="row">
+      <div class="col-sm-1">User</div>
+      <div class="col-sm-10">Username: {{blog.userid}}<br/>Twitter: {{blog.twitteruser}}<br/>Team: {{blog.team.name}}</div>
+    </div>
+    <div class="row">
+      <div class="col-sm-1">URLs</div>
+      <div class="col-sm-10">
+       Feed url: {{blog.feedurl}}<br/>
+       Blog url: {%if blog.blogurl%}<a href="{{blog.blogurl}}">{{blog.blogurl}}</a>{%else%}Blog not syndicated yet, so no URL available.{%endif%}
+       {%if blog.authorfilter%}<br/>Author filter: {{blog.authorfilter}}{%endif%}
+      </div>
+    </div>
+    <div class="row">
+      <div class="col-sm-1">Posts</div>
+      <div class="col-sm-10">
+       {%if blog.recent_entries%}
+       <table class="table table-condensed table-striped table-bordered">
+         <tr><th>Time</th><th>Title</th></tr>
+         {%for p in blog.recent_entries%}
+         <tr>
+           <td>{{p.dat|date:"Y-m-d H:i:s"}}</td>
+           <td><a href="{{p.link}}">{{p.title}}</a></td>
+         </tr>
+         {%endfor%}
+       </table>
+       {%else%}
+       No entries have been syndicated from this blog yet.
+       {%endif%}
+       <br/>Fetching entries since {{blog.lastget|date:"Y-m-d H:i:s"}}.
+      </div>
+    </div>
+    <div class="row">
+      <div class="col-sm-12">
+       {%if blog.recent_entries%}<a class="btn btn-default" onClick="confirmApprove('approve/{{blog.id}}/')">Approve</a>{%endif%}
+       <a class="btn btn-default" href="/register/edit/{{blog.id}}/">Edit</a>
+       <a class="btn btn-default" href="reject/{{blog.id}}/">Reject</a>
+       <a class="btn btn-default" href="/register/admin/register/blog/{{blog.id}}/">Admin</a>
+      </div>
+    </div>
+  </div>
+</div>
+{%endfor%}
+
+<h2>Full admin</h2>
+<p>Normal moderation shouldn't need it, but just in case - here it is!</p>
+<a class="btn btn-default" href="/register/admin/">Full admin</a>
+
+
+{%endblock%}
+{%block extrahead%}
+<script language="javascript">
+  function confirmApprove(url) {
+    if (confirm('Are you sure you want to approve this blog?')) {
+      document.location.href = url;
+    }
+  }
+</script>
+{%endblock%}
diff --git a/hamnadmin/hamnadmin/register/templates/moderate_reject.html b/hamnadmin/hamnadmin/register/templates/moderate_reject.html
new file mode 100644 (file)
index 0000000..7c499b5
--- /dev/null
@@ -0,0 +1,33 @@
+{% extends "regbase.html" %}
+{%block content%}
+<h1>Reject blog</h1>
+
+{%if form.errors%}
+<div class="alert alert-danger">Please correct the errors below, and re-submit the form.</div>
+{%endif%}
+
+{%if form.non_field_errors%}
+{%for e in form.non_field_errors%}
+<div class="alert alert-danger">{{e}}</div>
+{%endfor%}
+{%endif%}
+
+<form method="post" action="." class="form-horizontal">{%csrf_token%}
+  {%for field in form%}
+  <div class="form-group{%if field.errors%} has-error{%endif%}">
+    <label for="{{field.id}}" class="col-sm-2">{{field.label}}</label>
+    <div class="col-sm-7">
+      <div class="controls">
+       {{field}}
+       <span class="help-block">{{field.help_text}}</span>
+       {%if field.errors%}
+       <span class="help-block">{{field.errors}}</span>
+       {%endif%}
+      </div>
+    </div>
+  </div>
+  {%endfor%}
+  <input class="btn btn-default" type="submit" value="Reject blog">
+  <a class="btn btn-default" href="/register/moderate/">Cancel</a>
+</form>
+{%endblock%}
index b4a6e0880dd748e66b6f224a929419bfa9ce84c0..50f18fa7a153827bb8503a4cee8875b136b9fd33 100644 (file)
@@ -1,12 +1,24 @@
-{%extends "base.tmpl" %}
-{%block content%}
-{%if user.is_authenticated %}
-<div style="float:right;"><a href="/register/logout">Log out</a></div>
-{%endif%}
-<h1>Welcome to planet administration</h1>
-{%if user.is_superuser %}
-<h2>You are registered as an administrator.</h2>
-<p>The full administration interface can be found <a href="/register/admin/">here</a>.</p>
-{% endif %}
-{%block regcontent%}{%endblock%}
-{%endblock%}
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>{%if title%}{{title}} - {%endif%}Planet PostgreSQL</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0"></meta>
+    <link href="/css/bootstrap.min.css" rel="stylesheet" media="screen"></link>
+    <link href="/css/planet_reg.css" rel="stylesheet" media="screen"></link>
+    {% block extrahead %}{% endblock %}
+  </head>
+
+  <body>
+    <div class="container">
+      <div class="row">
+       <div class="col-sm-12" id="planethdr">
+         <a href="/register/"><img src="http://www.postgresql.org/layout/images/hdr_left.png" alt="PostgreSQL" /></a>
+         <img class="pull-right" src="http://www.postgresql.org/layout/images/hdr_right.png" alt="The world's most advanced open source database" />
+
+       </div>
+      </div>
+      {%block content%}{%endblock%}
+    </div>
+  </body>
+</html>
diff --git a/hamnadmin/hamnadmin/register/templatetags/__init__.py b/hamnadmin/hamnadmin/register/templatetags/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/hamnadmin/hamnadmin/register/templatetags/hamn.py b/hamnadmin/hamnadmin/register/templatetags/hamn.py
new file mode 100644 (file)
index 0000000..f1232e7
--- /dev/null
@@ -0,0 +1,13 @@
+from django.template.defaultfilters import stringfilter
+from django.utils.safestring import mark_safe
+from django import template
+
+from hamnadmin.util.html import TruncateAndClean
+
+register = template.Library()
+
+@register.filter(name='postcontents')
+@stringfilter
+def postcontents(value):
+       return mark_safe(TruncateAndClean(value))
+
index bfc4c40f9d485859cf92b3f9c8cbabe106bc8527..acb9f112b618b9187deb979d0e559b91e9a71bc8 100644 (file)
@@ -6,17 +6,17 @@ admin.autodiscover()
 
 urlpatterns = patterns('',
     (r'^$', 'hamnadmin.register.views.root'),
-    (r'^new/$', 'hamnadmin.register.views.new'),
-    (r'^discover/(\d+)/$', 'hamnadmin.register.views.discover'),
-    (r'^delete/(\d+)/$', 'hamnadmin.register.views.delete'),
-    (r'^reset/(\d+)/$', 'hamnadmin.register.views.reset'),
+    (r'^new/$', 'hamnadmin.register.views.edit'),
+    (r'^edit/(?P<id>\d+)/$', 'hamnadmin.register.views.edit'),
+    (r'^delete/(?P<id>\d+)/$', 'hamnadmin.register.views.delete'),
 
-    (r'^log/(\d+)/$','hamnadmin.register.views.logview'),
-    (r'^blogposts/(\d+)/$', 'hamnadmin.register.views.blogposts'),
     (r'^blogposts/(\d+)/hide/(\d+)/$', 'hamnadmin.register.views.blogpost_hide'),
     (r'^blogposts/(\d+)/unhide/(\d+)/$', 'hamnadmin.register.views.blogpost_unhide'),
     (r'^blogposts/(\d+)/delete/(\d+)/$', 'hamnadmin.register.views.blogpost_delete'),
 
+    (r'^moderate/$', 'hamnadmin.register.views.moderate'),
+    (r'^moderate/reject/(\d+)/$', 'hamnadmin.register.views.moderate_reject'),
+    (r'^moderate/approve/(\d+)/$', 'hamnadmin.register.views.moderate_approve'),
     (r'^login/$', 'hamnadmin.auth.login'),
     (r'^auth_receive/$', 'hamnadmin.auth.auth_receive'),
     (r'^logout/$', 'hamnadmin.auth.logout'),
index f2b09d72490bea0ab2f252559b9e65e4edcc98a3..cd8c17a02dffaa15975bf4f7c477e5c4ad3019b6 100644 (file)
@@ -5,183 +5,142 @@ from django.contrib.auth.decorators import login_required, user_passes_test
 from django.conf import settings
 from django.core.mail import send_mail
 from django.db import transaction
-from django.db.models import Q
+from django.db.models import Q, Count, Max
+from django.contrib import messages
 
 from hamnadmin.register.models import *
-from hamnadmin.exceptions import pExcept
+from hamnadmin.mailqueue.util import send_simple_mail
+from hamnadmin.util.varnish import purge_url, purge_root_and_feeds
 
 import socket
 import datetime
 import feedparser
 
+from forms import BlogEditForm, ModerateRejectForm
+
+# Public planet
+def planet_home(request):
+       statdate = datetime.datetime.now() - datetime.timedelta(days=61)
+       posts = Post.objects.filter(hidden=False, feed__approved=True).order_by('-dat')[:30]
+       topposters = Blog.objects.filter(approved=True, excludestats=False, posts__hidden=False, posts__dat__gt=statdate).annotate(numposts=Count('posts__id')).order_by('-numposts')[:10]
+       topteams = Team.objects.filter(blog__approved=True, blog__excludestats=False, blog__posts__hidden=False, blog__posts__dat__gt=statdate).annotate(numposts=Count('blog__posts__id')).order_by('-numposts')[:10]
+       return render_to_response('index.tmpl', {
+               'posts': posts,
+               'topposters': topposters,
+               'topteams': topteams,
+       }, context_instance=RequestContext(request))
+
+
+def planet_feeds(request):
+       return render_to_response('feeds.tmpl', {
+               'feeds': Blog.objects.filter(approved=True),
+               'teams': Team.objects.filter(blog__approved=True).distinct().order_by('name'),
+       }, context_instance=RequestContext(request))
+
+def planet_add(request):
+       return render_to_response('add.tmpl', {
+       }, context_instance=RequestContext(request))
+
+
+
+# Registration interface (login and all)
 def issuperuser(user):
        return user.is_authenticated() and user.is_superuser
 
 @login_required
 def root(request):
-       if request.user.is_superuser:
+       if request.user.is_superuser and request.GET.has_key('admin') and request.GET['admin'] == '1':
                blogs = Blog.objects.all()
        else:
                blogs = Blog.objects.filter(userid=request.user.username)
        return render_to_response('index.html',{
                'blogs': blogs,
-               'teams': Team.objects.all(),
+               'teams': Team.objects.all().order_by('name'),
        }, context_instance=RequestContext(request))
 
 @login_required
 @transaction.atomic
-def new(request):
-       if not request.method== 'POST':
-               raise pExcept('must be POST')
-       feedurl = request.POST['feedurl']
-       user = request.user.username
-       authorfilter  = request.POST['authorfilter']
-       if not len(feedurl) > 1:
-               raise pExcept('must include blog url!')
-
-       # TODO: add support for 'feed://' urls
-       if not feedurl.startswith('http://') and not feedurl.startswith('https://'):
-               raise pExcept('Only http served blogs are accepted!')
-
-       # See if this blog is already registered
-       try:
-               blog = Blog.objects.get(
-                       Q(feedurl=feedurl),
-                       Q(authorfilter=authorfilter)
-               )
-               raise pExcept('This blog is already registered.')
-       except Blog.DoesNotExist:
-               # This is what we expect to happen.. :-)
-               pass
-
-       # Attempting to join a team?
-       if int(request.POST['team']) != -1:
-               if not (request.POST.has_key('ok_team') and request.POST['ok_team'] == 'yesitsfine'):
-                       raise pExcept('You must confirm that the owner of the team knows about you joining it.')
-               try:
-                       team = Team.objects.get(pk=int(request.POST['team']))
-               except:
-                       raise pExcept('Failed to get team information!')
+def edit(request, id=None):
+       if id:
+               if request.user.is_superuser:
+                       blog = get_object_or_404(Blog, id=id)
+               else:
+                       blog = get_object_or_404(Blog, id=id, userid=request.user.username)
        else:
-               team = None
-
-       # Attempting to register a new blog. First let's see that we can download it
-       socket.setdefaulttimeout(20)
-       try:
-               feed = feedparser.parse(feedurl)
-               status = feed.status
-               if not status == 200:
-                       raise pExcept('Attempt to download blog feed returned status %s.' % (status))
-               lnk = feed.feed.link
-               l = len(feed.entries)
-               if l < 1:
-                       raise pExcept('Blog feed contains no entries.')
-       except pExcept:
-               raise
-       except Exception, e:
-               raise pExcept('Failed to download blog feed: %s' % e)
-       
-       if not settings.NOTIFYADDR:
-               raise pExcept('Notify address not specified, cannot complete')
-
-       blog = Blog()
-       blog.userid = request.user.username
-       blog.name = "%s %s" % (request.user.first_name, request.user.last_name)
-
-       blog.feedurl = feedurl
-       blog.authorfilter = authorfilter
-       blog.blogurl = lnk
-       blog.approved = False
-       if team:
-               blog.team = team
-       send_mail('New blog assignment', """
-The user '%s' has requested the blog at
-%s (name %s)
-is added to Planet PostgreSQL!
-
-So, head off to the admin interface and approve or reject this!
-http://planet.postgresql.org/register/admin/register/blog/
-""" % (blog.userid, blog.feedurl, blog.name), 'webmaster@postgresql.org', [settings.NOTIFYADDR])
+               blog = Blog(userid=request.user.username, name = u"{0} {1}".format(request.user.first_name, request.user.last_name))
 
-       blog.save()
-       AuditEntry(request.user.username, 'Added blog %s' % blog.feedurl).save()
-       return HttpResponseRedirect('..')
+       if request.method == 'POST':
+               saved_url = blog.feedurl
+               saved_filter = blog.authorfilter
+               form = BlogEditForm(request, data=request.POST, instance=blog)
+               if form.is_valid():
+                       if id:
+                               # This is an existing one. If we change the URL of the blog, it needs to be
+                               # de-moderated if it was previously approved.
+                               if blog.approved:
+                                       if saved_url != form.cleaned_data['feedurl'] or saved_filter != form.cleaned_data['authorfilter']:
+                                               obj = form.save()
+                                               obj.approved = False
+                                               obj.save()
 
-@login_required
-@transaction.atomic
-def delete(request, id):
-       blog = get_object_or_404(Blog, id=id)
-       if not blog.userid == request.user.username:
-               raise pExcept("You can only delete your own feeds! Don't try to hack!")
-       send_mail('Blog deleted', """
-The user '%s' has deleted the blog at
-%s (name %s)
-""" % (blog.userid, blog.feedurl, blog.name), 'webmaster@postgresql.org', [settings.NOTIFYADDR])
-       blog.delete()
-       AuditEntry(request.user.username, 'Deleted blog %s' % blog.feedurl).save()
-       return HttpResponseRedirect('../..')
+                                               send_simple_mail(settings.EMAIL_SENDER,
+                                                                                settings.NOTIFICATION_RECEIVER,
+                                                                                "A blog was edited on Planet PostgreSQL",
+                                                                                u"The blog at {0}\nwas edited by {1} in a way that needs new moderation.\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(blog.feedurl, blog.userid),
+                                                                                sendername="Planet PostgreSQL",
+                                                                                receivername="Planet PostgreSQL Moderators",
+                                                                        )
 
-@login_required
-@transaction.atomic
-def reset(request, id):
-       blog = get_object_or_404(Blog, id=id)
-       if not blog.userid == request.user.username:
-               raise pExcept("You can only reset your own feeds! Don't try to hack!")
-       blog.lastget = datetime.datetime(2000,01,01)
-       blog.save()
-       AuditEntry(request.user.username, 'Reset blog %s' % blog.feedurl).save()
-       return HttpResponseRedirect('../..')
+                                               messages.warning(request, "Blog has been resubmitted for moderation, and is temporarily disabled.")
 
-@user_passes_test(issuperuser)
-@transaction.atomic
-def discover(request, id):
-       blog = get_object_or_404(Blog, id=id)
-
-       # Attempt to run the discover
-       socket.setdefaulttimeout(20)
-       try:
-               feed = feedparser.parse(blog.feedurl)
-               if not blog.blogurl == feed.feed.link:
-                       blog.blogurl = feed.feed.link
-                       blog.save()
-                       AuditEntry(request.user.username, 'Discovered metadata for %s' % blog.feedurl).save()
-                       return HttpResponse('Metadata (currently: Blog URL) successfully updated.')
-               return HttpResponse('Metadata was not changed.')
-       except Exception, e:
-               return HttpResponse('Failed to discover metadata: %s' % (e))
+                                               purge_root_and_feeds()
+                                               purge_url('/feeds.html')
 
-@login_required
-def logview(request, id):
-       blog = get_object_or_404(Blog, id=id)
-       if not blog.userid == request.user.username and not request.user.is_superuser:
-               return HttpResponse("You can't view the log for somebody elses blog!")
-               
-       logentries = AggregatorLog.objects.filter(feed=blog)[:50]
-       
-       return render_to_response('aggregatorlog.html', {
-               'entries': logentries,
-       }, context_instance=RequestContext(request))
+                                               return HttpResponseRedirect("/register/edit/{0}/".format(obj.id))
+                                       else:
+                                               messages.info(request, "did not change")
+
+                       obj = form.save()
+                       return HttpResponseRedirect("/register/edit/{0}/".format(obj.id))
+       else:
+               form =  BlogEditForm(request, instance=blog)
+
+       return render_to_response('edit.html', {
+               'new': id is None,
+               'form': form,
+               'blog': blog,
+               'log': AggregatorLog.objects.filter(feed=blog).order_by('-ts')[:30],
+               'posts': Post.objects.filter(feed=blog).order_by('-dat')[:10],
+       }, RequestContext(request))
 
 @login_required
 @transaction.atomic
-def blogposts(request, id):
-       blog = get_object_or_404(Blog, id=id)
-       if not blog.userid == request.user.username and not request.user.is_superuser:
-               return HttpResponse("You can't view/edit somebody elses blog!")
-       
-       posts = Post.objects.filter(feed=blog)
+def delete(request, id):
+       if request.user.is_superuser:
+               blog = get_object_or_404(Blog, id=id)
+       else:
+               blog = get_object_or_404(Blog, id=id, userid=request.user.username)
 
-       return render_to_response('blogposts.html',{
-               'posts': posts,
-       }, context_instance=RequestContext(request))
+       send_simple_mail(settings.EMAIL_SENDER,
+                                        settings.NOTIFICATION_RECEIVER,
+                                        "A blog was deleted on Planet PostgreSQL",
+                                        u"The blog at {0} by {1}\nwas deleted by {2}\n\n".format(blog.feedurl, blog.name, request.user.username),
+                                        sendername="Planet PostgreSQL",
+                                        receivername="Planet PostgreSQL Moderators",
+       )
+       blog.delete()
+       messages.info(request, "Blog deleted.")
+       purge_root_and_feeds()
+       purge_url('/feeds.html')
+       return HttpResponseRedirect("/register/")
 
 def __getvalidblogpost(request, blogid, postid):
        blog = get_object_or_404(Blog, id=blogid)
        post = get_object_or_404(Post, id=postid)
        if not blog.userid == request.user.username and not request.user.is_superuser:
-               raise pExcept("You can't view/edit somebody elses blog!")
+               raise Exception("You can't view/edit somebody elses blog!")
        if not post.feed.id == blog.id:
-               raise pExcept("Blog does not match post")
+               raise Exception("Blog does not match post")
        return post
 
 def __setposthide(request, blogid, postid, status):
@@ -189,7 +148,9 @@ def __setposthide(request, blogid, postid, status):
        post.hidden = status
        post.save()
        AuditEntry(request.user.username, 'Set post %s on blog %s visibility to %s' % (postid, blogid, status)).save()
-       return HttpResponseRedirect('../..')
+       messages.info(request, 'Set post "%s" to %s' % (post.title, status and "hidden" or "visible"), extra_tags="top")
+       purge_root_and_feeds()
+       return HttpResponseRedirect("/register/edit/{0}/".format(blogid))
 
 @login_required
 @transaction.atomic
@@ -205,7 +166,93 @@ def blogpost_unhide(request, blogid, postid):
 @transaction.atomic
 def blogpost_delete(request, blogid, postid):
        post = __getvalidblogpost(request, blogid, postid)
-
+       title = post.title
        post.delete()
        AuditEntry(request.user.username, 'Deleted post %s from blog %s' % (postid, blogid)).save()
-       return HttpResponseRedirect('../..')
+       messages.info(request, 'Deleted post "%s". It will be reloaded on the next scheduled crawl.' % title)
+       purge_root_and_feeds()
+       return HttpResponseRedirect("/register/edit/{0}/".format(blogid))
+
+# Moderation
+@login_required
+@user_passes_test(issuperuser)
+def moderate(request):
+       return render_to_response('moderate.html',{
+               'blogs': Blog.objects.filter(approved=False).annotate(oldest=Max('posts__dat')).order_by('oldest'),
+       }, context_instance=RequestContext(request))
+
+@login_required
+@user_passes_test(issuperuser)
+@transaction.atomic
+def moderate_reject(request, blogid):
+       blog = get_object_or_404(Blog, id=blogid)
+
+       if request.method == "POST":
+               form = ModerateRejectForm(data=request.POST)
+               if form.is_valid():
+                       # Ok, actually reject this blog.
+                       u = get_object_or_404(User, username=blog.userid)
+
+                       # Always send moderator mail
+                       send_simple_mail(settings.EMAIL_SENDER,
+                                                        settings.NOTIFICATION_RECEIVER,
+                                                        "A blog was rejected on Planet PostgreSQL",
+                                                        u"The blog at {0} by {1} {2}\nwas marked as rejected by {3}. The message given was:\n\n{4}\n\n".format(blog.feedurl, u.first_name, u.last_name, request.user.username, form.cleaned_data['message']),
+                                                        sendername="Planet PostgreSQL",
+                                                        receivername="Planet PostgreSQL Moderators",
+                                                        )
+                       messages.info(request, u"Blog {0} rejected, notification sent to moderators".format(blog.feedurl))
+                       if not form.cleaned_data['modsonly']:
+                               send_simple_mail(settings.EMAIL_SENDER,
+                                                                u.email,
+                                                                "Your blog submission to Planet PostgreSQL",
+                                                                u"The blog at {0} that you submitted to Planet PostgreSQL has\nunfortunately been rejected. The reason given was:\n\n{1}\n\n".format(blog.feedurl, form.cleaned_data['message']),
+                                                                sendername="Planet PostgreSQL",
+                                                                receivername = u"{0} {1}".format(u.first_name, u.last_name),
+                                                                )
+                               messages.info(request, u"Blog {0} rejected, notification sent to blog owner".format(blog.feedurl))
+
+                       blog.delete()
+                       return HttpResponseRedirect("/register/moderate/")
+       else:
+               form = ModerateRejectForm()
+
+       return render_to_response('moderate_reject.html', {
+               'form': form,
+               'blog': blog,
+       }, RequestContext(request))
+
+@login_required
+@user_passes_test(issuperuser)
+@transaction.atomic
+def moderate_approve(request, blogid):
+       blog = get_object_or_404(Blog, id=blogid)
+       u = get_object_or_404(User, username=blog.userid)
+
+       send_simple_mail(settings.EMAIL_SENDER,
+                                        settings.NOTIFICATION_RECEIVER,
+                                        "A blog was approved on Planet PostgreSQL",
+                                        u"The blog at {0} by {1} {2}\nwas marked as approved by {3}.\n\n".format(blog.feedurl, u.first_name, u.last_name, request.user.username),
+                                        sendername="Planet PostgreSQL",
+                                        receivername="Planet PostgreSQL Moderators",
+       )
+
+       send_simple_mail(settings.EMAIL_SENDER,
+                                        u.email,
+                                        "Your blog submission to Planet PostgreSQL",
+                                        u"The blog at {0} that you submitted to Planet PostgreSQL has\nbeen approved.\n\n".format(blog.feedurl),
+                                        sendername="Planet PostgreSQL",
+                                        receivername = u"{0} {1}".format(u.first_name, u.last_name),
+       )
+
+       blog.approved = True
+       blog.save()
+
+       AuditEntry(request.user.username, 'Approved blog %s at %s' % (blog.id, blog.feedurl)).save()
+
+       messages.info(request, u"Blog {0} approved, notification sent to moderators and owner.".format(blog.feedurl))
+
+       purge_root_and_feeds()
+       purge_url('/feeds.html')
+
+       return HttpResponseRedirect("/register/moderate/")
index 2c6c9b4ce9c4c3d4ca1a9d642481e5f43c324017..9de04158e57aec5f063f9de7cdd09acce518904c 100644 (file)
@@ -38,9 +38,9 @@ TEMPLATE_LOADERS = (
 MIDDLEWARE_CLASSES = (
     'django.middleware.common.CommonMiddleware',
     'django.contrib.sessions.middleware.SessionMiddleware',
+       'django.middleware.csrf.CsrfViewMiddleware',
     'django.contrib.messages.middleware.MessageMiddleware',
     'django.contrib.auth.middleware.AuthenticationMiddleware',
-    'hamnadmin.exceptions.PlanetExceptionMiddleware',
 )
 
 ROOT_URLCONF = 'hamnadmin.urls'
@@ -54,7 +54,6 @@ INSTALLED_APPS = (
     'django.contrib.auth',
     'django.contrib.contenttypes',
     'django.contrib.sessions',
-    'django.contrib.sites',
        'django.contrib.staticfiles',
     'hamnadmin.register',
     'hamnadmin.mailqueue',
@@ -69,6 +68,12 @@ LOGIN_URL = '/register/login'
 
 ALLOWED_HOSTS=['*']
 
+EMAIL_SENDER='planet@postgresql.org'
+NOTIFICATION_RECEIVER='planet@postgresql.org'
+
+# Set to None for testing
+VARNISH_URL="http://localhost/varnish-purge"
+
 # If there is a local_settings.py, let it override our settings
 try:
        from local_settings import *
index 2318a44b94d40bc11d19801f02849f4eb1919d44..2bd0ff9431332fa7ca0ac48101de9aae02499a35 100644 (file)
@@ -3,8 +3,13 @@ from django.conf.urls import *
 # Uncomment the next two lines to enable the admin:
 # from django.contrib import admin
 # admin.autodiscover()
+from hamnadmin.register.feeds import PostFeed
 
 urlpatterns = patterns('',
-    # Example:
+    (r'^$', 'hamnadmin.register.views.planet_home'),
+    (r'^add.html$', 'hamnadmin.register.views.planet_add'),
+    (r'^feeds.html$', 'hamnadmin.register.views.planet_feeds'),
+
+    (r'^rss20(?P<type>_short)?\.xml$', PostFeed()),
     (r'^register/', include('hamnadmin.register.urls')),
 )
diff --git a/hamnadmin/hamnadmin/util/__init__.py b/hamnadmin/hamnadmin/util/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/hamnadmin/hamnadmin/util/aggregate.py b/hamnadmin/hamnadmin/util/aggregate.py
new file mode 100644 (file)
index 0000000..a6610db
--- /dev/null
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+import datetime
+
+import feedparser
+
+from hamnadmin.register.models import Post
+
+class FeedFetcher(object):
+       def __init__(self, feed, tracefunc=None):
+               self.feed = feed
+               self.tracefunc = tracefunc
+               self.newest_entry_date = None
+
+       def _trace(self, msg):
+               if self.tracefunc:
+                       self.tracefunc(msg)
+
+       def parse(self, fetchsince=None):
+               if fetchsince:
+                       parser = feedparser.parse(self.feed.feedurl, modified=fetchsince.timetuple())
+               else:
+                       parser = feedparser.parse(self.feed.feedurl)
+
+               if not hasattr(parser, 'status'):
+                       # bozo_excpetion can seemingly be set when there is no error as well,
+                       # so make sure we only check if we didn't get a status.
+                       if hasattr(parser, 'bozo_exception'):
+                               raise Exception('Feed load error %s' % parser.bozo_exception)
+                       raise Exception('Feed load error with no exception!')
+
+               if parser.status == 304:
+                       # Not modified
+                       return
+
+               if parser.status != 200:
+                       # XXX: follow redirect?
+                       raise Exception('Feed returned status %s' % parser.status)
+
+               self._trace("Fetched %s, status %s" % (self.feed.feedurl, parser.status))
+
+               if self.feed.blogurl == '':
+                       try:
+                               self.feed.blogurl = parser.feed.link
+                       except:
+                               pass
+
+               for entry in parser.entries:
+                       if not self.matches_filter(entry):
+                               self._trace("Entry %s does not match filter, skipped" % entry.link)
+                               continue
+
+                       # Grab the entry. At least atom feeds from wordpress store what we
+                       # want in entry.content[0].value and *also* has a summary that's
+                       # much shorter.
+                       # We therefor check all available texts, and just pick the one that
+                       # is longest.
+                       txtalts = []
+                       try:
+                               txtalts.append(entry.content[0].value)
+                       except:
+                               pass
+                       if entry.has_key('summary'):
+                               txtalts.append(entry.summary)
+
+                       # Select the longest text
+                       txt = max(txtalts, key=len)
+                       if txt == '':
+                               self._trace("Entry %s has no contents" % entry.link)
+                               continue
+
+                       dat = None
+                       if hasattr(entry, 'published_parsed'):
+                               dat = datetime.datetime(*(entry.published_parsed[0:6]))
+                       elif hasattr(entry, 'updated_parsed'):
+                               dat = datetime.datetime(*(entry.updated_parsed[0:6]))
+                       else:
+                               self._trace("Failed to get date for entry %s (keys %s)" % (entry.link, entry.keys()))
+                               continue
+
+                               if self.newest_entry_date:
+                                       if dat > self.newest_entry_date:
+                                               self.newest_entry_date = dat
+                               else:
+                                       self.newest_entry_date = dat
+
+                       yield Post(feed=self.feed,
+                                          guid=entry.id,
+                                          link=entry.link,
+                                          txt=txt,
+                                          dat=dat,
+                                          title=entry.title,
+                                          )
+
+
+               # Check if we got back a Last-Modified time
+               if hasattr(parser, 'modified_parsed') and parser['modified_parsed']:
+                       # Last-Modified header retreived. If we did receive it, we will
+                       # trust the content (assuming we can parse it)
+                       d = datetime.datetime(*parser['modified_parsed'][:6])
+                       if (d-datetime.datetime.now()).days > 5:
+                               # Except if it's ridiculously long in the future, we'll set it
+                               # to right now instead, to deal with buggy blog software. We
+                               # currently define rediculously long as 5 days
+                               d = datetime.datetime.now()
+
+                       self.feed.lastget = d
+               else:
+                       # We didn't get a Last-Modified time, so set it to the entry date
+                       # for the latest entry in this feed.
+                       if self.newest_entry_date:
+                               self.feed.lastget = self.newest_entry_date
+
+       def matches_filter(self, entry):
+               # For now, we only match against self.feed.authorfilter. In the future,
+               # there may be more filters.
+               if self.feed.authorfilter:
+                       # Match against an author filter
+
+                       if entry.has_key('author_detail'):
+                               return entry.author_detail.name == self.feed.authorfilter
+                       elif entry.has_key('author'):
+                               return entry.author == self.feed.authorfilter
+                       else:
+                               return False
+
+               # No filters, always return true
+               return True
diff --git a/hamnadmin/hamnadmin/util/html.py b/hamnadmin/hamnadmin/util/html.py
new file mode 100644 (file)
index 0000000..299db2e
--- /dev/null
@@ -0,0 +1,98 @@
+from HTMLParser import HTMLParser
+import tidy
+import urllib
+
+_tidyopts = dict(   drop_proprietary_attributes=1,
+                                       alt_text='',
+                                       hide_comments=1,
+                                       output_xhtml=1,
+                                       show_body_only=1,
+                                       clean=1,
+                                       char_encoding='utf8',
+)
+
+def TruncateAndClean(txt):
+       # First apply Tidy
+       txt = unicode(str(tidy.parseString(txt.encode('utf-8'), **_tidyopts)),'utf8')
+
+       # Then truncate as necessary
+       ht = HtmlTruncator(2048)
+       ht.feed(txt)
+       out = ht.GetText()
+
+       # Remove initial <br /> tags
+       while out.startswith('<br'):
+               out = out[out.find('>')+1:]
+
+       return out
+
+class HtmlTruncator(HTMLParser):
+       def __init__(self, maxlen):
+               HTMLParser.__init__(self)
+               self.len = 0
+               self.maxlen = maxlen
+               self.fulltxt = ''
+               self.trunctxt = ''
+               self.tagstack = []
+               self.skiprest = False
+       
+       def feed(self, txt):
+               txt = txt.lstrip()
+               self.fulltxt += txt
+               HTMLParser.feed(self, txt)
+
+       def handle_startendtag(self, tag, attrs):
+               if self.skiprest: return
+               self.trunctxt += self.get_starttag_text()
+       
+       def quoteurl(self, str):
+               p = str.split(":",2)
+               if len(p) < 2:
+                       # Don't crash on invalid URLs
+                       return ""
+               return p[0] + ":" + urllib.quote(p[1])
+
+       def cleanhref(self, attrs):
+               if attrs[0] == 'href':
+                       return 'href', self.quoteurl(attrs[1])
+               return attrs
+
+       def handle_starttag(self, tag, attrs):
+               if self.skiprest: return
+               self.trunctxt += "<" + tag
+               self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)]))
+               self.trunctxt += ">"
+               self.tagstack.append(tag)
+
+       def handle_endtag(self, tag):
+               if self.skiprest: return
+               self.trunctxt += "</" + tag + ">"
+               self.tagstack.pop()
+
+       def handle_entityref(self, ref):
+               self.len += 1
+               if self.skiprest: return
+               self.trunctxt += "&" + ref + ";"
+
+       def handle_data(self, data):
+               self.len += len(data)
+               if self.skiprest: return
+               self.trunctxt += data
+               if self.len > self.maxlen:
+                       # Passed max length, so truncate text as close to the limit as possible
+                       self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
+
+                       # Now append any tags that weren't properly closed
+                       self.tagstack.reverse()
+                       for tag in self.tagstack:
+                               self.trunctxt += "</" + tag + ">"
+                       self.skiprest = True
+
+                       # Finally, append the continuation chars
+                       self.trunctxt += "[...]"
+
+       def GetText(self):
+               if self.len > self.maxlen:
+                       return self.trunctxt
+               else:
+                       return self.fulltxt
diff --git a/hamnadmin/hamnadmin/util/shortlink.py b/hamnadmin/hamnadmin/util/shortlink.py
new file mode 100644 (file)
index 0000000..06a7191
--- /dev/null
@@ -0,0 +1,7 @@
+# Simple map used to shorten id values to URLs
+urlvalmap = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a',
+                        'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
+                        'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+                        'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+                        'I', 'J', 'K', 'L', 'M', 'N', 'O', ' P', 'Q', 'R', 'S',
+                        'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '_']
diff --git a/hamnadmin/hamnadmin/util/varnish.py b/hamnadmin/hamnadmin/util/varnish.py
new file mode 100644 (file)
index 0000000..95d3419
--- /dev/null
@@ -0,0 +1,20 @@
+from django.conf import settings
+
+import urllib2
+
+def purge_url(url):
+       if not settings.VARNISH_URL:
+               print "Not purging {0}".format(url)
+       else:
+               try:
+                       request = urllib2.Request(settings.VARNISH_URL, headers={
+                               '^X-Purge': url,
+                               })
+                       response = urllib2.urlopen(request, timeout=2)
+                       if response.getcode() != 200:
+                               raise Exception("Invalid response code %s" % response.getcode())
+               except Exception, e:
+                       raise Exception("Failed to purge '{0}': {1}'".format(url, e))
+
+def purge_root_and_feeds():
+       purge_url('/(|rss20.*)$')
diff --git a/planethtml.py b/planethtml.py
deleted file mode 100644 (file)
index e031f02..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-"""PostgreSQL Planet Aggregator
-
-This file contains helper classes used to store the data when
-"communicating" with the templates to generate HTML output.
-
-Copyright (C) 2008 PostgreSQL Global Development Group
-"""
-
-import datetime
-import urllib
-
-# Yes, a global function (!)
-# Hmm. We only quote the ampersand here, since it's a HTML escape that
-# shows up in URLs quote often.
-def quoteurl(str):
-       if str is None: return None
-       return str.replace('&','&amp;')
-
-class PlanetPost:
-       def __init__(self, guid,link,dat,title,author,blogurl,txt,teamname,teamurl):
-               self.guid = guid
-               self.link = link
-               self.dat = dat
-               self.posttitle = title
-               self.author = author
-               self._blogurl = blogurl
-               self.txt = txt
-               self.teamname = teamname
-               self._teamurl = teamurl
-
-
-       def _get_blogurl(self):
-               return quoteurl(self._blogurl)
-       blogurl = property(_get_blogurl)
-
-       def _get_teamurl(self):
-               return quoteurl(self._teamurl)
-       teamurl = property(_get_teamurl)
-
-       def _get_datetime(self):
-               return self.dat.strftime("%Y-%m-%d at %H:%M:%S")
-       datetime = property(_get_datetime)
-
-       def _get_contents(self):
-               if self.txt.endswith("[...]"):
-                       self.txt = '%s<p>[<a href="%s">continue reading</a>]</p>' % (self.txt[:len(self.txt)-5], self.link)
-               return self.txt
-       contents = property(_get_contents)
-
-       def _get_title(self):
-               return self.posttitle
-       title = property(_get_title)
-
-class PlanetFeed:
-       def __init__(self,row):
-               self.name = row[0]
-               self._blogurl = row[1]
-               self._feedurl = row[2]
-               self.numposts = row[3]
-               self.teamname = row[4]
-               self._teamurl = row[5]
-               self.teamcount = row[6]
-
-       def _get_blogurl(self):
-               return quoteurl(self._blogurl)
-       blogurl = property(_get_blogurl)
-
-       def _get_feedurl(self):
-               return quoteurl(self._feedurl)
-       feedurl = property(_get_feedurl)
-
-       def _get_teamurl(self):
-               return quoteurl(self._teamurl)
-       teamurl = property(_get_teamurl)
diff --git a/shorturl.py b/shorturl.py
deleted file mode 100755 (executable)
index 8b02947..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-# Simple map used to shorten id values to URLs
-_urlvalmap = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '_']
-
-def shortid(id):
-       s = ""
-       while id > 0:
-               s = _urlvalmap[id % 64] + s
-               id /= 64
-       return "http://postgr.es/p/%s" % s
-
-
-if len(sys.argv) != 2:
-       print "Usage: shorturl.py <id>"
-       sys.exit(1)
-
-id = int(sys.argv[1])
-
-print "%s -> %s" % (id, shortid(id))
index 76b75adee76867cc045fd7fbee87a5819df500a1..1814deba9e2beef50f221f17598d103a297d3a82 100644 (file)
@@ -6,26 +6,21 @@ The following list contains all feeds aggregated at this site.
 </p>
 
 <ul>
-{% for feed in allposters %}
+{% for feed in feeds %}
  <li><a href="{{feed.feedurl}}"><img src="img/feed-icon-14x14.png" alt="RSS"/></a> {%if feed.blogurl %}<a href="{{feed.blogurl}}">{{feed.name}}</a>{%else%}{{feed.name}}{%endif%}</li>
 {% endfor %}
 </ul>
 
 <h2>Teams</h2>
 <ul>
-{% for feed in allteams %}
-
- {% ifchanged feed.teamname %}
-  {% if not forloop.first %}
-  </ul>
- </li>
-  {% endif %}
- <li><a href="{{feed.teamurl}}">{{feed.teamname}}</a>
+{% for team in teams %}
+ <li><a href="{{team.teamurl}}">{{team.name}}</a>
   <ul>
- {% endifchanged %}
+{%for feed in team.blog_set.all%}
    <li><a href="{{feed.feedurl}}"><img src="img/feed-icon-14x14.png" alt="RSS"/></a> {%if feed.blogurl %}<a href="{{feed.blogurl}}">{{feed.name}}</a>{%else%}{{feed.name}}{%endif%}</li>
 {% endfor %}
   </ul>
  </li>
+{% endfor %}
 </ul>
 {% endblock %}
index 7b45d2366e631e329d6a41f5c732ed3f9e137f57..fd369a674d909fd67ff7b3b7397d03d03a6cc59a 100644 (file)
@@ -1,4 +1,5 @@
 {% extends "base.tmpl" %}
+{% load hamn %}
 {% block content %}
     <div id="planetRight">
 {% if topposters %}
@@ -6,7 +7,7 @@
      <div class="planetRightDescription">Number of posts in the past month</div>
      <ul class="toplist">
 {% for feed in topposters %}
-      <li><a href="{{feed.feedurl}}"><img src="img/feed-icon-14x14.png" alt="RSS"/></a> {%if feed.blogurl %}<a href="{{feed.blogurl}}">{{feed.name}}</a>{%else%}{{feed.name}}{%endif%} {%if feed.teamname%}({{feed.teamname}}) {%endif%}- {{feed.numposts}}</li>
+      <li><a href="{{feed.feedurl}}"><img src="img/feed-icon-14x14.png" alt="RSS"/></a> {%if feed.blogurl %}<a href="{{feed.blogurl}}">{{feed.name}}</a>{%else%}{{feed.name}}{%endif%} {%if feed.team%}({{feed.team.name}}) {%endif%}- {{feed.numposts}}</li>
 {% endfor %}
      </ul>
 {% endif %}
@@ -14,8 +15,8 @@
      <div class="planetRightTitle">Top teams</div>
      <div class="planetRightDescription">Number of posts in the past month</div>
      <ul>
-{% for feed in topteams %}
-      <li><a href="{{feed.teamurl}}">{{feed.teamname}}</a> - {{feed.teamcount}}</li>
+{% for team in topteams %}
+      <li><a href="{{team.teamurl}}">{{team.name}}</a> - {{team.numposts}}</li>
 {%endfor%}
      </ul>
 {% endif %}
         <div class="planetPostTitle"><a href="{{post.link}}">{{post.title}}</a></div>
         <b class="blf"></b><b class="b2f"></b><b class="b3f"></b><b class="b4f"></b>
         <div class="planetPostAuthor">
-         Posted by {%if post.blogurl%}<a href="{{post.blogurl}}">{{post.author}}</a>{%else%}{{post.author}}{%endif%} {%if post.teamurl%} in <a href="{{post.teamurl}}">{{post.teamname}}</a>{%endif%} on <span class="date">{{post.datetime}}</span>
+         Posted by {%if post.feed.blogurl%}<a href="{{post.feed.blogurl}}">{{post.feed.name}}</a>{%else%}{{post.feed.name}}{%endif%} {%if post.feed.team%} in <a href="{{post.feed.team.teamurl}}">{{post.feed.team.name}}</a>{%endif%} on <span class="date">{{post.dat|date:"Y-m-d"}} at {{post.dat|date:"H:i"}}</span>
         </div><!-- end planetPostHeader -->
         <b class="b4f"></b><b class="b3f"></b><b class="b2f"></b><b class="b1f"></b>
         <div class="cl"></div>
        </div><!-- end planetPostHeaderWrap -->
       </div><!-- end planet Post -->
-      <div class="planetPostContent">{{post.contents|safe}}</div>
+      <div class="planetPostContent">{{post.txt|postcontents}}</div>
       <div class="cl"></div>
      </div>
 {% endfor %}
diff --git a/www/css/planet_reg.css b/www/css/planet_reg.css
new file mode 100644 (file)
index 0000000..05f3023
--- /dev/null
@@ -0,0 +1,9 @@
+div#planethdr {
+   padding-left: 0px;
+   padding-right: 0px;
+   background: url(http://www.postgresql.org/layout/images/hdr_fill.png);
+}
+
+input.form-control[type=checkbox] {
+width: 10px;
+}