New way to figure out which text from the RSS flow to include in the blog:
authorMagnus Hagander <magnus@hagander.net>
Mon, 3 Aug 2009 18:38:32 +0000 (20:38 +0200)
committerMagnus Hagander <magnus@hagander.net>
Mon, 3 Aug 2009 18:38:32 +0000 (20:38 +0200)
try every option we know about, and pick the longest available text.

aggregator.py

index e1bf9da6703332514b847f7b27e73943168c3dff..59fad9f0c53491e09aae8585af439ffb2c76b9e7 100755 (executable)
@@ -69,14 +69,19 @@ class Aggregator:
                                
                        # Grab the entry. At least atom feeds from wordpress store what we
                        # want in entry.content[0].value and *also* has a summary that's
-                       # much shorter. Other blog software store what we want in the summary
-                       # attribute. So let's just try one after another until we hit something.
+                       # much shorter.
+                       # We therefor check all available texts, and just pick the one that
+                       # is longest.
+                       txtalts = []
                        try:
-                               txt = entry.content[0].value
+                               txtalts.append(entry.content[0].value)
                        except:
-                               txt = ''
-                       if txt == '' and entry.has_key('summary'):
-                               txt = entry.summary
+                               pass
+                       if entry.has_key('summary'):
+                               txtalts.append(entry.summary)
+
+                       # Select the longest text
+                       txt = max(txtalts, key=len)
                        if txt == '':
                                # Not a critical error, we just ignore empty posts
                                print "Failed to get text for entry at %s" % entry.link