from email.header import decode_header
from lib.exception import IgnorableException
+from lib.log import log
class ArchivesParser(object):
def __init__(self):
self.attachments = []
self.get_attachments()
if len(self.attachments) > 0:
- print "Found %s attachments" % len(self.attachments)
- print [(a[0],a[1],len(a[2])) for a in self.attachments]
+ log.status("Found %s attachments" % len(self.attachments))
+ log.status([(a[0],a[1],len(a[2])) for a in self.attachments])
# Build an list of the message id's we are interested in
self.parents = []
for p in container.get_payload():
if p.get_params() == None:
# MIME multipart/mixed, but no MIME type on the part
- print "Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid
+ log.log("Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid)
return self.get_payload_as_unicode(p)
if p.get_params()[0][0].lower() == 'text/plain':
# Don't include it if it looks like an attachment
m = self.re_msgid.match(messageid)
if not m:
if ignorebroken:
- print "Could not parse messageid '%s', ignoring it" % messageid
+ log.log("Could not parse messageid '%s', ignoring it" % messageid)
return None
raise Exception("Could not parse message id '%s'" % messageid)
return m.groups(1)[0]
dp = datetime.datetime(*dp.utctimetuple()[:6])
return dp
except Exception, e:
- print "Failed to parse date '%s'" % d
+ log.log("Failed to parse date '%s'" % d)
raise e
def decode_mime_header(self, hdr):
from parser import ArchivesParser
+from lib.log import log, opstatus
+
class ArchivesParserStorage(ArchivesParser):
def __init__(self):
super(ArchivesParserStorage, self).__init__()
if len(r) > 0:
# Has to be 1 row, since we have a unique index on id
if not r[0][1]:
- print "Tagging message %s with list %s" % (self.msgid, listid)
+ log.status("Tagging message %s with list %s" % (self.msgid, listid))
curs.execute("INSERT INTO list_threads (threadid, listid) VALUES (%(threadid)s, %(listid)s)", {
'threadid': r[0][0],
'listid': listid,
})
#FIXME: option to overwrite existing message!
- print "Message %s already stored" % self.msgid
+ log.status("Message %s already stored" % self.msgid)
+ opstatus.dupes += 1
return
# Resolve own thread
# Slice away all matches that are worse than the one we wanted
self.parents = self.parents[:best_parent]
- print "Message %s resolved to existing thread %s, waiting for %s better messages" % (self.msgid, self.threadid, len(self.parents))
+ log.status("Message %s resolved to existing thread %s, waiting for %s better messages" % (self.msgid, self.threadid, len(self.parents)))
else:
# No parent exist. But don't create the threadid just yet, since
# it's possible that we're somebody elses parent!
mergethreads = set([r[2] for r in childrows]).difference(set((self.threadid,)))
if len(mergethreads):
# We have one or more merge threads
- print "Merging threads %s into thread %s" % (",".join(str(s) for s in mergethreads), self.threadid)
+ log.status("Merging threads %s into thread %s" % (",".join(str(s) for s in mergethreads), self.threadid))
curs.execute("UPDATE messages SET threadid=%(threadid)s WHERE threadid=ANY(%(oldthreadids)s)", {
'threadid': self.threadid,
'oldthreadids': list(mergethreads),
# No parent and no child exists - create a new threadid, just for us!
curs.execute("SELECT nextval('threadid_seq')")
self.threadid = curs.fetchall()[0][0]
- print "Message %s resolved to no parent (out of %s) and no child, new thread %s" % (self.msgid, len(self.parents), self.threadid)
+ log.status("Message %s resolved to no parent (out of %s) and no child, new thread %s" % (self.msgid, len(self.parents), self.threadid))
# Insert a thread tag if we're on a new list
curs.execute("INSERT INTO list_threads (threadid, listid) SELECT %(threadid)s, %(listid)s WHERE NOT EXISTS (SELECT * FROM list_threads t2 WHERE t2.threadid=%(threadid)s AND t2.listid=%(listid)s) RETURNING threadid", {
'listid': listid,
})
if len(curs.fetchall()):
- print "Tagged thread %s with listid %s" % (self.threadid, listid)
+ log.status("Tagged thread %s with listid %s" % (self.threadid, listid))
curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s) RETURNING id", {
'parentid': self.parentid,
} for a in self.attachments])
if len(self.children):
- print "Setting %s other threads to children of %s" % (len(self.children), self.msgid)
+ log.status("Setting %s other threads to children of %s" % (len(self.children), self.msgid))
curs.executemany("UPDATE messages SET parentid=%(parent)s WHERE id=%(id)s",
[{'parent': id, 'id': c} for c in self.children])
if len(self.parents):
# properly threaded - so store them in the db.
curs.executemany("INSERT INTO unresolved_messages (message, priority, msgid) VALUES (%(id)s, %(priority)s, %(msgid)s)",
[{'id': id, 'priority': i, 'msgid': self.parents[i]} for i in range(0, len(self.parents))])
+
+ opstatus.stored += 1
from lib.storage import ArchivesParserStorage
from lib.mbox import MailboxBreakupParser
from lib.exception import IgnorableException
+from lib.log import log, opstatus
if __name__ == "__main__":
optparser.add_option('-d', '--directory', dest='directory', help='Load all messages in directory')
optparser.add_option('-m', '--mbox', dest='mbox', help='Load all messages in mbox')
optparser.add_option('-i', '--interactive', dest='interactive', action='store_true', help='Prompt after each message')
+ optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output')
(opt, args) = optparser.parse_args()
optparser.print_usage()
sys.exit(1)
+ log.set(opt.verbose)
+
# Yay for hardcoding
conn = psycopg2.connect("host=/tmp dbname=archives")
})
r = curs.fetchall()
if len(r) != 1:
- print "List %s not found" % opt.list
+ log.error("List %s not found" % opt.list)
conn.close()
sys.exit(1)
listid = r[0][0]
if opt.directory:
# Parse all files in directory
for x in os.listdir(opt.directory):
- print "Parsing file %s" % x
+ log.status("Parsing file %s" % x)
with open(os.path.join(opt.directory, x)) as f:
ap = ArchivesParserStorage()
ap.parse(f)
try:
ap.analyze()
except IgnorableException, e:
- print "%s :: ignoring" % e
+ log.log("%s :: ignoring" % e)
+ opstatus.failed += 1
continue
ap.store(conn, listid)
if opt.interactive:
try:
ap.analyze()
except IgnorableException, e:
- print "%s :: ignoring" % e
+ log.log("%s :: ignoring" % e)
+ opstatus.failed += 1
continue
ap.store(conn, listid)
if mboxparser.returncode():
- print "Failed to parse mbox:"
- print mboxparser.stderr_output()
+ log.error("Failed to parse mbox:")
+ log.error(mboxparser.stderr_output())
sys.exit(1)
else:
# Parse single message on stdin
try:
ap.analyze()
except IgnorableException, e:
- print "%s :: ignoring" % e
+ log.log("%s :: ignoring" % e)
conn.close()
sys.exit(1)
ap.store(conn, listid)
- print "Committing..."
+ log.status("Committing...")
conn.commit()
- print "Done."
+ log.status("Done.")
conn.close()
+ opstatus.print_status()