From 69af766f8fae8ef489d951c71a37966fc51d736b Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 3 Jan 2019 21:52:58 +0100 Subject: [PATCH] Whitespace fixes --- django/archives/auth.py | 8 ++- django/archives/mailarchives/api.py | 18 +++-- django/archives/mailarchives/models.py | 15 +++-- .../archives/mailarchives/redirecthandler.py | 2 + .../mailarchives/templatetags/pgfilters.py | 10 ++- django/archives/mailarchives/views.py | 65 +++++++++++++++---- django/archives/settings.py | 16 +---- django/archives/util.py | 3 + loader/clean_date.py | 6 +- loader/generate_mbox.py | 5 +- loader/hide_message.py | 4 +- loader/lib/log.py | 2 +- loader/lib/mbox.py | 1 + loader/lib/parser.py | 31 +++++---- loader/lib/storage.py | 19 +++--- loader/lib/varnish.py | 2 +- loader/load_message.py | 5 +- loader/pglister_sync.py | 13 ++-- loader/purge_frontend_message.py | 2 +- loader/reparse_message.py | 9 +-- loader/tools/edit_raw.py | 2 +- loader/tools/fix_from.py | 2 +- 22 files changed, 150 insertions(+), 90 deletions(-) diff --git a/django/archives/auth.py b/django/archives/auth.py index bcb349c..6ccf131 100644 --- a/django/archives/auth.py +++ b/django/archives/auth.py @@ -35,6 +35,7 @@ from Crypto.Hash import SHA from Crypto import Random import time + class AuthBackend(ModelBackend): # We declare a fake backend that always fails direct authentication - # since we should never be using direct authentication in the first place! @@ -62,7 +63,7 @@ def login(request): r = Random.new() iv = r.read(16) encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv) - cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes + cipher = encryptor.encrypt(s + ' ' * (16 - (len(s) % 16))) # pad to 16 bytes return HttpResponseRedirect("%s?d=%s$%s" % ( settings.PGAUTH_REDIRECT, @@ -72,6 +73,7 @@ def login(request): else: return HttpResponseRedirect(settings.PGAUTH_REDIRECT) + # Handle logout requests by logging out of this site and then # redirecting to log out from the main site as well. def logout(request): @@ -79,6 +81,7 @@ def logout(request): django_logout(request) return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT) + # Receive an authentication response from the main website and try # to log the user in. def auth_receive(request): @@ -120,7 +123,7 @@ def auth_receive(request): changed = True if user.email != data['e'][0]: user.email = data['e'][0] - changed= True + changed = True if changed: user.save() except User.DoesNotExist: @@ -221,6 +224,7 @@ def user_search(searchterm=None, userid=None): return j + # Import a user into the local authentication system. Will initially # make a search for it, and if anything other than one entry is returned # the import will fail. diff --git a/django/archives/mailarchives/api.py b/django/archives/mailarchives/api.py index acb90aa..b53e08e 100644 --- a/django/archives/mailarchives/api.py +++ b/django/archives/mailarchives/api.py @@ -27,6 +27,7 @@ def listinfo(request): return resp + @cache(hours=4) def latest(request, listname): if not settings.PUBLIC_ARCHIVES: @@ -47,8 +48,8 @@ def latest(request, listname): if limit <= 0 or limit > 100: limit = 50 - extrawhere=[] - extraparams=[] + extrawhere = [] + extraparams = [] # Return only messages that have attachments? if 'a' in request.GET: @@ -65,17 +66,19 @@ def latest(request, listname): extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid) else: list = None - extrawhere='' + extrawhere = '' mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit] allyearmonths = set([(m.date.year, m.date.month) for m in mlist]) resp = HttpResponse(content_type='application/json') json.dump([ - {'msgid': m.messageid, - 'date': m.date.isoformat(), - 'from': m.mailfrom, - 'subj': m.subject,} + { + 'msgid': m.messageid, + 'date': m.date.isoformat(), + 'from': m.mailfrom, + 'subj': m.subject, + } for m in mlist], resp) # Make sure this expires from the varnish cache when new entries show @@ -112,6 +115,7 @@ def thread(request, msgid): resp['X-pgthread'] = m.threadid return resp + def thread_subscribe(request, msgid): if not settings.PUBLIC_ARCHIVES: return HttpResponseForbidden('No API access on private archives for now') diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py index 165cef8..6bd047d 100644 --- a/django/archives/mailarchives/models.py +++ b/django/archives/mailarchives/models.py @@ -5,10 +5,10 @@ from django.db import models # we might need that flexibility in the future. hide_reasons = [ None, # placeholder for 0 - 'This message has been hidden because a virus was found in the message.', # 1 - 'This message has been hidden because the message violated policies.', # 2 - 'This message has been hidden because for privacy reasons.', # 3 - 'This message was corrupt', # 4 + 'This message has been hidden because a virus was found in the message.', # 1 + 'This message has been hidden because the message violated policies.', # 2 + 'This message has been hidden because for privacy reasons.', # 3 + 'This message was corrupt', # 4 ] @@ -42,6 +42,7 @@ class Message(models.Model): # multiple times from templates without generating multiple queries # to the database. _attachments = None + @property def attachments(self): if not self._attachments: @@ -57,6 +58,7 @@ class Message(models.Model): # Weird value return 'This message has been hidden.' + class ListGroup(models.Model): groupid = models.IntegerField(null=False, primary_key=True) groupname = models.CharField(max_length=200, null=False, blank=False) @@ -65,6 +67,7 @@ class ListGroup(models.Model): class Meta: db_table = 'listgroups' + class List(models.Model): listid = models.IntegerField(null=False, primary_key=True) listname = models.CharField(max_length=200, null=False, blank=False, unique=True) @@ -74,7 +77,6 @@ class List(models.Model): group = models.ForeignKey(ListGroup, db_column='groupid') subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)") - @property def maybe_shortdesc(self): if self.shortdesc: @@ -84,6 +86,7 @@ class List(models.Model): class Meta: db_table = 'lists' + class Attachment(models.Model): message = models.ForeignKey(Message, null=False, blank=False, db_column='message') filename = models.CharField(max_length=1000, null=False, blank=False) @@ -115,6 +118,7 @@ class ListSubscriber(models.Model): unique_together = (('list', 'username'), ) db_table = 'listsubscribers' + class ApiClient(models.Model): apikey = models.CharField(max_length=100, null=False, blank=False) postback = models.URLField(max_length=500, null=False, blank=False) @@ -122,6 +126,7 @@ class ApiClient(models.Model): class Meta: db_table = 'apiclients' + class ThreadSubscription(models.Model): apiclient = models.ForeignKey(ApiClient, null=False, blank=False) threadid = models.IntegerField(null=False, blank=False) diff --git a/django/archives/mailarchives/redirecthandler.py b/django/archives/mailarchives/redirecthandler.py index f91a773..fc6a575 100644 --- a/django/archives/mailarchives/redirecthandler.py +++ b/django/archives/mailarchives/redirecthandler.py @@ -1,9 +1,11 @@ from django import shortcuts + class ERedirect(Exception): def __init__(self, url): self.url = url + class RedirectMiddleware(object): def process_exception(self, request, exception): if isinstance(exception, ERedirect): diff --git a/django/archives/mailarchives/templatetags/pgfilters.py b/django/archives/mailarchives/templatetags/pgfilters.py index c7d121b..83bb477 100644 --- a/django/archives/mailarchives/templatetags/pgfilters.py +++ b/django/archives/mailarchives/templatetags/pgfilters.py @@ -7,14 +7,17 @@ import hashlib register = template.Library() + def _rewrite_email(value): - return value.replace('@', '(at)').replace('.','(dot)') + return value.replace('@', '(at)').replace('.', '(dot)') + @register.filter(name='hidemail') @stringfilter def hidemail(value): return _rewrite_email(value) + # A regular expression and replacement function to mangle email addresses. # # The archived messages contain a lot of links to other messages in the @@ -26,17 +29,21 @@ def hidemail(value): # Those are not email addresses, so ignore them. The links won't work if they # are mangled. _re_mail = re.compile('(/m(essage-id)?/)?[^()<>@,;:\/\s"\'&|]+@[^()<>@,;:\/\s"\'&|]+') + + def _rewrite_email_match(match): if match.group(1): return match.group(0) # was preceded by /message-id/ else: return _rewrite_email(match.group(0)) + @register.filter(name='hideallemail') @stringfilter def hideallemail(value): return _re_mail.sub(lambda x: _rewrite_email_match(x), value) + @register.filter(name='nameonly') @stringfilter def nameonly(value): @@ -45,6 +52,7 @@ def nameonly(value): return name return email.split('@')[0] + @register.filter(name='md5') @stringfilter def md5(value): diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py index 99c7def..a9b1d33 100644 --- a/django/archives/mailarchives/views.py +++ b/django/archives/mailarchives/views.py @@ -25,6 +25,7 @@ from .redirecthandler import ERedirect from .models import * + # Ensure the user is logged in (if it's not public lists) def ensure_logged_in(request): if settings.PUBLIC_ARCHIVES: @@ -33,6 +34,7 @@ def ensure_logged_in(request): return raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + # Ensure the user has permissions to access a list. If not, raise # a permissions exception. def ensure_list_permissions(request, l): @@ -49,6 +51,7 @@ def ensure_list_permissions(request, l): # Redirect to a login page raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + # Ensure the user has permissions to access a message. In order to view # a message, the user must have permissions on *all* lists the thread # appears on. @@ -83,6 +86,7 @@ def ensure_message_permissions(request, msgid): # Redirect to a login page raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path)) + # Decorator to set cache age def cache(days=0, hours=0, minutes=0, seconds=0): "Set the server to cache object a specified time. td must be a timedelta object" @@ -92,11 +96,12 @@ def cache(days=0, hours=0, minutes=0, seconds=0): if settings.PUBLIC_ARCHIVES: # Only set cache headers on public archives td = timedelta(hours=hours, minutes=minutes, seconds=seconds) - resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds) + resp['Cache-Control'] = 's-maxage=%s' % (td.days * 3600 * 24 + td.seconds) return resp return __cache return _cache + def nocache(fn): def _nocache(request, *_args, **_kwargs): resp = fn(request, *_args, **_kwargs) @@ -106,6 +111,7 @@ def nocache(fn): return resp return _nocache + # Decorator to require http auth def antispam_auth(fn): def _antispam_auth(request, *_args, **_kwargs): @@ -131,7 +137,6 @@ def antispam_auth(fn): return _antispam_auth - def get_all_groups_and_lists(request, listid=None): # Django doesn't (yet) support traversing the reverse relationship, # so we'll get all the lists and rebuild it backwards. @@ -152,7 +157,7 @@ def get_all_groups_and_lists(request, listid=None): 'groupid': l.group.groupid, 'groupname': l.group.groupname, 'sortkey': l.group.sortkey, - 'lists': [l,], + 'lists': [l, ], 'homelink': 'list/group/%s' % l.group.groupid, } @@ -183,10 +188,12 @@ class NavContext(object): if listname: self.ctx.update({'searchform_listname': listname}) + def render_nav(navcontext, template, ctx): ctx.update(navcontext.ctx) return render(navcontext.request, template, ctx) + @cache(hours=4) def index(request): ensure_logged_in(request) @@ -200,7 +207,7 @@ def index(request): @cache(hours=8) def groupindex(request, groupid): (groups, listgroupid) = get_all_groups_and_lists(request) - mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)] + mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid'] == int(groupid)] if len(mygroups) == 0: raise Http404('List group does not exist') @@ -208,6 +215,7 @@ def groupindex(request, groupid): 'groups': mygroups, }) + @cache(hours=8) def monthlist(request, listname): l = get_object_or_404(List, listname=listname) @@ -215,13 +223,14 @@ def monthlist(request, listname): curs = connection.cursor() curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid}) - months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()] + months = [{'year': r[0], 'month': r[1], 'date': datetime(r[0], r[1], 1)} for r in curs.fetchall()] return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', { 'list': l, 'months': months, }) + def get_monthday_info(mlist, l, d): allmonths = set([m.date.month for m in mlist]) monthdate = None @@ -271,9 +280,10 @@ def _render_datelist(request, l, d, datefilter, title, queryproc): 'daysinmonth': daysinmonth, 'yearmonth': yearmonth, }) - r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths])) + r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year, month in allyearmonths])) return r + def render_datelist_from(request, l, d, title, to=None): # NOTE! Basic permissions checks must be done before calling this function! datefilter = Q(date__gte=d) @@ -283,6 +293,7 @@ def render_datelist_from(request, l, d, title, to=None): return _render_datelist(request, l, d, datefilter, title, lambda x: list(x.order_by('date')[:200])) + def render_datelist_to(request, l, d, title): # NOTE! Basic permissions checks must be done before calling this function! @@ -293,6 +304,7 @@ def render_datelist_to(request, l, d, title): return _render_datelist(request, l, d, Q(date__lte=d), title, lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date)) + @cache(hours=2) def datelistsince(request, listname, msgid): l = get_object_or_404(List, listname=listname) @@ -301,6 +313,7 @@ def datelistsince(request, listname, msgid): msg = get_object_or_404(Message, messageid=msgid) return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) + # Longer cache since this will be used for the fixed date links @cache(hours=4) def datelistsincetime(request, listname, year, month, day, hour, minute): @@ -313,6 +326,7 @@ def datelistsincetime(request, listname, year, month, day, hour, minute): raise Http404("Invalid date format, not found") return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) + @cache(hours=2) def datelistbefore(request, listname, msgid): l = get_object_or_404(List, listname=listname) @@ -321,6 +335,7 @@ def datelistbefore(request, listname, msgid): msg = get_object_or_404(Message, messageid=msgid) return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S"))) + @cache(hours=2) def datelistbeforetime(request, listname, year, month, day, hour, minute): l = get_object_or_404(List, listname=listname) @@ -332,6 +347,7 @@ def datelistbeforetime(request, listname, year, month, day, hour, minute): raise Http404("Invalid date format, not found") return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M"))) + @cache(hours=4) def datelist(request, listname, year, month): l = get_object_or_404(List, listname=listname) @@ -342,10 +358,11 @@ def datelist(request, listname, year, month): except ValueError: raise Http404("Malformatted date, month not found") - enddate = d+timedelta(days=31) + enddate = d + timedelta(days=31) enddate = datetime(enddate.year, enddate.month, 1) return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate) + @cache(hours=4) def attachment(request, attid): # Use a direct query instead of django, since it has bad support for @@ -362,6 +379,7 @@ def attachment(request, attid): return HttpResponse(r[0][3], content_type=r[0][1]) + def _build_thread_structure(threadid): # Yeah, this is *way* too complicated for the django ORM curs = connection.cursor() @@ -373,8 +391,18 @@ def _build_thread_structure(threadid): SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t ORDER BY datepath||date """, {'threadid': threadid}) - for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall(): - yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)} + for id, _from, subject, date, messageid, has_attachment, parentid, parentpath in curs.fetchall(): + yield { + 'id': id, + 'mailfrom': _from, + 'subject': subject, + 'date': date, + 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), + 'messageid': messageid, + 'hasattachment': has_attachment, + 'parentid': parentid, + 'indent': " " * len(parentpath), + } def _get_nextprevious(listmap, dt): @@ -419,6 +447,7 @@ SELECT l.listid,0, } return retval + @cache(hours=4) def message(request, msgid): ensure_message_permissions(request, msgid) @@ -437,7 +466,7 @@ def message(request, msgid): if ims >= newest: return HttpResponseNotModified() - responses = [t for t in threadstruct if t['parentid']==m.id] + responses = [t for t in threadstruct if t['parentid'] == m.id] if m.parentid: for t in threadstruct: @@ -460,6 +489,7 @@ def message(request, msgid): r['Last-Modified'] = http_date(newest) return r + @cache(hours=4) def message_flat(request, msgid): ensure_message_permissions(request, msgid) @@ -489,6 +519,7 @@ def message_flat(request, msgid): r['Last-Modified'] = http_date(newest) return r + @nocache @antispam_auth def message_raw(request, msgid): @@ -532,7 +563,6 @@ def _build_mbox(query, params, msgid=None): msg = parser.parse(s) return msg.as_string(unixfrom=True) - def _message_stream(first): yield _one_message(first[1]) @@ -547,6 +577,7 @@ def _build_mbox(query, params, msgid=None): r['Content-type'] = 'application/mbox' return r + @nocache @antispam_auth def message_mbox(request, msgid): @@ -561,6 +592,7 @@ def message_mbox(request, msgid): }, msgid) + @nocache @antispam_auth def mbox(request, listname, listname2, mboxyear, mboxmonth): @@ -588,6 +620,7 @@ def mbox(request, listname, listname2, mboxyear, mboxmonth): query = query.replace('%%%', '') return _build_mbox(query, params) + def search(request): if not settings.PUBLIC_ARCHIVES: # We don't support searching of non-public archives at all at this point. @@ -689,11 +722,12 @@ def search(request): 's': subject, 'f': mailfrom, 'r': rank, - 'a': abstract.replace("[[[[[[", "").replace("]]]]]]",""), + 'a': abstract.replace("[[[[[[", "").replace("]]]]]]", ""), } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()], resp) return resp + @cache(seconds=10) def web_sync_timestamp(request): s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n") @@ -701,6 +735,7 @@ def web_sync_timestamp(request): r['Content-Length'] = len(s) return r + @cache(hours=8) def legacy(request, listname, year, month, msgnum): curs = connection.cursor() @@ -715,18 +750,20 @@ def legacy(request, listname, year, month, msgnum): raise Http404('Message does not exist') return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0]) + # dynamic CSS serving, meaning we merge a number of different CSS into a # single one, making sure it turns into a single http response. We do this # dynamically, since the output will be cached. _dynamic_cssmap = { 'base': ['media/css/main.css', - 'media/css/normalize.css',], + 'media/css/normalize.css', ], 'docs': ['media/css/global.css', 'media/css/table.css', 'media/css/text.css', 'media/css/docs.css'], } + @cache(hours=8) def dynamic_css(request, css): if css not in _dynamic_cssmap: @@ -765,6 +802,7 @@ def dynamic_css(request, css): return resp + # Redirect to the requested url, with a slash first. This is used to remove # trailing slashes on messageid links by doing a permanent redirect. This is # better than just eating them, since this way we only end up with one copy @@ -773,6 +811,7 @@ def dynamic_css(request, css): def slash_redirect(request, url): return HttpResponsePermanentRedirect("/%s" % url) + # Redirect the requested URL to whatever happens to be in the regexp capture. # This is used for user agents that generate broken URLs that are easily # captured using regexp. diff --git a/django/archives/settings.py b/django/archives/settings.py index 8b942ac..80b990e 100644 --- a/django/archives/settings.py +++ b/django/archives/settings.py @@ -11,7 +11,7 @@ MANAGERS = ADMINS DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. + 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. 'NAME': 'archives', # Or path to database file if using sqlite3. 'USER': '', # Not used with sqlite3. 'PASSWORD': '', # Not used with sqlite3. @@ -74,14 +74,6 @@ STATICFILES_DIRS = ( # Don't forget to use absolute paths, not relative paths. ) -# List of finder classes that know how to find static files in -# various locations. -#STATICFILES_FINDERS = ( -# 'django.contrib.staticfiles.finders.FileSystemFinder', -# 'django.contrib.staticfiles.finders.AppDirectoriesFinder', -# 'django.contrib.staticfiles.finders.DefaultStorageFinder', -#) - # Make this unique, and don't share it with anybody. SECRET_KEY = '7j9q&&!g26rkh!=g%1zb@20b^k^gmzy4=!mhzu2wesxb9b%16m' @@ -134,10 +126,8 @@ LOGGING = { } } - - # Required for lighttpd -FORCE_SCRIPT_NAME="" +FORCE_SCRIPT_NAME = "" # Always override! SEARCH_CLIENTS = ('127.0.0.1',) @@ -164,4 +154,4 @@ if not PUBLIC_ARCHIVES: ] + INSTALLED_APPS from archives.util import validate_new_user - PGAUTH_CREATEUSER_CALLBACK=validate_new_user + PGAUTH_CREATEUSER_CALLBACK = validate_new_user diff --git a/django/archives/util.py b/django/archives/util.py index 4ed9730..cf39e25 100644 --- a/django/archives/util.py +++ b/django/archives/util.py @@ -2,6 +2,7 @@ from django.http import HttpResponse from django.db import connection from django.utils.functional import SimpleLazyObject + def validate_new_user(username, email, firstname, lastname): # Only allow user creation if they are already a subscriber curs = connection.cursor() @@ -14,6 +15,7 @@ def validate_new_user(username, email, firstname, lastname): return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.") + def _get_gitrev(): # Return the current git revision, that is used for # cache-busting URLs. @@ -33,6 +35,7 @@ def _get_gitrev(): # If packed-refs also can't be read, just give up return 'eeeeeeee' + # Template context processor to add information about the root link and # the current git revision. git revision is returned as a lazy object so # we don't spend effort trying to load it if we don't need it (though diff --git a/loader/clean_date.py b/loader/clean_date.py index 65d6bdb..19143cb 100755 --- a/loader/clean_date.py +++ b/loader/clean_date.py @@ -15,6 +15,7 @@ import dateutil.parser import psycopg2 + def scan_message(messageid, olddate, curs): u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid print("Scanning message at %s (date reported as %s)..." % (u, olddate)) @@ -26,7 +27,7 @@ def scan_message(messageid, olddate, curs): # Can be either one of them, but we really don't care... ds = None - for k,r in list(msg.items()): + for k, r in list(msg.items()): if k != 'Received': continue print("Trying on %s" % r) @@ -61,10 +62,11 @@ def scan_message(messageid, olddate, curs): elif x.upper() == 'N': break + if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') conn = psycopg2.connect(connstr) diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py index 3ac1b97..dfb8d3d 100755 --- a/loader/generate_mbox.py +++ b/loader/generate_mbox.py @@ -64,12 +64,11 @@ if __name__ == "__main__": parser.print_help() sys.exit(1) - # Arguments OK, now connect cfg = ConfigParser() cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini')) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' @@ -85,7 +84,7 @@ if __name__ == "__main__": # Same month, so do it monthrange = ((today.year, today.month),) else: - monthrange = ((today.year, today.month),(yesterday.year, yesterday.month)) + monthrange = ((today.year, today.month), (yesterday.year, yesterday.month)) for lid, lname in all_lists: for year, month in monthrange: fullpath = os.path.join(args.destination, lname, 'files/public/archive') diff --git a/loader/hide_message.py b/loader/hide_message.py index 7a0f524..4a4d10a 100755 --- a/loader/hide_message.py +++ b/loader/hide_message.py @@ -15,7 +15,7 @@ import psycopg2 from lib.varnish import VarnishPurger reasons = [ - None, # Placeholder for 0 + None, # Placeholder for 0 "virus", "violates policies", "privacy", @@ -41,7 +41,7 @@ if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' diff --git a/loader/lib/log.py b/loader/lib/log.py index 48722c9..4995969 100644 --- a/loader/lib/log.py +++ b/loader/lib/log.py @@ -18,6 +18,7 @@ class Log(object): def print_status(self): opstatus.print_status() + class OpStatus(object): def __init__(self): self.stored = 0 @@ -32,4 +33,3 @@ class OpStatus(object): log = Log() opstatus = OpStatus() - diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py index c097e72..278fd6c 100644 --- a/loader/lib/mbox.py +++ b/loader/lib/mbox.py @@ -10,6 +10,7 @@ from io import BytesIO SEPARATOR = "ABCARCHBREAK123" * 50 bSEPARATOR = bytes(SEPARATOR, 'ascii') + class MailboxBreakupParser(object): def __init__(self, fn): self.EOF = False diff --git a/loader/lib/parser.py b/loader/lib/parser.py index a727f0e..cef9468 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -13,6 +13,7 @@ import io from lib.exception import IgnorableException from lib.log import log + class ArchivesParser(object): def __init__(self): self.parser = BytesParser(policy=compat32) @@ -64,7 +65,6 @@ class ArchivesParser(object): if m and not m in self.parents: self.parents.append(m) - def clean_charset(self, charset): lcharset = charset.lower() if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown': @@ -139,7 +139,7 @@ class ArchivesParser(object): if not params: # No content-type, so we assume us-ascii return str(b, 'us-ascii', errors='ignore') - for k,v in params: + for k, v in params: if k.lower() == 'charset': charset = v break @@ -157,6 +157,7 @@ class ArchivesParser(object): # Regular expression matching the PostgreSQL custom mail footer that # is appended to all emails. _re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL) + def get_body(self): b = self._get_body() if b: @@ -323,8 +324,8 @@ class ArchivesParser(object): # If it has a name, we consider it an attachments if not container.get_params(): return - for k,v in container.get_params(): - if k=='name' and v != '': + for k, v in container.get_params(): + if k == 'name' and v != '': # Yes, it has a name try: self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) @@ -374,6 +375,7 @@ class ArchivesParser(object): # No name, and text/plain, so ignore it re_msgid = re.compile('^\s*<(.*)>\s*') + def clean_messageid(self, messageid, ignorebroken=False): m = self.re_msgid.match(messageid) if not m: @@ -381,7 +383,7 @@ class ArchivesParser(object): log.status("Could not parse messageid '%s', ignoring it" % messageid) return None raise IgnorableException("Could not parse message id '%s'" % messageid) - return m.groups(1)[0].replace(' ','') + return m.groups(1)[0].replace(' ', '') # _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$') # Now using [^\s] instead of \w, to work with japanese chars @@ -389,6 +391,7 @@ class ArchivesParser(object): _date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$') _date_multiminus_re = re.compile(' -(-\d+)$') _date_offsetnoplus_re = re.compile(' (\d{4})$') + def forgiving_date_decode(self, d): if d.strip() == '': raise IgnorableException("Failed to parse empty date") @@ -416,17 +419,17 @@ class ArchivesParser(object): if d.endswith('+0-900'): d = d.replace('+0-900', '-0900') if d.endswith('Mexico/General'): - d = d.replace('Mexico/General','CDT') + d = d.replace('Mexico/General', 'CDT') if d.endswith('Pacific Daylight Time'): d = d.replace('Pacific Daylight Time', 'PDT') if d.endswith(' ZE2'): - d = d.replace(' ZE2',' +0200') + d = d.replace(' ZE2', ' +0200') if d.find('-Juin-') > 0: - d = d.replace('-Juin-','-Jun-') + d = d.replace('-Juin-', '-Jun-') if d.find('-Juil-') > 0: - d = d.replace('-Juil-','-Jul-') + d = d.replace('-Juil-', '-Jul-') if d.find(' 0 (GMT)'): - d = d.replace(' 0 (GMT)',' +0000') + d = d.replace(' 0 (GMT)', ' +0000') if self._date_multiminus_re.search(d): d = self._date_multiminus_re.sub(' \\1', d) @@ -434,7 +437,6 @@ class ArchivesParser(object): if self._date_offsetnoplus_re.search(d): d = self._date_offsetnoplus_re.sub('+\\1', d) - # We have a number of dates in the format # " +0200 (MET DST)" # or similar. The problem coming from the space within the @@ -455,7 +457,7 @@ class ArchivesParser(object): # Some offsets are >16 hours, which postgresql will not # (for good reasons) accept - if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1: + if dp.utcoffset() and abs(dp.utcoffset().days * (24 * 60 * 60) + dp.utcoffset().seconds) > 60 * 60 * 16 - 1: # Convert it to a UTC timestamp using Python. It will give # us the right time, but the wrong timezone. Should be # enough... @@ -471,6 +473,7 @@ class ArchivesParser(object): # Workaround for broken quoting in some MUAs (see below) _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE) + def _decode_mime_header(self, hdr, email_workaround): if hdr == None: return None @@ -480,7 +483,7 @@ class ArchivesParser(object): # do this *before* doing any MIME decoding, we should be safe against # anybody *actually* putting that sequence in the header (since we # won't match the encoded contents) - hdr = hdr.replace("\n\t"," ") + hdr = hdr.replace("\n\t", " ") # In at least some cases, at least gmail (and possibly other MUAs) # incorrectly put double quotes in the name/email field even when @@ -516,7 +519,7 @@ class ArchivesParser(object): def get_mandatory(self, fieldname): try: x = self.msg[fieldname] - if x==None: + if x == None: raise Exception() return x except: diff --git a/loader/lib/storage.py b/loader/lib/storage.py index a358068..cf2c284 100644 --- a/loader/lib/storage.py +++ b/loader/lib/storage.py @@ -4,6 +4,7 @@ from .parser import ArchivesParser from lib.log import log, opstatus + class ArchivesParserStorage(ArchivesParser): def __init__(self): super(ArchivesParserStorage, self).__init__() @@ -82,7 +83,7 @@ class ArchivesParserStorage(ArchivesParser): 'message': pk, }) if len(self.attachments): - curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ { + curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)", [{ 'message': pk, 'filename': a[0] or 'unknown_filename', 'contenttype': a[1], @@ -106,11 +107,11 @@ class ArchivesParserStorage(ArchivesParser): all_parents = curs.fetchall() if len(all_parents): # At least one of the parents exist. Now try to figure out which one - best_parent = len(self.parents)+1 + best_parent = len(self.parents) + 1 best_threadid = -1 best_parentid = None - for i in range(0,len(all_parents)): - for j in range(0,len(self.parents)): + for i in range(0, len(all_parents)): + for j in range(0, len(self.parents)): if self.parents[j] == all_parents[i][1]: # This messageid found. Better than the last one? if j < best_parent: @@ -226,7 +227,7 @@ class ArchivesParserStorage(ArchivesParser): self.msgid, id, self.threadid, self.parentid)) if len(self.attachments): # Insert attachments - curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ { + curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)", [{ 'message': id, 'filename': a[0] or 'unknown_filename', 'contenttype': a[1], @@ -261,7 +262,6 @@ class ArchivesParserStorage(ArchivesParser): f.write("\n-------------------------------\n\n") return - if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject): log.status("Message %s has header changes " % self.msgid) f.write("==== %s ====\n" % self.msgid) @@ -281,22 +281,21 @@ class ArchivesParserStorage(ArchivesParser): tofile='new', n=0, lineterm='')) - if (len(tempdiff)-2) % 3 == 0: + if (len(tempdiff) - 2) % 3 == 0: # 3 rows to a diff, two header rows. # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From, # which indicates the only change is in the From. ok = True tempdiff = tempdiff[2:] while tempdiff: - a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0)) + a, b, c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0)) if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')): - ok=False + ok = False break if ok: fromonlyf.write("%s\n" % self.msgid) return - # Generate a nicer diff d = list(difflib.unified_diff(bodytxt.splitlines(), self.bodytxt.splitlines(), diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py index 99d2d50..2b2bf89 100644 --- a/loader/lib/varnish.py +++ b/loader/lib/varnish.py @@ -2,6 +2,7 @@ import requests from lib.log import log + class VarnishPurger(object): def __init__(self, cfg): self.cfg = cfg @@ -30,4 +31,3 @@ class VarnishPurger(object): }) if r.status_code != 200: log.error("Failed to send purge request!") - diff --git a/loader/load_message.py b/loader/load_message.py index bf1cedf..72985e7 100755 --- a/loader/load_message.py +++ b/loader/load_message.py @@ -20,6 +20,7 @@ from lib.exception import IgnorableException from lib.log import log, opstatus from lib.varnish import VarnishPurger + def log_failed_message(listid, srctype, src, msg, err): try: msgid = msg.msgid @@ -80,7 +81,7 @@ if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' @@ -168,7 +169,7 @@ if __name__ == "__main__": try: ap.analyze(date_override=opt.force_date) except IgnorableException as e: - log_failed_message(listid, "stdin","", ap, e) + log_failed_message(listid, "stdin", "", ap, e) conn.close() sys.exit(1) ap.store(conn, listid) diff --git a/loader/pglister_sync.py b/loader/pglister_sync.py index be8a9b9..d1f04c1 100755 --- a/loader/pglister_sync.py +++ b/loader/pglister_sync.py @@ -10,7 +10,7 @@ from configparser import ConfigParser import psycopg2 import requests -if __name__=="__main__": +if __name__ == "__main__": parser = argparse.ArgumentParser(description="Synchronize lists from pglister") parser.add_argument('--dryrun', action='store_true', help="Don't commit changes to database") @@ -19,14 +19,14 @@ if __name__=="__main__": cfg = ConfigParser() cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' if cfg.has_option('pglister', 'subscribers') and cfg.getint('pglister', 'subscribers'): - do_subscribers=1 + do_subscribers = 1 else: - do_subscribers=0 + do_subscribers = 0 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) conn = psycopg2.connect(connstr) @@ -42,9 +42,9 @@ if __name__=="__main__": obj = r.json() # For groups, just add them if they don't exist - groups = {g['group']['id']:g['group']['groupname'] for g in obj} + groups = {g['group']['id']: g['group']['groupname'] for g in obj} - for id,name in list(groups.items()): + for id, name in list(groups.items()): curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", { 'group': name, }) @@ -90,7 +90,6 @@ if __name__=="__main__": else: print("Removed subscriber %s from list %s" % (who, name)) - # We don't remove lists ever, because we probably want to keep archives around. # But for now, we alert on them. curs.execute("SELECT listname FROM lists WHERE active AND NOT listname=ANY(%(lists)s)", { diff --git a/loader/purge_frontend_message.py b/loader/purge_frontend_message.py index 8325c72..3a87005 100755 --- a/loader/purge_frontend_message.py +++ b/loader/purge_frontend_message.py @@ -33,7 +33,7 @@ if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' diff --git a/loader/reparse_message.py b/loader/reparse_message.py index 0f1fc2a..542e84a 100755 --- a/loader/reparse_message.py +++ b/loader/reparse_message.py @@ -21,6 +21,7 @@ from lib.exception import IgnorableException from lib.log import log, opstatus from lib.varnish import VarnishPurger + def ResultIter(cursor): # Fetch lots of data but keep memory usage down a bit, by feeding it out of # a generator, and use fetchmany() @@ -62,7 +63,7 @@ if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' @@ -88,7 +89,7 @@ if __name__ == "__main__": if not opt.update: f = codecs.open("reparse.diffs", "w", "utf-8") - fromonlyf = open("reparse.fromonly","w") + fromonlyf = open("reparse.fromonly", "w") firststatus = datetime.now() laststatus = datetime.now() @@ -114,8 +115,8 @@ if __name__ == "__main__": ap.diff(conn, f, fromonlyf, id) if datetime.now() - laststatus > timedelta(seconds=5): sys.stdout.write("%s messages parsed (%s%%, %s / second), %s updated\r" % (num, - num*100/totalcount, - num / ((datetime.now()-firststatus).seconds), + num * 100 / totalcount, + num / ((datetime.now() - firststatus).seconds), updated)) sys.stdout.flush() laststatus = datetime.now() diff --git a/loader/tools/edit_raw.py b/loader/tools/edit_raw.py index 08e606f..5387af4 100755 --- a/loader/tools/edit_raw.py +++ b/loader/tools/edit_raw.py @@ -31,7 +31,7 @@ if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' diff --git a/loader/tools/fix_from.py b/loader/tools/fix_from.py index 719c40c..d262546 100755 --- a/loader/tools/fix_from.py +++ b/loader/tools/fix_from.py @@ -17,7 +17,7 @@ if __name__ == "__main__": cfg = ConfigParser() cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0]))) try: - connstr = cfg.get('db','connstr') + connstr = cfg.get('db', 'connstr') except: connstr = 'need_connstr' -- 2.39.5