Generate internal sitemap including devel docs
authorMagnus Hagander <magnus@hagander.net>
Thu, 23 Mar 2017 11:03:18 +0000 (12:03 +0100)
committerMagnus Hagander <magnus@hagander.net>
Thu, 23 Mar 2017 11:11:22 +0000 (12:11 +0100)
We'll use this to index some things in our own search engine without
exposing it to external sitemap parsers. Not from a security standpoint
of course, but something that will lead to it being possible to search
the devel docs again.

pgweb/core/views.py
pgweb/docs/struct.py
pgweb/urls.py
pgweb/util/sitestruct.py

index a36e9792fd5745ed1cdb7eda0ab01b8337bfbdd3..35f556580f0173d0140193a1559a3cf01f21e2ce 100644 (file)
@@ -132,15 +132,13 @@ Sitemap: https://www.postgresql.org/sitemap.xml
 """, content_type='text/plain')
 
 
-# Sitemap (XML format)
-@cache(hours=6)
-def sitemap(request):
+def _make_sitemap(pagelist):
        resp = HttpResponse(content_type='text/xml')
        x = PgXmlHelper(resp)
        x.startDocument()
        x.startElement('urlset', {'xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9'})
        pages = 0
-       for p in get_all_pages_struct():
+       for p in pagelist:
                pages+=1
                x.startElement('url', {})
                x.add_xml_element('loc', 'https://www.postgresql.org/%s' % urllib.quote(p[0]))
@@ -153,6 +151,18 @@ def sitemap(request):
        x.endDocument()
        return resp
 
+# Sitemap (XML format)
+@cache(hours=6)
+def sitemap(request):
+       return _make_sitemap(get_all_pages_struct())
+
+# Internal sitemap (only for our own search engine)
+# Note! Still served up to anybody who wants it, so don't
+# put anything secret in it...
+@cache(hours=6)
+def sitemap_internal(request):
+       return _make_sitemap(get_all_pages_struct(method='get_internal_struct'))
+
 # dynamic CSS serving, meaning we merge a number of different CSS into a
 # single one, making sure it turns into a single http response. We do this
 # dynamically, since the output will be cached.
index ce0b137ea60f2ef2cf1495ced36a961b6fa69178..d0325604064d69dab48a4f795cb3dc26770c5a40 100644 (file)
@@ -36,3 +36,15 @@ def get_struct():
                if version == currentversion.tree:
                        yield ('docs/current/static/%s' % filename,
                                   1.0, loaded)
+
+# For our internal sitemap (used only by our own search engine),
+# include the devel version of the docs (and only those, since the
+# other versions are already included)
+def get_internal_struct():
+       curs = connection.cursor()
+       curs.execute("SELECT d.file, v.docsloaded FROM docs d INNER JOIN core_version v ON v.tree=d.version WHERE version = 0")
+
+       for filename, loaded in curs.fetchall():
+               yield ('docs/devel/static/%s' % (filename, ),
+                          0.1,
+                          loaded)
index e60df64e75c3d71977e2bdf3004c57a95979208a..904e54ebf51539dfec747b8cee900db6c43e3180 100644 (file)
@@ -81,6 +81,7 @@ urlpatterns = patterns('',
        # Sitemap (FIXME: support for >50k urls!)
        ###
        (r'^sitemap.xml', 'pgweb.core.views.sitemap'),
+       (r'^sitemap_internal.xml', 'pgweb.core.views.sitemap_internal'),
 
        ###
        # Workaround for broken links pushed in press release
index 01153d8042633766143b05e709a0daf74edac31b..be3cdec216129232d76c4fecb11de805290b044f 100644 (file)
@@ -1,6 +1,6 @@
 from django.conf import settings
 
-def get_all_pages_struct():
+def get_all_pages_struct(method='get_struct'):
        """
        Return an iterator over all distinct pages on the site.
        Each page is returned as a tuple consisting of:
@@ -13,9 +13,10 @@ def get_all_pages_struct():
        for app in settings.INSTALLED_APPS:
                if app.startswith('pgweb.'):
                        try:
-                               m = __import__(app+".struct", {}, {}, 'get_struct')
+                               m = __import__(app+".struct", {}, {}, method)
                        except:
                                # Failed to import - probably module didnd't exist
                                continue
 
-                       for x in m.get_struct(): yield x
+                       if hasattr(m, method):
+                               for x in getattr(m, method)(): yield x