More ssl for the crawler - don̈́'t verify server cert
authorMagnus Hagander <magnus@hagander.net>
Wed, 25 May 2016 13:47:21 +0000 (15:47 +0200)
committerMagnus Hagander <magnus@hagander.net>
Wed, 25 May 2016 13:47:21 +0000 (15:47 +0200)
We'll fix verification later, but for now since we're working on a
hardcoded local IP, we don't consider it important.

tools/search/crawler/lib/basecrawler.py

index 3dd2bb5d65c42f2fff6fc97d63334e5b7d53267f..fc173b42aebc4aabd851b0a03e60e985b9da1064 100644 (file)
@@ -163,21 +163,20 @@ class BaseSiteCrawler(object):
 
        def fetch_page(self, url):
                try:
-                       if not self.https:
-                               port = 80
-                               connclass = httplib.HTTPConnection
-                       else:
-                               port = 443
-                               connclass = httplib.HTTPSConnection
-
                        # Unfortunatley, persistent connections seem quite unreliable,
                        # so create a new one for each page.
                        if self.serverip:
-                               h = connclass(host=self.serverip, port=port, strict=True, timeout=10)
+                               if not self.https:
+                                       h = httplib.HTTPConnection(host=self.serverip, port=80, strict=True, timeout=10)
+                               else:
+                                       h = httplib.HTTPSConnection(host=self.serverip, port=443, strict=True, timeout=10, context=ssl._create_unverified_context())
                                h.putrequest("GET", url, skip_host=1)
                                h.putheader("Host", self.hostname)
                        else:
-                               h = connclass(host=self.hostname, port=port, strict=True, timeout=10)
+                               if not self.https:
+                                       h = httplib.HTTPConnection(host=self.hostname, port=80, strict=True, timeout=10)
+                               else:
+                                       h = httplib.HTTPSConnection(host=self.hostname, port=443, strict=True, timeout=10, context=ssl._createa_unverified_context())
                                h.putrequest("GET", url)
                        h.putheader("User-agent","pgsearch/0.2")
                        h.putheader("Connection","close")