Support loading docs in utf-8 from pg version 10
authorMagnus Hagander <magnus@hagander.net>
Wed, 16 Nov 2016 12:00:50 +0000 (13:00 +0100)
committerMagnus Hagander <magnus@hagander.net>
Wed, 16 Nov 2016 12:00:50 +0000 (13:00 +0100)
tools/docs/docload.py

index faf1892e49f1c816598929af9ce03620b6512653..12b2695736ee4d9acb5b40860e09f3038ccb095c 100755 (executable)
@@ -29,7 +29,17 @@ def load_doc_file(filename, f):
                                indent='auto',
                        )
 
-       contents = unicode(f.read(),'latin1')
+       # Postgres 10 started using xml toolchain and now produces docmentation in utf8. So we need
+       # to figure out which version it is.
+       rawcontents = f.read()
+       if rawcontents.startswith('<?xml version="1.0" encoding="UTF-8"'):
+               # Version 10, use utf8
+               encoding = 'utf-8'
+       else:
+               encoding = 'latin1'
+
+       contents = unicode(rawcontents, encoding)
+
        tm = re_titlematch.search(contents)
        if tm:
                title = tm.group(1)