One more round of encodings

author Magnus Hagander <magnus@hagander.net>

Fri, 6 Jul 2012 09:46:02 +0000 (11:46 +0200)

committer Magnus Hagander <magnus@hagander.net>

Fri, 6 Jul 2012 09:46:02 +0000 (11:46 +0200)
author Magnus Hagander <magnus@hagander.net>
Fri, 6 Jul 2012 09:46:02 +0000 (11:46 +0200)
committer Magnus Hagander <magnus@hagander.net>
Fri, 6 Jul 2012 09:46:02 +0000 (11:46 +0200)
diff --git a/loader/lib/parser.py b/loader/lib/parser.py

index a72fa0c28130512888ed5514e3206d20facf5481..60caa8cf6356eb8540204ecb5a298a3bc1c07a84 100644 (file)
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -57,7 +57,7 @@ class ArchivesParser(object):
                         # Special case where we don't know... We'll assume
                         # us-ascii and use replacements
                         return 'us-ascii'
-               if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all':
+               if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all' or lcharset == 'default_charset':
                         # Seriously broken charset definitions, map to us-ascii
                         # and throw away the rest with replacements
                         return 'us-ascii'
@@ -91,13 +91,18 @@ class ArchivesParser(object):
                         return 'iso-8859-1'
                 if lcharset == 'x-windows-949':
                         return 'ms949'
-               if lcharset == 'pt_pt':
+               if lcharset == 'pt_pt' or lcharset == 'de_latin' or lcharset == 'de':
                         # This is a locale, and not a charset, but most likely it's this one
                         return 'iso-8859-1'
+               if lcharset == 'iso-8858-15':
+                       # How is this a *common* mistake?
+                       return 'iso-8859-15'
                 if lcharset == 'macintosh':
                         return 'mac_roman'
                 if lcharset == 'cn-big5':
                         return 'big5'
+               if lcharset == 'x-unicode-2-0-UTF-7':
+                       return 'utf-7'
                 if lcharset == 'tscii':
                         # No support for this charset :S Map it down to ascii
                         # and throw away all the rest. sucks, but we have to
author	Magnus Hagander <magnus@hagander.net>
	Fri, 6 Jul 2012 09:46:02 +0000 (11:46 +0200)
committer	Magnus Hagander <magnus@hagander.net>
	Fri, 6 Jul 2012 09:46:02 +0000 (11:46 +0200)