2121from gitdb .utils .encoding import (
2222 string_types , # @UnusedImport
2323 text_type , # @UnusedImport
24- force_bytes , # @UnusedImport
25- force_text # @UnusedImport
24+ force_text , # @UnusedImport
2625)
2726
2827
@@ -77,7 +76,7 @@ def safe_decode(s):
7776def safe_encode (s ):
7877 """Safely decodes a binary string to unicode"""
7978 if isinstance (s , unicode ):
80- return s .encode (defenc )
79+ return s .encode (defenc , 'surrogateescape' )
8180 elif isinstance (s , bytes ):
8281 return s
8382 elif s is not None :
@@ -123,8 +122,8 @@ def __str__(self):
123122 else : # Python 2
124123 def __str__ (self ):
125124 return self .__unicode__ ().encode (defenc )
126-
127-
125+
126+
128127"""
129128This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
130129handler of Python 3.
@@ -139,12 +138,14 @@ def __str__(self):
139138# # -- Python 2/3 compatibility -------------------------------------
140139# FS_ERRORS = 'my_surrogateescape'
141140
141+
142142def u (text ):
143143 if PY3 :
144144 return text
145145 else :
146146 return text .decode ('unicode_escape' )
147147
148+
148149def b (data ):
149150 if PY3 :
150151 return data .encode ('latin1' )
@@ -155,9 +156,10 @@ def b(data):
155156 _unichr = chr
156157 bytes_chr = lambda code : bytes ((code ,))
157158else :
158- _unichr = unichr
159+ _unichr = unichr # @UndefinedVariable
159160 bytes_chr = chr
160161
162+
161163def surrogateescape_handler (exc ):
162164 """
163165 Pure Python implementation of the PEP 383: the "surrogateescape" error
@@ -204,7 +206,7 @@ def replace_surrogate_encode(mystring):
204206 # The following magic comes from Py3.3's Python/codecs.c file:
205207 if not 0xD800 <= code <= 0xDCFF :
206208 # Not a surrogate. Fail with the original exception.
207- raise exc
209+ raise
208210 # mybytes = [0xe0 | (code >> 12),
209211 # 0x80 | ((code >> 6) & 0x3f),
210212 # 0x80 | (code & 0x3f)]
@@ -256,9 +258,8 @@ def encodefilename(fn):
256258 elif 0xDC80 <= code <= 0xDCFF :
257259 ch = bytes_chr (code - 0xDC00 )
258260 else :
259- raise UnicodeEncodeError (FS_ENCODING ,
260- fn , index , index + 1 ,
261- 'ordinal not in range(128)' )
261+ raise UnicodeEncodeError (FS_ENCODING , fn , index , index + 1 ,
262+ 'ordinal not in range(128)' )
262263 encoded .append (ch )
263264 return bytes ().join (encoded )
264265 elif FS_ENCODING == 'utf-8' :
@@ -272,20 +273,22 @@ def encodefilename(fn):
272273 ch = bytes_chr (code - 0xDC00 )
273274 encoded .append (ch )
274275 else :
275- raise UnicodeEncodeError (
276- FS_ENCODING ,
277- fn , index , index + 1 , 'surrogates not allowed' )
276+ raise UnicodeEncodeError (FS_ENCODING , fn , index , index + 1 ,
277+ 'surrogates not allowed' )
278278 else :
279279 ch_utf8 = ch .encode ('utf-8' )
280280 encoded .append (ch_utf8 )
281281 return bytes ().join (encoded )
282282 else :
283283 return fn .encode (FS_ENCODING , FS_ERRORS )
284284
285+
285286def decodefilename (fn ):
286287 return fn .decode (FS_ENCODING , FS_ERRORS )
287288
288- FS_ENCODING = 'ascii' ; fn = b ('[abc\xff ]' ); encoded = u ('[abc\udcff ]' )
289+ FS_ENCODING = 'ascii'
290+ fn = b ('[abc\xff ]' )
291+ encoded = u ('[abc\udcff ]' )
289292# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
290293# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
291294
0 commit comments