@@ -16,3 +16,59 @@ def count_newlines_from_end(str):
1616 i -= 1
1717 return len (str ) - i
1818
19+ #: Aliases for the utf-8 codec
20+ _UTF8_ALIASES = frozenset (('utf-8' , 'UTF-8' , 'utf8' , 'UTF8' , 'utf_8' , 'UTF_8' ,
21+ 'utf' , 'UTF' , 'u8' , 'U8' ))
22+ #: Aliases for the latin-1 codec
23+ _LATIN1_ALIASES = frozenset (('latin-1' , 'LATIN-1' , 'latin1' , 'LATIN1' ,
24+ 'latin' , 'LATIN' , 'l1' , 'L1' , 'cp819' , 'CP819' , '8859' , 'iso8859-1' ,
25+ 'ISO8859-1' , 'iso-8859-1' , 'ISO-8859-1' ))
26+
27+ def to_unicode (obj , encoding = 'utf-8' , errors = 'replace' , nonstring = None ):
28+ '''Convert an object into a :class:`unicode` string '''
29+
30+ # Could use isbasestring/isunicode here but we want this code to be as
31+ # fast as possible
32+ if isinstance (obj , basestring ):
33+ if isinstance (obj , unicode ):
34+ return obj
35+ if encoding in _UTF8_ALIASES :
36+ return unicode (obj , 'utf-8' , errors )
37+ if encoding in _LATIN1_ALIASES :
38+ return unicode (obj , 'latin-1' , errors )
39+ return obj .decode (encoding , errors )
40+
41+ if not nonstring :
42+ nonstring = 'simplerepr'
43+ if nonstring == 'empty' :
44+ return u''
45+ elif nonstring == 'passthru' :
46+ return obj
47+ elif nonstring == 'simplerepr' :
48+ try :
49+ simple = obj .__unicode__ ()
50+ except (AttributeError , UnicodeError ):
51+ simple = None
52+ if not simple :
53+ try :
54+ simple = str (obj )
55+ except UnicodeError :
56+ try :
57+ simple = obj .__str__ ()
58+ except (UnicodeError , AttributeError ):
59+ simple = u''
60+ if isinstance (simple , str ):
61+ return unicode (simple , encoding , errors )
62+ return simple
63+ elif nonstring in ('repr' , 'strict' ):
64+ obj_repr = repr (obj )
65+ if isinstance (obj_repr , str ):
66+ obj_repr = unicode (obj_repr , encoding , errors )
67+ if nonstring == 'repr' :
68+ return obj_repr
69+ raise TypeError ('to_unicode was given "%(obj)s" which is neither'
70+ ' a byte string (str) or a unicode string' %
71+ {'obj' : obj_repr .encode (encoding , 'replace' )})
72+
73+ raise TypeError ('nonstring value, %(param)s, is not set to a valid'
74+ ' action' % {'param' : nonstring })
0 commit comments