Skip to content

Commit ea3afcf

Browse files
committed
add convert string to unicode.
1 parent f91feb4 commit ea3afcf

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

string_functions.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,59 @@ def count_newlines_from_end(str):
1616
i -= 1
1717
return len(str) - i
1818

19+
#: Aliases for the utf-8 codec
20+
_UTF8_ALIASES = frozenset(('utf-8', 'UTF-8', 'utf8', 'UTF8', 'utf_8', 'UTF_8',
21+
'utf', 'UTF', 'u8', 'U8'))
22+
#: Aliases for the latin-1 codec
23+
_LATIN1_ALIASES = frozenset(('latin-1', 'LATIN-1', 'latin1', 'LATIN1',
24+
'latin', 'LATIN', 'l1', 'L1', 'cp819', 'CP819', '8859', 'iso8859-1',
25+
'ISO8859-1', 'iso-8859-1', 'ISO-8859-1'))
26+
27+
def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None):
28+
'''Convert an object into a :class:`unicode` string '''
29+
30+
# Could use isbasestring/isunicode here but we want this code to be as
31+
# fast as possible
32+
if isinstance(obj, basestring):
33+
if isinstance(obj, unicode):
34+
return obj
35+
if encoding in _UTF8_ALIASES:
36+
return unicode(obj, 'utf-8', errors)
37+
if encoding in _LATIN1_ALIASES:
38+
return unicode(obj, 'latin-1', errors)
39+
return obj.decode(encoding, errors)
40+
41+
if not nonstring:
42+
nonstring = 'simplerepr'
43+
if nonstring == 'empty':
44+
return u''
45+
elif nonstring == 'passthru':
46+
return obj
47+
elif nonstring == 'simplerepr':
48+
try:
49+
simple = obj.__unicode__()
50+
except (AttributeError, UnicodeError):
51+
simple = None
52+
if not simple:
53+
try:
54+
simple = str(obj)
55+
except UnicodeError:
56+
try:
57+
simple = obj.__str__()
58+
except (UnicodeError, AttributeError):
59+
simple = u''
60+
if isinstance(simple, str):
61+
return unicode(simple, encoding, errors)
62+
return simple
63+
elif nonstring in ('repr', 'strict'):
64+
obj_repr = repr(obj)
65+
if isinstance(obj_repr, str):
66+
obj_repr = unicode(obj_repr, encoding, errors)
67+
if nonstring == 'repr':
68+
return obj_repr
69+
raise TypeError('to_unicode was given "%(obj)s" which is neither'
70+
' a byte string (str) or a unicode string' %
71+
{'obj': obj_repr.encode(encoding, 'replace')})
72+
73+
raise TypeError('nonstring value, %(param)s, is not set to a valid'
74+
' action' % {'param': nonstring})

0 commit comments

Comments
 (0)