Skip to content
Open
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
05ae5ad
Add Ascii85, base85, and Z85 support to binascii
kangtastic Mar 8, 2023
aa06c5d
Restore base64.py
kangtastic Apr 26, 2025
6377440
Create _base64 module with wrappers for accelerated functions
kangtastic Apr 26, 2025
6c0e4a3
Test both Python and C codepaths in base64
kangtastic Apr 26, 2025
ce4773c
Match behavior between Python and C base 85 functions
kangtastic Apr 26, 2025
4072e3b
Add Z85 tests to binascii
kangtastic Apr 27, 2025
bc9217f
Update generated files
kangtastic Apr 27, 2025
2c40ba0
Avoid importing functools
kangtastic Apr 28, 2025
fd9eaf7
Avoid circular import in _base64
kangtastic Apr 28, 2025
4746d18
Do not use a decorator for changing exception type
kangtastic Apr 28, 2025
d075593
Test Python and C codepaths in base64 using mixins
kangtastic Apr 28, 2025
6d65fec
Remove leading underscore from functions in private module
kangtastic Apr 29, 2025
a241356
Merge branch 'main' into gh-101178-rework-base85
serhiy-storchaka Dec 24, 2025
0df9a40
Use more modern C API.
serhiy-storchaka Dec 24, 2025
60fbd7c
Fix tests.
serhiy-storchaka Dec 24, 2025
a070887
Merge branch 'main' into gh-101178-rework-base85
serhiy-storchaka Dec 25, 2025
167e83e
Fix new tests.
serhiy-storchaka Dec 25, 2025
01df442
Optimize binascii.b2a_ascii85().
serhiy-storchaka Dec 26, 2025
7885918
Apply suggestions from code review
serhiy-storchaka Dec 27, 2025
1e928e3
Update C style to more closely adhere to PEP-7
kangtastic Dec 28, 2025
2691a0a
Remove pure-Python base-85-related codepaths in base64
kangtastic Dec 28, 2025
b9d27bd
Remove now-unnecessary _base64 module and fix tests
kangtastic Dec 28, 2025
780517a
Separate Z85 from Base85 on the Python API side
kangtastic Dec 28, 2025
bc9a66d
Fix tests after separating Base85 from Z85
kangtastic Dec 28, 2025
dc1d3fc
Merge branch 'main' into gh-101178-rework-base85
kangtastic Dec 28, 2025
c5de5a1
Update generated files after merging main
kangtastic Dec 28, 2025
3bb3b18
Update Misc/NEWS.d and Misc/ACKS
kangtastic Dec 28, 2025
6f09fa8
Update generated files again
kangtastic Dec 29, 2025
6d8f897
Fix typo in NEWS entry
kangtastic Dec 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove pure-Python base-85-related codepaths in base64
  • Loading branch information
kangtastic committed Dec 28, 2025
commit 2691a0aba0dffd0433d654ece0b2de53a6126946
206 changes: 8 additions & 198 deletions Lib/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere

import struct
import binascii


Expand Down Expand Up @@ -290,36 +289,6 @@ def b16decode(s, casefold=False):
#
# Ascii85 encoding/decoding
#

_a85chars = None
_a85chars2 = None
_A85START = b"<~"
_A85END = b"~>"

def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
# Helper function for a85encode and b85encode
if not isinstance(b, bytes_types):
b = memoryview(b).tobytes()

padding = (-len(b)) % 4
if padding:
b = b + b'\0' * padding
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)

chunks = [b'z' if foldnuls and not word else
b'y' if foldspaces and word == 0x20202020 else
(chars2[word // 614125] +
chars2[word // 85 % 7225] +
chars[word % 85])
for word in words]

if padding and not pad:
if chunks[-1] == b'z':
chunks[-1] = chars[0] * 5
chunks[-1] = chunks[-1][:-padding]

return b''.join(chunks)

def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
"""Encode bytes-like object b using Ascii85 and return a bytes object.

Expand All @@ -337,29 +306,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
which is used by the Adobe implementation.
"""
global _a85chars, _a85chars2
# Delay the initialization of tables to not waste memory
# if the function is never called
if _a85chars2 is None:
_a85chars = [bytes((i,)) for i in range(33, 118)]
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]

result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)

if adobe:
result = _A85START + result
if wrapcol:
wrapcol = max(2 if adobe else 1, wrapcol)
chunks = [result[i: i + wrapcol]
for i in range(0, len(result), wrapcol)]
if adobe:
if len(chunks[-1]) + 2 > wrapcol:
chunks.append(b'')
result = b'\n'.join(chunks)
if adobe:
result += _A85END

return result
return binascii.b2a_ascii85(b, fold_spaces=foldspaces,
wrap=adobe, width=wrapcol, pad=pad)

def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
Expand All @@ -377,152 +325,34 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):

The result is returned as a bytes object.
"""
b = _bytes_from_decode_data(b)
if adobe:
if not b.endswith(_A85END):
raise ValueError(
"Ascii85 encoded byte sequences must end "
"with {!r}".format(_A85END)
)
if b.startswith(_A85START):
b = b[2:-2] # Strip off start/end markers
else:
b = b[:-2]
#
# We have to go through this stepwise, so as to ignore spaces and handle
# special short sequences
#
packI = struct.Struct('!I').pack
decoded = []
decoded_append = decoded.append
curr = []
curr_append = curr.append
curr_clear = curr.clear
for x in b + b'u' * 4:
if b'!'[0] <= x <= b'u'[0]:
curr_append(x)
if len(curr) == 5:
acc = 0
for x in curr:
acc = 85 * acc + (x - 33)
try:
decoded_append(packI(acc))
except struct.error:
raise ValueError('Ascii85 overflow') from None
curr_clear()
elif x == b'z'[0]:
if curr:
raise ValueError('z inside Ascii85 5-tuple')
decoded_append(b'\0\0\0\0')
elif foldspaces and x == b'y'[0]:
if curr:
raise ValueError('y inside Ascii85 5-tuple')
decoded_append(b'\x20\x20\x20\x20')
elif x in ignorechars:
# Skip whitespace
continue
else:
raise ValueError('Non-Ascii85 digit found: %c' % x)

result = b''.join(decoded)
padding = 4 - len(curr)
if padding:
# Throw away the extra padding
result = result[:-padding]
return result

# The following code is originally taken (with permission) from Mercurial

_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
_b85chars = None
_b85chars2 = None
_b85dec = None
return binascii.a2b_ascii85(b, fold_spaces=foldspaces,
wrap=adobe, ignore=ignorechars)

def b85encode(b, pad=False):
"""Encode bytes-like object b in base85 format and return a bytes object.

If pad is true, the input is padded with b'\\0' so its length is a multiple of
4 bytes before encoding.
"""
global _b85chars, _b85chars2
# Delay the initialization of tables to not waste memory
# if the function is never called
if _b85chars2 is None:
_b85chars = [bytes((i,)) for i in _b85alphabet]
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
return _85encode(b, _b85chars, _b85chars2, pad)
return binascii.b2a_base85(b, pad=pad, newline=False)

def b85decode(b):
"""Decode the base85-encoded bytes-like object or ASCII string b

The result is returned as a bytes object.
"""
global _b85dec
# Delay the initialization of tables to not waste memory
# if the function is never called
if _b85dec is None:
# we don't assign to _b85dec directly to avoid issues when
# multiple threads call this function simultaneously
b85dec_tmp = [None] * 256
for i, c in enumerate(_b85alphabet):
b85dec_tmp[c] = i
_b85dec = b85dec_tmp

b = _bytes_from_decode_data(b)
padding = (-len(b)) % 5
b = b + b'~' * padding
out = []
packI = struct.Struct('!I').pack
for i in range(0, len(b), 5):
chunk = b[i:i + 5]
acc = 0
try:
for c in chunk:
acc = acc * 85 + _b85dec[c]
except TypeError:
for j, c in enumerate(chunk):
if _b85dec[c] is None:
raise ValueError('bad base85 character at position %d'
% (i + j)) from None
raise
try:
out.append(packI(acc))
except struct.error:
raise ValueError('base85 overflow in hunk starting at byte %d'
% i) from None

result = b''.join(out)
if padding:
result = result[:-padding]
return result

_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
# Translating b85 valid but z85 invalid chars to b'\x00' is required
# to prevent them from being decoded as b85 valid chars.
_z85_b85_decode_diff = b';_`|~'
_z85_decode_translation = bytes.maketrans(
_z85alphabet + _z85_b85_decode_diff,
_b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
)
_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)
return binascii.a2b_base85(b, strict_mode=True)

def z85encode(s, pad=False):
"""Encode bytes-like object b in z85 format and return a bytes object."""
return b85encode(s, pad).translate(_z85_encode_translation)
return binascii.b2a_base85(s, pad=pad, newline=False, z85=True)

def z85decode(s):
"""Decode the z85-encoded bytes-like object or ASCII string b

The result is returned as a bytes object.
"""
s = _bytes_from_decode_data(s)
s = s.translate(_z85_decode_translation)
try:
return b85decode(s)
except ValueError as e:
raise ValueError(e.args[0].replace('base85', 'z85')) from None
return binascii.a2b_base85(s, strict_mode=True, z85=True)

# Legacy interface. This code could be cleaned up since I don't believe
# binascii has any line length limitations. It just doesn't seem worth it
Expand Down Expand Up @@ -579,26 +409,6 @@ def decodebytes(s):
return binascii.a2b_base64(s)


# Use accelerated implementations of originally pure-Python parts if possible.
try:
from _base64 import (a85encode as _a85encode, a85decode as _a85decode,
b85encode as _b85encode, b85decode as _b85decode,
z85encode as _z85encode, z85decode as _z85decode)
# Avoid expensive import of update_wrapper() from functools.
def _copy_attributes(func, src_func):
func.__doc__ = src_func.__doc__
func.__module__ = "base64"
return func
a85encode = _copy_attributes(_a85encode, a85encode)
a85decode = _copy_attributes(_a85decode, a85decode)
b85encode = _copy_attributes(_b85encode, b85encode)
b85decode = _copy_attributes(_b85decode, b85decode)
z85encode = _copy_attributes(_z85encode, z85encode)
z85decode = _copy_attributes(_z85decode, z85decode)
except ImportError:
pass


# Usable as a script...
def main():
"""Small main program"""
Expand Down