diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml deleted file mode 100644 index eda8d076..00000000 --- a/.github/workflows/black.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: Black - -on: ["push", "pull_request"] - -jobs: - black: - runs-on: ubuntu-latest - steps: - - name: Setup Python - uses: actions/setup-python@v1 - with: - python-version: '3.x' - architecture: 'x64' - - - name: Checkout - uses: actions/checkout@v1 - - - name: Black Code Formatter - run: | - pip install black - black --diff --check msgpack/ test/ setup.py diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..b696b926 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,33 @@ +name: docs + +on: ["push", "pull_request"] + +jobs: + docs: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: "pip" + cache-dependency-path: | + requirements.txt + docs/requirements.txt + + - name: Build + run: | + pip install -r requirements.txt + make cython + + - name: Sphinx Documentation Generator + run: | + pip install -r docs/requirements.txt + make docs diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..198cf7b5 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,22 @@ +name: lint + +on: ["push", "pull_request"] + +jobs: + lint: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: ruff check + run: | + pipx run ruff check --diff msgpack/ test/ setup.py + + - name: ruff format + run: | + pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml deleted file mode 100644 index 89bdb4e4..00000000 --- a/.github/workflows/linux.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Build Linux Wheels -on: - push: - pull_request: - create: - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v1 - - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - - name: Cythonize - shell: bash - run: | - pip install -U pip - pip -V - pip install -r requirements.txt - make cython - #python setup.py sdist - - - name: Build wheels - shell: bash - run: | - make linux-wheel - - - name: Run test (3.8) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - - name: Set up Python 3.7 - uses: actions/setup-python@v1 - with: - python-version: 3.7 - - - name: Run test (3.7) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - name: Set up Python 3.6 - uses: actions/setup-python@v1 - with: - python-version: 3.6 - - - name: Run test (3.6) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - - name: Upload Wheels - uses: actions/upload-artifact@v1 - with: - name: linux-wheels - path: ./dist/wheelhouse/ diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml deleted file mode 100644 index fb2c67f4..00000000 --- a/.github/workflows/mac.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: Build macOS Wheels -on: - push: - pull_request: - create: - -jobs: - build: - runs-on: macos-latest - - steps: - - name: Checkout - uses: actions/checkout@v1 - - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: "3.8" - - - name: Cythonize - run: | - pip install -U pip - pip install -r requirements.txt - make cython - - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel - - - name: Run test - run: | - pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index - pytest -v test - - - - name: Set up Python 3.7 - uses: actions/setup-python@v1 - with: - python-version: "3.7" - - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel - - - name: Run test - run: | - pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index - pytest -v test - - - - name: Set up Python 3.6 - uses: actions/setup-python@v1 - with: - python-version: "3.6" - - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel - - - name: Run test - run: | - pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index - pytest -v test - - - - name: Upload Wheels - uses: actions/upload-artifact@v1 - with: - name: macos-wheels - path: ./dist/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..6b1664ae --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: Run tests +on: + push: + branches: [main] + pull_request: + create: + +jobs: + test: + strategy: + matrix: + os: ["ubuntu-latest", "windows-latest", "windows-11-arm", "macos-latest"] + py: ["3.14", "3.14t", "3.13", "3.12", "3.11", "3.10"] + exclude: + - os: windows-11-arm + py: "3.10" + runs-on: ${{ matrix.os }} + name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.py }} + allow-prereleases: true + cache: "pip" + + - name: Prepare + shell: bash + run: | + python -m pip install -r requirements.txt pytest + + - name: Build + shell: bash + run: | + make cython + pip install . + + - name: Test (C extension) + shell: bash + run: | + pytest -v test + + - name: Test (pure Python fallback) + shell: bash + run: | + MSGPACK_PUREPYTHON=1 pytest -v test + + - name: build packages + shell: bash + run: | + python -m build -nv + + - name: upload packages + uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.os }}-${{ matrix.py }} + path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml new file mode 100644 index 00000000..531abbc3 --- /dev/null +++ b/.github/workflows/wheel.yml @@ -0,0 +1,88 @@ +name: Build sdist and Wheels +on: + push: + branches: [main] + release: + types: + - published + workflow_dispatch: + +jobs: + build_wheels: + strategy: + matrix: + # macos-13 is for intel + os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "windows-11-arm", "macos-13", "macos-latest"] + runs-on: ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }} + + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.x" + cache: "pip" + - name: Cythonize + shell: bash + run: | + pip install -r requirements.txt + make cython + + - name: Build + uses: pypa/cibuildwheel@v3.3.0 + env: + CIBW_TEST_REQUIRES: "pytest" + CIBW_TEST_COMMAND: "pytest {package}/test" + CIBW_SKIP: "pp* cp38-* cp39-* cp310-win_arm64" + + - name: Build sdist + if: runner.os == 'Linux' && runner.arch == 'X64' + run: | + pip install build + python -m build -s -o wheelhouse + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }} + path: wheelhouse + + # combine all wheels into one artifact + combine_wheels: + needs: [build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-all + path: dist + + # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + if: github.event_name == 'release' && github.event.action == 'published' + # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) + # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 + #with: + # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml deleted file mode 100644 index cecb8258..00000000 --- a/.github/workflows/windows.yaml +++ /dev/null @@ -1,70 +0,0 @@ -name: Build and test windows wheels -on: - push: - branches: - - master - - test - pull_request: - create: - -jobs: - build: - runs-on: windows-latest - steps: - - name: Checkout - uses: actions/checkout@v1 - - - name: Cythonize - shell: bash - run: | - pip install -U Cython - make cython - #python setup.py sdist - - - name: Python 3.6 (amd64) - env: - PYTHON: "py -3.6-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.6 (x86) - env: - PYTHON: "py -3.6-32" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.7 (amd64) - env: - PYTHON: "py -3.7-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.7 (x86) - env: - PYTHON: "py -3.7-32" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.8 (amd64) - env: - PYTHON: "py -3.8-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.8 (x86) - env: - PYTHON: "py -3.8-32" - shell: bash - run: | - ci/runtests.sh - - - name: Upload Wheels - uses: actions/upload-artifact@v1 - with: - name: win-wheels - path: ./dist diff --git a/.gitignore b/.gitignore index 800f1c22..341be631 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,13 @@ MANIFEST build/* dist/* .tox +.python-version *.pyc *.pyo *.so *~ msgpack/__version__.py +msgpack/*.c msgpack/*.cpp *.egg-info /venv diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..88d87182 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,24 @@ +# Read the Docs configuration file for Sphinx projects. +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + apt_packages: + - build-essential + jobs: + pre_install: + - pip install -r requirements.txt + - make cython + +python: + install: + - method: pip + path: . + - requirements: docs/requirements.txt + +sphinx: + configuration: docs/conf.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 5132b4c4..00000000 --- a/.travis.yml +++ /dev/null @@ -1,89 +0,0 @@ -version: ~> 1.0 -dist: xenial -language: python -cache: pip -arch: - - amd64 - - arm64 -python: - # Available Python (PyPy) can be listed by: - # - # $ aws s3 ls s3://travis-python-archives/binaries/ubuntu/16.04/x86_64/ - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "3.9-dev" - - -_pure: &pure - install: - - pip install -U pip - - pip install -U pytest pytest-cov codecov - - pip install . - script: - - pytest --cov=msgpack -v test - -matrix: - include: - - name: 32bit build - language: python - services: - - docker - env: - - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 - install: - - pip install -U pip - - pip install -r requirements.txt - - make cython - - docker pull $DOCKER_IMAGE - script: - - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - - arch: arm64 - name: arm64 32bit build - language: python - services: - - docker - env: - - DOCKER_IMAGE=quay.io/pypa/manylinux2014_aarch64 - install: - - pip install -U pip - - pip install -r requirements.txt - - make cython - - docker pull $DOCKER_IMAGE - script: - - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - - name: "Python 2 (fallback)" - python: "2.7" - <<: *pure - - - name: "pypy2.7" - python: "pypy2.7-7.1.1" - <<: *pure - - - name: "pypy3" - python: "pypy3.6-7.1.1" - <<: *pure - -install: - - pip install -U pip - - pip install -U pytest pytest-cov codecov - - pip install -r requirements.txt # Cython - - make cython - - pip install -e . - -script: - - python -c 'import sys; print(hex(sys.maxsize))' - - python -c 'from msgpack import _cmsgpack' - - pytest --cov=msgpack -v test - - MSGPACK_PUREPYTHON=x pytest --cov=msgpack -v test - -after_success: - - if [ -f .coverage ]; then - codecov; - fi - -# vim: sw=2 ts=2 diff --git a/ChangeLog.rst b/ChangeLog.rst index d922e847..beeab15c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,137 @@ +1.1.2 +===== + +Release Date: 2025-10-08 + +This release does not change source code. It updates only building wheels: + +* Update Cython to v3.1.4 +* Update cibuildwheel to v3.2.0 +* Drop Python 3.8 +* Add Python 3.14 +* Add windows-arm + +1.1.1 +===== + +Release Date: 2025-06-13 + +* No change from 1.1.1rc1. + +1.1.1rc1 +======== + +Release Date: 2025-06-06 + +* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. + +1.1.0 +===== + +Release Date: 2024-09-10 + +* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with + future Cython. (#620) + +1.1.0rc2 +======== + +Release Date: 2024-08-19 + +* Update Cython to 3.0.11 for better Python 3.13 support. +* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. + +1.1.0rc1 +======== + +Release Date: 2024-05-07 + +* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. +* Stop using C++ mode in Cython to reduce compile error on some compilers. +* ``Packer()`` has ``buf_size`` option to specify initial size of + internal buffer to reduce reallocation. +* The default internal buffer size of ``Packer()`` is reduced from + 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` + if you are packing large data. +* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become + more accurate by avoiding floating point calculations. (#591) +* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. +* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only + arguments to improve compatibility with the Cython implementation. + +1.0.8 +===== + +Release Date: 2024-03-01 + +* Update Cython to 3.0.8. This fixes memory leak when iterating + ``Unpacker`` object on Python 3.12. +* Do not include C/Cython files in binary wheels. + + +1.0.7 +===== + +Release Date: 2023-09-28 + +* Fix build error of extension module on Windows. (#567) +* ``setup.py`` doesn't skip build error of extension module. (#568) + + +1.0.6 +===== + +Release Date: 2023-09-21 + +.. note:: + v1.0.6 Wheels for Windows don't contain extension module. + Please upgrade to v1.0.7 or newer. + +* Add Python 3.12 wheels (#517) +* Remove Python 2.7, 3.6, and 3.7 support + + +1.0.5 +===== + +Release Date: 2023-03-08 + +* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) +* Add Python 3.11 wheels (#517) +* fallback: Fix packing multidimensional memoryview (#527) + +1.0.4 +===== + +Release Date: 2022-06-03 + +* Support Python 3.11 (beta). +* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 +* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 +* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 + +1.0.3 +===== + +Release Date: 2021-11-24 JST + +* Fix Docstring (#459) +* Fix error formatting (#463) +* Improve error message about strict_map_key (#485) + +1.0.2 +===== + +* Fix year 2038 problem regression in 1.0.1. (#451) + +1.0.1 +===== + +* Add Python 3.9 and linux/arm64 wheels. (#439) +* Fixed Unpacker.tell() after read_bytes() (#426) +* Fixed unpacking datetime before epoch on Windows (#433) +* Fixed fallback Packer didn't check DateTime.tzinfo (#434) + 1.0.0 ===== @@ -57,7 +191,7 @@ Important changes * unpacker: Default value of input limits are smaller than before to avoid DoS attack. If you need to handle large data, you need to specify limits manually. (#319) -* Unpacker doesn't wrap underlaying ``ValueError`` (including ``UnicodeError``) into +* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into ``UnpackValueError``. If you want to catch all exception during unpack, you need to use ``try ... except Exception`` with minimum try code block. (#323, #233) diff --git a/DEVELOP.md b/DEVELOP.md index 9c823c34..27adf8c0 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -1,13 +1,5 @@ # Developer's note -## Wheels - -Wheels for macOS and Linux are built on Travis and AppVeyr, in -[methane/msgpack-wheels](https://github.com/methane/msgpack-wheels) repository. - -Wheels for Windows are built on Github Actions in this repository. - - ### Build ``` diff --git a/MANIFEST.in b/MANIFEST.in index 57d84a4c..6317706e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING include README.md -recursive-include msgpack *.h *.c *.pyx *.cpp +recursive-include msgpack *.h *.c *.pyx recursive-include test *.py diff --git a/Makefile b/Makefile index 2a4c0af8..51f3e0ef 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,28 @@ +PYTHON_SOURCES = msgpack test setup.py + .PHONY: all all: cython python setup.py build_ext -i -f -.PHONY: black -black: - black msgpack/ test/ setup.py +.PHONY: format +format: + ruff format $(PYTHON_SOURCES) + +.PHONY: lint +lint: + ruff check $(PYTHON_SOURCES) + +.PHONY: doc +doc: + cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html + +.PHONY: pyupgrade +pyupgrade: + @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; .PHONY: cython cython: - cython --cplus msgpack/_cmsgpack.pyx + cython msgpack/_cmsgpack.pyx .PHONY: test test: cython @@ -25,15 +39,21 @@ clean: rm -rf build rm -f msgpack/_cmsgpack.cpp rm -f msgpack/_cmsgpack.*.so + rm -f msgpack/_cmsgpack.*.pyd rm -rf msgpack/__pycache__ rm -rf test/__pycache__ .PHONY: update-docker update-docker: - docker pull quay.io/pypa/manylinux2010_i686 - docker pull quay.io/pypa/manylinux2010_x86_64 + docker pull quay.io/pypa/manylinux2014_i686 + docker pull quay.io/pypa/manylinux2014_x86_64 + docker pull quay.io/pypa/manylinux2014_aarch64 .PHONY: linux-wheel linux-wheel: - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_i686 bash docker/buildwheel.sh - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_x86_64 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh + +.PHONY: linux-arm64-wheel +linux-arm64-wheel: + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_aarch64 bash docker/buildwheel.sh diff --git a/README.md b/README.md index ac52d94b..1f063242 100644 --- a/README.md +++ b/README.md @@ -1,126 +1,55 @@ # MessagePack for Python -[![Build Status](https://travis-ci.org/msgpack/msgpack-python.svg?branch=master)](https://travis-ci.org/msgpack/msgpack-python) +[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) [![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) -## What's this +## What is this? [MessagePack](https://msgpack.org/) is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. - -## Very important notes for existing users - -### PyPI package name - -TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. -Do `pip uninstall msgpack-python; pip install -U msgpack` instead. - -Package name on PyPI was changed to msgpack from 0.5. -I upload transitional package (msgpack-python 0.5 which depending on msgpack) -for smooth transition from msgpack-python to msgpack. - -Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-python`, -msgpack is removed, and `import msgpack` fail. - - -### Compatibility with the old format - -You can use `use_bin_type=False` option to pack `bytes` -object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. - -You can unpack old msgpack format using `raw=True` option. -It unpacks str (raw) type in msgpack into Python bytes. - -See note below for detail. - - -### Major breaking changes in msgpack 1.0 - -* Python 2 - - * The extension module does not support Python 2 anymore. - The pure Python implementation (`msgpack.fallback`) is used for Python 2. - -* Packer - - * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. - **If you are still using Python 2, you must use unicode for all string types.** - You can use `use_bin_type=False` to encode into old msgpack format. - * `encoding` option is removed. UTF-8 is used always. - -* Unpacker - - * `raw=False` by default. It assumes str types are valid UTF-8 string - and decode them to Python str (unicode) object. - * `encoding` option is removed. You can use `raw=True` to support old format. - * Default value of `max_buffer_size` is changed from 0 to 100 MiB. - * Default value of `strict_map_key` is changed to True to avoid hashdos. - You need to pass `strict_map_key=False` if you have data which contain map keys - which type is not bytes or str. - - ## Install - - $ pip install msgpack - +``` +$ pip install msgpack +``` ### Pure Python implementation -The extension module in msgpack (`msgpack._cmsgpack`) does not support -Python 2 and PyPy. - -But msgpack provides a pure Python implementation (`msgpack.fallback`) -for PyPy and Python 2. +The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. -Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, -Python 2 support will not be dropped in the foreseeable future. +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. ### Windows -When you can't use a binary distribution, you need to install Visual Studio -or Windows SDK on Windows. -Without extension, using pure Python implementation on CPython runs slowly. +If you can't use a binary distribution, you need to install Visual Studio +or the Windows SDK on Windows. +Without the extension, the pure Python implementation on CPython runs slowly. ## How to use -NOTE: In examples below, I use `raw=False` and `use_bin_type=True` for users -using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. - - ### One-shot pack & unpack Use `packb` for packing and `unpackb` for unpacking. -msgpack provides `dumps` and `loads` as an alias for compatibility with +msgpack provides `dumps` and `loads` as aliases for compatibility with `json` and `pickle`. -`pack` and `dump` packs to a file-like object. -`unpack` and `load` unpacks from a file-like object. +`pack` and `dump` pack to a file-like object. +`unpack` and `load` unpack from a file-like object. ```pycon - >>> import msgpack - >>> msgpack.packb([1, 2, 3], use_bin_type=True) - '\x93\x01\x02\x03' - >>> msgpack.unpackb(_, raw=False) - [1, 2, 3] +>>> import msgpack +>>> msgpack.packb([1, 2, 3]) +'\x93\x01\x02\x03' +>>> msgpack.unpackb(_) +[1, 2, 3] ``` -`unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: - -```pycon - >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) - (1, 2, 3) -``` - -You should always specify the `use_list` keyword argument for backward compatibility. -See performance issues relating to `use_list option`_ below. - -Read the docstring for other options. +Read the docstring for options. ### Streaming unpacking @@ -129,87 +58,90 @@ Read the docstring for other options. stream (or from bytes provided through its `feed` method). ```py - import msgpack - from io import BytesIO +import msgpack +from io import BytesIO - buf = BytesIO() - for i in range(100): - buf.write(msgpack.packb(i, use_bin_type=True)) +buf = BytesIO() +for i in range(100): + buf.write(msgpack.packb(i)) - buf.seek(0) +buf.seek(0) - unpacker = msgpack.Unpacker(buf, raw=False) - for unpacked in unpacker: - print(unpacked) +unpacker = msgpack.Unpacker(buf) +for unpacked in unpacker: + print(unpacked) ``` -### Packing/unpacking of custom data type +### Packing/unpacking of custom data types It is also possible to pack/unpack custom data types. Here is an example for `datetime.datetime`. ```py - import datetime - import msgpack +import datetime +import msgpack - useful_dict = { - "id": 1, - "created": datetime.datetime.now(), - } +useful_dict = { + "id": 1, + "created": datetime.datetime.now(), +} - def decode_datetime(obj): - if '__datetime__' in obj: - obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") - return obj +def decode_datetime(obj): + if '__datetime__' in obj: + obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") + return obj - def encode_datetime(obj): - if isinstance(obj, datetime.datetime): - return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} - return obj +def encode_datetime(obj): + if isinstance(obj, datetime.datetime): + return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} + return obj - packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) - this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) +packed_dict = msgpack.packb(useful_dict, default=encode_datetime) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) ``` `Unpacker`'s `object_hook` callback receives a dict; the `object_pairs_hook` callback may instead be used to receive a list of key-value pairs. +NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. +See `datetime` option in `Packer` docstring. + ### Extended types It is also possible to pack/unpack custom data types using the **ext** type. ```pycon - >>> import msgpack - >>> import array - >>> def default(obj): - ... if isinstance(obj, array.array) and obj.typecode == 'd': - ... return msgpack.ExtType(42, obj.tostring()) - ... raise TypeError("Unknown type: %r" % (obj,)) - ... - >>> def ext_hook(code, data): - ... if code == 42: - ... a = array.array('d') - ... a.fromstring(data) - ... return a - ... return ExtType(code, data) - ... - >>> data = array.array('d', [1.2, 3.4]) - >>> packed = msgpack.packb(data, default=default, use_bin_type=True) - >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) - >>> data == unpacked - True +>>> import msgpack +>>> import array +>>> def default(obj): +... if isinstance(obj, array.array) and obj.typecode == 'd': +... return msgpack.ExtType(42, obj.tostring()) +... raise TypeError("Unknown type: %r" % (obj,)) +... +>>> def ext_hook(code, data): +... if code == 42: +... a = array.array('d') +... a.fromstring(data) +... return a +... return ExtType(code, data) +... +>>> data = array.array('d', [1.2, 3.4]) +>>> packed = msgpack.packb(data, default=default) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) +>>> data == unpacked +True ``` ### Advanced unpacking control As an alternative to iteration, `Unpacker` objects provide `unpack`, -`skip`, `read_array_header` and `read_map_header` methods. The former two -read an entire message from the stream, respectively de-serialising and returning +`skip`, `read_array_header`, and `read_map_header` methods. The former two +read an entire message from the stream, respectively deserializing and returning the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair in a map, can be unpacked or skipped individually. @@ -217,7 +149,7 @@ in a map, can be unpacked or skipped individually. ## Notes -### string and binary type +### String and binary types in the old MessagePack spec Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -226,22 +158,22 @@ You can pack into and unpack from this old spec using `use_bin_type=False` and `raw=True` options. ```pycon - >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) - [b'spam', b'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) - [b'spam', 'eggs'] +>>> import msgpack +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) +[b'spam', b'eggs'] +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) +[b'spam', 'eggs'] ``` ### ext type -To use the **ext** type, pass `msgpack.ExtType` object to packer. +To use the **ext** type, pass a `msgpack.ExtType` object to the packer. ```pycon - >>> import msgpack - >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) - >>> msgpack.unpackb(packed) - ExtType(code=42, data='xyzzy') +>>> import msgpack +>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) +>>> msgpack.unpackb(packed) +ExtType(code=42, data='xyzzy') ``` You can use it with `default` and `ext_hook`. See below. @@ -249,24 +181,62 @@ You can use it with `default` and `ext_hook`. See below. ### Security -To unpacking data received from unreliable source, msgpack provides +When unpacking data received from an unreliable source, msgpack provides two security options. `max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. -It is used to limit the preallocated list size too. +It is also used to limit preallocated list sizes. `strict_map_key` (default: `True`) limits the type of map keys to bytes and str. -While msgpack spec doesn't limit the types of the map keys, -there is a risk of the hashdos. +While the MessagePack spec doesn't limit map key types, +there is a risk of a hash DoS. If you need to support other types for map keys, use `strict_map_key=False`. ### Performance tips -CPython's GC starts when growing allocated object. -This means unpacking may cause useless GC. -You can use `gc.disable()` when unpacking large message. +CPython's GC starts when the number of allocated objects grows. +This means unpacking may trigger unnecessary GC. +You can use `gc.disable()` when unpacking a large message. -List is the default sequence type of Python. -But tuple is lighter than list. +A list is the default sequence type in Python. +However, a tuple is lighter than a list. You can use `use_list=False` while unpacking when performance is important. + + +## Major breaking changes in the history + +### msgpack 0.5 + +The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### msgpack 1.0 + +* Python 2 support + + * The extension module no longer supports Python 2. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + + * msgpack 1.0.6 drops official support of Python 2.7, as pip and + GitHub Action "setup-python" no longer supports Python 2.7. + +* Packer + + * Packer uses `use_bin_type=True` by default. + Bytes are encoded in the bin type in MessagePack. + * The `encoding` option is removed. UTF-8 is always used. + +* Unpacker + + * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings + and decodes them to Python str (Unicode) objects. + * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). + * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. + You need to pass `max_buffer_size=0` if you have large but safe data. + * The default value of `strict_map_key` is changed to True to avoid hash DoS. + You need to pass `strict_map_key=False` if you have data that contain map keys + whose type is neither bytes nor str. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..75f0c541 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +## Security contact information + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure. \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index f338e177..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -environment: - matrix: - # For Python versions available on Appveyor, see - # http://www.appveyor.com/docs/installed-software#python - - PYTHON: "C:\\Python36" - -install: - # We need wheel installed to build wheels - - "%PYTHON%\\python.exe -m pip install -U pip" - - "%PYTHON%\\python.exe -m pip install -U cython" - - "%PYTHON%\\Scripts\\cython --cplus msgpack/_cmsgpack.pyx" - -build: off - -test_script: - # Put your test command here. - # Note that you must use the environment variable %PYTHON% to refer to - # the interpreter you're using - Appveyor does not do anything special - # to put the Python version you want to use on PATH. - - set PYTHON="C:\\Python27" - - ci\\runtests.bat - - set PYTHON="C:\\Python27-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python36" - - ci\\runtests.bat - - set PYTHON="C:\\Python36-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python37" - - ci\\runtests.bat - - set PYTHON="C:\\Python37-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python38" - - ci\\runtests.bat - - set PYTHON="C:\\Python38-x64" - - ci\\runtests.bat - -after_test: - # This step builds your wheels. - # Again, you need to use %PYTHON% to get the correct interpreter - -artifacts: - # bdist_wheel puts your built wheel in the dist directory - - path: dist\*.whl - -#on_success: -# You can use this step to upload your artifacts to a public website. -# See Appveyor's documentation for more details. Or you can simply -# access your wheels from the Appveyor "artifacts" tab for your build. - -# vim: set shiftwidth=2 diff --git a/ci/runtests.bat b/ci/runtests.bat deleted file mode 100644 index 4ae2f708..00000000 --- a/ci/runtests.bat +++ /dev/null @@ -1,9 +0,0 @@ -%PYTHON%\python.exe -m pip install -U pip wheel pytest -%PYTHON%\python.exe setup.py build_ext -i -%PYTHON%\python.exe setup.py install -%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" -%PYTHON%\python.exe -c "from msgpack import _cmsgpack" -%PYTHON%\python.exe setup.py bdist_wheel -%PYTHON%\python.exe -m pytest -v test -SET EL=%ERRORLEVEL% -exit /b %EL% diff --git a/ci/runtests.sh b/ci/runtests.sh deleted file mode 100644 index 5d87f696..00000000 --- a/ci/runtests.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -ex -${PYTHON} -VV -${PYTHON} -m pip install setuptools wheel pytest -${PYTHON} setup.py build_ext -if -${PYTHON} -c "from msgpack import _cmsgpack" -${PYTHON} setup.py bdist_wheel -${PYTHON} -m pytest -v test diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index 89a25706..ff34139d 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -7,10 +7,12 @@ set -e -x ARCH=`uname -p` echo "arch=$ARCH" +ls /opt/python + for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/python setup.py bdist_wheel + $PYBIN/python -m build -w done cd dist diff --git a/docker/shared.env b/docker/shared.env index 17abdd8f..80274ac6 100644 --- a/docker/shared.env +++ b/docker/shared.env @@ -1,6 +1,7 @@ PYTHON_VERSIONS=( + cp310-cp310 + cp39-cp39 cp38-cp38 cp37-cp37m cp36-cp36m - cp35-cp35m ) diff --git a/docs/_static/README.txt b/docs/_static/README.txt new file mode 100644 index 00000000..1c70594f --- /dev/null +++ b/docs/_static/README.txt @@ -0,0 +1 @@ +Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/api.rst b/docs/api.rst index 93827e19..f5dfbbd2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,19 +5,19 @@ API reference .. autofunction:: pack -:func:`dump` is alias for :func:`pack` +``dump()`` is an alias for :func:`pack` .. autofunction:: packb -:func:`dumps` is alias for :func:`packb` +``dumps()`` is an alias for :func:`packb` .. autofunction:: unpack -:func:`load` is alias for :func:`unpack` +``load()`` is an alias for :func:`unpack` .. autofunction:: unpackb -:func:`loads` is alias for :func:`unpackb` +``loads()`` is an alias for :func:`unpackb` .. autoclass:: Packer :members: diff --git a/docs/conf.py b/docs/conf.py index 6b432be0..28116cd6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -11,12 +9,12 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +# import os +# import sys +# sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- @@ -40,8 +38,8 @@ master_doc = "index" # General information about the project. -project = u"msgpack" -copyright = u"Inada Naoki" +project = "msgpack" +copyright = "Inada Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -91,7 +89,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinxdoc" +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -181,7 +179,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "msgpack.tex", u"msgpack Documentation", u"Author", "manual"), + ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -209,7 +207,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "msgpack", u"msgpack Documentation", [u"Author"], 1)] +man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -224,8 +222,8 @@ ( "index", "msgpack", - u"msgpack Documentation", - u"Author", + "msgpack Documentation", + "Author", "msgpack", "One line description of project.", "Miscellaneous", @@ -245,10 +243,10 @@ # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = u"msgpack" -epub_author = u"Author" -epub_publisher = u"Author" -epub_copyright = u"2013, Author" +epub_title = "msgpack" +epub_author = "Author" +epub_publisher = "Author" +epub_copyright = "2013, Author" # The language of the text. It defaults to the language option # or en if the language is not set. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..26002de4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx~=7.3.7 +sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index d6705e22..f3266b70 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,19 +1,20 @@ -# coding: utf-8 -from ._version import version -from .exceptions import * +# ruff: noqa: F401 +import os + +from .exceptions import * # noqa: F403 from .ext import ExtType, Timestamp -import os -import sys +version = (1, 1, 2) +__version__ = "1.1.2" -if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: - from .fallback import Packer, unpackb, Unpacker +if os.environ.get("MSGPACK_PUREPYTHON"): + from .fallback import Packer, Unpacker, unpackb else: try: - from ._cmsgpack import Packer, unpackb, Unpacker + from ._cmsgpack import Packer, Unpacker, unpackb except ImportError: - from .fallback import Packer, unpackb, Unpacker + from .fallback import Packer, Unpacker, unpackb def pack(o, stream, **kwargs): diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index 1faaac3a..9680b31e 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,5 +1,6 @@ -# coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3 +#cython: freethreading_compatible = True +import cython from cpython.datetime cimport import_datetime, datetime_new import_datetime() diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index b4706463..94d1462c 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,5 +1,3 @@ -# coding: utf-8 - from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from cpython.datetime cimport ( @@ -16,8 +14,6 @@ from .ext import ExtType, Timestamp cdef extern from "Python.h": int PyMemoryView_Check(object obj) - char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL - cdef extern from "pack.h": struct msgpack_packer: @@ -26,26 +22,21 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_bin(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) - int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); - int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) - -cdef extern from "buff_converter.h": - object buff_to_buff(char *, Py_ssize_t) + int msgpack_pack_nil(msgpack_packer* pk) except -1 + int msgpack_pack_true(msgpack_packer* pk) except -1 + int msgpack_pack_false(msgpack_packer* pk) except -1 + int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 + int msgpack_pack_float(msgpack_packer* pk, float d) except -1 + int msgpack_pack_double(msgpack_packer* pk, double d) except -1 + int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 + cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -59,11 +50,11 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer(object): +cdef class Packer: """ MessagePack Packer - usage:: + Usage:: packer = Packer() astream.write(packer.pack(a)) @@ -71,7 +62,8 @@ cdef class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -94,30 +86,52 @@ cdef class Packer(object): This is useful when trying to implement accurate serialization for python types. + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + The size of the internal buffer. (default: 256*1024) + Useful if serialisation size can be correctly estimated, + avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default cdef object _berrors cdef const char *unicode_errors + cdef size_t exports # number of exported buffers cdef bint strict_types cdef bint use_float cdef bint autoreset cdef bint datetime - def __cinit__(self): - cdef int buf_size = 1024*1024 + def __cinit__(self, buf_size=256*1024, **_kwargs): self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 + self.exports = 0 + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + assert self.exports == 0 + + cdef _check_exports(self): + if self.exports > 0: + raise BufferError("Existing exports of data: Packer cannot be changed") + + @cython.critical_section def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None): + bint strict_types=False, bint datetime=False, unicode_errors=None, + buf_size=256*1024): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -134,157 +148,130 @@ cdef class Packer(object): else: self.unicode_errors = self._berrors - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + # returns -2 when default should(o) be called + cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: cdef long long llval cdef unsigned long long ullval cdef unsigned long ulval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d + cdef const char* rawval cdef Py_ssize_t L - cdef int default_used = 0 - cdef bint strict_types = self.strict_types cdef Py_buffer view - - if nest_limit < 0: - raise ValueError("recursion limit exceeded.") - - while True: - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif o is True: - ret = msgpack_pack_true(&self.pk) - elif o is False: - ret = msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): - # PyInt_Check(long) is True for Python 3. - # So we should test long before int. - try: - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if not default_used and self._default is not None: - o = self._default(o) - default_used = True - continue - else: - raise OverflowError("Integer value out of range") - elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) + cdef bint strict = self.strict_types + + if o is None: + msgpack_pack_nil(&self.pk) + elif o is True: + msgpack_pack_true(&self.pk) + elif o is False: + msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict else PyLong_Check(o): + try: + if o > 0: + ullval = o + msgpack_pack_unsigned_long_long(&self.pk, ullval) else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) - rawval = o - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if self.unicode_errors == NULL: - ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); - if ret == -2: - raise ValueError("unicode string is too large") + llval = o + msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if will_default: + return -2 else: - o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - rawval = o - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o): - d = o - L = len(d) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif not strict_types and PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif type(o) is ExtType if strict_types else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - longval = o.code - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise ValueError("EXT data is too large") - ret = msgpack_pack_ext(&self.pk, longval, L) - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif type(o) is Timestamp: - llval = o.seconds - ulval = o.nanoseconds - ret = msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + raise OverflowError("Integer value out of range") + elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): + if self.use_float: + msgpack_pack_float(&self.pk, o) + else: + msgpack_pack_double(&self.pk, o) + elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): + if self.unicode_errors == NULL: + rawval = PyUnicode_AsUTF8AndSize(o, &L) + if L >ITEM_LIMIT: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) L = Py_SIZE(o) if L > ITEM_LIMIT: - raise ValueError("list is too large") - ret = msgpack_pack_array(&self.pk, L) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyMemoryView_Check(o): - if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: - raise ValueError("could not get buffer for memoryview") - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise ValueError("memoryview is too large") - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + raise ValueError("unicode string is too large") + rawval = o + msgpack_pack_raw(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o) if strict else PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + msgpack_pack_map(&self.pk, L) + for k, v in o.items(): + self._pack(k, nest_limit) + self._pack(v, nest_limit) + elif type(o) is ExtType if strict else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + msgpack_pack_ext(&self.pk, o.code, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + msgpack_pack_array(&self.pk, L) + for v in o: + self._pack(v, nest_limit) + elif PyMemoryView_Check(o): + PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) + L = view.len + if L > ITEM_LIMIT: PyBuffer_Release(&view); - elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: - delta = o - epoch - if not PyDelta_CheckExact(delta): - raise ValueError("failed to calculate delta") - llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) - ulval = timedelta_microseconds(delta) * 1000 - ret = msgpack_pack_timestamp(&self.pk, llval, ulval) - elif not default_used and self._default: + raise ValueError("memoryview is too large") + try: + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, view.buf, L) + finally: + PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif will_default: + return -2 + elif self.datetime and PyDateTime_CheckExact(o): + # this should be later than will_default + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef int ret + if nest_limit < 0: + raise ValueError("recursion limit exceeded.") + nest_limit -= 1 + if self._default is not None: + ret = self._pack_inner(o, 1, nest_limit) + if ret == -2: o = self._default(o) - default_used = 1 - continue else: - PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) - return ret + return ret + return self._pack_inner(o, 0, nest_limit) - cpdef pack(self, object obj): + @cython.critical_section + def pack(self, object obj): cdef int ret + self._check_exports() try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) except: @@ -297,36 +284,37 @@ cdef class Packer(object): self.pk.length = 0 return buf + @cython.critical_section def pack_ext_type(self, typecode, data): + self._check_exports() + if len(data) > ITEM_LIMIT: + raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) + @cython.critical_section def pack_array_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise ValueError - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + raise ValueError("array too large") + msgpack_pack_array(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf + @cython.critical_section def pack_map_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise ValueError - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + raise ValueError("map too learge") + msgpack_pack_map(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf + @cython.critical_section def pack_map_pairs(self, object pairs): """ Pack *pairs* as msgpack map type. @@ -334,33 +322,43 @@ cdef class Packer(object): *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + self._check_exports() + size = len(pairs) + if size > ITEM_LIMIT: + raise ValueError("map too large") + msgpack_pack_map(&self.pk, size) + for k, v in pairs: + self._pack(k) + self._pack(v) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf + @cython.critical_section def reset(self): """Reset internal buffer. This method is useful only when autoreset=False. """ + self._check_exports() self.pk.length = 0 + @cython.critical_section def bytes(self): """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) def getbuffer(self): - """Return view of internal buffer.""" - return buff_to_buff(self.pk.buf, self.pk.length) + """Return memoryview of internal buffer. + + Note: Packer now supports buffer protocol. You can use memoryview(packer). + """ + return memoryview(self) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) + self.exports += 1 + + def __releasebuffer__(self, Py_buffer *buffer): + self.exports -= 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 4340e044..f0cf96d7 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,15 +1,12 @@ -# coding: utf-8 - from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject - cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -ctypedef unsigned long long uint64_t +from libc.stdint cimport uint64_t from .exceptions import ( BufferFull, @@ -36,7 +33,7 @@ cdef extern from "unpack.h": PyObject* timestamp_t PyObject *giga; PyObject *utc; - char *unicode_errors + const char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len @@ -211,17 +208,17 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker(object): +cdef class Unpacker: """Streaming unpacker. Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) :param bool use_list: If true, unpack msgpack array to Python list. @@ -231,21 +228,34 @@ cdef class Unpacker(object): If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). + :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) + :param str unicode_errors: + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. + :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's INT_MAX. + Limits size of data waiting unpacked. 0 means 2**32-1. The default value is 100*1024*1024 (100MiB). Raises `BufferFull` exception when it is insufficient. You should set this parameter when unpacking data from untrusted source. @@ -259,18 +269,16 @@ cdef class Unpacker(object): Limits max length of bin. (default: max_buffer_size) :param int max_array_len: - Limits max length of array. (default: max_buffer_size) + Limits max length of array. + (default: max_buffer_size) :param int max_map_len: - Limits max length of map. (default: max_buffer_size//2) + Limits max length of map. + (default: max_buffer_size//2) :param int max_ext_len: Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size) - - :param str unicode_errors: - Error handler used for decoding str type. (default: `'strict'`) - + Limits max size of ext type. (default: max_buffer_size) Example of streaming deserialize from file-like object:: @@ -314,6 +322,7 @@ cdef class Unpacker(object): PyMem_Free(self.buf) self.buf = NULL + @cython.critical_section def __init__(self, file_like=None, *, Py_ssize_t read_size=0, bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, @@ -374,6 +383,7 @@ cdef class Unpacker(object): max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + @cython.critical_section def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff @@ -430,34 +440,30 @@ cdef class Unpacker(object): self.buf_size = buf_size self.buf_tail = tail + _buf_len - cdef read_from_file(self): - next_bytes = self.file_like_read( - min(self.read_size, - self.max_buffer_size - (self.buf_tail - self.buf_head) - )) + cdef int read_from_file(self) except -1: + cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head) + if remains <= 0: + raise BufferFull + + next_bytes = self.file_like_read(min(self.read_size, remains)) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) else: self.file_like = None + return 0 cdef object _unpack(self, execute_fn execute, bint iter=0): cdef int ret cdef object obj cdef Py_ssize_t prev_head - if self.buf_head >= self.buf_tail and self.file_like is not None: - self.read_from_file() - while 1: prev_head = self.buf_head - if prev_head >= self.buf_tail: - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") - - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - self.stream_offset += self.buf_head - prev_head + if prev_head < self.buf_tail: + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head + else: + ret = 0 if ret == 1: obj = unpack_data(&self.ctx) @@ -478,6 +484,7 @@ cdef class Unpacker(object): else: raise ValueError("Unpack failed: error = %d" % (ret,)) + @cython.critical_section def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" cdef Py_ssize_t nread @@ -490,6 +497,7 @@ cdef class Unpacker(object): self.stream_offset += nread return ret + @cython.critical_section def unpack(self): """Unpack one object @@ -497,6 +505,7 @@ cdef class Unpacker(object): """ return self._unpack(unpack_construct) + @cython.critical_section def skip(self): """Read and ignore one object, returning None @@ -504,6 +513,7 @@ cdef class Unpacker(object): """ return self._unpack(unpack_skip) + @cython.critical_section def read_array_header(self): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. @@ -512,6 +522,7 @@ cdef class Unpacker(object): """ return self._unpack(read_array_header) + @cython.critical_section def read_map_header(self): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. @@ -520,6 +531,7 @@ cdef class Unpacker(object): """ return self._unpack(read_map_header) + @cython.critical_section def tell(self): """Returns the current position of the Unpacker in bytes, i.e., the number of bytes that were read from the input, also the starting @@ -530,6 +542,7 @@ cdef class Unpacker(object): def __iter__(self): return self + @cython.critical_section def __next__(self): return self._unpack(unpack_construct, 1) diff --git a/msgpack/_version.py b/msgpack/_version.py deleted file mode 100644 index 9f55cf50..00000000 --- a/msgpack/_version.py +++ /dev/null @@ -1 +0,0 @@ -version = (1, 0, 0) diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h deleted file mode 100644 index 86b4196d..00000000 --- a/msgpack/buff_converter.h +++ /dev/null @@ -1,8 +0,0 @@ -#include "Python.h" - -/* cython does not support this preprocessor check => write it in raw C */ -static PyObject * -buff_to_buff(char *buff, Py_ssize_t size) -{ - return PyMemoryView_FromMemory(buff, size, PyBUF_READ); -} diff --git a/msgpack/ext.py b/msgpack/ext.py index 8341c68b..9694819a 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,21 +1,6 @@ -# coding: utf-8 -from collections import namedtuple import datetime -import sys import struct - - -PY2 = sys.version_info[0] == 2 - -if PY2: - int_types = (int, long) - _utc = None -else: - int_types = int - try: - _utc = datetime.timezone.utc - except AttributeError: - _utc = datetime.timezone(datetime.timedelta(0)) +from collections import namedtuple class ExtType(namedtuple("ExtType", "code data")): @@ -28,14 +13,15 @@ def __new__(cls, code, data): raise TypeError("data must be bytes") if not 0 <= code <= 127: raise ValueError("code must be 0~127") - return super(ExtType, cls).__new__(cls, code, data) + return super().__new__(cls, code, data) -class Timestamp(object): +class Timestamp: """Timestamp represents the Timestamp extension type in msgpack. - When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python - msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. + When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and + unpack `Timestamp`. This class is immutable: Do not override seconds and nanoseconds. """ @@ -53,31 +39,25 @@ def __init__(self, seconds, nanoseconds=0): Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999. Default is 0. - Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. + Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. """ - if not isinstance(seconds, int_types): - raise TypeError("seconds must be an interger") - if not isinstance(nanoseconds, int_types): + if not isinstance(seconds, int): + raise TypeError("seconds must be an integer") + if not isinstance(nanoseconds, int): raise TypeError("nanoseconds must be an integer") - if not (0 <= nanoseconds < 10 ** 9): - raise ValueError( - "nanoseconds must be a non-negative integer less than 999999999." - ) + if not (0 <= nanoseconds < 10**9): + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") self.seconds = seconds self.nanoseconds = nanoseconds def __repr__(self): """String representation of Timestamp.""" - return "Timestamp(seconds={0}, nanoseconds={1})".format( - self.seconds, self.nanoseconds - ) + return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" def __eq__(self, other): """Check for equality with another Timestamp object""" if type(other) is self.__class__: - return ( - self.seconds == other.seconds and self.nanoseconds == other.nanoseconds - ) + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds return False def __ne__(self, other): @@ -140,10 +120,10 @@ def from_unix(unix_sec): """Create a Timestamp from posix timestamp in seconds. :param unix_float: Posix timestamp in seconds. - :type unix_float: int or float. + :type unix_float: int or float """ seconds = int(unix_sec // 1) - nanoseconds = int((unix_sec % 1) * 10 ** 9) + nanoseconds = int((unix_sec % 1) * 10**9) return Timestamp(seconds, nanoseconds) def to_unix(self): @@ -161,7 +141,7 @@ def from_unix_nano(unix_ns): :param int unix_ns: Posix timestamp in nanoseconds. :rtype: Timestamp """ - return Timestamp(*divmod(unix_ns, 10 ** 9)) + return Timestamp(*divmod(unix_ns, 10**9)) def to_unix_nano(self): """Get the timestamp as a unixtime in nanoseconds. @@ -169,23 +149,22 @@ def to_unix_nano(self): :returns: posix timestamp in nanoseconds :rtype: int """ - return self.seconds * 10 ** 9 + self.nanoseconds + return self.seconds * 10**9 + self.nanoseconds def to_datetime(self): """Get the timestamp as a UTC datetime. - Python 2 is not supported. - - :rtype: datetime. + :rtype: `datetime.datetime` """ - return datetime.datetime.fromtimestamp(self.to_unix(), _utc) + utc = datetime.timezone.utc + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( + seconds=self.seconds, microseconds=self.nanoseconds // 1000 + ) @staticmethod def from_datetime(dt): """Create a Timestamp from datetime with tzinfo. - Python 2 is not supported. - :rtype: Timestamp """ - return Timestamp.from_unix(dt.timestamp()) + return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 1e0bbe91..b02e47cf 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,63 +1,22 @@ """Fallback pure Python implementation of msgpack""" -from datetime import datetime as _DateTime -import sys import struct - - -PY2 = sys.version_info[0] == 2 -if PY2: - int_types = (int, long) - - def dict_iteritems(d): - return d.iteritems() - - -else: - int_types = int - unicode = str - xrange = range - - def dict_iteritems(d): - return d.items() - - -if sys.version_info < (3, 5): - # Ugly hack... - RecursionError = RuntimeError - - def _is_recursionerror(e): - return ( - len(e.args) == 1 - and isinstance(e.args[0], str) - and e.args[0].startswith("maximum recursion depth exceeded") - ) - - -else: - - def _is_recursionerror(e): - return True - +import sys +from datetime import datetime as _DateTime if hasattr(sys, "pypy_version_info"): - # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own - # StringBuilder is fastest. from __pypy__ import newlist_hint + from __pypy__.builders import BytesBuilder - try: - from __pypy__.builders import BytesBuilder as StringBuilder - except ImportError: - from __pypy__.builders import StringBuilder - USING_STRINGBUILDER = True + _USING_STRINGBUILDER = True - class StringIO(object): + class BytesIO: def __init__(self, s=b""): if s: - self.builder = StringBuilder(len(s)) + self.builder = BytesBuilder(len(s)) self.builder.append(s) else: - self.builder = StringBuilder() + self.builder = BytesBuilder() def write(self, s): if isinstance(s, memoryview): @@ -69,19 +28,18 @@ def write(self, s): def getvalue(self): return self.builder.build() - else: - USING_STRINGBUILDER = False - from io import BytesIO as StringIO + from io import BytesIO - newlist_hint = lambda size: [] + _USING_STRINGBUILDER = False + def newlist_hint(size): + return [] -from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError +from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError from .ext import ExtType, Timestamp - EX_SKIP = 0 EX_CONSTRUCT = 1 EX_READ_ARRAY_HEADER = 2 @@ -129,34 +87,54 @@ def unpackb(packed, **kwargs): ret = unpacker._unpack() except OutOfData: raise ValueError("Unpack failed: incomplete input") - except RecursionError as e: - if _is_recursionerror(e): - raise StackError - raise + except RecursionError: + raise StackError if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret -if sys.version_info < (2, 7, 6): - - def _unpack_from(f, b, o=0): - """Explicit type cast for legacy struct.unpack_from""" - return struct.unpack_from(f, bytes(b), o) - - -else: - _unpack_from = struct.unpack_from - - -class Unpacker(object): +_NO_FORMAT_USED = "" +_MSGPACK_HEADERS = { + 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), + 0xC5: (2, ">H", TYPE_BIN), + 0xC6: (4, ">I", TYPE_BIN), + 0xC7: (2, "Bb", TYPE_EXT), + 0xC8: (3, ">Hb", TYPE_EXT), + 0xC9: (5, ">Ib", TYPE_EXT), + 0xCA: (4, ">f"), + 0xCB: (8, ">d"), + 0xCC: (1, _NO_FORMAT_USED), + 0xCD: (2, ">H"), + 0xCE: (4, ">I"), + 0xCF: (8, ">Q"), + 0xD0: (1, "b"), + 0xD1: (2, ">h"), + 0xD2: (4, ">i"), + 0xD3: (8, ">q"), + 0xD4: (1, "b1s", TYPE_EXT), + 0xD5: (2, "b2s", TYPE_EXT), + 0xD6: (4, "b4s", TYPE_EXT), + 0xD7: (8, "b8s", TYPE_EXT), + 0xD8: (16, "b16s", TYPE_EXT), + 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), + 0xDA: (2, ">H", TYPE_RAW), + 0xDB: (4, ">I", TYPE_RAW), + 0xDC: (2, ">H", TYPE_ARRAY), + 0xDD: (4, ">I", TYPE_ARRAY), + 0xDE: (2, ">H", TYPE_MAP), + 0xDF: (4, ">I", TYPE_MAP), +} + + +class Unpacker: """Streaming unpacker. Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -175,17 +153,17 @@ class Unpacker(object): 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). Python 2 is not supported. + 3 - datetime.datetime (UTC). :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) @@ -229,7 +207,7 @@ class Unpacker(object): Example of streaming deserialize from socket:: - unpacker = Unpacker(max_buffer_size) + unpacker = Unpacker() while True: buf = sock.recv(1024**2) if not buf: @@ -248,6 +226,7 @@ class Unpacker(object): def __init__( self, file_like=None, + *, read_size=0, use_list=True, raw=False, @@ -291,7 +270,7 @@ def __init__( self._buf_checkpoint = 0 if not max_buffer_size: - max_buffer_size = 2 ** 31 - 1 + max_buffer_size = 2**31 - 1 if max_str_len == -1: max_str_len = max_buffer_size if max_bin_len == -1: @@ -332,9 +311,7 @@ def __init__( if object_pairs_hook is not None and not callable(object_pairs_hook): raise TypeError("`object_pairs_hook` is not callable") if object_hook is not None and object_pairs_hook is not None: - raise TypeError( - "object_pairs_hook and object_hook are mutually " "exclusive" - ) + raise TypeError("object_pairs_hook and object_hook are mutually exclusive") if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") @@ -352,9 +329,10 @@ def feed(self, next_bytes): # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython self._buffer.extend(view) + view.release() def _consume(self): - """ Gets rid of the used parts of the buffer. """ + """Gets rid of the used parts of the buffer.""" self._stream_offset += self._buff_i - self._buf_checkpoint self._buf_checkpoint = self._buff_i @@ -396,6 +374,8 @@ def _reserve(self, n, raise_outofdata=True): # Read from file remain_bytes = -remain_bytes + if remain_bytes + len(self._buffer) > self._max_buffer_size: + raise BufferFull while remain_bytes > 0: to_read_bytes = max(self._read_size, remain_bytes) read_data = self.file_like.read(to_read_bytes) @@ -409,7 +389,7 @@ def _reserve(self, n, raise_outofdata=True): self._buff_i = 0 # rollback raise OutOfData - def _read_header(self, execute=EX_CONSTRUCT): + def _read_header(self): typ = TYPE_IMMEDIATE n = 0 obj = None @@ -424,205 +404,89 @@ def _read_header(self, execute=EX_CONSTRUCT): n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") elif b == 0xC0: obj = None elif b == 0xC2: obj = False elif b == 0xC3: obj = True - elif b == 0xC4: - typ = TYPE_BIN - self._reserve(1) - n = self._buffer[self._buff_i] - self._buff_i += 1 - if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n) - elif b == 0xC5: - typ = TYPE_BIN - self._reserve(2) - n = _unpack_from(">H", self._buffer, self._buff_i)[0] - self._buff_i += 2 - if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n) - elif b == 0xC6: - typ = TYPE_BIN - self._reserve(4) - n = _unpack_from(">I", self._buffer, self._buff_i)[0] - self._buff_i += 4 + elif 0xC4 <= b <= 0xC6: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + n = self._buffer[self._buff_i] + self._buff_i += size if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") obj = self._read(n) - elif b == 0xC7: # ext 8 - typ = TYPE_EXT - self._reserve(2) - L, n = _unpack_from("Bb", self._buffer, self._buff_i) - self._buff_i += 2 - if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L) - elif b == 0xC8: # ext 16 - typ = TYPE_EXT - self._reserve(3) - L, n = _unpack_from(">Hb", self._buffer, self._buff_i) - self._buff_i += 3 - if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L) - elif b == 0xC9: # ext 32 - typ = TYPE_EXT - self._reserve(5) - L, n = _unpack_from(">Ib", self._buffer, self._buff_i) - self._buff_i += 5 + elif 0xC7 <= b <= 0xC9: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") obj = self._read(L) - elif b == 0xCA: - self._reserve(4) - obj = _unpack_from(">f", self._buffer, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xCB: - self._reserve(8) - obj = _unpack_from(">d", self._buffer, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xCC: - self._reserve(1) - obj = self._buffer[self._buff_i] - self._buff_i += 1 - elif b == 0xCD: - self._reserve(2) - obj = _unpack_from(">H", self._buffer, self._buff_i)[0] - self._buff_i += 2 - elif b == 0xCE: - self._reserve(4) - obj = _unpack_from(">I", self._buffer, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xCF: - self._reserve(8) - obj = _unpack_from(">Q", self._buffer, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xD0: - self._reserve(1) - obj = _unpack_from("b", self._buffer, self._buff_i)[0] - self._buff_i += 1 - elif b == 0xD1: - self._reserve(2) - obj = _unpack_from(">h", self._buffer, self._buff_i)[0] - self._buff_i += 2 - elif b == 0xD2: - self._reserve(4) - obj = _unpack_from(">i", self._buffer, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xD3: - self._reserve(8) - obj = _unpack_from(">q", self._buffer, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xD4: # fixext 1 - typ = TYPE_EXT - if self._max_ext_len < 1: - raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - self._reserve(2) - n, obj = _unpack_from("b1s", self._buffer, self._buff_i) - self._buff_i += 2 - elif b == 0xD5: # fixext 2 - typ = TYPE_EXT - if self._max_ext_len < 2: - raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - self._reserve(3) - n, obj = _unpack_from("b2s", self._buffer, self._buff_i) - self._buff_i += 3 - elif b == 0xD6: # fixext 4 - typ = TYPE_EXT - if self._max_ext_len < 4: - raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - self._reserve(5) - n, obj = _unpack_from("b4s", self._buffer, self._buff_i) - self._buff_i += 5 - elif b == 0xD7: # fixext 8 - typ = TYPE_EXT - if self._max_ext_len < 8: - raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - self._reserve(9) - n, obj = _unpack_from("b8s", self._buffer, self._buff_i) - self._buff_i += 9 - elif b == 0xD8: # fixext 16 - typ = TYPE_EXT - if self._max_ext_len < 16: - raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - self._reserve(17) - n, obj = _unpack_from("b16s", self._buffer, self._buff_i) - self._buff_i += 17 - elif b == 0xD9: - typ = TYPE_RAW - self._reserve(1) - n = self._buffer[self._buff_i] - self._buff_i += 1 - if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n) - elif b == 0xDA: - typ = TYPE_RAW - self._reserve(2) - (n,) = _unpack_from(">H", self._buffer, self._buff_i) - self._buff_i += 2 - if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n) - elif b == 0xDB: - typ = TYPE_RAW - self._reserve(4) - (n,) = _unpack_from(">I", self._buffer, self._buff_i) - self._buff_i += 4 + elif 0xCA <= b <= 0xD3: + size, fmt = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + obj = self._buffer[self._buff_i] + self._buff_i += size + elif 0xD4 <= b <= 0xD8: + size, fmt, typ = _MSGPACK_HEADERS[b] + if self._max_ext_len < size: + raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") + self._reserve(size + 1) + n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + 1 + elif 0xD9 <= b <= 0xDB: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + else: + n = self._buffer[self._buff_i] + self._buff_i += size if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) - elif b == 0xDC: - typ = TYPE_ARRAY - self._reserve(2) - (n,) = _unpack_from(">H", self._buffer, self._buff_i) - self._buff_i += 2 - if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xDD: - typ = TYPE_ARRAY - self._reserve(4) - (n,) = _unpack_from(">I", self._buffer, self._buff_i) - self._buff_i += 4 + elif 0xDC <= b <= 0xDD: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xDE: - self._reserve(2) - (n,) = _unpack_from(">H", self._buffer, self._buff_i) - self._buff_i += 2 - if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - typ = TYPE_MAP - elif b == 0xDF: - self._reserve(4) - (n,) = _unpack_from(">I", self._buffer, self._buff_i) - self._buff_i += 4 + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + elif 0xDE <= b <= 0xDF: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - typ = TYPE_MAP + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") else: raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj def _unpack(self, execute=EX_CONSTRUCT): - typ, n, obj = self._read_header(execute) + typ, n, obj = self._read_header() if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: @@ -635,12 +499,12 @@ def _unpack(self, execute=EX_CONSTRUCT): # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call `list_hook` self._unpack(EX_SKIP) return ret = newlist_hint(n) - for i in xrange(n): + for i in range(n): ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) @@ -648,25 +512,22 @@ def _unpack(self, execute=EX_CONSTRUCT): return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call hooks self._unpack(EX_SKIP) self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) - for _ in xrange(n) + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) ) else: ret = {} - for _ in xrange(n): + for _ in range(n): key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (unicode, bytes): - raise ValueError( - "%s is not allowed for map key" % str(type(key)) - ) - if not PY2 and type(key) is str: + if self._strict_map_key and type(key) not in (str, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) + if isinstance(key, str): key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: @@ -740,11 +601,11 @@ def tell(self): return self._stream_offset -class Packer(object): +class Packer: """ MessagePack Packer - Usage: + Usage:: packer = Packer() astream.write(packer.pack(a)) @@ -752,7 +613,8 @@ class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -779,15 +641,18 @@ class Packer(object): If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. - (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + Internal buffer size. This option is used only for C implementation. """ def __init__( self, + *, default=None, use_single_float=False, autoreset=True, @@ -795,19 +660,17 @@ def __init__( strict_types=False, datetime=False, unicode_errors=None, + buf_size=None, ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._buffer = StringIO() - if PY2 and datetime: - raise ValueError("datetime is not supported in Python 2") + self._buffer = BytesIO() self._datetime = bool(datetime) self._unicode_errors = unicode_errors or "strict" - if default is not None: - if not callable(default): - raise TypeError("default must be callable") + if default is not None and not callable(default): + raise TypeError("default must be callable") self._default = default def _pack( @@ -832,7 +695,7 @@ def _pack( if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if check(obj, int_types): + if check(obj, int): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -860,20 +723,20 @@ def _pack( raise OverflowError("Integer value out of range") if check(obj, (bytes, bytearray)): n = len(obj) - if n >= 2 ** 32: + if n >= 2**32: raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) - if check(obj, unicode): + if check(obj, str): obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) - if n >= 2 ** 32: + if n >= 2**32: raise ValueError("String is too large") self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): - n = len(obj) * obj.itemsize - if n >= 2 ** 32: + n = obj.nbytes + if n >= 2**32: raise ValueError("Memoryview is too large") self._pack_bin_header(n) return self._buffer.write(obj) @@ -913,15 +776,13 @@ def _pack( if check(obj, list_types): n = len(obj) self._pack_array_header(n) - for i in xrange(n): + for i in range(n): self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._pack_map_pairs( - len(obj), dict_iteritems(obj), nest_limit - 1 - ) + return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) - if self._datetime and check(obj, _DateTime): + if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: obj = Timestamp.from_datetime(obj) default_used = 1 continue @@ -930,42 +791,46 @@ def _pack( obj = self._default(obj) default_used = 1 continue - raise TypeError("Cannot serialize %r" % (obj,)) + + if self._datetime and check(obj, _DateTime): + raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") + + raise TypeError(f"Cannot serialize {obj!r}") def pack(self, obj): try: self._pack(obj) except: - self._buffer = StringIO() # force reset + self._buffer = BytesIO() # force reset raise if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_array_header(self, n): - if n >= 2 ** 32: + if n >= 2**32: raise ValueError self._pack_array_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_map_header(self, n): - if n >= 2 ** 32: + if n >= 2**32: raise ValueError self._pack_map_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_ext_type(self, typecode, data): @@ -1017,7 +882,7 @@ def _pack_map_header(self, n): def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) - for (k, v) in pairs: + for k, v in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) @@ -1054,11 +919,11 @@ def reset(self): This method is useful only when autoreset=False. """ - self._buffer = StringIO() + self._buffer = BytesIO() def getbuffer(self): """Return view of internal buffer.""" - if USING_STRINGBUILDER or PY2: + if _USING_STRINGBUILDER: return memoryview(self.bytes()) else: return self._buffer.getbuffer() diff --git a/msgpack/pack.h b/msgpack/pack.h index 4f3ce1d9..edf3a3fe 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,15 +21,12 @@ #include "sysdep.h" #include #include +#include #ifdef __cplusplus extern "C" { #endif -#ifdef _MSC_VER -#define inline __inline -#endif - typedef struct msgpack_packer { char *buf; size_t length; @@ -67,53 +64,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" -// return -2 when o is too long -static inline int -msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) -{ -#if PY_MAJOR_VERSION >= 3 - assert(PyUnicode_Check(o)); - - Py_ssize_t len; - const char* buf = PyUnicode_AsUTF8AndSize(o, &len); - if (buf == NULL) - return -1; - - if (len > limit) { - return -2; - } - - int ret = msgpack_pack_raw(pk, len); - if (ret) return ret; - - return msgpack_pack_raw_body(pk, buf, len); -#else - PyObject *bytes; - Py_ssize_t len; - int ret; - - // py2 - bytes = PyUnicode_AsUTF8String(o); - if (bytes == NULL) - return -1; - - len = PyString_GET_SIZE(bytes); - if (len > limit) { - Py_DECREF(bytes); - return -2; - } - - ret = msgpack_pack_raw(pk, len); - if (ret) { - Py_DECREF(bytes); - return -1; - } - ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len); - Py_DECREF(bytes); - return ret; -#endif -} - #ifdef __cplusplus } #endif diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 0e940b84..b8959f02 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,18 +37,6 @@ * Integer */ -#define msgpack_pack_real_uint8(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ -} while(0) - #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -123,18 +111,6 @@ do { \ } \ } while(0) -#define msgpack_pack_real_int8(x, d) \ -do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ -} while(0) - #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -264,49 +240,6 @@ do { \ } while(0) -static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) -{ - msgpack_pack_real_uint8(x, d); -} - -static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) -{ - msgpack_pack_real_uint16(x, d); -} - -static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) -{ - msgpack_pack_real_uint32(x, d); -} - -static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) -{ - msgpack_pack_real_uint64(x, d); -} - -static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} - -static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} - -static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -//#ifdef msgpack_pack_inline_func_cint - static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -372,192 +305,37 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG == 4 +#if SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else -if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} + if (sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); + } else { + msgpack_pack_real_int64(x, d); + } #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(LLONG_MAX) -#if LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) -{ -#if defined(SIZEOF_SHORT) -#if SIZEOF_SHORT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_SHORT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(USHRT_MAX) -#if USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) -{ -#if defined(SIZEOF_INT) -#if SIZEOF_INT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_INT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(UINT_MAX) -#if UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) -{ -#if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULONG_MAX) -#if ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif } static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULLONG_MAX) -#if ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { msgpack_pack_real_uint64(x, d); } -#endif -} - -//#undef msgpack_pack_inline_func_cint -//#endif - /* @@ -568,7 +346,12 @@ static inline int msgpack_pack_float(msgpack_packer* x, float d) { unsigned char buf[5]; buf[0] = 0xca; + +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack4(d, (char *)&buf[1], 0); +#else _PyFloat_Pack4(d, &buf[1], 0); +#endif msgpack_pack_append_buffer(x, buf, 5); } @@ -576,7 +359,11 @@ static inline int msgpack_pack_double(msgpack_packer* x, double d) { unsigned char buf[9]; buf[0] = 0xcb; +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack8(d, (char *)&buf[1], 0); +#else _PyFloat_Pack8(d, &buf[1], 0); +#endif msgpack_pack_append_buffer(x, buf, 9); } @@ -801,11 +588,9 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin #undef TAKE8_32 #undef TAKE8_64 -#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 -#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index ed9c1bc0..70673004 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,14 +61,14 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif -#else -#include /* __BYTE_ORDER */ +#else /* _WIN32 */ +#include /* ntohs, ntohl */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define __BIG_ENDIAN__ #elif _WIN32 #define __LITTLE_ENDIAN__ @@ -95,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t; #ifdef _WIN32 # if defined(ntohl) # define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# elif defined(_byteswap_ulong) || defined(_MSC_VER) # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) # else # define _msgpack_be32(x) \ @@ -108,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be32(x) ntohl(x) #endif -#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) +#if defined(_byteswap_uint64) || defined(_MSC_VER) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index debdf715..58a2f4f5 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -47,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong((long)d); + PyObject *p = PyLong_FromLong((long)d); if (!p) return -1; *o = p; @@ -61,7 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromSize_t((size_t)d); + PyObject *p = PyLong_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -74,7 +74,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } if (!p) return -1; @@ -84,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong(d); + PyObject *p = PyLong_FromLong(d); if (!p) return -1; *o = p; @@ -107,7 +107,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac if (d > LONG_MAX || d < LONG_MIN) { p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } *o = p; return 0; @@ -193,7 +193,7 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { - PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key", Py_TYPE(k)->tp_name); + PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name); return -1; } if (PyUnicode_CheckExact(k)) { @@ -341,7 +341,26 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos else if (u->timestamp == 0) { // Timestamp py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); } - else { // float or datetime + else if (u->timestamp == 3) { // datetime + // Calculate datetime using epoch + delta + // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps + PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); + if (epoch == NULL) { + return -1; + } + + PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000); + if (d == NULL) { + Py_DECREF(epoch); + return -1; + } + + py = PyNumber_Add(epoch, d); + + Py_DECREF(epoch); + Py_DECREF(d); + } + else { // float PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); if (a == NULL) return -1; @@ -358,18 +377,7 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos a = PyNumber_Add(b, c); Py_DECREF(b); Py_DECREF(c); - - if (u->timestamp == 3) { // datetime - PyObject *t = PyTuple_Pack(2, a, u->utc); - Py_DECREF(a); - if (t == NULL) { - return -1; - } - py = PyDateTime_FromTimestamp(t); - Py_DECREF(t); - } else { // float - py = a; - } + py = a; } } else { py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h new file mode 100644 index 00000000..c14a3c2b --- /dev/null +++ b/msgpack/unpack_container_header.h @@ -0,0 +1,51 @@ +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 9924b9c6..cce29e7a 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -75,8 +75,7 @@ static inline void unpack_clear(unpack_context *ctx) Py_CLEAR(ctx->stack[0].obj); } -template -static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -243,10 +242,20 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize _msgpack_load32(uint32_t,n)+1, _ext_zero); case CS_FLOAT: { - double f = _PyFloat_Unpack4((unsigned char*)n, 0); + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack4((const char*)n, 0); +#else + f = _PyFloat_Unpack4((unsigned char*)n, 0); +#endif push_fixed_value(_float, f); } case CS_DOUBLE: { - double f = _PyFloat_Unpack8((unsigned char*)n, 0); + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack8((const char*)n, 0); +#else + f = _PyFloat_Unpack8((unsigned char*)n, 0); +#endif push_fixed_value(_double, f); } case CS_UINT_8: push_fixed_value(_uint8, *(uint8_t*)n); @@ -376,6 +385,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef construct_cb } +#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -387,68 +397,27 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef again_fixed_trail_if_zero #undef start_container -template -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; +static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(1, ctx, data, len, off); +} +static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(0, ctx, data, len, off); } -#undef SWITCH_RANGE_BEGIN -#undef SWITCH_RANGE -#undef SWITCH_RANGE_DEFAULT -#undef SWITCH_RANGE_END - -static const execute_fn unpack_construct = &unpack_execute; -static const execute_fn unpack_skip = &unpack_execute; -static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; -static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; - -#undef NEXT_CS +#define unpack_container_header read_array_header +#define fixed_offset 0x90 +#define var_offset 0xdc +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset + +#define unpack_container_header read_map_header +#define fixed_offset 0x80 +#define var_offset 0xde +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..c69d5a7c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,45 @@ +[build-system] +requires = ["setuptools >= 78.1.1"] +build-backend = "setuptools.build_meta" + +[project] +name = "msgpack" +dynamic = ["version"] +license = "Apache-2.0" +authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] +description = "MessagePack serializer" +readme = "README.md" +keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Topic :: File Formats", + "Intended Audience :: Developers", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + +[project.urls] +Homepage = "https://msgpack.org/" +Documentation = "https://msgpack-python.readthedocs.io/" +Repository = "https://github.com/msgpack/msgpack-python/" +Tracker = "https://github.com/msgpack/msgpack-python/issues" +Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" + +[tool.setuptools] +# Do not install C/C++/Cython source files +include-package-data = false + +[tool.setuptools.dynamic] +version = {attr = "msgpack.__version__"} + +[tool.ruff] +line-length = 100 +target-version = "py310" +lint.select = [ + "E", # pycodestyle + "F", # Pyflakes + "I", # isort + #"UP", pyupgrade +] diff --git a/requirements.txt b/requirements.txt index a2cce258..9e4643b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ -Cython~=0.29.13 +Cython==3.2.1 +setuptools==78.1.1 +build diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index 2ec9ca70..4029e9ed --- a/setup.py +++ b/setup.py @@ -1,98 +1,24 @@ #!/usr/bin/env python -# coding: utf-8 -import io import os import sys -from glob import glob -from distutils.command.sdist import sdist -from setuptools import setup, Extension - -from distutils.command.build_ext import build_ext +from setuptools import Extension, setup PYPY = hasattr(sys, "pypy_version_info") -PY2 = sys.version_info[0] == 2 - - -class NoCython(Exception): - pass - - -try: - import Cython.Compiler.Main as cython_compiler - - have_cython = True -except ImportError: - have_cython = False - - -def cythonize(src): - sys.stderr.write("cythonize: %r\n" % (src,)) - cython_compiler.compile([src], cplus=True) - - -def ensure_source(src): - pyx = os.path.splitext(src)[0] + ".pyx" - - if not os.path.exists(src): - if not have_cython: - raise NoCython - cythonize(pyx) - elif ( - os.path.exists(pyx) - and os.stat(src).st_mtime < os.stat(pyx).st_mtime - and have_cython - ): - cythonize(pyx) - return src - - -class BuildExt(build_ext): - def build_extension(self, ext): - try: - ext.sources = list(map(ensure_source, ext.sources)) - except NoCython: - print("WARNING") - print("Cython is required for building extension from checkout.") - print("Install Cython >= 0.16 or install msgpack from PyPI.") - print("Falling back to pure Python implementation.") - return - try: - return build_ext.build_extension(self, ext) - except Exception as e: - print("WARNING: Failed to compile extension modules.") - print("msgpack uses fallback pure python implementation.") - print(e) - - -exec(open("msgpack/_version.py").read()) - -version_str = ".".join(str(x) for x in version[:3]) -if len(version) > 3 and version[3] != "final": - version_str += version[3] - -# Cython is required for sdist -class Sdist(sdist): - def __init__(self, *args, **kwargs): - cythonize("msgpack/_cmsgpack.pyx") - sdist.__init__(self, *args, **kwargs) - libraries = [] +macros = [] +ext_modules = [] + if sys.platform == "win32": libraries.append("ws2_32") - -if sys.byteorder == "big": - macros = [("__BIG_ENDIAN__", "1")] -else: macros = [("__LITTLE_ENDIAN__", "1")] -ext_modules = [] -if not PYPY and not PY2: +if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ext_modules.append( Extension( "msgpack._cmsgpack", - sources=["msgpack/_cmsgpack.cpp"], + sources=["msgpack/_cmsgpack.c"], libraries=libraries, include_dirs=["."], define_macros=macros, @@ -100,41 +26,7 @@ def __init__(self, *args, **kwargs): ) del libraries, macros - -desc = "MessagePack (de)serializer." -with io.open("README.md", encoding="utf-8") as f: - long_desc = f.read() -del f - setup( - name="msgpack", - author="Inada Naoki", - author_email="songofacandy@gmail.com", - version=version_str, - cmdclass={"build_ext": BuildExt, "sdist": Sdist}, ext_modules=ext_modules, packages=["msgpack"], - description=desc, - long_description=long_desc, - long_description_content_type="text/markdown", - url="https://msgpack.org/", - project_urls={ - "Documentation": "https://msgpack-python.readthedocs.io/", - "Source": "https://github.com/msgpack/msgpack-python", - "Tracker": "https://github.com/msgpack/msgpack-python/issues", - }, - license="Apache 2.0", - classifiers=[ - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - ], ) diff --git a/test/test_buffer.py b/test/test_buffer.py index 62507cf4..ca097222 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,12 +1,8 @@ -#!/usr/bin/env python -# coding: utf-8 +from pytest import raises -import sys -import pytest -from msgpack import packb, unpackb +from msgpack import Packer, packb, unpackb -@pytest.mark.skipif(sys.version_info[0] == 2, reason="Python 2 is not supported") def test_unpack_buffer(): from array import array @@ -21,7 +17,7 @@ def test_unpack_bytearray(): obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) == expected_type for s in obj) + assert all(type(s) is expected_type for s in obj) def test_unpack_memoryview(): @@ -30,4 +26,24 @@ def test_unpack_memoryview(): obj = unpackb(view, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) == expected_type for s in obj) + assert all(type(s) is expected_type for s in obj) + + +def test_packer_getbuffer(): + packer = Packer(autoreset=False) + packer.pack_array_header(2) + packer.pack(42) + packer.pack("hello") + buffer = packer.getbuffer() + assert isinstance(buffer, memoryview) + assert bytes(buffer) == b"\x92*\xa5hello" + + if Packer.__module__ == "msgpack._cmsgpack": # only for Cython + # cython Packer supports buffer protocol directly + assert bytes(packer) == b"\x92*\xa5hello" + + with raises(BufferError): + packer.pack(42) + buffer.release() + packer.pack(42) + assert bytes(packer) == b"\x92*\xa5hello*" diff --git a/test/test_case.py b/test/test_case.py index a0a3c5ad..c4c615e3 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import packb, unpackb def check(length, obj, use_bin_type=True): v = packb(obj, use_bin_type=use_bin_type) - assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) + assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" assert unpackb(v, use_list=0, raw=not use_bin_type) == obj @@ -120,11 +119,11 @@ def test_match(): ), ({}, b"\x80"), ( - dict([(x, x) for x in range(15)]), + {x: x for x in range(15)}, b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", ), ( - dict([(x, x) for x in range(16)]), + {x: x for x in range(16)}, b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", ), ] @@ -134,4 +133,4 @@ def test_match(): def test_unicode(): - assert unpackb(packb(u"foobar"), use_list=1) == u"foobar" + assert unpackb(packb("foobar"), use_list=1) == "foobar" diff --git a/test/test_except.py b/test/test_except.py index 5544f2bc..b77ac800 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -# coding: utf-8 + +import datetime from pytest import raises -from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData -import datetime +from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb class DummyException(Exception): @@ -53,7 +53,7 @@ def test_invalidvalue(): def test_strict_map_key(): - valid = {u"unicode": 1, b"bytes": 2} + valid = {"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) assert valid == unpackb(packed, raw=False, strict_map_key=True) diff --git a/test/test_extension.py b/test/test_extension.py index 6b365751..aaf0fd92 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,5 @@ -from __future__ import print_function import array + import msgpack from msgpack import ExtType @@ -17,9 +17,7 @@ def p(s): assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 - assert ( - p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 - ) # ext 32 + assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 def test_unpack_ext_type(): @@ -49,16 +47,13 @@ def default(obj): except AttributeError: data = obj.tostring() return ExtType(typecode, data) - raise TypeError("Unknown type object %r" % (obj,)) + raise TypeError(f"Unknown type object {obj!r}") def ext_hook(code, data): print("ext_hook called", code, data) assert code == 123 obj = array.array("d") - try: - obj.frombytes(data) - except AttributeError: # PY2 - obj.fromstring(data) + obj.frombytes(data) return obj obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] @@ -67,20 +62,14 @@ def ext_hook(code, data): assert obj == obj2 -import sys - -if sys.version > "3": - long = int - - def test_overriding_hooks(): def default(obj): - if isinstance(obj, long): + if isinstance(obj, int): return {"__type__": "long", "__data__": str(obj)} else: return obj - obj = {"testval": long(1823746192837461928374619)} + obj = {"testval": 1823746192837461928374619} refobj = {"testval": default(obj["testval"])} refout = msgpack.packb(refobj) assert isinstance(refout, (str, bytes)) diff --git a/test/test_format.py b/test/test_format.py index fbbc3f98..c06c87dc 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import unpackb @@ -25,9 +24,7 @@ def testFixRaw(): def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}} - ) + check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) def testUnsignedInt(): diff --git a/test/test_limits.py b/test/test_limits.py index 65e6bcc7..9b92b4d9 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,27 +1,25 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals import pytest from msgpack import ( - packb, - unpackb, - Packer, - Unpacker, ExtType, + Packer, PackOverflowError, PackValueError, + Unpacker, UnpackValueError, + packb, + unpackb, ) def test_integer(): - x = -(2 ** 63) + x = -(2**63) assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): packb(x - 1) - x = 2 ** 64 - 1 + x = 2**64 - 1 assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): packb(x + 1) @@ -29,16 +27,16 @@ def test_integer(): def test_array_header(): packer = Packer() - packer.pack_array_header(2 ** 32 - 1) + packer.pack_array_header(2**32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2 ** 32) + packer.pack_array_header(2**32) def test_map_header(): packer = Packer() - packer.pack_map_header(2 ** 32 - 1) + packer.pack_map_header(2**32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2 ** 32) + packer.pack_array_header(2**32) def test_max_str_len(): diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 86b2c1f7..0a2a6f53 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,15 +1,8 @@ #!/usr/bin/env python -# coding: utf-8 -import pytest from array import array -from msgpack import packb, unpackb -import sys - -pytestmark = pytest.mark.skipif( - sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol" -) +from msgpack import packb, unpackb def make_array(f, data): @@ -53,46 +46,54 @@ def test_fixstr_from_float(): def test_str16_from_byte(): - _runtest("B", 2 ** 8, b"\xda", b"\x01\x00", False) - _runtest("B", 2 ** 16 - 1, b"\xda", b"\xff\xff", False) + _runtest("B", 2**8, b"\xda", b"\x01\x00", False) + _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False) def test_str16_from_float(): - _runtest("f", 2 ** 8, b"\xda", b"\x01\x00", False) - _runtest("f", 2 ** 16 - 4, b"\xda", b"\xff\xfc", False) + _runtest("f", 2**8, b"\xda", b"\x01\x00", False) + _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False) def test_str32_from_byte(): - _runtest("B", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) + _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) def test_str32_from_float(): - _runtest("f", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) + _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) def test_bin8_from_byte(): _runtest("B", 1, b"\xc4", b"\x01", True) - _runtest("B", 2 ** 8 - 1, b"\xc4", b"\xff", True) + _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True) def test_bin8_from_float(): _runtest("f", 4, b"\xc4", b"\x04", True) - _runtest("f", 2 ** 8 - 4, b"\xc4", b"\xfc", True) + _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True) def test_bin16_from_byte(): - _runtest("B", 2 ** 8, b"\xc5", b"\x01\x00", True) - _runtest("B", 2 ** 16 - 1, b"\xc5", b"\xff\xff", True) + _runtest("B", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True) def test_bin16_from_float(): - _runtest("f", 2 ** 8, b"\xc5", b"\x01\x00", True) - _runtest("f", 2 ** 16 - 4, b"\xc5", b"\xff\xfc", True) + _runtest("f", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True) def test_bin32_from_byte(): - _runtest("B", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) + _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) def test_bin32_from_float(): - _runtest("f", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) + _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + + +def test_multidim_memoryview(): + # See https://github.com/msgpack/msgpack-python/issues/526 + view = memoryview(b"\00" * 6) + data = view.cast(view.format, (3, 2)) + packed = packb(data) + assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" diff --git a/test/test_newspec.py b/test/test_newspec.py index b7da486e..9e2f9be5 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,6 +1,4 @@ -# coding: utf-8 - -from msgpack import packb, unpackb, ExtType +from msgpack import ExtType, packb, unpackb def test_str8(): diff --git a/test/test_obj.py b/test/test_obj.py index 86c557cd..23be06d5 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 from pytest import raises + from msgpack import packb, unpackb @@ -34,7 +34,7 @@ def test_decode_pairs_hook(): prod_sum = 1 * 2 + 3 * 4 unpacked = unpackb( packed, - object_pairs_hook=lambda l: sum(k * v for k, v in l), + object_pairs_hook=lambda lst: sum(k * v for k, v in lst), use_list=1, strict_map_key=False, ) @@ -49,7 +49,7 @@ def test_only_one_obj_hook(): def test_bad_hook(): with raises(TypeError): packed = packb([3, 1 + 2j], default=lambda o: o) - unpacked = unpackb(packed, use_list=1) + unpackb(packed, use_list=1) def _arr_to_str(arr): diff --git a/test/test_pack.py b/test/test_pack.py index a51d84c9..374d1549 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,16 +1,12 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals +import struct from collections import OrderedDict from io import BytesIO -import struct -import sys import pytest -from pytest import raises, xfail -from msgpack import packb, unpackb, Unpacker, Packer, pack +from msgpack import Packer, Unpacker, packb, unpackb def check(data, use_list=False): @@ -80,13 +76,8 @@ def testPackByteArrays(): check(td) -@pytest.mark.skipif( - sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" -) def testIgnoreUnicodeErrors(): - re = unpackb( - packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore" - ) + re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") assert re == "abcdef" @@ -96,12 +87,9 @@ def testStrictUnicodeUnpack(): unpackb(packed, raw=False, use_list=1) -@pytest.mark.skipif( - sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" -) def testIgnoreErrorsPack(): re = unpackb( - packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), + packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), raw=False, use_list=1, ) @@ -114,8 +102,8 @@ def testDecodeBinary(): def testPackFloat(): - assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(str(">f"), 1.0) - assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(str(">d"), 1.0) + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) def testArraySize(sizes=[0, 5, 50, 1000]): @@ -160,7 +148,7 @@ def testMapSize(sizes=[0, 5, 50, 1000]): bio.seek(0) unpacker = Unpacker(bio, strict_map_key=False) for size in sizes: - assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) + assert unpacker.unpack() == {i: i * 2 for i in range(size)} def test_odict(): diff --git a/test/test_read_size.py b/test/test_read_size.py index 33a7e7dd..0f6c1b50 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,5 +1,6 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from msgpack import packb, Unpacker, OutOfData + +from msgpack import OutOfData, Unpacker, packb UnexpectedTypeException = ValueError diff --git a/test/test_seq.py b/test/test_seq.py index 0d5d8065..8dee4620 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,9 +1,8 @@ -#!/usr/bin/env python -# coding: utf-8 - +# ruff: noqa: E501 +# ignore line length limit for long comments import io -import msgpack +import msgpack binarydata = bytes(bytearray(range(256))) @@ -35,7 +34,7 @@ def test_exceeding_unpacker_read_size(): read_count = 0 for idx, o in enumerate(unpacker): - assert type(o) == bytes + assert isinstance(o, bytes) assert o == gen_binary_data(idx) read_count += 1 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 6293a453..0f895d7d 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,11 +1,11 @@ #!/usr/bin/env python -# coding: utf-8 import io -from msgpack import Unpacker, BufferFull -from msgpack import pack -from msgpack.exceptions import OutOfData + from pytest import raises +from msgpack import BufferFull, Unpacker, pack, packb +from msgpack.exceptions import OutOfData + def test_partialdata(): unpacker = Unpacker() @@ -78,6 +78,15 @@ def test_maxbuffersize(): assert ord("b") == next(unpacker) +def test_maxbuffersize_file(): + buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2)) + unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20) + assert unpacker.unpack() == b"a" * 10 + # assert unpacker.unpack() == [b"a" * 20]*2 + with raises(BufferFull): + print(unpacker.unpack()) + + def test_readbytes(): unpacker = Unpacker(read_size=3) unpacker.feed(b"foobar") @@ -118,8 +127,8 @@ def test_issue124(): def test_unpack_tell(): stream = io.BytesIO() - messages = [2 ** i - 1 for i in range(65)] - messages += [-(2 ** i) for i in range(1, 64)] + messages = [2**i - 1 for i in range(65)] + messages += [-(2**i) for i in range(1, 64)] messages += [ b"hello", b"hello" * 1000, diff --git a/test/test_stricttype.py b/test/test_stricttype.py index fe9ec6cd..72776a2c 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,7 +1,6 @@ -# coding: utf-8 - from collections import namedtuple -from msgpack import packb, unpackb, ExtType + +from msgpack import ExtType, packb, unpackb def test_namedtuple(): @@ -10,7 +9,7 @@ def test_namedtuple(): def default(o): if isinstance(o, T): return dict(o._asdict()) - raise TypeError("Unsupported type %s" % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, raw=False) @@ -23,7 +22,7 @@ def test_tuple(): def default(o): if isinstance(o, tuple): return {"__type__": "tuple", "value": list(o)} - raise TypeError("Unsupported type %s" % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") def convert(o): if o.get("__type__") == "tuple": @@ -44,9 +43,7 @@ def test_tuple_ext(): def default(o): if isinstance(o, tuple): # Convert to list and pack - payload = packb( - list(o), strict_types=True, use_bin_type=True, default=default - ) + payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) raise TypeError(repr(o)) @@ -54,7 +51,7 @@ def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError("Unknown Ext code {}".format(code)) + raise ValueError(f"Unknown Ext code {code}") data = packb(t, strict_types=True, use_bin_type=True, default=default) expected = unpackb(data, raw=False, ext_hook=convert) diff --git a/test/test_subtype.py b/test/test_subtype.py index d91d4553..a911578c 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,9 +1,9 @@ #!/usr/bin/env python -# coding: utf-8 -from msgpack import packb, unpackb from collections import namedtuple +from msgpack import packb + class MyList(list): pass diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 823fe04e..831141a1 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,40 +1,38 @@ -import pytest -import sys import datetime + +import pytest + import msgpack from msgpack.ext import Timestamp -if sys.version_info[0] > 2: - from msgpack.ext import _utc - def test_timestamp(): # timestamp32 - ts = Timestamp(2 ** 32 - 1) + ts = Timestamp(2**32 - 1) assert ts.to_bytes() == b"\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd6\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2 ** 32 - 1 and ts.nanoseconds == 0 + assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 # timestamp64 - ts = Timestamp(2 ** 34 - 1, 999999999) + ts = Timestamp(2**34 - 1, 999999999) assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd7\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2 ** 34 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 # timestamp96 - ts = Timestamp(2 ** 63 - 1, 999999999) + ts = Timestamp(2**63 - 1, 999999999) assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xc7\x0c\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2 ** 63 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 # negative fractional ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 @@ -85,25 +83,48 @@ def test_timestamp_to(): assert t.to_unix_nano() == 42000014000 -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_timestamp_datetime(): t = Timestamp(42, 14) - assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + utc = datetime.timezone.utc + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) + ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) + + assert ( + Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 + ) + + ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) + ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) + assert ( + Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 + ) + + assert Timestamp.from_datetime(ts).to_datetime() == ts -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_unpack_datetime(): t = Timestamp(42, 14) + utc = datetime.timezone.utc packed = msgpack.packb(t) unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + +def test_pack_unpack_before_epoch(): + utc = datetime.timezone.utc + t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) + packed = msgpack.packb(t_in, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == t_in -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime(): t = Timestamp(42, 14000) dt = t.to_datetime() - assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + utc = datetime.timezone.utc + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) packed2 = msgpack.packb(t) @@ -121,3 +142,30 @@ def test_pack_datetime(): assert x assert x[0] == dt assert msgpack.unpackb(packed) is None + + +def test_issue451(): + # https://github.com/msgpack/msgpack-python/issues/451 + utc = datetime.timezone.utc + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) + packed = msgpack.packb(dt, datetime=True) + assert packed == b"\xd6\xff\xf4\x86eL" + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert dt == unpacked + + +def test_pack_datetime_without_tzinfo(): + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + with pytest.raises(ValueError, match="where tzinfo=None"): + packed = msgpack.packb(dt, datetime=True) + + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + packed = msgpack.packb(dt, datetime=True, default=lambda x: None) + assert packed == msgpack.packb(None) + + utc = datetime.timezone.utc + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) + packed = msgpack.packb(dt, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py index 057b7bf4..b17c3c53 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,12 +1,9 @@ -from io import BytesIO import sys -from msgpack import Unpacker, packb, OutOfData, ExtType -from pytest import raises, mark +from io import BytesIO + +from pytest import mark, raises -try: - from itertools import izip as zip -except ImportError: - pass +from msgpack import ExtType, OutOfData, Unpacker, packb def test_unpack_array_header_from_file(): @@ -52,7 +49,7 @@ def hook(x): def test_unpacker_ext_hook(): class MyUnpacker(Unpacker): def __init__(self): - super(MyUnpacker, self).__init__(ext_hook=self._hook, raw=False) + super().__init__(ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: @@ -70,7 +67,7 @@ def _hook(self, code, data): def test_unpacker_tell(): - objects = 1, 2, u"abc", u"def", u"ghi" + objects = 1, 2, "abc", "def", "ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" positions = 1, 2, 6, 10, 14 unpacker = Unpacker(BytesIO(packed)) @@ -80,7 +77,7 @@ def test_unpacker_tell(): def test_unpacker_tell_read_bytes(): - objects = 1, u"abc", u"ghi" + objects = 1, "abc", "ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" raw_data = b"\x02", b"\xa3def", b"" lenghts = 1, 4, 999 @@ -90,10 +87,3 @@ def test_unpacker_tell_read_bytes(): assert obj == unp assert pos == unpacker.tell() assert unpacker.read_bytes(n) == raw - - -if __name__ == "__main__": - test_unpack_array_header_from_file() - test_unpacker_hook_refcnt() - test_unpacker_ext_hook() - test_unpacker_tell() diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 607b182e..00000000 --- a/tox.ini +++ /dev/null @@ -1,43 +0,0 @@ -[tox] -envlist = - py27-pure, - {py35,py36,py37,py38}-{c,pure}, - {pypy,pypy3}-pure, - py27-x86, - py34-x86, - -[variants:pure] -setenv= - MSGPACK_PUREPYTHON=x - -[testenv] -deps= - pytest - -changedir=test -commands= - c,x86: python -c 'from msgpack import _cmsgpack' - c,x86: py.test - pure: py.test - -[testenv:py27-x86] -basepython=python2.7-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - py.test - -[testenv:py34-x86] -basepython=python3.4-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - py.test