diff --git a/.coveragerc b/.coveragerc index 742e899d4..e019a358a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ [run] branch = True omit = + .nox/* google/__init__.py google/cloud/__init__.py @@ -32,6 +33,7 @@ exclude_lines = # Ignore abstract methods raise NotImplementedError omit = + .nox/* */gapic/*.py */proto/*.py */core/*.py diff --git a/.flake8 b/.flake8 index 29227d4cf..32986c792 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ # Generated by synthtool. DO NOT EDIT! [flake8] -ignore = E203, E266, E501, W503 +ignore = E203, E231, E266, E501, W503 exclude = # Exclude generated code. **/proto/** diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6b8a73b31..10cf433a8 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:36a95b8f494e4674dc9eee9af98961293b51b86b3649942aac800ae6c1f796d4 + digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a +# created: 2025-01-09T12:01:16.422459506Z diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4e9e8be82..b37686f76 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,12 +3,10 @@ # # For syntax help see: # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax +# Note: This file is autogenerated. To make changes to the codeowner team, please update .repo-metadata.json. +# @googleapis/yoshi-python @googleapis/cloud-storage-dpe are the default owners for changes in this repo +* @googleapis/yoshi-python @googleapis/cloud-storage-dpe -# The cloud-storage-dpe team is the default owner for anything not -# explicitly taken by someone else. -* @googleapis/cloud-storage-dpe @googleapis/yoshi-python - -# Additionally, the python-samples-owners team is also among -# the default owners for samples changes. -/samples/ @googleapis/cloud-storage-dpe @googleapis/yoshi-python @googleapis/python-samples-owners \ No newline at end of file +# @googleapis/python-samples-reviewers @googleapis/cloud-storage-dpe are the default owners for samples changes +/samples/ @googleapis/python-samples-reviewers @googleapis/cloud-storage-dpe diff --git a/.github/auto-approve.yml b/.github/auto-approve.yml new file mode 100644 index 000000000..311ebbb85 --- /dev/null +++ b/.github/auto-approve.yml @@ -0,0 +1,3 @@ +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/auto-approve +processes: + - "OwlBotTemplateChanges" diff --git a/google/cloud/__init__.py b/.github/auto-label.yaml similarity index 73% rename from google/cloud/__init__.py rename to .github/auto-label.yaml index 0e1bc5131..21786a4eb 100644 --- a/google/cloud/__init__.py +++ b/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2016 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +requestsize: + enabled: true -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) +path: + pullrequest: true + paths: + samples: "samples" diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml new file mode 100644 index 000000000..8f2aae9b2 --- /dev/null +++ b/.github/blunderbuss.yml @@ -0,0 +1,12 @@ +# Blunderbuss config +# +# This file controls who is assigned for pull requests and issues. +# Note: This file is autogenerated. To make changes to the assignee +# team, please update `codeowner_team` in `.repo-metadata.json`. +assign_issues: + - andrewsg + - cojenco + +assign_prs: + - andrewsg + - cojenco diff --git a/.github/release-please.yml b/.github/release-please.yml index 4507ad059..8a7214bdd 100644 --- a/.github/release-please.yml +++ b/.github/release-please.yml @@ -1 +1,6 @@ +branches: +- branch: python2 + handleGHRelease: true + releaseType: python releaseType: python +handleGHRelease: true diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml new file mode 100644 index 000000000..5980127a4 --- /dev/null +++ b/.github/release-trigger.yml @@ -0,0 +1,2 @@ +enabled: true +multiScmName: python-storage diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index d77e10188..cc1eb10e1 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -10,3 +10,11 @@ branchProtectionRules: - 'Kokoro' - 'cla/google' - 'Kokoro system-3.8' + - 'OwlBot Post Processor' +- pattern: python2 + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true + requiredStatusCheckContexts: + - 'Kokoro' + - 'cla/google' + - 'Kokoro system-2.7' diff --git a/.gitignore b/.gitignore index b4243ced7..d083ea1dd 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 500351238..fdc6d0271 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,24 @@ cd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 +# Export variable to override api endpoint +export API_ENDPOINT_OVERRIDE + +# Export variable to override api endpoint version +export API_VERSION_OVERRIDE + +# Export dual region locations +export DUAL_REGION_LOC_1 +export DUAL_REGION_LOC_2 + +# Setup universe domain testing needed environment variables. +export TEST_UNIVERSE_DOMAIN_CREDENTIAL=$(realpath ${KOKORO_GFILE_DIR}/secret_manager/client-library-test-universe-domain-credential) +export TEST_UNIVERSE_DOMAIN=$(gcloud secrets versions access latest --project cloud-devrel-kokoro-resources --secret=client-library-test-universe-domain) +export TEST_UNIVERSE_PROJECT_ID=$(gcloud secrets versions access latest --project cloud-devrel-kokoro-resources --secret=client-library-test-universe-project-id) +export TEST_UNIVERSE_LOCATION=$(gcloud secrets versions access latest --project cloud-devrel-kokoro-resources --secret=client-library-test-universe-storage-location) + + + # Debug: show build environment env | grep KOKORO @@ -33,13 +51,6 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json # Setup project id. export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") -# Remove old nox -python3 -m pip uninstall --yes --quiet nox-automation - -# Install nox -python3 -m pip install --upgrade --quiet nox -python3 -m nox --version - # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then diff --git a/.kokoro/continuous/continuous.cfg b/.kokoro/continuous/continuous.cfg index 8f43917d9..0cfe6b6e2 100644 --- a/.kokoro/continuous/continuous.cfg +++ b/.kokoro/continuous/continuous.cfg @@ -1 +1,7 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Credentials needed to test universe domain. +env_vars: { + key: "SECRET_MANAGER_KEYS" + value: "client-library-test-universe-domain-credential" +} diff --git a/.kokoro/continuous/prerelease-deps.cfg b/.kokoro/continuous/prerelease-deps.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/continuous/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 4e1b1fb8b..e5410e296 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ubuntu:20.04 +from ubuntu:24.04 ENV DEBIAN_FRONTEND noninteractive @@ -40,7 +40,6 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ - python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -60,8 +59,31 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb + +###################### Install python 3.10.14 for docs/docfx session + +# Download python 3.10.14 +RUN wget https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz + +# Extract files +RUN tar -xvf Python-3.10.14.tgz + +# Install python 3.10.14 +RUN ./Python-3.10.14/configure --enable-optimizations +RUN make altinstall + +ENV PATH /usr/local/bin/python3.10:$PATH + +###################### Install pip RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.8 /tmp/get-pip.py \ + && python3.10 /tmp/get-pip.py \ && rm /tmp/get-pip.py -CMD ["python3.8"] +# Test pip +RUN python3.10 -m pip + +# Install build requirements +COPY requirements.txt /requirements.txt +RUN python3.10 -m pip install --require-hashes -r requirements.txt + +CMD ["python3.10"] diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in new file mode 100644 index 000000000..816817c67 --- /dev/null +++ b/.kokoro/docker/docs/requirements.in @@ -0,0 +1 @@ +nox diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt new file mode 100644 index 000000000..f99a5c4aa --- /dev/null +++ b/.kokoro/docker/docs/requirements.txt @@ -0,0 +1,72 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +# +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb + # via nox +colorlog==6.9.0 \ + --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ + --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 + # via nox +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 + # via virtualenv +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 + # via virtualenv +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +packaging==24.2 \ + --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ + --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f + # via nox +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb + # via virtualenv +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 + # via nox +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa + # via nox diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index d3d3d8c50..73480a2ff 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -63,4 +63,4 @@ before_action { keyname: "docuploader_service_account" } } -} \ No newline at end of file +} diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh index f52514257..c435402f4 100755 --- a/.kokoro/populate-secrets.sh +++ b/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC. +# Copyright 2024 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/presubmit/prerelease-deps.cfg b/.kokoro/presubmit/prerelease-deps.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.8.cfg index f4bcee3db..6d3603eed 100644 --- a/.kokoro/presubmit/system-3.8.cfg +++ b/.kokoro/presubmit/system-3.8.cfg @@ -4,4 +4,10 @@ env_vars: { key: "NOX_SESSION" value: "system-3.8" +} + +# Credentials needed to test universe domain. +env_vars: { + key: "SECRET_MANAGER_KEYS" + value: "client-library-test-universe-domain-credential" } \ No newline at end of file diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 8acb14e80..233205d58 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,20 +21,18 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3 -m pip install --user --upgrade --quiet nox -python3 -m nox --version +python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt +python3.10 -m nox --version # build docs nox -s docs -python3 -m pip install --user gcp-docuploader - # create metadata -python3 -m docuploader create-metadata \ +python3.10 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3 setup.py --version) \ + --version=$(python3.10 setup.py --version) \ --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3 setup.py --name) \ + --distribution-name=$(python3.10 setup.py --name) \ --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) @@ -42,18 +40,18 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" +python3.10 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" # docfx yaml files nox -s docfx # create metadata. -python3 -m docuploader create-metadata \ +python3.10 -m docuploader create-metadata \ --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3 setup.py --version) \ + --version=$(python3.10 setup.py --version) \ --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3 setup.py --name) \ + --distribution-name=$(python3.10 setup.py --name) \ --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) @@ -61,4 +59,4 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" +python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 7970969eb..a15b26b59 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,17 +16,14 @@ set -eo pipefail # Start the releasetool reporter -python3 -m pip install gcp-releasetool +python3 -m pip install --require-hashes -r github/python-storage/.kokoro/requirements.txt python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script -# Ensure that we have the latest versions of Twine, Wheel, and Setuptools. -python3 -m pip install --upgrade twine wheel setuptools - # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") cd github/python-storage python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 58a3ff6b8..17918dc86 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,8 +23,27 @@ env_vars: { value: "github/python-storage/.kokoro/release.sh" } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google-cloud-pypi-token-keystore-2" + } + } +} + # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem, client-library-test-universe-domain-credential" +} + +# Store the packages we uploaded to PyPI. That way, we have a record of exactly +# what we published, which we can use to generate SBOMs and attestations. +action { + define_artifacts { + regex: "github/python-storage/**/*.tar.gz" + strip_prefix: "github/python-storage" + } } diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in new file mode 100644 index 000000000..fff4d9ce0 --- /dev/null +++ b/.kokoro/requirements.in @@ -0,0 +1,11 @@ +gcp-docuploader +gcp-releasetool>=2 # required for compatibility with cryptography>=42.x +importlib-metadata +typing-extensions +twine +wheel +setuptools +nox>=2022.11.21 # required to remove dependency on py +charset-normalizer<3 +click<8.1.0 +cryptography>=42.0.5 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt new file mode 100644 index 000000000..9622baf0b --- /dev/null +++ b/.kokoro/requirements.txt @@ -0,0 +1,537 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --allow-unsafe --generate-hashes requirements.in +# +argcomplete==3.4.0 \ + --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ + --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f + # via nox +attrs==23.2.0 \ + --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ + --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 + # via gcp-releasetool +backports-tarfile==1.2.0 \ + --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ + --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 + # via jaraco-context +cachetools==5.3.3 \ + --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ + --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 + # via google-auth +certifi==2024.7.4 \ + --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ + --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 + # via requests +cffi==1.16.0 \ + --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ + --hash=sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a \ + --hash=sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417 \ + --hash=sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab \ + --hash=sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520 \ + --hash=sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36 \ + --hash=sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743 \ + --hash=sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8 \ + --hash=sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed \ + --hash=sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684 \ + --hash=sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56 \ + --hash=sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324 \ + --hash=sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d \ + --hash=sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235 \ + --hash=sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e \ + --hash=sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088 \ + --hash=sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000 \ + --hash=sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7 \ + --hash=sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e \ + --hash=sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673 \ + --hash=sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c \ + --hash=sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe \ + --hash=sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2 \ + --hash=sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 \ + --hash=sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8 \ + --hash=sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a \ + --hash=sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0 \ + --hash=sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b \ + --hash=sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896 \ + --hash=sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e \ + --hash=sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9 \ + --hash=sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2 \ + --hash=sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b \ + --hash=sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6 \ + --hash=sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404 \ + --hash=sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f \ + --hash=sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0 \ + --hash=sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4 \ + --hash=sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc \ + --hash=sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936 \ + --hash=sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba \ + --hash=sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872 \ + --hash=sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb \ + --hash=sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614 \ + --hash=sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1 \ + --hash=sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d \ + --hash=sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969 \ + --hash=sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b \ + --hash=sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4 \ + --hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \ + --hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \ + --hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357 + # via cryptography +charset-normalizer==2.1.1 \ + --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ + --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f + # via + # -r requirements.in + # requests +click==8.0.4 \ + --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ + --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb + # via + # -r requirements.in + # gcp-docuploader + # gcp-releasetool +colorlog==6.8.2 \ + --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ + --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 + # via + # gcp-docuploader + # nox +cryptography==42.0.8 \ + --hash=sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad \ + --hash=sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583 \ + --hash=sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b \ + --hash=sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c \ + --hash=sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1 \ + --hash=sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648 \ + --hash=sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949 \ + --hash=sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba \ + --hash=sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c \ + --hash=sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9 \ + --hash=sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d \ + --hash=sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c \ + --hash=sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e \ + --hash=sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2 \ + --hash=sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d \ + --hash=sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7 \ + --hash=sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70 \ + --hash=sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2 \ + --hash=sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7 \ + --hash=sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14 \ + --hash=sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe \ + --hash=sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e \ + --hash=sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71 \ + --hash=sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961 \ + --hash=sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7 \ + --hash=sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c \ + --hash=sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28 \ + --hash=sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842 \ + --hash=sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902 \ + --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ + --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ + --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e + # via + # -r requirements.in + # gcp-releasetool + # secretstorage +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 + # via virtualenv +docutils==0.21.2 \ + --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ + --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 + # via readme-renderer +filelock==3.15.4 \ + --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ + --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 + # via virtualenv +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea + # via -r requirements.in +gcp-releasetool==2.0.1 \ + --hash=sha256:34314a910c08e8911d9c965bd44f8f2185c4f556e737d719c33a41f6a610de96 \ + --hash=sha256:b0d5863c6a070702b10883d37c4bdfd74bf930fe417f36c0c965d3b7c779ae62 + # via -r requirements.in +google-api-core==2.19.1 \ + --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ + --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.31.0 \ + --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ + --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 + # via + # gcp-releasetool + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.17.0 \ + --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ + --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 + # via gcp-docuploader +google-crc32c==1.5.0 \ + --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ + --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ + --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ + --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ + --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ + --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ + --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ + --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ + --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ + --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ + --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ + --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ + --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ + --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ + --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ + --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ + --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ + --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ + --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ + --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ + --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ + --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ + --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ + --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ + --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ + --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ + --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ + --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ + --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ + --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ + --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ + --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ + --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ + --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ + --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ + --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ + --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ + --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ + --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ + --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ + --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ + --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ + --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ + --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ + --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ + --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ + --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ + --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ + --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ + --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ + --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ + --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ + --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ + --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ + --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ + --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ + --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ + --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ + --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ + --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ + --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ + --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ + --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ + --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ + --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ + --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ + --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ + --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.1 \ + --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ + --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 + # via google-cloud-storage +googleapis-common-protos==1.63.2 \ + --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ + --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 + # via google-api-core +idna==3.7 \ + --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ + --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 + # via requests +importlib-metadata==8.0.0 \ + --hash=sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f \ + --hash=sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812 + # via + # -r requirements.in + # keyring + # twine +jaraco-classes==3.4.0 \ + --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ + --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 + # via keyring +jaraco-context==5.3.0 \ + --hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \ + --hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2 + # via keyring +jaraco-functools==4.0.1 \ + --hash=sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664 \ + --hash=sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8 + # via keyring +jeepney==0.8.0 \ + --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ + --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 + # via + # keyring + # secretstorage +jinja2==3.1.4 \ + --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ + --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d + # via gcp-releasetool +keyring==25.2.1 \ + --hash=sha256:2458681cdefc0dbc0b7eb6cf75d0b98e59f9ad9b2d4edd319d18f68bdca95e50 \ + --hash=sha256:daaffd42dbda25ddafb1ad5fec4024e5bbcfe424597ca1ca452b299861e49f1b + # via + # gcp-releasetool + # twine +markdown-it-py==3.0.0 \ + --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ + --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb + # via rich +markupsafe==2.1.5 \ + --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ + --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ + --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ + --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ + --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ + --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ + --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ + --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ + --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ + --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ + --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ + --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ + --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ + --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ + --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ + --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ + --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ + --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ + --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ + --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ + --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ + --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ + --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ + --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ + --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ + --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ + --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ + --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ + --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ + --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ + --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ + --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ + --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ + --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ + --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ + --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ + --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ + --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ + --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ + --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ + --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ + --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ + --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ + --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ + --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ + --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ + --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ + --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ + --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ + --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ + --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ + --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ + --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ + --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ + --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ + --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ + --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ + --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ + --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ + --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 + # via jinja2 +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +more-itertools==10.3.0 \ + --hash=sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463 \ + --hash=sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320 + # via + # jaraco-classes + # jaraco-functools +nh3==0.2.18 \ + --hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \ + --hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \ + --hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \ + --hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \ + --hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \ + --hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \ + --hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \ + --hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \ + --hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \ + --hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \ + --hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \ + --hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \ + --hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \ + --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ + --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ + --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe + # via readme-renderer +nox==2024.4.15 \ + --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ + --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f + # via -r requirements.in +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 + # via + # gcp-releasetool + # nox +pkginfo==1.10.0 \ + --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ + --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 + # via twine +platformdirs==4.2.2 \ + --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ + --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 + # via virtualenv +proto-plus==1.24.0 \ + --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ + --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 + # via google-api-core +protobuf==5.27.2 \ + --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ + --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ + --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ + --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ + --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ + --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ + --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ + --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ + --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ + --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ + --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 + # via + # gcp-docuploader + # gcp-releasetool + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.0 \ + --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ + --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.0 \ + --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ + --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b + # via google-auth +pycparser==2.22 \ + --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ + --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc + # via cffi +pygments==2.18.0 \ + --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ + --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a + # via + # readme-renderer + # rich +pyjwt==2.8.0 \ + --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ + --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 + # via gcp-releasetool +pyperclip==1.9.0 \ + --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 + # via gcp-releasetool +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 + # via gcp-releasetool +readme-renderer==44.0 \ + --hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \ + --hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1 + # via twine +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # gcp-releasetool + # google-api-core + # google-cloud-storage + # requests-toolbelt + # twine +requests-toolbelt==1.0.0 \ + --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ + --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 + # via twine +rfc3986==2.0.0 \ + --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ + --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c + # via twine +rich==13.7.1 \ + --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ + --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 + # via twine +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +secretstorage==3.3.3 \ + --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ + --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 + # via keyring +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via + # gcp-docuploader + # python-dateutil +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via nox +twine==5.1.1 \ + --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ + --hash=sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db + # via -r requirements.in +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via -r requirements.in +urllib3==2.2.2 \ + --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ + --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 + # via + # requests + # twine +virtualenv==20.26.3 \ + --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ + --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 + # via nox +wheel==0.43.0 \ + --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ + --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 + # via -r requirements.in +zipp==3.19.2 \ + --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ + --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c + # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +setuptools==70.2.0 \ + --hash=sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05 \ + --hash=sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1 + # via -r requirements.in diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.11/common.cfg similarity index 93% rename from .kokoro/samples/python3.6/common.cfg rename to .kokoro/samples/python3.11/common.cfg index 985a0cbfb..f9443bb73 100644 --- a/.kokoro/samples/python3.6/common.cfg +++ b/.kokoro/samples/python3.11/common.cfg @@ -10,13 +10,13 @@ action { # Specify which tests to run env_vars: { key: "RUN_TESTS_SESSION" - value: "py-3.6" + value: "py-3.11" } # Declare build specific Cloud project. env_vars: { key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py36" + value: "python-docs-samples-tests-311" } env_vars: { diff --git a/.kokoro/samples/python3.6/presubmit.cfg b/.kokoro/samples/python3.11/continuous.cfg similarity index 100% rename from .kokoro/samples/python3.6/presubmit.cfg rename to .kokoro/samples/python3.11/continuous.cfg diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.11/periodic-head.cfg similarity index 100% rename from .kokoro/samples/python3.6/periodic-head.cfg rename to .kokoro/samples/python3.11/periodic-head.cfg diff --git a/.kokoro/samples/python3.6/periodic.cfg b/.kokoro/samples/python3.11/periodic.cfg similarity index 100% rename from .kokoro/samples/python3.6/periodic.cfg rename to .kokoro/samples/python3.11/periodic.cfg diff --git a/.kokoro/samples/python3.11/presubmit.cfg b/.kokoro/samples/python3.11/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.11/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.12/common.cfg b/.kokoro/samples/python3.12/common.cfg new file mode 100644 index 000000000..40c79a35a --- /dev/null +++ b/.kokoro/samples/python3.12/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.12" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-312" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-storage/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-storage/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.6/continuous.cfg b/.kokoro/samples/python3.12/continuous.cfg similarity index 97% rename from .kokoro/samples/python3.6/continuous.cfg rename to .kokoro/samples/python3.12/continuous.cfg index 7218af149..a1c8d9759 100644 --- a/.kokoro/samples/python3.6/continuous.cfg +++ b/.kokoro/samples/python3.12/continuous.cfg @@ -3,5 +3,4 @@ env_vars: { key: "INSTALL_LIBRARY_FROM_SOURCE" value: "True" -} - +} \ No newline at end of file diff --git a/.kokoro/samples/python3.12/periodic-head.cfg b/.kokoro/samples/python3.12/periodic-head.cfg new file mode 100644 index 000000000..5d0faf58f --- /dev/null +++ b/.kokoro/samples/python3.12/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-storage/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.12/periodic.cfg b/.kokoro/samples/python3.12/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.12/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.12/presubmit.cfg b/.kokoro/samples/python3.12/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.12/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.13/common.cfg b/.kokoro/samples/python3.13/common.cfg new file mode 100644 index 000000000..8c288fd15 --- /dev/null +++ b/.kokoro/samples/python3.13/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.13" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-313" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-storage/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-storage/.kokoro/trampoline_v2.sh" diff --git a/.kokoro/samples/python3.13/continuous.cfg b/.kokoro/samples/python3.13/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.13/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.13/periodic-head.cfg b/.kokoro/samples/python3.13/periodic-head.cfg new file mode 100644 index 000000000..5d0faf58f --- /dev/null +++ b/.kokoro/samples/python3.13/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-storage/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.13/periodic.cfg b/.kokoro/samples/python3.13/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.13/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.13/presubmit.cfg b/.kokoro/samples/python3.13/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.13/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh index ba3a707b0..e9d8bd79a 100755 --- a/.kokoro/test-samples-against-head.sh +++ b/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 8a324c9c7..53e365bc4 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,7 +33,8 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -python3.6 -m pip install --upgrade --quiet nox +# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then @@ -76,7 +77,7 @@ for file in samples/**/requirements.txt; do echo "------------------------------------------------------------" # Use nox to execute the tests for the project. - python3.6 -m nox -s "$RUN_TESTS_SESSION" + python3.9 -m nox -s "$RUN_TESTS_SESSION" EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 11c042d34..7933d8201 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index f39236e94..48f796997 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2017 Google Inc. +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 4af6cdc26..35fa52923 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62eb5a77d..1d74695f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,10 +22,10 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 23.7.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 +- repo: https://github.com/pycqa/flake8 + rev: 6.1.0 hooks: - id: flake8 diff --git a/.repo-metadata.json b/.repo-metadata.json index 2cd2642fe..9e537d52f 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -13,5 +13,6 @@ "requires_billing": true, "default_version": "", "codeowner_team": "@googleapis/cloud-storage-dpe", - "api_shortname": "storage" + "api_shortname": "storage", + "api_description": "is a durable and highly available object storage service. Google Cloud Storage is almost infinitely scalable and guarantees consistency: when a write succeeds, the latest copy of the object will be returned to any GET, globally." } diff --git a/.trampolinerc b/.trampolinerc index 0eee72ab6..008015237 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Template for .trampolinerc - # Add required env vars here. required_envvars+=( ) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19ff1db98..dcf58ac2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,386 @@ [1]: https://pypi.org/project/google-cloud-storage/#history +## [3.0.0](https://github.com/googleapis/python-storage/compare/v2.19.0...v3.0.0) (2025-01-28) + + +### ⚠ BREAKING CHANGES + +Please consult the README for details on this major version release. + +* The default checksum strategy for uploads has changed from None to "auto" ([#1383](https://github.com/googleapis/python-storage/issues/1383)) +* The default checksum strategy for downloads has changed from "md5" to "auto" ([#1383](https://github.com/googleapis/python-storage/issues/1383)) +* Deprecated positional argument "num_retries" has been removed ([#1377](https://github.com/googleapis/python-storage/issues/1377)) +* Deprecated argument "text_mode" has been removed ([#1379](https://github.com/googleapis/python-storage/issues/1379)) +* Blob.download_to_filename() now deletes the empty destination file on a 404 ([#1394](https://github.com/googleapis/python-storage/pull/1394)) +* Media operations now use the same retry backoff, timeout and custom predicate system as non-media operations, which may slightly impact default retry behavior ([#1385](https://github.com/googleapis/python-storage/issues/1385)) +* Retries are now enabled by default for uploads, blob deletes and blob metadata updates ([#1400](https://github.com/googleapis/python-storage/issues/1400)) + +### Features + +* Add "auto" checksum option and make default ([#1383](https://github.com/googleapis/python-storage/issues/1383)) ([5375fa0](https://github.com/googleapis/python-storage/commit/5375fa07385c60cac694025aee123e20cb25bb65)) +* Blob.download_to_filename() deletes the empty destination file on a 404 ([#1394](https://github.com/googleapis/python-storage/pull/1394)) ([066be2d](https://github.com/googleapis/python-storage/commit/066be2db789cfd28d47d143ca0f7ccc9da183682)) +* Enable custom predicates for media operations ([#1385](https://github.com/googleapis/python-storage/issues/1385)) ([f3517bf](https://github.com/googleapis/python-storage/commit/f3517bfcb9e4ab8e4d761eb64a753e64b3d5871d)) +* Integrate google-resumable-media ([#1283](https://github.com/googleapis/python-storage/issues/1283)) ([bd917b4](https://github.com/googleapis/python-storage/commit/bd917b49d2a20e2e1edee2d32dc65b66da8d6aba)) +* Retry by default for uploads, blob deletes, metadata updates ([#1400](https://github.com/googleapis/python-storage/issues/1400)) ([0426005](https://github.com/googleapis/python-storage/commit/0426005175079ebdd73c299642a83b8193086d60)) + + +### Bug Fixes + +* Cancel upload when BlobWriter exits with exception ([#1243](https://github.com/googleapis/python-storage/issues/1243)) ([df107d2](https://github.com/googleapis/python-storage/commit/df107d20a772e9b955d9978cd4a7731869e92cbe)) +* Changed name of methods `Blob.from_string()` and `Bucket.from_string()` to `from_uri()` ([#1335](https://github.com/googleapis/python-storage/issues/1335)) ([58c1d03](https://github.com/googleapis/python-storage/commit/58c1d038198046665317a0d00eb9630608349476)) +* Correctly calculate starting offset for retries of ranged reads ([#1376](https://github.com/googleapis/python-storage/issues/1376)) ([7b6c9a0](https://github.com/googleapis/python-storage/commit/7b6c9a0fb3a79d713f951176a690f6e72c4d77c5)) +* Filter download_kwargs in BlobReader ([#1411](https://github.com/googleapis/python-storage/issues/1411)) ([0c21210](https://github.com/googleapis/python-storage/commit/0c21210450319f6da920982116ee52075105c45a)) +* Remove deprecated num_retries argument ([#1377](https://github.com/googleapis/python-storage/issues/1377)) ([58b5040](https://github.com/googleapis/python-storage/commit/58b5040933d4b21e0be94357ed5aa14c87969f73)) +* Remove deprecated text_mode argument ([#1379](https://github.com/googleapis/python-storage/issues/1379)) ([4d20a8e](https://github.com/googleapis/python-storage/commit/4d20a8efa8cf37bb7f099b20a8c352c9a0c42659)) + + +### Documentation + +* Correct formatting and update README.rst ([#1427](https://github.com/googleapis/python-storage/issues/1427)) ([2945853](https://github.com/googleapis/python-storage/commit/29458539773e834b202fef0c77dc439c393b37e8)) +* Fix issue with exceptions.py documentation ([#1328](https://github.com/googleapis/python-storage/issues/1328)) ([22b8c30](https://github.com/googleapis/python-storage/commit/22b8c304afc7199fbc2dec448a4a3c5eba7d4e3a)) + +## [2.19.0](https://github.com/googleapis/python-storage/compare/v2.18.2...v2.19.0) (2024-11-21) + + +### Features + +* Add integration test for universe domain ([#1346](https://github.com/googleapis/python-storage/issues/1346)) ([02a972d](https://github.com/googleapis/python-storage/commit/02a972d35fae6d05edfb26381f6a71e3b8f59d6d)) +* Add restore_bucket and handling for soft-deleted buckets ([#1365](https://github.com/googleapis/python-storage/issues/1365)) ([ab94efd](https://github.com/googleapis/python-storage/commit/ab94efda83f68c974ec91d6b869b09047501031a)) +* Add support for restore token ([#1369](https://github.com/googleapis/python-storage/issues/1369)) ([06ed15b](https://github.com/googleapis/python-storage/commit/06ed15b33dc884da6dffbef5119e47f0fc4e1285)) +* IAM signBlob retry and universe domain support ([#1380](https://github.com/googleapis/python-storage/issues/1380)) ([abc8061](https://github.com/googleapis/python-storage/commit/abc80615ee00a14bc0e6b095252f6d1eb09c4b45)) + + +### Bug Fixes + +* Allow signed post policy v4 with service account and token ([#1356](https://github.com/googleapis/python-storage/issues/1356)) ([8ec02c0](https://github.com/googleapis/python-storage/commit/8ec02c0e656a4e6786f256798f4b93b95b50acec)) +* Do not spam the log with checksum related INFO messages when downloading using transfer_manager ([#1357](https://github.com/googleapis/python-storage/issues/1357)) ([42392ef](https://github.com/googleapis/python-storage/commit/42392ef8e38527ce4e50454cdd357425b3f57c87)) + +## [2.18.2](https://github.com/googleapis/python-storage/compare/v2.18.1...v2.18.2) (2024-08-08) + + +### Bug Fixes + +* Add regression test for range read retry issue and bump dependency to fix ([#1338](https://github.com/googleapis/python-storage/issues/1338)) ([0323647](https://github.com/googleapis/python-storage/commit/0323647d768b3be834cfab53efb3c557a47d41c3)) + +## [2.18.1](https://github.com/googleapis/python-storage/compare/v2.18.0...v2.18.1) (2024-08-05) + + +### Bug Fixes + +* Properly escape URL construction for XML MPU API ([#1333](https://github.com/googleapis/python-storage/issues/1333)) ([bf4d0e0](https://github.com/googleapis/python-storage/commit/bf4d0e0a2ef1d608d679c22b13d8f5d90b39c7b2)) + +## [2.18.0](https://github.com/googleapis/python-storage/compare/v2.17.0...v2.18.0) (2024-07-09) + + +### Features + +* Add OpenTelemetry Tracing support as a preview feature ([#1288](https://github.com/googleapis/python-storage/issues/1288)) ([c2ab0e0](https://github.com/googleapis/python-storage/commit/c2ab0e035b179a919b27c7f50318472f14656e00)) + + +### Bug Fixes + +* Allow Protobuf 5.x ([#1317](https://github.com/googleapis/python-storage/issues/1317)) ([152b249](https://github.com/googleapis/python-storage/commit/152b249472a09342777237d47b6c09f99c2d28e6)) +* Correct notification error message ([#1290](https://github.com/googleapis/python-storage/issues/1290)) ([1cb977d](https://github.com/googleapis/python-storage/commit/1cb977daa2d97c255a382ce81f56a43168b0637d)), closes [#1289](https://github.com/googleapis/python-storage/issues/1289) + +## [2.17.0](https://github.com/googleapis/python-storage/compare/v2.16.0...v2.17.0) (2024-05-22) + + +### Features + +* Support HNS enablement in bucket metadata ([#1278](https://github.com/googleapis/python-storage/issues/1278)) ([add3c01](https://github.com/googleapis/python-storage/commit/add3c01f0974e22df7f0b50504d5e83e4235fd81)) +* Support page_size in bucket.list_blobs ([#1275](https://github.com/googleapis/python-storage/issues/1275)) ([c52e882](https://github.com/googleapis/python-storage/commit/c52e882f65583a7739392926308cc34984561165)) + + +### Bug Fixes + +* Remove deprecated methods in samples and tests ([#1274](https://github.com/googleapis/python-storage/issues/1274)) ([4db96c9](https://github.com/googleapis/python-storage/commit/4db96c960b07e503c1031c9fa879cf2af195f513)) + + +### Documentation + +* Reference Storage Control in readme ([#1254](https://github.com/googleapis/python-storage/issues/1254)) ([3d6d369](https://github.com/googleapis/python-storage/commit/3d6d3693d5c1b24cd3d2bbdeabfd78b8bfd4161a)) +* Update DEFAULT_RETRY_IF_GENERATION_SPECIFIED docstrings ([#1234](https://github.com/googleapis/python-storage/issues/1234)) ([bdd426a](https://github.com/googleapis/python-storage/commit/bdd426adf5901faa36115885af868ef50e356a36)) + +## [2.16.0](https://github.com/googleapis/python-storage/compare/v2.15.0...v2.16.0) (2024-03-18) + + +### Features + +* Add support for soft delete ([#1229](https://github.com/googleapis/python-storage/issues/1229)) ([3928aa0](https://github.com/googleapis/python-storage/commit/3928aa0680ec03addae1f792c73abb5c9dc8586f)) +* Support includeFoldersAsPrefixes ([#1223](https://github.com/googleapis/python-storage/issues/1223)) ([7bb8065](https://github.com/googleapis/python-storage/commit/7bb806538cf3d7a5e16390db1983620933d5e51a)) + +## [2.15.0](https://github.com/googleapis/python-storage/compare/v2.14.0...v2.15.0) (2024-02-28) + + +### Features + +* Support custom universe domains/TPC ([#1212](https://github.com/googleapis/python-storage/issues/1212)) ([f4cf041](https://github.com/googleapis/python-storage/commit/f4cf041a5f2075cecf5f4993f8b7afda0476a52b)) + + +### Bug Fixes + +* Add "updated" as property for Bucket ([#1220](https://github.com/googleapis/python-storage/issues/1220)) ([ae9a53b](https://github.com/googleapis/python-storage/commit/ae9a53b464e7d82c79a019a4111c49a4cdcc3ae0)) +* Remove utcnow usage ([#1215](https://github.com/googleapis/python-storage/issues/1215)) ([8d8a53a](https://github.com/googleapis/python-storage/commit/8d8a53a1368392ad7a1c4352f559c12932c5a9c9)) + +## [2.14.0](https://github.com/googleapis/python-storage/compare/v2.13.0...v2.14.0) (2023-12-10) + + +### Features + +* Add support for Python 3.12 ([#1187](https://github.com/googleapis/python-storage/issues/1187)) ([ecf4150](https://github.com/googleapis/python-storage/commit/ecf41504ba7f2a2c2db2e3c7e267686283d2cab3)) +* Support object retention lock ([#1188](https://github.com/googleapis/python-storage/issues/1188)) ([a179337](https://github.com/googleapis/python-storage/commit/a1793375cf038ce79d4d4b7077f6b4dcc4b4aeec)) + + +### Bug Fixes + +* Clarify error message and docstrings in Blob class method ([#1196](https://github.com/googleapis/python-storage/issues/1196)) ([92c20d3](https://github.com/googleapis/python-storage/commit/92c20d3f7520c6b94308ebb156202fdfd1dcd482)) +* Propagate timeout in BlobWriter ([#1186](https://github.com/googleapis/python-storage/issues/1186)) ([22f36da](https://github.com/googleapis/python-storage/commit/22f36da1ce5b04408653ddbdbf35f25ed1072af8)), closes [#1184](https://github.com/googleapis/python-storage/issues/1184) +* Use native namespace to avoid pkg_resources warnings ([#1176](https://github.com/googleapis/python-storage/issues/1176)) ([2ed915e](https://github.com/googleapis/python-storage/commit/2ed915ec4b35df6fad04f42df25e48667148fcf5)) + +## [2.13.0](https://github.com/googleapis/python-storage/compare/v2.12.0...v2.13.0) (2023-10-31) + + +### Features + +* Add Autoclass v2.1 support ([#1117](https://github.com/googleapis/python-storage/issues/1117)) ([d38adb6](https://github.com/googleapis/python-storage/commit/d38adb6a3136152ad68ad8a9c4583d06509307b2)) +* Add support for custom headers ([#1121](https://github.com/googleapis/python-storage/issues/1121)) ([2f92c3a](https://github.com/googleapis/python-storage/commit/2f92c3a2a3a1585d0f77be8fe3c2c5324140b71a)) + + +### Bug Fixes + +* Blob.from_string parse storage uri with regex ([#1170](https://github.com/googleapis/python-storage/issues/1170)) ([0a243fa](https://github.com/googleapis/python-storage/commit/0a243faf5d6ca89b977ea1cf543356e0dd04df95)) +* Bucket.delete(force=True) now works with version-enabled buckets ([#1172](https://github.com/googleapis/python-storage/issues/1172)) ([0de09d3](https://github.com/googleapis/python-storage/commit/0de09d30ea6083d962be1c1f5341ea14a2456dc7)) +* Fix typo in Bucket.clear_lifecycle_rules() ([#1169](https://github.com/googleapis/python-storage/issues/1169)) ([eae9ebe](https://github.com/googleapis/python-storage/commit/eae9ebed12d26832405c2f29fbdb14b4babf080d)) + + +### Documentation + +* Fix exception field in tm reference docs ([#1164](https://github.com/googleapis/python-storage/issues/1164)) ([eac91cb](https://github.com/googleapis/python-storage/commit/eac91cb6ffb0066248f824fc1f307140dd7c85da)) + +## [2.12.0](https://github.com/googleapis/python-storage/compare/v2.11.0...v2.12.0) (2023-10-12) + + +### Features + +* Add additional_blob_attributes to upload_many_from_filenames ([#1162](https://github.com/googleapis/python-storage/issues/1162)) ([c7229f2](https://github.com/googleapis/python-storage/commit/c7229f2e53151fc2f2eb1268afc67dad87ebbb0a)) +* Add crc32c_checksum argument to download_chunks_concurrently ([#1138](https://github.com/googleapis/python-storage/issues/1138)) ([fc92ad1](https://github.com/googleapis/python-storage/commit/fc92ad19ff0f9704456452e8c7c47a5f90c29eab)) +* Add skip_if_exists to download_many ([#1161](https://github.com/googleapis/python-storage/issues/1161)) ([c5a983d](https://github.com/googleapis/python-storage/commit/c5a983d5a0b0632811af86fb64664b4382b05512)) +* Launch transfer manager to GA ([#1159](https://github.com/googleapis/python-storage/issues/1159)) ([5c90563](https://github.com/googleapis/python-storage/commit/5c905637947c45e39ed8ee84911a12e254bde571)) + + +### Bug Fixes + +* Bump python-auth version to fix issue and remove workaround ([#1158](https://github.com/googleapis/python-storage/issues/1158)) ([28c02dd](https://github.com/googleapis/python-storage/commit/28c02dd41010e6d818a77f51c539457b2dbfa233)) +* Mark _deprecate_threads_param as a wrapper to unblock introspection and docs ([#1122](https://github.com/googleapis/python-storage/issues/1122)) ([69bd4a9](https://github.com/googleapis/python-storage/commit/69bd4a935a995f8f261a589ee2978f58b90224ab)) + + +### Documentation + +* Add snippets for upload_chunks_concurrently and add chunk_size ([#1135](https://github.com/googleapis/python-storage/issues/1135)) ([3a0f551](https://github.com/googleapis/python-storage/commit/3a0f551436b659afb2208fd558ddb846f4d62d98)) +* Update formatting and wording in transfer_manager docstrings ([#1163](https://github.com/googleapis/python-storage/issues/1163)) ([9e460d8](https://github.com/googleapis/python-storage/commit/9e460d8106cbfb76caf35df4f6beed159fa2c22d)) + +## [2.11.0](https://github.com/googleapis/python-storage/compare/v2.10.0...v2.11.0) (2023-09-19) + + +### Features + +* Add gccl-gcs-cmd field to X-Goog-API-Client header for Transfer Manager calls ([#1119](https://github.com/googleapis/python-storage/issues/1119)) ([14a1909](https://github.com/googleapis/python-storage/commit/14a1909963cfa41208f4e25b82b7c84c5e02452f)) +* Add transfer_manager.upload_chunks_concurrently using the XML MPU API ([#1115](https://github.com/googleapis/python-storage/issues/1115)) ([56aeb87](https://github.com/googleapis/python-storage/commit/56aeb8778d25fe245ac2e1e96ef71f0dad1fec0f)) +* Support configurable retries in upload_chunks_concurrently ([#1120](https://github.com/googleapis/python-storage/issues/1120)) ([1271686](https://github.com/googleapis/python-storage/commit/1271686428c0faffd3dd1b4fd57bfe467d2817d4)) + + +### Bug Fixes + +* Split retention period tests due to caching change ([#1068](https://github.com/googleapis/python-storage/issues/1068)) ([cc191b0](https://github.com/googleapis/python-storage/commit/cc191b070c520e85030cd4cef6d7d9a7b1dd0bf4)) + + +### Documentation + +* Add Transfer Manager documentation in c.g.c ([#1109](https://github.com/googleapis/python-storage/issues/1109)) ([c1f8724](https://github.com/googleapis/python-storage/commit/c1f8724dc1c5dc180f36424324def74a5daec620)) + +## [2.10.0](https://github.com/googleapis/python-storage/compare/v2.9.0...v2.10.0) (2023-06-14) + + +### Features + +* Add matchGlob parameter to list_blobs ([#1055](https://github.com/googleapis/python-storage/issues/1055)) ([d02098e](https://github.com/googleapis/python-storage/commit/d02098e6d5f656f9802cf0a494b507d77b065be7)) +* Allow exceptions to be included in batch responses ([#1043](https://github.com/googleapis/python-storage/issues/1043)) ([94a35ba](https://github.com/googleapis/python-storage/commit/94a35ba7416804881973f6a5296b430bdcf2832d)) + + +### Bug Fixes + +* Extend wait for bucket metadata consistency in system tests ([#1053](https://github.com/googleapis/python-storage/issues/1053)) ([d78586c](https://github.com/googleapis/python-storage/commit/d78586c388a683b8678f280df0c9456c6e109af7)) + + +### Documentation + +* Add clarification to batch module ([#1045](https://github.com/googleapis/python-storage/issues/1045)) ([11f6024](https://github.com/googleapis/python-storage/commit/11f6024a4fd0a66e8cdcc6c89c3d33534892386d)) + +## [2.9.0](https://github.com/googleapis/python-storage/compare/v2.8.0...v2.9.0) (2023-05-04) + + +### Features + +* Un-deprecate blob.download_to_file(), bucket.create(), and bucket.list_blobs() ([#1013](https://github.com/googleapis/python-storage/issues/1013)) ([aa4f282](https://github.com/googleapis/python-storage/commit/aa4f282514ebdaf58ced0743859a4ab1458f967c)) + + +### Bug Fixes + +* Avoid pickling processed credentials ([#1016](https://github.com/googleapis/python-storage/issues/1016)) ([7935824](https://github.com/googleapis/python-storage/commit/7935824049e2e6e430d2e601156730d6366c78f7)) +* Improve test error message for missing credentials ([#1024](https://github.com/googleapis/python-storage/issues/1024)) ([892481a](https://github.com/googleapis/python-storage/commit/892481a2c76fe5747ada3392345c087fb7f8bd8a)) + + +### Documentation + +* Add sample and sample test for transfer manager ([#1027](https://github.com/googleapis/python-storage/issues/1027)) ([4698799](https://github.com/googleapis/python-storage/commit/4698799101b5847d55edc8267db85257a74c3119)) +* Remove threads in transfer manager samples ([#1029](https://github.com/googleapis/python-storage/issues/1029)) ([30c5146](https://github.com/googleapis/python-storage/commit/30c51469af2efd4f5becaab7e7b02b207a074267)) + +## [2.8.0](https://github.com/googleapis/python-storage/compare/v2.7.0...v2.8.0) (2023-03-29) + + +### Features + +* Add multiprocessing and chunked downloading to transfer manager ([#1002](https://github.com/googleapis/python-storage/issues/1002)) ([e65316b](https://github.com/googleapis/python-storage/commit/e65316b5352a4e15c4dba806e899ad58f8665464)) + + +### Bug Fixes + +* Add trove classifier for python 3.11 ([#971](https://github.com/googleapis/python-storage/issues/971)) ([7886376](https://github.com/googleapis/python-storage/commit/7886376e5105f705a5fe9d061463cf1e033aecd0)) +* Remove use of deprecated cgi module ([#1006](https://github.com/googleapis/python-storage/issues/1006)) ([3071832](https://github.com/googleapis/python-storage/commit/30718322f6c7b1d7a3e4cfd44b6e1796f721b655)) + + +### Documentation + +* Add clarifications to read timeout ([#873](https://github.com/googleapis/python-storage/issues/873)) ([8fb26f4](https://github.com/googleapis/python-storage/commit/8fb26f439cf28ac4ec7a841db1cd0fd60ea77362)) +* Fix c.g.c structure ([#982](https://github.com/googleapis/python-storage/issues/982)) ([d5a2931](https://github.com/googleapis/python-storage/commit/d5a29318b5c68678ea63eb40a4dfede562f8963e)) +* Update c.g.c docs and guides ([#994](https://github.com/googleapis/python-storage/issues/994)) ([62b4a50](https://github.com/googleapis/python-storage/commit/62b4a500e40860c54c53d12323434d28739f9812)) + +## [2.7.0](https://github.com/googleapis/python-storage/compare/v2.6.0...v2.7.0) (2022-12-07) + + +### Features + +* Add "transfer_manager" module for concurrent uploads and downloads, as a preview feature ([#943](https://github.com/googleapis/python-storage/issues/943)) ([9998a5e](https://github.com/googleapis/python-storage/commit/9998a5e1c9e9e8920c4d40e13e39095585de657a)) +* Add use_auth_w_custom_endpoint support ([#941](https://github.com/googleapis/python-storage/issues/941)) ([5291c08](https://github.com/googleapis/python-storage/commit/5291c08cc76a7dbd853e51c19c944f6336c14d26)) + + +### Bug Fixes + +* Implement closed property on fileio.py classes ([#907](https://github.com/googleapis/python-storage/issues/907)) ([64406ca](https://github.com/googleapis/python-storage/commit/64406ca70cef98a81f6bb9da6e602196f4235178)) + +## [2.6.0](https://github.com/googleapis/python-storage/compare/v2.5.0...v2.6.0) (2022-11-07) + + +### Features + +* Add Autoclass support and samples ([#791](https://github.com/googleapis/python-storage/issues/791)) ([9ccdc5f](https://github.com/googleapis/python-storage/commit/9ccdc5f2e8a9e28b2df47260d639b6af2708fe9a)), closes [#797](https://github.com/googleapis/python-storage/issues/797) +* Add predefined_acl to create_resumable_upload_session ([#878](https://github.com/googleapis/python-storage/issues/878)) ([2b3e8f9](https://github.com/googleapis/python-storage/commit/2b3e8f967df95d45c35e150b201e77b8962c7e9b)) +* Enable delete_blobs() to preserve generation ([#840](https://github.com/googleapis/python-storage/issues/840)) ([8fd4c37](https://github.com/googleapis/python-storage/commit/8fd4c376bd5f031836feb8101c9c0c0d1c2e969d)), closes [#814](https://github.com/googleapis/python-storage/issues/814) +* Make tests run against environments other than prod ([#883](https://github.com/googleapis/python-storage/issues/883)) ([7dfeb62](https://github.com/googleapis/python-storage/commit/7dfeb622bb966e368786e3c9be67ad77b3150725)) + + +### Bug Fixes + +* Align bucket bound hostname url builder consistency ([#875](https://github.com/googleapis/python-storage/issues/875)) ([8a24add](https://github.com/googleapis/python-storage/commit/8a24add52f0bc7dbcb3ec427bd3e4551b3afcbf5)) +* BlobWriter.close() will do nothing if already closed ([#887](https://github.com/googleapis/python-storage/issues/887)) ([7707220](https://github.com/googleapis/python-storage/commit/770722034072cfcaafc18340e91746957ef31397)) +* Remove client side validations ([#868](https://github.com/googleapis/python-storage/issues/868)) ([928ebbc](https://github.com/googleapis/python-storage/commit/928ebbccbe183666f3b35adb7226bd259d4e71c0)) + + +### Documentation + +* Update comments in list_blobs sample ([#866](https://github.com/googleapis/python-storage/issues/866)) ([9469f5d](https://github.com/googleapis/python-storage/commit/9469f5dd5ca6d546a47efbc3d673a401ead9d632)) +* Clarify prefixes entity in list_blobs usage ([#837](https://github.com/googleapis/python-storage/issues/837)) ([7101f47](https://github.com/googleapis/python-storage/commit/7101f47fde663eec4bbaaa246c7fe4e973ca2506)) +* Streamline docs for migration ([#876](https://github.com/googleapis/python-storage/issues/876)) ([7c8a178](https://github.com/googleapis/python-storage/commit/7c8a178978d2022482afd301242ae79b2f9c737a)) +* Update docstring for lifecycle_rules to match generator behavior ([#841](https://github.com/googleapis/python-storage/issues/841)) ([36fb81b](https://github.com/googleapis/python-storage/commit/36fb81b5b0e5b7e65b9db434c997617136bfc3fc)) + +## [2.5.0](https://github.com/googleapis/python-storage/compare/v2.4.0...v2.5.0) (2022-07-24) + + +### Features + +* Custom Placement Config Dual Region Support ([#819](https://github.com/googleapis/python-storage/issues/819)) ([febece7](https://github.com/googleapis/python-storage/commit/febece76802252278bb7626d931973a76561382a)) + + +### Documentation + +* open file-like objects in byte mode for uploads ([#824](https://github.com/googleapis/python-storage/issues/824)) ([4bd3d1d](https://github.com/googleapis/python-storage/commit/4bd3d1ddf21196b075bbd84cdcb553c5d7355b93)) + +## [2.4.0](https://github.com/googleapis/python-storage/compare/v2.3.0...v2.4.0) (2022-06-07) + + +### Features + +* add AbortIncompleteMultipartUpload lifecycle rule ([#765](https://github.com/googleapis/python-storage/issues/765)) ([b2e5150](https://github.com/googleapis/python-storage/commit/b2e5150f191c04acb47ad98cef88512451aff81d)) +* support OLM Prefix/Suffix ([#773](https://github.com/googleapis/python-storage/issues/773)) ([187cf50](https://github.com/googleapis/python-storage/commit/187cf503194cf636640ca8ba787f9e8c216ea763)) + + +### Bug Fixes + +* fix rewrite object in CMEK enabled bucket ([#807](https://github.com/googleapis/python-storage/issues/807)) ([9b3cbf3](https://github.com/googleapis/python-storage/commit/9b3cbf3789c21462eac3c776cd29df12701e792f)) + + +### Documentation + +* fix changelog header to consistent size ([#802](https://github.com/googleapis/python-storage/issues/802)) ([4dd0907](https://github.com/googleapis/python-storage/commit/4dd0907b68e20d1ffcd0fe350831867197917e0d)) +* **samples:** Update the Recovery Point Objective (RPO) sample output ([#725](https://github.com/googleapis/python-storage/issues/725)) ([b0bf411](https://github.com/googleapis/python-storage/commit/b0bf411f8fec8712b3eeb99a2dd33de6d82312f8)) +* Update generation_metageneration.rst with a missing space ([#798](https://github.com/googleapis/python-storage/issues/798)) ([1e7cdb6](https://github.com/googleapis/python-storage/commit/1e7cdb655beb2a61a0d1b984c4d0468ec31bf463)) +* update retry docs ([#808](https://github.com/googleapis/python-storage/issues/808)) ([c365d5b](https://github.com/googleapis/python-storage/commit/c365d5bbd78292adb6861da3cdfae9ab7b39b844)) + +## [2.3.0](https://github.com/googleapis/python-storage/compare/v2.2.1...v2.3.0) (2022-04-12) + + +### Features + +* add dual region bucket support and sample ([#748](https://github.com/googleapis/python-storage/issues/748)) ([752e8ab](https://github.com/googleapis/python-storage/commit/752e8ab42d23afd68738e4d7ca6cdeee416dfd50)) +* track invocation id for retry metrics ([#741](https://github.com/googleapis/python-storage/issues/741)) ([bd56931](https://github.com/googleapis/python-storage/commit/bd5693164e7331df5f14186fd002e72e5203d7ee)) + + +### Bug Fixes + +* **deps:** drop pkg_resources ([#744](https://github.com/googleapis/python-storage/issues/744)) ([e963f33](https://github.com/googleapis/python-storage/commit/e963f33ced2852b64d721d69928b54443461ec9c)) + + +### Documentation + +* fix links in blob module ([#759](https://github.com/googleapis/python-storage/issues/759)) ([9b29314](https://github.com/googleapis/python-storage/commit/9b2931430b0796ffb23ec4efacd82dacad36f40f)) + +## [2.2.1](https://github.com/googleapis/python-storage/compare/v2.2.0...v2.2.1) (2022-03-15) + + +### Bug Fixes + +* remove py.typed marker file for PEP 561 ([#735](https://github.com/googleapis/python-storage/issues/735)) ([f77d2f7](https://github.com/googleapis/python-storage/commit/f77d2f787f435f2f898e9babcdab81225672ad4f)), closes [#734](https://github.com/googleapis/python-storage/issues/734) + +## [2.2.0](https://github.com/googleapis/python-storage/compare/v2.1.0...v2.2.0) (2022-03-14) + + +### Features + +* allow no project in client methods using storage emulator ([#703](https://github.com/googleapis/python-storage/issues/703)) ([bcde0ec](https://github.com/googleapis/python-storage/commit/bcde0ec619d7d303892bcc0863b7f977c79f7649)) + + +### Bug Fixes + +* add user agent in python-storage when calling resumable media ([c7bf615](https://github.com/googleapis/python-storage/commit/c7bf615909a04f3bab3efb1047a9f4ba659bba19)) +* **deps:** require google-api-core>=1.31.5, >=2.3.2 ([#722](https://github.com/googleapis/python-storage/issues/722)) ([e9aab38](https://github.com/googleapis/python-storage/commit/e9aab389f868799d4425133954bad4f1cbb85786)) +* Fix BlobReader handling of interleaved reads and seeks ([#721](https://github.com/googleapis/python-storage/issues/721)) ([5d1cfd2](https://github.com/googleapis/python-storage/commit/5d1cfd2050321481a3bc4acbe80537ea666506fa)) +* retry client side requests timeout ([#727](https://github.com/googleapis/python-storage/issues/727)) ([e0b3b35](https://github.com/googleapis/python-storage/commit/e0b3b354d51e4be7c563d7f2f628a7139df842c0)) + + +### Documentation + +* fixed download_blob_to_file example ([#704](https://github.com/googleapis/python-storage/issues/704)) ([2c94d98](https://github.com/googleapis/python-storage/commit/2c94d98ed21cc768cfa54fac3d734254fc4d8480)) + +## [2.1.0](https://github.com/googleapis/python-storage/compare/v2.0.0...v2.1.0) (2022-01-19) + + +### Features + +* add turbo replication support and samples ([#622](https://github.com/googleapis/python-storage/issues/622)) ([4dafc81](https://github.com/googleapis/python-storage/commit/4dafc815470480ce9de7f0357e331d3fbd0ae9b7)) +* avoid authentication with storage emulator ([#679](https://github.com/googleapis/python-storage/issues/679)) ([8789afa](https://github.com/googleapis/python-storage/commit/8789afaaa1b2bd6f03fae72e3d87ce004ec10129)) +* remove python 3.6 support ([#689](https://github.com/googleapis/python-storage/issues/689)) ([8aa4130](https://github.com/googleapis/python-storage/commit/8aa4130ee068a1922161c8ca54a53a4a51d65ce0)) + ## [2.0.0](https://github.com/googleapis/python-storage/compare/v1.44.0...v2.0.0) (2022-01-12) @@ -51,7 +431,7 @@ * add README to samples subdirectory ([#639](https://www.github.com/googleapis/python-storage/issues/639)) ([58af882](https://www.github.com/googleapis/python-storage/commit/58af882c047c31f59486513c568737082bca6350)) * update samples readme with cli args ([#651](https://www.github.com/googleapis/python-storage/issues/651)) ([75dda81](https://www.github.com/googleapis/python-storage/commit/75dda810e808074d18dfe7915f1403ad01bf2f02)) -### [1.42.3](https://www.github.com/googleapis/python-storage/compare/v1.42.2...v1.42.3) (2021-09-30) +## [1.42.3](https://www.github.com/googleapis/python-storage/compare/v1.42.2...v1.42.3) (2021-09-30) ### Bug Fixes @@ -60,7 +440,7 @@ * check response code in batch.finish ([#609](https://www.github.com/googleapis/python-storage/issues/609)) ([318a286](https://www.github.com/googleapis/python-storage/commit/318a286d709427bfe9f3a37e933c255ac51b3033)) * skip tests that use unspecified pap until we get the change in ([#600](https://www.github.com/googleapis/python-storage/issues/600)) ([38b9b55](https://www.github.com/googleapis/python-storage/commit/38b9b5582e2c6bbd1acab2b49410084170466fad)) -### [1.42.2](https://www.github.com/googleapis/python-storage/compare/v1.42.1...v1.42.2) (2021-09-16) +## [1.42.2](https://www.github.com/googleapis/python-storage/compare/v1.42.1...v1.42.2) (2021-09-16) ### Bug Fixes @@ -69,7 +449,7 @@ * add unpinned protobuf for python3 ([#592](https://www.github.com/googleapis/python-storage/issues/592)) ([53f7ad0](https://www.github.com/googleapis/python-storage/commit/53f7ad0204ad425011da9162d1a78f8276c837eb)) * pin six as a required dependency ([#589](https://www.github.com/googleapis/python-storage/issues/589)) ([9ca97bf](https://www.github.com/googleapis/python-storage/commit/9ca97bf9139c71cd033c78af73da904b27d8ff50)) -### [1.42.1](https://www.github.com/googleapis/python-storage/compare/v1.42.0...v1.42.1) (2021-09-07) +## [1.42.1](https://www.github.com/googleapis/python-storage/compare/v1.42.0...v1.42.1) (2021-09-07) ### Bug Fixes @@ -100,7 +480,7 @@ * update supported / removed Python versions in README ([#519](https://www.github.com/googleapis/python-storage/issues/519)) ([1f1b138](https://www.github.com/googleapis/python-storage/commit/1f1b138865fb171535ee0cf768aff1987ed58914)) -### [1.41.1](https://www.github.com/googleapis/python-storage/compare/v1.41.0...v1.41.1) (2021-07-20) +## [1.41.1](https://www.github.com/googleapis/python-storage/compare/v1.41.0...v1.41.1) (2021-07-20) ### Bug Fixes @@ -178,7 +558,7 @@ * revise docstrings for generate_signed_url ([#408](https://www.github.com/googleapis/python-storage/issues/408)) ([f090548](https://www.github.com/googleapis/python-storage/commit/f090548437142b635191e90dcee1acd4c38e565c)) -### [1.37.1](https://www.github.com/googleapis/python-storage/compare/v1.37.0...v1.37.1) (2021-04-02) +## [1.37.1](https://www.github.com/googleapis/python-storage/compare/v1.37.0...v1.37.1) (2021-04-02) ### Bug Fixes @@ -198,14 +578,14 @@ * update user_project usage and documentation in bucket/client class methods ([#396](https://www.github.com/googleapis/python-storage/issues/396)) ([1a2734b](https://www.github.com/googleapis/python-storage/commit/1a2734ba6d316ce51e4e141571331e86196462b9)) -### [1.36.2](https://www.github.com/googleapis/python-storage/compare/v1.36.1...v1.36.2) (2021-03-09) +## [1.36.2](https://www.github.com/googleapis/python-storage/compare/v1.36.1...v1.36.2) (2021-03-09) ### Bug Fixes * update batch connection to request api endpoint info from client ([#392](https://www.github.com/googleapis/python-storage/issues/392)) ([91fc6d9](https://www.github.com/googleapis/python-storage/commit/91fc6d9870a36308b15a827ed6a691e5b4669b62)) -### [1.36.1](https://www.github.com/googleapis/python-storage/compare/v1.36.0...v1.36.1) (2021-02-19) +## [1.36.1](https://www.github.com/googleapis/python-storage/compare/v1.36.0...v1.36.1) (2021-02-19) ### Bug Fixes @@ -229,7 +609,7 @@ * pass the unused parameter ([#349](https://www.github.com/googleapis/python-storage/issues/349)) ([5c60d24](https://www.github.com/googleapis/python-storage/commit/5c60d240aa98d2a1dcc6933d6da2ce60ea1b7559)) * set custom_time on uploads ([#374](https://www.github.com/googleapis/python-storage/issues/374)) ([f048be1](https://www.github.com/googleapis/python-storage/commit/f048be10416f51cea4e6c8c5b805df7b5d9c4d32)), closes [#372](https://www.github.com/googleapis/python-storage/issues/372) -### [1.35.1](https://www.github.com/googleapis/python-storage/compare/v1.35.0...v1.35.1) (2021-01-28) +## [1.35.1](https://www.github.com/googleapis/python-storage/compare/v1.35.0...v1.35.1) (2021-01-28) ### Bug Fixes @@ -288,14 +668,14 @@ * self-upload files for Unicode system test ([#296](https://www.github.com/googleapis/python-storage/issues/296)) ([6f865d9](https://www.github.com/googleapis/python-storage/commit/6f865d97a19278884356055dfeeaae92f7c63cc1)) * use version.py for versioning, avoid issues with discovering version via get_distribution ([#288](https://www.github.com/googleapis/python-storage/issues/288)) ([fcd1c4f](https://www.github.com/googleapis/python-storage/commit/fcd1c4f7c947eb95d6937783fd69670a570f145e)) -### [1.31.2](https://www.github.com/googleapis/python-storage/compare/v1.31.1...v1.31.2) (2020-09-23) +## [1.31.2](https://www.github.com/googleapis/python-storage/compare/v1.31.1...v1.31.2) (2020-09-23) ### Documentation * fix docstring example for 'blob.generate_signed_url' ([#278](https://www.github.com/googleapis/python-storage/issues/278)) ([2dc91c9](https://www.github.com/googleapis/python-storage/commit/2dc91c947e3693023b4478a15c460693808ea2d9)) -### [1.31.1](https://www.github.com/googleapis/python-storage/compare/v1.31.0...v1.31.1) (2020-09-16) +## [1.31.1](https://www.github.com/googleapis/python-storage/compare/v1.31.0...v1.31.1) (2020-09-16) ### Bug Fixes @@ -388,7 +768,7 @@ * fix upload object with bucket cmek enabled ([#158](https://www.github.com/googleapis/python-storage/issues/158)) ([5f27ffa](https://www.github.com/googleapis/python-storage/commit/5f27ffa3b1b55681453b594a0ef9e2811fc5f0c8)) * set default POST policy scheme to "http" ([#172](https://www.github.com/googleapis/python-storage/issues/172)) ([90c020d](https://www.github.com/googleapis/python-storage/commit/90c020d69a69ebc396416e4086a2e0838932130c)) -### [1.28.1](https://www.github.com/googleapis/python-storage/compare/v1.28.0...v1.28.1) (2020-04-28) +## [1.28.1](https://www.github.com/googleapis/python-storage/compare/v1.28.0...v1.28.1) (2020-04-28) ### Bug Fixes diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f0118678a..d53ad8707 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -21,8 +21,8 @@ In order to add a feature: - The feature must be documented in both the API and narrative documentation. -- The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. +- The feature must work fully on the following CPython versions: + 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should diff --git a/MANIFEST.in b/MANIFEST.in index e783f4c62..d6814cd60 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/README.rst b/README.rst index 5419ae509..4a94b178f 100644 --- a/README.rst +++ b/README.rst @@ -1,80 +1,178 @@ Python Client for Google Cloud Storage ====================================== -|GA| |pypi| |versions| +|stable| |pypi| |versions| -`Google Cloud Storage`_ allows you to store data on -Google infrastructure with very high reliability, performance and -availability, and can be used to distribute large data objects to users -via direct download. +`Google Cloud Storage`_ is a managed service for storing unstructured data. Cloud Storage +allows world-wide storage and retrieval of any amount of data at any time. You can use +Cloud Storage for a range of scenarios including serving website content, storing data +for archival and disaster recovery, or distributing large data objects to users via direct download. +A comprehensive list of changes in each version may be found in the `CHANGELOG`_. + +- `Product Documentation`_ - `Client Library Documentation`_ -- `Storage API docs`_ +- `github.com/googleapis/python-storage`_ + +Certain control plane and long-running operations for Cloud Storage (including Folder +and Managed Folder operations) are supported via the `Storage Control Client`_. +The `Storage Control API`_ creates one space to perform metadata-specific, control plane, +and long-running operations apart from the Storage API. -.. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg - :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#general-availability +Read more about the client libraries for Cloud APIs, including the older +Google APIs Client Libraries, in `Client Libraries Explained`_. + +.. |stable| image:: https://img.shields.io/badge/support-stable-gold.svg + :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#stability-levels .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-storage.svg - :target: https://pypi.org/project/google-cloud-storage + :target: https://pypi.org/project/google-cloud-storage/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-storage.svg - :target: https://pypi.org/project/google-cloud-storage -.. _Google Cloud Storage: https://cloud.google.com/storage/docs -.. _Client Library Documentation: https://googleapis.dev/python/storage/latest -.. _Storage API docs: https://cloud.google.com/storage/docs/json_api/v1 + :target: https://pypi.org/project/google-cloud-storage/ +.. _Google Cloud Storage: https://cloud.google.com/storage +.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/storage/latest +.. _Product Documentation: https://cloud.google.com/storage +.. _CHANGELOG: https://github.com/googleapis/python-storage/blob/main/CHANGELOG.md +.. _github.com/googleapis/python-storage: https://github.com/googleapis/python-storage +.. _Storage Control Client: https://cloud.google.com/python/docs/reference/google-cloud-storage-control/latest +.. _Storage Control API: https://cloud.google.com/storage/docs/reference/rpc/google.storage.control.v2 +.. _Client Libraries Explained: https://cloud.google.com/apis/docs/client-libraries-explained + +3.0 Major Version Notes +----------------------- + +Feedback Welcome +~~~~~~~~~~~~~~~~ + +If you experience that backwards compatibility for your application is broken +with this major version release, please let us know through the Github issues +system. While some breaks of backwards compatibility may be unavoidable due to +new features in the major version release, we will do our best to minimize +them. Thank you. + +Exception Handling +~~~~~~~~~~~~~~~~~~ + +In Python Storage 3.0, the dependency ``google-resumable-media`` was integrated. +The ``google-resumable-media`` dependency included exceptions +``google.resumable_media.common.InvalidResponse`` and +``google.resumable_media.common.DataCorruption``, which were often imported +directly in user application code. The replacements for these exceptions are +``google.cloud.storage.exceptions.InvalidResponse`` and +``google.cloud.storage.exceptions.DataCorruption``. Please update application code +to import and use these exceptions instead. + +For backwards compatibility, if ``google-resumable-media`` is installed, the new +exceptions will be defined as subclasses of the old exceptions, so applications +should continue to work without modification. This backwards compatibility +feature may be removed in a future major version update. + +Some users may be using the original exception classes from the +``google-resumable-media`` library without explicitly installing that library. So +as not to break user applications following this pattern, +``google-resumable-media`` is still in the list of dependencies in this package's +setup.py file. Applications which do not import directly from +``google-resumable-media`` can safely disregard this dependency. +This backwards compatibility feature **will be removed** in a future major +version update. Please migrate to using the ``google.cloud.storage.exceptions`` +classes as above. + +Checksum Defaults +~~~~~~~~~~~~~~~~~ + +In Python Storage 3.0, uploads and downloads now have a default of "auto" where +applicable. "Auto" will use crc32c checksums, except for unusual cases where the +fast (C extension) crc32c implementation is not available, in which case it will +use md5 instead. Before Python Storage 3.0, the default was md5 for most +downloads and None for most uploads. Note that ranged downloads ("start" or +"end" set) still do not support any checksumming, and some features in +``transfer_manager.py`` still support crc32c only. + +Note: The method ``Blob.upload_from_file()`` requires a file in bytes mode, but +when checksum is set to None, as was the previous default, would not throw an +error if passed a file in string mode under some circumstances. With the new +defaults, it will now raise a TypeError. Please use a file opened in bytes +reading mode as required. + +Miscellaneous +~~~~~~~~~~~~~ + +- The ``BlobWriter`` class now attempts to terminate an ongoing resumable upload if + the writer exits with an exception. +- Retry behavior is now identical between media operations (uploads and + downloads) and other operations, and custom predicates are now supported for + media operations as well. +- ``Blob.download_as_filename()`` will now delete the empty file if it results in a + google.cloud.exceptions.NotFound exception (HTTP 404). +- Previously, object upload, metadata update, and delete methods had retries + disabled by default unless the generation or metageneration was specified in + the request. This has now changed so that retries are enabled by default. Quick Start ----------- -In order to use this library, you first need to go through the following steps: +In order to use this library, you first need to go through the following steps. +A step-by-step guide may also be found in `Get Started with Client Libraries`_. 1. `Select or create a Cloud Platform project.`_ 2. `Enable billing for your project.`_ 3. `Enable the Google Cloud Storage API.`_ 4. `Setup Authentication.`_ +.. _Get Started with Client Libraries: https://cloud.google.com/storage/docs/reference/libraries#client-libraries-install-python .. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project .. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project -.. _Enable the Google Cloud Storage API.: https://cloud.google.com/storage -.. _Setup Authentication.: https://cloud.google.com/storage/docs/reference/libraries#setting_up_authentication +.. _Enable the Google Cloud Storage API.: https://console.cloud.google.com/flows/enableapi?apiid=storage-api.googleapis.com +.. _Setup Authentication.: https://cloud.google.com/docs/authentication/client-libraries Installation ~~~~~~~~~~~~ -`Set up a Python development environment`_ and install this library in a `venv`. -`venv`_ is a tool to create isolated Python environments. The basic problem it -addresses is one of dependencies and versions, and indirectly permissions. +Install this library in a virtual environment using `venv`_. `venv`_ is a tool that +creates isolated Python environments. These isolated environments can have separate +versions of Python packages, which allows you to isolate one project's dependencies +from the dependencies of other projects. -Make sure you're using Python 3.3 or later, which includes `venv`_ by default. -With `venv`, it's possible to install this library without needing system +With `venv`_, it's possible to install this library without needing system install permissions, and without clashing with the installed system dependencies. -.. _Set up a Python development environment: https://cloud.google.com/python/docs/setup .. _`venv`: https://docs.python.org/3/library/venv.html +Code samples and snippets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Code samples and snippets live in the `samples/`_ folder. + +.. _`samples/`: https://github.com/googleapis/python-storage/tree/main/samples + + Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6 +Our client libraries are compatible with all current `active`_ and `maintenance`_ versions of +Python. -Deprecated Python Versions -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7: Python 2.7 support will be removed sometime after January 1, 2020. +Python >= 3.7 + +.. _active: https://devguide.python.org/devcycle/#in-development-main-branch +.. _maintenance: https://devguide.python.org/devcycle/#maintenance-branches Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 3.5: the last released version which supported Python 3.5 was -``google-cloud-storage 1.32.0``, released 2020-10-16. +Python <= 3.6 + +If you are using an `end-of-life`_ +version of Python, we recommend that you update as soon as possible to an actively supported version. -Python == 2.7: the last released version which supported Python 2.7 was -``google-cloud-storage 1.44.0``, released 2022-01-05. +.. _end-of-life: https://devguide.python.org/devcycle/#end-of-life-branches Mac/Linux ^^^^^^^^^ .. code-block:: console - python -m venv env - source env/bin/activate + python3 -m venv + source /bin/activate pip install google-cloud-storage @@ -83,40 +181,74 @@ Windows .. code-block:: console - py -m venv env - .\env\Scripts\activate + py -m venv + .\\Scripts\activate pip install google-cloud-storage -Example Usage -~~~~~~~~~~~~~ +Tracing With OpenTelemetry +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a PREVIEW FEATURE: Coverage and functionality are still in development and subject to change. + +This library can be configured to use `OpenTelemetry`_ to generate traces on calls to Google Cloud Storage. +For information on the benefits and utility of tracing, read the `Cloud Trace Overview `_. + +To enable OpenTelemetry tracing in the Cloud Storage client, first install OpenTelemetry: + +.. code-block:: console + + pip install google-cloud-storage[tracing] + +Set the ``ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES`` environment variable to selectively opt-in tracing for the Cloud Storage client: + +.. code-block:: console + + export ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES=True + +You will also need to tell OpenTelemetry which exporter to use. An example to export traces to Google Cloud Trace can be found below. + +.. code-block:: console + + # Install the Google Cloud Trace exporter and propagator, however you can use any exporter of your choice. + pip install opentelemetry-exporter-gcp-trace opentelemetry-propagator-gcp + + # [Optional] Install the OpenTelemetry Requests Instrumentation to trace the underlying HTTP requests. + pip install opentelemetry-instrumentation-requests + +.. code-block:: python -.. code:: python + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter - # Imports the Google Cloud client library - from google.cloud import storage + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(BatchSpanProcessor(CloudTraceSpanExporter())) + trace.set_tracer_provider(tracer_provider) - # Instantiates a client - client = storage.Client() + # Optional yet recommended to instrument the requests HTTP library + from opentelemetry.instrumentation.requests import RequestsInstrumentor + RequestsInstrumentor().instrument(tracer_provider=tracer_provider) - # Creates a new bucket and uploads an object - new_bucket = client.create_bucket('new-bucket-id') - new_blob = new_bucket.blob('remote/path/storage.txt') - new_blob.upload_from_filename(filename='/local/path.txt') +In this example, tracing data will be published to the `Google Cloud Trace`_ console. +Tracing is most effective when many libraries are instrumented to provide insight over the entire lifespan of a request. +For a list of libraries that can be instrumented, refer to the `OpenTelemetry Registry`_. - # Retrieve an existing bucket - # https://console.cloud.google.com/storage/browser/[bucket-id]/ - bucket = client.get_bucket('bucket-id') - # Then do other things... - blob = bucket.get_blob('remote/path/to/file.txt') - print(blob.download_as_bytes()) - blob.upload_from_string('New contents!') +.. _OpenTelemetry: https://opentelemetry.io +.. _OpenTelemetry Registry: https://opentelemetry.io/ecosystem/registry +.. _Google Cloud Trace: https://cloud.google.com/trace -What's Next -~~~~~~~~~~~ +Next Steps +~~~~~~~~~~ -Now that you've set up your Python client for Cloud Storage, -you can get started running `Storage samples.`_ +- Read the `Google Cloud Storage Product documentation`_ to learn + more about the product and see How-to Guides. +- Read the `Client Library Documentation`_ for Google Cloud Storage API + to see other available methods on the client. +- View this `README`_ to see the full list of Cloud + APIs that we cover. -.. _Storage samples.: https://github.com/googleapis/python-storage/tree/main/samples +.. _Google Cloud Storage Product documentation: https://cloud.google.com/storage +.. _README: https://github.com/googleapis/google-cloud-python/blob/main/README.rst diff --git a/docs/acl_guide.rst b/docs/acl_guide.rst new file mode 100644 index 000000000..13ba4e660 --- /dev/null +++ b/docs/acl_guide.rst @@ -0,0 +1,165 @@ +Managing Access to Data +======================= + +Cloud Storage offers two systems for granting users access your buckets and objects: +IAM and Access Control Lists (ACLs). These systems act in parallel - in order for a user to +access a Cloud Storage resource, only one of the systems needs to grant that user permission. +For additional access control options, see also: +`Cloud Storage Control Access to Data `_ + + +ACL +--- + +Cloud Storage uses access control lists (ACLs) to manage object and bucket access. +ACLs are the mechanism you use to share files with other users and allow +other users to access your buckets and files. + +ACLs are suitable for fine-grained control, but you may prefer using IAM to +control access at the project level. + + +:class:`google.cloud.storage.bucket.Bucket` has a getting method that creates +an ACL object under the hood, and you can interact with that using +:func:`google.cloud.storage.bucket.Bucket.acl`: + +.. code-block:: python + + client = storage.Client() + bucket = client.get_bucket(bucket_name) + acl = bucket.acl + +Adding and removing permissions can be done with the following methods +(in increasing order of granularity): + +- :func:`ACL.all` + corresponds to access for all users. +- :func:`ACL.all_authenticated` corresponds + to access for all users that are signed into a Google account. +- :func:`ACL.domain` corresponds to access on a + per Google Apps domain (ie, ``example.com``). +- :func:`ACL.group` corresponds to access on a + per group basis (either by ID or e-mail address). +- :func:`ACL.user` corresponds to access on a + per user basis (either by ID or e-mail address). + +And you are able to ``grant`` and ``revoke`` the following roles: + +- **Reading**: + :func:`_ACLEntity.grant_read` and :func:`_ACLEntity.revoke_read` +- **Writing**: + :func:`_ACLEntity.grant_write` and :func:`_ACLEntity.revoke_write` +- **Owning**: + :func:`_ACLEntity.grant_owner` and :func:`_ACLEntity.revoke_owner` + +You can use any of these like any other factory method (these happen to +be :class:`_ACLEntity` factories): + +.. code-block:: python + + acl.user("me@example.org").grant_read() + acl.all_authenticated().grant_write() + +After that, you can save any changes you make with the +:func:`google.cloud.storage.acl.ACL.save` method: + +.. code-block:: python + + acl.save() + + +You can alternatively save any existing :class:`google.cloud.storage.acl.ACL` +object (whether it was created by a factory method or not) from a +:class:`google.cloud.storage.bucket.Bucket`: + +.. code-block:: python + + bucket.acl.save(acl=acl) + + +To get the list of ``entity`` and ``role`` for each unique pair, the +:class:`ACL` class is iterable: + +.. code-block:: python + + print(list(acl)) + # [{'role': 'OWNER', 'entity': 'allUsers'}, ...] + + +This list of tuples can be used as the ``entity`` and ``role`` fields +when sending metadata for ACLs to the API. + + +IAM +--- + +Identity and Access Management (IAM) controls permissioning throughout Google Cloud and allows you +to grant permissions at the bucket and project levels. You should use IAM for any permissions that +apply to multiple objects in a bucket to reduce the risks of unintended exposure. To use IAM +exclusively, enable uniform bucket-level access to disallow ACLs for all Cloud Storage resources. +See also: +`Additional access control options `_ + +Constants used across IAM roles: +:::::::::::::::::::::::::::::::: + +- ``STORAGE_OBJECT_CREATOR_ROLE = "roles/storage.objectCreator"`` + corresponds to role implying rights to create objects, but not delete or overwrite them. +- ``STORAGE_OBJECT_VIEWER_ROLE = "roles/storage.objectViewer"`` + corresponds to role implying rights to view object properties, excluding ACLs. +- ``STORAGE_OBJECT_ADMIN_ROLE = "roles/storage.objectAdmin"`` + corresponds to role implying full control of objects. +- ``STORAGE_ADMIN_ROLE = "roles/storage.admin"`` + corresponds to role implying full control of objects and buckets. +- ``STORAGE_VIEWER_ROLE = "Viewer"`` + corresponds to role that can list buckets. +- ``STORAGE_EDITOR_ROLE = "Editor"`` + corresponds to role that can create, list, and delete buckets. +- ``STORAGE_OWNER_ROLE = "Owners"`` + corresponds to role that can Can create, list, and delete buckets; + and list tag bindings; and control HMAC keys in the project. + +Constants used across IAM permissions: +:::::::::::::::::::::::::::::::::::::: + +- ``STORAGE_BUCKETS_CREATE = "storage.buckets.create"`` + corresponds to permission that can create buckets. + +- ``STORAGE_BUCKETS_DELETE = "storage.buckets.delete"`` + corresponds to permission that can delete buckets. + +- ``STORAGE_BUCKETS_GET = "storage.buckets.get"`` + corresponds to permission that can read bucket metadata, excluding ACLs. + +- ``STORAGE_BUCKETS_LIST = "storage.buckets.list"`` + corresponds to permission that can list buckets. + +- ``STORAGE_BUCKETS_GET_IAM_POLICY = "storage.buckets.getIamPolicy"`` + corresponds to permission that can read bucket ACLs. + +- ``STORAGE_BUCKETS_SET_IAM_POLICY = "storage.buckets.setIamPolicy"`` + corresponds to permission that can update bucket ACLs. + +- ``STORAGE_BUCKETS_UPDATE = "storage.buckets.update"`` + corresponds to permission that can update buckets, excluding ACLS. + +- ``STORAGE_OBJECTS_CREATE = "storage.objects.create"`` + corresponds to permission that can add new objects to a bucket. + +- ``STORAGE_OBJECTS_DELETE = "storage.objects.delete"`` + corresponds to permission that can delete objects. + +- ``STORAGE_OBJECTS_GET = "storage.objects.get"`` + corresponds to permission that can read object data / metadata, excluding ACLs. + +- ``STORAGE_OBJECTS_LIST = "storage.objects.list"`` + corresponds to permission that can list objects in a bucket. + +- ``STORAGE_OBJECTS_GET_IAM_POLICY = "storage.objects.getIamPolicy"`` + corresponds to permission that can read object ACLs. + +- ``STORAGE_OBJECTS_SET_IAM_POLICY = "storage.objects.setIamPolicy"`` + corresponds to permission that can update object ACLs. + +- ``STORAGE_OBJECTS_UPDATE = "storage.objects.update"`` + corresponds to permission that can update object metadata, excluding ACLs. diff --git a/docs/conf.py b/docs/conf.py index fc9d1fd34..a2af349a6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -361,11 +361,14 @@ intersphinx_mapping = { "python": ("https://python.readthedocs.org/en/latest/", None), "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), - "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), + "google.api_core": ( + "https://googleapis.dev/python/google-api-core/latest/", + None, + ), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), - "requests": ("https://docs.python-requests.org/en/master/", None), + "requests": ("https://requests.readthedocs.io/en/stable/", None), } diff --git a/docs/generation_metageneration.rst b/docs/generation_metageneration.rst index 4a92e534a..eb77dad15 100644 --- a/docs/generation_metageneration.rst +++ b/docs/generation_metageneration.rst @@ -112,7 +112,7 @@ the blob (e.g., makes the operation conditional on whether the blob's current ``generation`` matches the given value. -As a special case, passing ``0`` as the value for``if_generation_match`` +As a special case, passing ``0`` as the value for ``if_generation_match`` makes the operation succeed only if there are no live versions of the blob. diff --git a/docs/index.rst b/docs/index.rst index 777926af3..cdbad15dd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,21 +8,33 @@ :class:`multiprocessing.Pool` or :class:`multiprocessing.Process` invokes :func:`os.fork`. +Guides +------ +.. toctree:: + :maxdepth: 2 + + acl_guide + generation_metageneration + retry_timeout + API Reference ------------- .. toctree:: :maxdepth: 2 - client - blobs - buckets - acl - batch - constants - hmac_key - notification - retry_timeout - generation_metageneration + storage/acl + storage/batch + storage/blob + storage/bucket + storage/client + storage/constants + storage/exceptions + storage/fileio + storage/hmac_key + storage/notification + storage/retry + storage/transfer_manager + More Examples ------------- @@ -38,3 +50,8 @@ Changelog :maxdepth: 2 changelog + +.. toctree:: + :hidden: + + summary_overview.md diff --git a/docs/retry_timeout.rst b/docs/retry_timeout.rst index 7c3ad3084..c9911a3f2 100644 --- a/docs/retry_timeout.rst +++ b/docs/retry_timeout.rst @@ -12,22 +12,27 @@ Configuring Timeouts -------------------- For a number of reasons, methods which invoke API methods may take -longer than expected or desired. By default, such methods all time out -after a default interval, 60.0 seconds. Rather than blocking your application -code for that interval, you may choose to configure explicit timeouts -in your code, using one of three forms: +longer than expected or desired. By default, such methods are applied a +default timeout of 60.0 seconds. -- You can pass a single integer or float which functions as the timeout for the - entire request. E.g.: +The python-storage client uses the timeout mechanics of the underlying +``requests`` HTTP library. The connect timeout is the number of seconds +to establish a connection to the server. The read timeout is the number +of seconds the client will wait for the server to send a response. +In most cases, this is the maximum wait time before the server sends +the first byte. Please refer to the `requests documentation `_ for details. + +You may also choose to configure explicit timeouts in your code, using one of three forms: + +- You can specify a single value for the timeout. The timeout value will be + applied to both the connect and the read timeouts. E.g.: .. code-block:: python bucket = client.get_bucket(BUCKET_NAME, timeout=300.0) # five minutes -- You can also be passed as a two-tuple, ``(connect_timeout, read_timeout)``, - where the ``connect_timeout`` sets the maximum time required to establish - the connection to the server, and the ``read_timeout`` sets the maximum - time to wait for a completed response. E.g.: +- You can also pass a two-tuple, ``(connect_timeout, read_timeout)``, + if you would like to set the values separately. E.g.: .. code-block:: python @@ -47,7 +52,7 @@ in your code, using one of three forms: See also: - :ref:`Timeouts in requests ` + `Timeouts in requests `_ .. _configuring_retries: @@ -73,7 +78,8 @@ for each method, base on its semantics: the same "generation", the library uses its :data:`~google.cloud.storage.retry.DEFAULT_RETRY_IF_GENERATION_SPECIFIED` policy, which retries API requests which returns a "transient" error, - but only if the original request includes an ``ifGenerationMatch`` header. + but only if the original request includes a ``generation`` or + ``ifGenerationMatch`` header. - For API requests which are idempotent only if the bucket or blob has the same "metageneration", the library uses its @@ -99,6 +105,20 @@ explicit policy in your code. bucket = client.get_bucket(BUCKET_NAME, retry=None) +- You can modify the default retry behavior and create a copy of :data:`~google.cloud.storage.retry.DEFAULT_RETRY` + by calling it with a ``with_XXX`` method. E.g.: + +.. code-block:: python + + from google.cloud.storage.retry import DEFAULT_RETRY + + # Customize retry with a deadline of 500 seconds (default=120 seconds). + modified_retry = DEFAULT_RETRY.with_deadline(500.0) + # Customize retry with an initial wait time of 1.5 (default=1.0). + # Customize retry with a wait time multiplier per iteration of 1.2 (default=2.0). + # Customize retry with a maximum wait time of 45.0 (default=60.0). + modified_retry = modified_retry.with_delay(initial=1.5, multiplier=1.2, maximum=45.0) + - You can pass an instance of :class:`google.api_core.retry.Retry` to enable retries; the passed object will define retriable response codes and errors, as well as configuring backoff and retry interval options. E.g.: @@ -140,13 +160,5 @@ explicit policy in your code. my_retry_policy = Retry(predicate=is_retryable) my_cond_policy = ConditionalRetryPolicy( - my_retry_policy, conditional_predicate=is_etag_in_data) + my_retry_policy, conditional_predicate=is_etag_in_data, ["query_params"]) bucket = client.get_bucket(BUCKET_NAME, retry=my_cond_policy) - - -Retry Module API ----------------- - -.. automodule:: google.cloud.storage.retry - :members: - :show-inheritance: diff --git a/docs/snippets.py b/docs/snippets.py index 89f92a20b..631dca468 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -34,16 +34,16 @@ def snippet(func): @snippet def storage_get_started(to_delete): - # [START storage_get_started] + # START storage_get_started client = storage.Client() bucket = client.get_bucket("bucket-id-here") # Then do other things... blob = bucket.get_blob("/remote/path/to/file.txt") - assert blob.download_as_string() == b"My old contents!" + assert blob.download_as_bytes() == b"My old contents!" blob.upload_from_string("New contents!") blob2 = bucket.blob("/remote/path/storage.txt") blob2.upload_from_filename(filename="/local/path.txt") - # [END storage_get_started] + # END storage_get_started to_delete.append(bucket) @@ -53,40 +53,40 @@ def client_bucket_acl(client, to_delete): bucket_name = "system-test-bucket" client.create_bucket(bucket_name) - # [START client_bucket_acl] + # START client_bucket_acl client = storage.Client() bucket = client.get_bucket(bucket_name) acl = bucket.acl - # [END client_bucket_acl] + # END client_bucket_acl to_delete.append(bucket) - # [START acl_user_settings] + # START acl_user_settings acl.user("me@example.org").grant_read() acl.all_authenticated().grant_write() - # [END acl_user_settings] + # END acl_user_settings - # [START acl_save] + # START acl_save acl.save() - # [END acl_save] + # END acl_save - # [START acl_revoke_write] + # START acl_revoke_write acl.all().grant_read() acl.all().revoke_write() - # [END acl_revoke_write] + # END acl_revoke_write - # [START acl_save_bucket] + # START acl_save_bucket bucket.acl.save(acl=acl) - # [END acl_save_bucket] + # END acl_save_bucket - # [START acl_print] + # START acl_print print(list(acl)) # [{'role': 'OWNER', 'entity': 'allUsers'}, ...] - # [END acl_print] + # END acl_print @snippet def download_to_file(to_delete): - # [START download_to_file] + # START download_to_file from google.cloud.storage import Blob client = storage.Client(project="my-project") @@ -96,14 +96,14 @@ def download_to_file(to_delete): blob.upload_from_string("my secret message.") with open("/tmp/my-secure-file", "wb") as file_obj: client.download_to_file(blob, file_obj) - # [END download_to_file] + # END download_to_file to_delete.append(blob) @snippet def upload_from_file(to_delete): - # [START upload_from_file] + # START upload_from_file from google.cloud.storage import Blob client = storage.Client(project="my-project") @@ -112,7 +112,7 @@ def upload_from_file(to_delete): blob = Blob("secure-data", bucket, encryption_key=encryption_key) with open("my-file", "rb") as my_file: blob.upload_from_file(my_file) - # [END upload_from_file] + # END upload_from_file to_delete.append(blob) @@ -121,21 +121,21 @@ def upload_from_file(to_delete): def get_blob(to_delete): from google.cloud.storage.blob import Blob - # [START get_blob] + # START get_blob client = storage.Client() bucket = client.get_bucket("my-bucket") assert isinstance(bucket.get_blob("/path/to/blob.txt"), Blob) # assert not bucket.get_blob("/does-not-exist.txt") # None - # [END get_blob] + # END get_blob to_delete.append(bucket) @snippet def delete_blob(to_delete): - # [START delete_blob] + # START delete_blob from google.cloud.exceptions import NotFound client = storage.Client() @@ -148,12 +148,12 @@ def delete_blob(to_delete): bucket.delete_blob("doesnt-exist") except NotFound: pass - # [END delete_blob] + # END delete_blob blob = None - # [START delete_blobs] + # START delete_blobs bucket.delete_blobs([blob], on_error=lambda blob: None) - # [END delete_blobs] + # END delete_blobs to_delete.append(bucket) @@ -161,15 +161,15 @@ def delete_blob(to_delete): @snippet def configure_website(to_delete): bucket_name = "test-bucket" - # [START configure_website] + # START configure_website client = storage.Client() bucket = client.get_bucket(bucket_name) bucket.configure_website("index.html", "404.html") - # [END configure_website] + # END configure_website - # [START make_public] + # START make_public bucket.make_public(recursive=True, future=True) - # [END make_public] + # END make_public to_delete.append(bucket) @@ -178,34 +178,34 @@ def configure_website(to_delete): def get_bucket(client, to_delete): import google - # [START get_bucket] + # START get_bucket try: bucket = client.get_bucket("my-bucket") except google.cloud.exceptions.NotFound: print("Sorry, that bucket does not exist!") - # [END get_bucket] + # END get_bucket to_delete.append(bucket) @snippet def add_lifecycle_delete_rule(client, to_delete): - # [START add_lifecycle_delete_rule] + # START add_lifecycle_delete_rule bucket = client.get_bucket("my-bucket") bucket.add_lifecycle_delete_rule(age=2) bucket.patch() - # [END add_lifecycle_delete_rule] + # END add_lifecycle_delete_rule to_delete.append(bucket) @snippet def add_lifecycle_set_storage_class_rule(client, to_delete): - # [START add_lifecycle_set_storage_class_rule] + # START add_lifecycle_set_storage_class_rule bucket = client.get_bucket("my-bucket") bucket.add_lifecycle_set_storage_class_rule( "COLD_LINE", matches_storage_class=["NEARLINE"] ) bucket.patch() - # [END add_lifecycle_set_storage_class_rule] + # END add_lifecycle_set_storage_class_rule to_delete.append(bucket) @@ -213,14 +213,14 @@ def add_lifecycle_set_storage_class_rule(client, to_delete): def lookup_bucket(client, to_delete): from google.cloud.storage.bucket import Bucket - # [START lookup_bucket] + # START lookup_bucket bucket = client.lookup_bucket("doesnt-exist") assert not bucket # None bucket = client.lookup_bucket("my-bucket") assert isinstance(bucket, Bucket) # - # [END lookup_bucket] + # END lookup_bucket to_delete.append(bucket) @@ -229,21 +229,21 @@ def lookup_bucket(client, to_delete): def create_bucket(client, to_delete): from google.cloud.storage import Bucket - # [START create_bucket] + # START create_bucket bucket = client.create_bucket("my-bucket") assert isinstance(bucket, Bucket) # - # [END create_bucket] + # END create_bucket to_delete.append(bucket) @snippet def list_buckets(client, to_delete): - # [START list_buckets] + # START list_buckets for bucket in client.list_buckets(): print(bucket) - # [END list_buckets] + # END list_buckets for bucket in client.list_buckets(): to_delete.append(bucket) @@ -252,7 +252,7 @@ def list_buckets(client, to_delete): @snippet def policy_document(client): # pylint: disable=unused-argument - # [START policy_document] + # START policy_document bucket = client.bucket("my-bucket") conditions = [["starts-with", "$key", ""], {"acl": "public-read"}] @@ -260,9 +260,7 @@ def policy_document(client): # Generate an upload form using the form fields. policy_fields = "".join( - ''.format( - key=key, value=value - ) + f'' for key, value in policy.items() ) @@ -279,7 +277,7 @@ def policy_document(client): ).format(bucket_name=bucket.name, policy_fields=policy_fields) print(upload_form) - # [END policy_document] + # END policy_document def _line_no(func): @@ -301,13 +299,15 @@ def main(): client = storage.Client() for example in _find_examples(): to_delete = [] - print("%-25s: %s" % _name_and_doc(example)) + name, doc = _name_and_doc(example) + print(f"{name:>25}: {doc}") + try: example(client, to_delete) except AssertionError as failure: - print(" FAIL: %s" % (failure,)) + print(f" FAIL: {failure}") except Exception as error: # pylint: disable=broad-except - print(" ERROR: %r" % (error,)) + print(f" ERROR: {error!r}") for item in to_delete: item.delete() diff --git a/docs/acl.rst b/docs/storage/acl.rst similarity index 76% rename from docs/acl.rst rename to docs/storage/acl.rst index f1f7d0289..4c8562626 100644 --- a/docs/acl.rst +++ b/docs/storage/acl.rst @@ -1,5 +1,5 @@ -ACL -~~~ +ACL Module +----------- .. automodule:: google.cloud.storage.acl :members: diff --git a/docs/batch.rst b/docs/storage/batch.rst similarity index 100% rename from docs/batch.rst rename to docs/storage/batch.rst diff --git a/docs/blobs.rst b/docs/storage/blob.rst similarity index 100% rename from docs/blobs.rst rename to docs/storage/blob.rst diff --git a/docs/buckets.rst b/docs/storage/bucket.rst similarity index 93% rename from docs/buckets.rst rename to docs/storage/bucket.rst index c42d7e303..e63fe2115 100644 --- a/docs/buckets.rst +++ b/docs/storage/bucket.rst @@ -1,4 +1,4 @@ -Buckets +Bucket ~~~~~~~ .. automodule:: google.cloud.storage.bucket diff --git a/docs/client.rst b/docs/storage/client.rst similarity index 100% rename from docs/client.rst rename to docs/storage/client.rst diff --git a/docs/constants.rst b/docs/storage/constants.rst similarity index 100% rename from docs/constants.rst rename to docs/storage/constants.rst diff --git a/docs/storage/exceptions.rst b/docs/storage/exceptions.rst new file mode 100644 index 000000000..4b4995ca7 --- /dev/null +++ b/docs/storage/exceptions.rst @@ -0,0 +1,7 @@ +Exceptions +~~~~~~~~~~ + +.. automodule:: google.cloud.storage.exceptions + :members: + :member-order: bysource + diff --git a/docs/storage/fileio.rst b/docs/storage/fileio.rst new file mode 100644 index 000000000..9ad214a25 --- /dev/null +++ b/docs/storage/fileio.rst @@ -0,0 +1,6 @@ +FileIO +~~~~~~~ + +.. automodule:: google.cloud.storage.fileio + :members: + :show-inheritance: \ No newline at end of file diff --git a/docs/hmac_key.rst b/docs/storage/hmac_key.rst similarity index 100% rename from docs/hmac_key.rst rename to docs/storage/hmac_key.rst diff --git a/docs/notification.rst b/docs/storage/notification.rst similarity index 100% rename from docs/notification.rst rename to docs/storage/notification.rst diff --git a/docs/storage/retry.rst b/docs/storage/retry.rst new file mode 100644 index 000000000..bb5690539 --- /dev/null +++ b/docs/storage/retry.rst @@ -0,0 +1,6 @@ +Retry +---------------- + +.. automodule:: google.cloud.storage.retry + :members: + :show-inheritance: \ No newline at end of file diff --git a/docs/storage/transfer_manager.rst b/docs/storage/transfer_manager.rst new file mode 100644 index 000000000..24f3e4e31 --- /dev/null +++ b/docs/storage/transfer_manager.rst @@ -0,0 +1,6 @@ +Transfer Manager +~~~~~~~~~~~~~~~~ + +.. automodule:: google.cloud.storage.transfer_manager + :members: + :show-inheritance: \ No newline at end of file diff --git a/docs/summary_overview.md b/docs/summary_overview.md new file mode 100644 index 000000000..e735f9658 --- /dev/null +++ b/docs/summary_overview.md @@ -0,0 +1,22 @@ +[ +This is a templated file. Adding content to this file may result in it being +reverted. Instead, if you want to place additional content, create an +"overview_content.md" file in `docs/` directory. The Sphinx tool will +pick up on the content and merge the content. +]: # + +# Google Cloud Storage API + +Overview of the APIs available for Google Cloud Storage API. + +## All entries + +Classes, methods and properties & attributes for +Google Cloud Storage API. + +[classes](https://cloud.google.com/python/docs/reference/storage/latest/summary_class.html) + +[methods](https://cloud.google.com/python/docs/reference/storage/latest/summary_method.html) + +[properties and +attributes](https://cloud.google.com/python/docs/reference/storage/latest/summary_property.html) diff --git a/google/cloud/storage/__init__.py b/google/cloud/storage/__init__.py index b05efab8c..4e9c47f4a 100644 --- a/google/cloud/storage/__init__.py +++ b/google/cloud/storage/__init__.py @@ -17,8 +17,8 @@ You'll typically use these to get started with the API: .. literalinclude:: snippets.py - :start-after: [START storage_get_started] - :end-before: [END storage_get_started] + :start-after: START storage_get_started + :end-before: END storage_get_started :dedent: 4 The main concepts with this API are: diff --git a/google/cloud/storage/_helpers.py b/google/cloud/storage/_helpers.py index c8359dc1b..674dced79 100644 --- a/google/cloud/storage/_helpers.py +++ b/google/cloud/storage/_helpers.py @@ -18,20 +18,40 @@ """ import base64 +import datetime from hashlib import md5 import os from urllib.parse import urlsplit +from urllib.parse import urlunsplit +from uuid import uuid4 -from google import resumable_media +from google.auth import environment_vars from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED -STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" +STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme. """Environment variable defining host for Storage emulator.""" -_DEFAULT_STORAGE_HOST = u"https://storage.googleapis.com" +_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme. +"""This is an experimental configuration variable. Use api_endpoint instead.""" + +_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE" +"""This is an experimental configuration variable used for internal testing.""" + +_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com" + +_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}" + +_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format( + universe_domain=_DEFAULT_UNIVERSE_DOMAIN +) + +_DEFAULT_SCHEME = "https://" + +_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1") +"""API version of the default storage host""" # etag match parameters in snake case and equivalent header _ETAG_MATCH_PARAMETERS = ( @@ -51,15 +71,53 @@ ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"), ) -_NUM_RETRIES_MESSAGE = ( - "`num_retries` has been deprecated and will be removed in a future " - "release. Use the `retry` argument with a Retry or ConditionalRetryPolicy " - "object, or None, instead." -) +# _NOW() returns the current local date and time. +# It is preferred to use timezone-aware datetimes _NOW(_UTC), +# which returns the current UTC date and time. +_NOW = datetime.datetime.now +_UTC = datetime.timezone.utc + + +def _get_storage_emulator_override(): + return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None) + + +def _get_default_storage_base_url(): + return os.getenv( + _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST + ) + + +def _get_api_endpoint_override(): + """This is an experimental configuration variable. Use api_endpoint instead.""" + if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST: + return _get_default_storage_base_url() + return None + +def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False): + """Returns the scheme and netloc sections of the url, with the bucket + prepended to the netloc. + + Not intended for use with netlocs which include a username and password. + """ + parsed_url = urlsplit(url) + new_netloc = f"{bucket}.{parsed_url.netloc}" + base_url = urlunsplit( + (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "") + ) + return base_url + + +def _use_client_cert(): + return os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" -def _get_storage_host(): - return os.environ.get(STORAGE_EMULATOR_ENV_VAR, _DEFAULT_STORAGE_HOST) + +def _get_environ_project(): + return os.getenv( + environment_vars.PROJECT, + os.getenv(environment_vars.LEGACY_PROJECT), + ) def _validate_name(name): @@ -160,6 +218,7 @@ def reload( if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, + soft_deleted=None, ): """Reload properties from Cloud Storage. @@ -205,6 +264,13 @@ def reload( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + + :type soft_deleted: bool + :param soft_deleted: + (Optional) If True, looks for a soft-deleted object. Will only return + the object metadata if the object exists and is in a soft-deleted state. + :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True. + See: https://cloud.google.com/storage/docs/soft-delete """ client = self._require_client(client) query_params = self._query_params @@ -218,6 +284,11 @@ def reload( if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, ) + if soft_deleted is not None: + query_params["softDeleted"] = soft_deleted + # Soft delete reload requires a generation, even for targets + # that don't include them in default query params (buckets). + query_params["generation"] = self.generation headers = self._encryption_headers() _add_etag_match_headers( headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match @@ -268,7 +339,8 @@ def patch( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, + override_unlocked_retention=False, ): """Sends all changed properties in a PATCH request. @@ -305,12 +377,21 @@ def patch( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + + :type override_unlocked_retention: bool + :param override_unlocked_retention: + (Optional) override_unlocked_retention must be set to True if the operation includes + a retention property that changes the mode from Unlocked to Locked, reduces the + retainUntilTime, or removes the retention configuration from the object. See: + https://cloud.google.com/storage/docs/json_api/v1/objects/patch """ client = self._require_client(client) query_params = self._query_params # Pass '?projection=full' here because 'PATCH' documented not # to work properly w/ 'noAcl'. query_params["projection"] = "full" + if override_unlocked_retention: + query_params["overrideUnlockedRetention"] = override_unlocked_retention _add_generation_match_parameters( query_params, if_generation_match=if_generation_match, @@ -340,6 +421,7 @@ def update( if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + override_unlocked_retention=False, ): """Sends all properties in a PUT request. @@ -376,11 +458,20 @@ def update( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + + :type override_unlocked_retention: bool + :param override_unlocked_retention: + (Optional) override_unlocked_retention must be set to True if the operation includes + a retention property that changes the mode from Unlocked to Locked, reduces the + retainUntilTime, or removes the retention configuration from the object. See: + https://cloud.google.com/storage/docs/json_api/v1/objects/patch """ client = self._require_client(client) query_params = self._query_params query_params["projection"] = "full" + if override_unlocked_retention: + query_params["overrideUnlockedRetention"] = override_unlocked_retention _add_generation_match_parameters( query_params, if_generation_match=if_generation_match, @@ -506,14 +597,12 @@ def _raise_if_more_than_one_set(**kwargs): :raises: :class:`~ValueError` containing the fields that were set """ if sum(arg is not None for arg in kwargs.values()) > 1: - escaped_keys = ["'%s'" % name for name in kwargs.keys()] + escaped_keys = [f"'{name}'" for name in kwargs.keys()] keys_but_last = ", ".join(escaped_keys[:-1]) last_key = escaped_keys[-1] - msg = "Pass at most one of {keys_but_last} and {last_key}".format( - keys_but_last=keys_but_last, last_key=last_key - ) + msg = f"Pass at most one of {keys_but_last} and {last_key}" raise ValueError(msg) @@ -535,38 +624,43 @@ def _bucket_bound_hostname_url(host, scheme=None): if url_parts.scheme and url_parts.netloc: return host - return "{scheme}://{host}/".format(scheme=scheme, host=host) + return f"{scheme}://{host}" -def _api_core_retry_to_resumable_media_retry(retry, num_retries=None): - """Convert google.api.core.Retry to google.resumable_media.RetryStrategy. +def _get_invocation_id(): + return "gccl-invocation-id/" + str(uuid4()) - Custom predicates are not translated. - :type retry: google.api_core.Retry - :param retry: (Optional) The google.api_core.Retry object to translate. +def _get_default_headers( + user_agent, + content_type="application/json; charset=UTF-8", + x_upload_content_type=None, + command=None, +): + """Get the headers for a request. - :type num_retries: int - :param num_retries: (Optional) The number of retries desired. This is - supported for backwards compatibility and is mutually exclusive with - `retry`. + :type user_agent: str + :param user_agent: The user-agent for requests. - :rtype: google.resumable_media.RetryStrategy - :returns: A RetryStrategy with all applicable attributes copied from input, - or a RetryStrategy with max_retries set to 0 if None was input. - """ + :type command: str + :param command: + (Optional) Information about which interface for the operation was + used, to be included in the X-Goog-API-Client header. Please leave + as None unless otherwise directed. - if retry is not None and num_retries is not None: - raise ValueError("num_retries and retry arguments are mutually exclusive") - - elif retry is not None: - return resumable_media.RetryStrategy( - max_sleep=retry._maximum, - max_cumulative_retry=retry._deadline, - initial_delay=retry._initial, - multiplier=retry._multiplier, - ) - elif num_retries is not None: - return resumable_media.RetryStrategy(max_retries=num_retries) - else: - return resumable_media.RetryStrategy(max_retries=0) + :rtype: dict + :returns: The headers to be used for the request. + """ + x_goog_api_client = f"{user_agent} {_get_invocation_id()}" + + if command: + x_goog_api_client += f" gccl-gcs-cmd/{command}" + + return { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "User-Agent": user_agent, + "X-Goog-API-Client": x_goog_api_client, + "content-type": content_type, + "x-upload-content-type": x_upload_content_type or content_type, + } diff --git a/google/cloud/storage/_http.py b/google/cloud/storage/_http.py index 6c9d11700..aea13cc57 100644 --- a/google/cloud/storage/_http.py +++ b/google/cloud/storage/_http.py @@ -15,23 +15,21 @@ """Create / interact with Google Cloud Storage connections.""" import functools -import os -import pkg_resources - from google.cloud import _http - from google.cloud.storage import __version__ +from google.cloud.storage import _helpers +from google.cloud.storage._opentelemetry_tracing import create_trace_span -if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER - release = pkg_resources.get_distribution("google-cloud-core").parsed_version - if release < pkg_resources.parse_version("1.6.0"): - raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature") +class Connection(_http.JSONConnection): + """A connection to Google Cloud Storage via the JSON REST API. + Mutual TLS will be enabled if the "GOOGLE_API_USE_CLIENT_CERTIFICATE" + environment variable is set to the exact string "true" (case-sensitive). -class Connection(_http.JSONConnection): - """A connection to Google Cloud Storage via the JSON REST API. Mutual TLS feature will be - enabled if `GOOGLE_API_USE_CLIENT_CERTIFICATE` environment variable is set to "true". + Mutual TLS is not compatible with any API endpoint or universe domain + override at this time. If such settings are enabled along with + "GOOGLE_API_USE_CLIENT_CERTIFICATE", a ValueError will be raised. :type client: :class:`~google.cloud.storage.client.Client` :param client: The client that owns the current connection. @@ -43,7 +41,7 @@ class Connection(_http.JSONConnection): :param api_endpoint: (Optional) api endpoint to use. """ - DEFAULT_API_ENDPOINT = "https://storage.googleapis.com" + DEFAULT_API_ENDPOINT = _helpers._get_default_storage_base_url() DEFAULT_API_MTLS_ENDPOINT = "https://storage.mtls.googleapis.com" def __init__(self, client, client_info=None, api_endpoint=None): @@ -56,11 +54,11 @@ def __init__(self, client, client_info=None, api_endpoint=None): # TODO: When metrics all use gccl, this should be removed #9552 if self._client_info.user_agent is None: # pragma: no branch self._client_info.user_agent = "" - agent_version = "gcloud-python/{}".format(__version__) + agent_version = f"gcloud-python/{__version__}" if agent_version not in self._client_info.user_agent: - self._client_info.user_agent += " {} ".format(agent_version) + self._client_info.user_agent += f" {agent_version} " - API_VERSION = "v1" + API_VERSION = _helpers._API_VERSION """The version of the API, used in building the API call's URL.""" API_URL_TEMPLATE = "{api_base_url}/storage/{api_version}{path}" @@ -68,13 +66,25 @@ def __init__(self, client, client_info=None, api_endpoint=None): def api_request(self, *args, **kwargs): retry = kwargs.pop("retry", None) + invocation_id = _helpers._get_invocation_id() + kwargs["extra_api_info"] = invocation_id + span_attributes = { + "gccl-invocation-id": invocation_id, + } call = functools.partial(super(Connection, self).api_request, *args, **kwargs) - if retry: - # If this is a ConditionalRetryPolicy, check conditions. - try: - retry = retry.get_retry_policy_if_conditions_met(**kwargs) - except AttributeError: # This is not a ConditionalRetryPolicy. - pass + with create_trace_span( + name="Storage.Connection.api_request", + attributes=span_attributes, + client=self._client, + api_request=kwargs, + retry=retry, + ): if retry: - call = retry(call) - return call() + # If this is a ConditionalRetryPolicy, check conditions. + try: + retry = retry.get_retry_policy_if_conditions_met(**kwargs) + except AttributeError: # This is not a ConditionalRetryPolicy. + pass + if retry: + call = retry(call) + return call() diff --git a/google/cloud/storage/_media/__init__.py b/google/cloud/storage/_media/__init__.py new file mode 100644 index 000000000..edab8f51d --- /dev/null +++ b/google/cloud/storage/_media/__init__.py @@ -0,0 +1,34 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for Google Media Downloads and Resumable Uploads. + +=========== +Subpackages +=========== + +Each subpackage is tailored to a specific transport library: + +* the :mod:`~google.cloud.storage._media.requests` subpackage uses the ``requests`` + transport library. + +.. _requests: http://docs.python-requests.org/ +""" + +from google.cloud.storage._media.common import UPLOAD_CHUNK_SIZE + + +__all__ = [ + "UPLOAD_CHUNK_SIZE", +] diff --git a/google/cloud/storage/_media/_download.py b/google/cloud/storage/_media/_download.py new file mode 100644 index 000000000..349ddf30c --- /dev/null +++ b/google/cloud/storage/_media/_download.py @@ -0,0 +1,620 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Virtual bases classes for downloading media from Google APIs.""" + + +import http.client +import re + +from google.cloud.storage._media import _helpers +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.retry import DEFAULT_RETRY + + +_CONTENT_RANGE_RE = re.compile( + r"bytes (?P\d+)-(?P\d+)/(?P\d+)", + flags=re.IGNORECASE, +) +_ACCEPTABLE_STATUS_CODES = (http.client.OK, http.client.PARTIAL_CONTENT) +_GET = "GET" +_ZERO_CONTENT_RANGE_HEADER = "bytes */0" + + +class DownloadBase(object): + """Base class for download helpers. + + Defines core shared behavior across different download types. + + Args: + media_url (str): The URL containing the media to be downloaded. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + the downloaded resource can be written to. + start (int): The first byte in a range to be downloaded. + end (int): The last byte in a range to be downloaded. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + A None value will disable retries. A google.api_core.retry.Retry + value will enable retries, and the object will configure backoff and + timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + media_url (str): The URL containing the media to be downloaded. + start (Optional[int]): The first byte in a range to be downloaded. + end (Optional[int]): The last byte in a range to be downloaded. + """ + + def __init__( + self, + media_url, + stream=None, + start=None, + end=None, + headers=None, + retry=DEFAULT_RETRY, + ): + self.media_url = media_url + self._stream = stream + self.start = start + self.end = end + if headers is None: + headers = {} + self._headers = headers + self._finished = False + self._retry_strategy = retry + + @property + def finished(self): + """bool: Flag indicating if the download has completed.""" + return self._finished + + @staticmethod + def _get_status_code(response): + """Access the status code from an HTTP response. + + Args: + response (object): The HTTP response object. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + @staticmethod + def _get_headers(response): + """Access the headers from an HTTP response. + + Args: + response (object): The HTTP response object. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + @staticmethod + def _get_body(response): + """Access the response body from an HTTP response. + + Args: + response (object): The HTTP response object. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class Download(DownloadBase): + """Helper to manage downloading a resource from a Google API. + + "Slices" of the resource can be retrieved by specifying a range + with ``start`` and / or ``end``. However, in typical usage, neither + ``start`` nor ``end`` is expected to be provided. + + Args: + media_url (str): The URL containing the media to be downloaded. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + the downloaded resource can be written to. + start (int): The first byte in a range to be downloaded. If not + provided, but ``end`` is provided, will download from the + beginning to ``end`` of the media. + end (int): The last byte in a range to be downloaded. If not + provided, but ``start`` is provided, will download from the + ``start`` to the end of the media. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. The response headers must contain + a checksum of the requested type. If the headers lack an + appropriate checksum (for instance in the case of transcoded or + ranged downloads where the remote service does not know the + correct checksum) an INFO-level log will be emitted. Supported + values are "md5", "crc32c", "auto" and None. The default is "auto", + which will try to detect if the C extension for crc32c is installed + and fall back to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + """ + + def __init__( + self, + media_url, + stream=None, + start=None, + end=None, + headers=None, + checksum="auto", + retry=DEFAULT_RETRY, + ): + super(Download, self).__init__( + media_url, stream=stream, start=start, end=end, headers=headers, retry=retry + ) + self.checksum = checksum + if self.checksum == "auto": + self.checksum = ( + "crc32c" if _helpers._is_crc32c_available_and_fast() else "md5" + ) + self._bytes_downloaded = 0 + self._expected_checksum = None + self._checksum_object = None + self._object_generation = None + + def _prepare_request(self): + """Prepare the contents of an HTTP request. + + This is everything that must be done before a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Returns: + Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always GET) + * the URL for the request + * the body of the request (always :data:`None`) + * headers for the request + + Raises: + ValueError: If the current :class:`Download` has already + finished. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.finished: + raise ValueError("A download can only be used once.") + + add_bytes_range(self.start, self.end, self._headers) + return _GET, self.media_url, None, self._headers + + def _process_response(self, response): + """Process the response from an HTTP request. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + # Tombstone the current Download so it cannot be used again. + self._finished = True + _helpers.require_status_code( + response, _ACCEPTABLE_STATUS_CODES, self._get_status_code + ) + + def consume(self, transport, timeout=None): + """Consume the resource to be downloaded. + + If a ``stream`` is attached to this download, then the downloaded + resource will be written to the stream. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class ChunkedDownload(DownloadBase): + """Download a resource in chunks from a Google API. + + Args: + media_url (str): The URL containing the media to be downloaded. + chunk_size (int): The number of bytes to be retrieved in each + request. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + will be used to concatenate chunks of the resource as they are + downloaded. + start (int): The first byte in a range to be downloaded. If not + provided, defaults to ``0``. + end (int): The last byte in a range to be downloaded. If not + provided, will download to the end of the media. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with each request, e.g. headers for data encryption + key headers. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + media_url (str): The URL containing the media to be downloaded. + start (Optional[int]): The first byte in a range to be downloaded. + end (Optional[int]): The last byte in a range to be downloaded. + chunk_size (int): The number of bytes to be retrieved in each request. + + Raises: + ValueError: If ``start`` is negative. + """ + + def __init__( + self, + media_url, + chunk_size, + stream, + start=0, + end=None, + headers=None, + retry=DEFAULT_RETRY, + ): + if start < 0: + raise ValueError( + "On a chunked download the starting " "value cannot be negative." + ) + super(ChunkedDownload, self).__init__( + media_url, + stream=stream, + start=start, + end=end, + headers=headers, + retry=retry, + ) + self.chunk_size = chunk_size + self._bytes_downloaded = 0 + self._total_bytes = None + self._invalid = False + + @property + def bytes_downloaded(self): + """int: Number of bytes that have been downloaded.""" + return self._bytes_downloaded + + @property + def total_bytes(self): + """Optional[int]: The total number of bytes to be downloaded.""" + return self._total_bytes + + @property + def invalid(self): + """bool: Indicates if the download is in an invalid state. + + This will occur if a call to :meth:`consume_next_chunk` fails. + """ + return self._invalid + + def _get_byte_range(self): + """Determines the byte range for the next request. + + Returns: + Tuple[int, int]: The pair of begin and end byte for the next + chunked request. + """ + curr_start = self.start + self.bytes_downloaded + curr_end = curr_start + self.chunk_size - 1 + # Make sure ``curr_end`` does not exceed ``end``. + if self.end is not None: + curr_end = min(curr_end, self.end) + # Make sure ``curr_end`` does not exceed ``total_bytes - 1``. + if self.total_bytes is not None: + curr_end = min(curr_end, self.total_bytes - 1) + return curr_start, curr_end + + def _prepare_request(self): + """Prepare the contents of an HTTP request. + + This is everything that must be done before a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + .. note: + + This method will be used multiple times, so ``headers`` will + be mutated in between requests. However, we don't make a copy + since the same keys are being updated. + + Returns: + Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always GET) + * the URL for the request + * the body of the request (always :data:`None`) + * headers for the request + + Raises: + ValueError: If the current download has finished. + ValueError: If the current download is invalid. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.finished: + raise ValueError("Download has finished.") + if self.invalid: + raise ValueError("Download is invalid and cannot be re-used.") + + curr_start, curr_end = self._get_byte_range() + add_bytes_range(curr_start, curr_end, self._headers) + return _GET, self.media_url, None, self._headers + + def _make_invalid(self): + """Simple setter for ``invalid``. + + This is intended to be passed along as a callback to helpers that + raise an exception so they can mark this instance as invalid before + raising. + """ + self._invalid = True + + def _process_response(self, response): + """Process the response from an HTTP request. + + This is everything that must be done after a request that doesn't + require network I/O. This is based on the `sans-I/O`_ philosophy. + + For the time being, this **does require** some form of I/O to write + a chunk to ``stream``. However, this will (almost) certainly not be + network I/O. + + Updates the current state after consuming a chunk. First, + increments ``bytes_downloaded`` by the number of bytes in the + ``content-length`` header. + + If ``total_bytes`` is already set, this assumes (but does not check) + that we already have the correct value and doesn't bother to check + that it agrees with the headers. + + We expect the **total** length to be in the ``content-range`` header, + but this header is only present on requests which sent the ``range`` + header. This response header should be of the form + ``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1`` + should be the same as the ``Content-Length``. + + Args: + response (object): The HTTP response object (need headers). + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the number + of bytes in the body doesn't match the content length header. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + # Verify the response before updating the current instance. + if _check_for_zero_content_range( + response, self._get_status_code, self._get_headers + ): + self._finished = True + return + + _helpers.require_status_code( + response, + _ACCEPTABLE_STATUS_CODES, + self._get_status_code, + callback=self._make_invalid, + ) + headers = self._get_headers(response) + response_body = self._get_body(response) + + start_byte, end_byte, total_bytes = get_range_info( + response, self._get_headers, callback=self._make_invalid + ) + + transfer_encoding = headers.get("transfer-encoding") + + if transfer_encoding is None: + content_length = _helpers.header_required( + response, + "content-length", + self._get_headers, + callback=self._make_invalid, + ) + num_bytes = int(content_length) + if len(response_body) != num_bytes: + self._make_invalid() + raise InvalidResponse( + response, + "Response is different size than content-length", + "Expected", + num_bytes, + "Received", + len(response_body), + ) + else: + # 'content-length' header not allowed with chunked encoding. + num_bytes = end_byte - start_byte + 1 + + # First update ``bytes_downloaded``. + self._bytes_downloaded += num_bytes + # If the end byte is past ``end`` or ``total_bytes - 1`` we are done. + if self.end is not None and end_byte >= self.end: + self._finished = True + elif end_byte >= total_bytes - 1: + self._finished = True + # NOTE: We only use ``total_bytes`` if not already known. + if self.total_bytes is None: + self._total_bytes = total_bytes + # Write the response body to the stream. + self._stream.write(response_body) + + def consume_next_chunk(self, transport, timeout=None): + """Consume the next chunk of the resource to be downloaded. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +def add_bytes_range(start, end, headers): + """Add a bytes range to a header dictionary. + + Some possible inputs and the corresponding bytes ranges:: + + >>> headers = {} + >>> add_bytes_range(None, None, headers) + >>> headers + {} + >>> add_bytes_range(500, 999, headers) + >>> headers['range'] + 'bytes=500-999' + >>> add_bytes_range(None, 499, headers) + >>> headers['range'] + 'bytes=0-499' + >>> add_bytes_range(-500, None, headers) + >>> headers['range'] + 'bytes=-500' + >>> add_bytes_range(9500, None, headers) + >>> headers['range'] + 'bytes=9500-' + + Args: + start (Optional[int]): The first byte in a range. Can be zero, + positive, negative or :data:`None`. + end (Optional[int]): The last byte in a range. Assumed to be + positive. + headers (Mapping[str, str]): A headers mapping which can have the + bytes range added if at least one of ``start`` or ``end`` + is not :data:`None`. + """ + if start is None: + if end is None: + # No range to add. + return + else: + # NOTE: This assumes ``end`` is non-negative. + bytes_range = "0-{:d}".format(end) + else: + if end is None: + if start < 0: + bytes_range = "{:d}".format(start) + else: + bytes_range = "{:d}-".format(start) + else: + # NOTE: This is invalid if ``start < 0``. + bytes_range = "{:d}-{:d}".format(start, end) + + headers[_helpers.RANGE_HEADER] = "bytes=" + bytes_range + + +def get_range_info(response, get_headers, callback=_helpers.do_nothing): + """Get the start, end and total bytes from a content range header. + + Args: + response (object): An HTTP response object. + get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers + from an HTTP response. + callback (Optional[Callable]): A callback that takes no arguments, + to be executed when an exception is being raised. + + Returns: + Tuple[int, int, int]: The start byte, end byte and total bytes. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the + ``Content-Range`` header is not of the form + ``bytes {start}-{end}/{total}``. + """ + content_range = _helpers.header_required( + response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback + ) + match = _CONTENT_RANGE_RE.match(content_range) + if match is None: + callback() + raise InvalidResponse( + response, + "Unexpected content-range header", + content_range, + 'Expected to be of the form "bytes {start}-{end}/{total}"', + ) + + return ( + int(match.group("start_byte")), + int(match.group("end_byte")), + int(match.group("total_bytes")), + ) + + +def _check_for_zero_content_range(response, get_status_code, get_headers): + """Validate if response status code is 416 and content range is zero. + + This is the special case for handling zero bytes files. + + Args: + response (object): An HTTP response object. + get_status_code (Callable[Any, int]): Helper to get a status code + from a response. + get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers + from an HTTP response. + + Returns: + bool: True if content range total bytes is zero, false otherwise. + """ + if get_status_code(response) == http.client.REQUESTED_RANGE_NOT_SATISFIABLE: + content_range = _helpers.header_required( + response, + _helpers.CONTENT_RANGE_HEADER, + get_headers, + callback=_helpers.do_nothing, + ) + if content_range == _ZERO_CONTENT_RANGE_HEADER: + return True + return False diff --git a/google/cloud/storage/_media/_helpers.py b/google/cloud/storage/_media/_helpers.py new file mode 100644 index 000000000..c07101eda --- /dev/null +++ b/google/cloud/storage/_media/_helpers.py @@ -0,0 +1,383 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared utilities used by both downloads and uploads.""" + +from __future__ import absolute_import + +import base64 +import hashlib +import logging + +from urllib.parse import parse_qs +from urllib.parse import urlencode +from urllib.parse import urlsplit +from urllib.parse import urlunsplit + +from google.cloud.storage import retry +from google.cloud.storage.exceptions import InvalidResponse + + +RANGE_HEADER = "range" +CONTENT_RANGE_HEADER = "content-range" +CONTENT_ENCODING_HEADER = "content-encoding" + +_SLOW_CRC32C_WARNING = ( + "Currently using crcmod in pure python form. This is a slow " + "implementation. Python 3 has a faster implementation, `google-crc32c`, " + "which will be used if it is installed." +) +_GENERATION_HEADER = "x-goog-generation" +_HASH_HEADER = "x-goog-hash" +_STORED_CONTENT_ENCODING_HEADER = "x-goog-stored-content-encoding" + +_MISSING_CHECKSUM = """\ +No {checksum_type} checksum was returned from the service while downloading {} +(which happens for composite objects), so client-side content integrity +checking is not being performed.""" +_LOGGER = logging.getLogger(__name__) + + +def do_nothing(): + """Simple default callback.""" + + +def header_required(response, name, get_headers, callback=do_nothing): + """Checks that a specific header is in a headers dictionary. + + Args: + response (object): An HTTP response object, expected to have a + ``headers`` attribute that is a ``Mapping[str, str]``. + name (str): The name of a required header. + get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers + from an HTTP response. + callback (Optional[Callable]): A callback that takes no arguments, + to be executed when an exception is being raised. + + Returns: + str: The desired header. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the header + is missing. + """ + headers = get_headers(response) + if name not in headers: + callback() + raise InvalidResponse(response, "Response headers must contain header", name) + + return headers[name] + + +def require_status_code(response, status_codes, get_status_code, callback=do_nothing): + """Require a response has a status code among a list. + + Args: + response (object): The HTTP response object. + status_codes (tuple): The acceptable status codes. + get_status_code (Callable[Any, int]): Helper to get a status code + from a response. + callback (Optional[Callable]): A callback that takes no arguments, + to be executed when an exception is being raised. + + Returns: + int: The status code. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status code + is not one of the values in ``status_codes``. + """ + status_code = get_status_code(response) + if status_code not in status_codes: + if status_code not in retry._RETRYABLE_STATUS_CODES: + callback() + raise InvalidResponse( + response, + "Request failed with status code", + status_code, + "Expected one of", + *status_codes + ) + return status_code + + +def _get_metadata_key(checksum_type): + if checksum_type == "md5": + return "md5Hash" + else: + return checksum_type + + +def prepare_checksum_digest(digest_bytestring): + """Convert a checksum object into a digest encoded for an HTTP header. + + Args: + bytes: A checksum digest bytestring. + + Returns: + str: A base64 string representation of the input. + """ + encoded_digest = base64.b64encode(digest_bytestring) + # NOTE: ``b64encode`` returns ``bytes``, but HTTP headers expect ``str``. + return encoded_digest.decode("utf-8") + + +def _get_expected_checksum(response, get_headers, media_url, checksum_type): + """Get the expected checksum and checksum object for the download response. + + Args: + response (~requests.Response): The HTTP response object. + get_headers (callable: response->dict): returns response headers. + media_url (str): The URL containing the media to be downloaded. + checksum_type Optional(str): The checksum type to read from the headers, + exactly as it will appear in the headers (case-sensitive). Must be + "md5", "crc32c" or None. + + Returns: + Tuple (Optional[str], object): The expected checksum of the response, + if it can be detected from the ``X-Goog-Hash`` header, and the + appropriate checksum object for the expected checksum. + """ + if checksum_type not in ["md5", "crc32c", None]: + raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") + elif checksum_type in ["md5", "crc32c"]: + headers = get_headers(response) + expected_checksum = _parse_checksum_header( + headers.get(_HASH_HEADER), response, checksum_label=checksum_type + ) + + if expected_checksum is None: + msg = _MISSING_CHECKSUM.format( + media_url, checksum_type=checksum_type.upper() + ) + _LOGGER.info(msg) + checksum_object = _DoNothingHash() + else: + checksum_object = _get_checksum_object(checksum_type) + else: + expected_checksum = None + checksum_object = _DoNothingHash() + + return (expected_checksum, checksum_object) + + +def _get_uploaded_checksum_from_headers(response, get_headers, checksum_type): + """Get the computed checksum and checksum object from the response headers. + + Args: + response (~requests.Response): The HTTP response object. + get_headers (callable: response->dict): returns response headers. + checksum_type Optional(str): The checksum type to read from the headers, + exactly as it will appear in the headers (case-sensitive). Must be + "md5", "crc32c" or None. + + Returns: + Tuple (Optional[str], object): The checksum of the response, + if it can be detected from the ``X-Goog-Hash`` header, and the + appropriate checksum object for the expected checksum. + """ + if checksum_type not in ["md5", "crc32c", None]: + raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") + elif checksum_type in ["md5", "crc32c"]: + headers = get_headers(response) + remote_checksum = _parse_checksum_header( + headers.get(_HASH_HEADER), response, checksum_label=checksum_type + ) + else: + remote_checksum = None + + return remote_checksum + + +def _parse_checksum_header(header_value, response, checksum_label): + """Parses the checksum header from an ``X-Goog-Hash`` value. + + .. _header reference: https://cloud.google.com/storage/docs/\ + xml-api/reference-headers#xgooghash + + Expects ``header_value`` (if not :data:`None`) to be in one of the three + following formats: + + * ``crc32c=n03x6A==`` + * ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` + * ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` + + See the `header reference`_ for more information. + + Args: + header_value (Optional[str]): The ``X-Goog-Hash`` header from + a download response. + response (~requests.Response): The HTTP response object. + checksum_label (str): The label of the header value to read, as in the + examples above. Typically "md5" or "crc32c" + + Returns: + Optional[str]: The expected checksum of the response, if it + can be detected from the ``X-Goog-Hash`` header; otherwise, None. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If there are + multiple checksums of the requested type in ``header_value``. + """ + if header_value is None: + return None + + matches = [] + for checksum in header_value.split(","): + name, value = checksum.split("=", 1) + # Official docs say "," is the separator, but real-world responses have encountered ", " + if name.lstrip() == checksum_label: + matches.append(value) + + if len(matches) == 0: + return None + elif len(matches) == 1: + return matches[0] + else: + raise InvalidResponse( + response, + "X-Goog-Hash header had multiple ``{}`` values.".format(checksum_label), + header_value, + matches, + ) + + +def _get_checksum_object(checksum_type): + """Respond with a checksum object for a supported type, if not None. + + Raises ValueError if checksum_type is unsupported. + """ + if checksum_type == "md5": + return hashlib.md5() + elif checksum_type == "crc32c": + # In order to support platforms that don't have google_crc32c + # support, only perform the import on demand. + import google_crc32c + + return google_crc32c.Checksum() + elif checksum_type is None: + return None + else: + raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") + + +def _is_crc32c_available_and_fast(): + """Return True if the google_crc32c C extension is installed. + + Return False if either the package is not installed, or if only the + pure-Python version is installed. + """ + try: + import google_crc32c + + if google_crc32c.implementation == "c": + return True + except Exception: + pass + return False + + +def _parse_generation_header(response, get_headers): + """Parses the generation header from an ``X-Goog-Generation`` value. + + Args: + response (~requests.Response): The HTTP response object. + get_headers (callable: response->dict): returns response headers. + + Returns: + Optional[long]: The object generation from the response, if it + can be detected from the ``X-Goog-Generation`` header; otherwise, None. + """ + headers = get_headers(response) + object_generation = headers.get(_GENERATION_HEADER, None) + + if object_generation is None: + return None + else: + return int(object_generation) + + +def _get_generation_from_url(media_url): + """Retrieve the object generation query param specified in the media url. + + Args: + media_url (str): The URL containing the media to be downloaded. + + Returns: + long: The object generation from the media url if exists; otherwise, None. + """ + + _, _, _, query, _ = urlsplit(media_url) + query_params = parse_qs(query) + object_generation = query_params.get("generation", None) + + if object_generation is None: + return None + else: + return int(object_generation[0]) + + +def add_query_parameters(media_url, query_params): + """Add query parameters to a base url. + + Args: + media_url (str): The URL containing the media to be downloaded. + query_params (dict): Names and values of the query parameters to add. + + Returns: + str: URL with additional query strings appended. + """ + + if len(query_params) == 0: + return media_url + + scheme, netloc, path, query, frag = urlsplit(media_url) + params = parse_qs(query) + new_params = {**params, **query_params} + query = urlencode(new_params, doseq=True) + return urlunsplit((scheme, netloc, path, query, frag)) + + +def _is_decompressive_transcoding(response, get_headers): + """Returns True if the object was served decompressed. This happens when the + "x-goog-stored-content-encoding" header is "gzip" and "content-encoding" header + is not "gzip". See more at: https://cloud.google.com/storage/docs/transcoding#transcoding_and_gzip + Args: + response (~requests.Response): The HTTP response object. + get_headers (callable: response->dict): returns response headers. + Returns: + bool: Returns True if decompressive transcoding has occurred; otherwise, False. + """ + headers = get_headers(response) + return ( + headers.get(_STORED_CONTENT_ENCODING_HEADER) == "gzip" + and headers.get(CONTENT_ENCODING_HEADER) != "gzip" + ) + + +class _DoNothingHash(object): + """Do-nothing hash object. + + Intended as a stand-in for ``hashlib.md5`` or a crc32c checksum + implementation in cases where it isn't necessary to compute the hash. + """ + + def update(self, unused_chunk): + """Do-nothing ``update`` method. + + Intended to match the interface of ``hashlib.md5`` and other checksums. + + Args: + unused_chunk (bytes): A chunk of data. + """ diff --git a/google/cloud/storage/_media/_upload.py b/google/cloud/storage/_media/_upload.py new file mode 100644 index 000000000..8d89ee5b2 --- /dev/null +++ b/google/cloud/storage/_media/_upload.py @@ -0,0 +1,1602 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Virtual bases classes for uploading media via Google APIs. + +Supported here are: + +* simple (media) uploads +* multipart uploads that contain both metadata and a small file as payload +* resumable uploads (with metadata as well) +""" + +import http.client +import json +import os +import random +import re +import sys +import urllib.parse + +from google.cloud.storage._media import _helpers +from google.cloud.storage._media import UPLOAD_CHUNK_SIZE +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.exceptions import DataCorruption +from google.cloud.storage.retry import DEFAULT_RETRY + +from xml.etree import ElementTree + + +_CONTENT_TYPE_HEADER = "content-type" +_CONTENT_RANGE_TEMPLATE = "bytes {:d}-{:d}/{:d}" +_RANGE_UNKNOWN_TEMPLATE = "bytes {:d}-{:d}/*" +_EMPTY_RANGE_TEMPLATE = "bytes */{:d}" +_BOUNDARY_WIDTH = len(str(sys.maxsize - 1)) +_BOUNDARY_FORMAT = "==============={{:0{:d}d}}==".format(_BOUNDARY_WIDTH) +_MULTIPART_SEP = b"--" +_CRLF = b"\r\n" +_MULTIPART_BEGIN = b"\r\ncontent-type: application/json; charset=UTF-8\r\n\r\n" +_RELATED_HEADER = b'multipart/related; boundary="' +_BYTES_RANGE_RE = re.compile(r"bytes=0-(?P\d+)", flags=re.IGNORECASE) +_STREAM_ERROR_TEMPLATE = ( + "Bytes stream is in unexpected state. " + "The local stream has had {:d} bytes read from it while " + "{:d} bytes have already been updated (they should match)." +) +_STREAM_READ_PAST_TEMPLATE = ( + "{:d} bytes have been read from the stream, which exceeds " + "the expected total {:d}." +) +_DELETE = "DELETE" +_POST = "POST" +_PUT = "PUT" +_UPLOAD_CHECKSUM_MISMATCH_MESSAGE = ( + "The computed ``{}`` checksum, ``{}``, and the checksum reported by the " + "remote host, ``{}``, did not match." +) +_UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE = ( + "Response metadata had no ``{}`` value; checksum could not be validated." +) +_UPLOAD_HEADER_NO_APPROPRIATE_CHECKSUM_MESSAGE = ( + "Response headers had no ``{}`` value; checksum could not be validated." +) +_MPU_INITIATE_QUERY = "?uploads" +_MPU_PART_QUERY_TEMPLATE = "?partNumber={part}&uploadId={upload_id}" +_S3_COMPAT_XML_NAMESPACE = "{http://s3.amazonaws.com/doc/2006-03-01/}" +_UPLOAD_ID_NODE = "UploadId" +_MPU_FINAL_QUERY_TEMPLATE = "?uploadId={upload_id}" + + +class UploadBase(object): + """Base class for upload helpers. + + Defines core shared behavior across different upload types. + + Args: + upload_url (str): The URL where the content will be uploaded. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + """ + + def __init__(self, upload_url, headers=None, retry=DEFAULT_RETRY): + self.upload_url = upload_url + if headers is None: + headers = {} + self._headers = headers + self._finished = False + self._retry_strategy = retry + + @property + def finished(self): + """bool: Flag indicating if the upload has completed.""" + return self._finished + + def _process_response(self, response): + """Process the response from an HTTP request. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 200. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + # Tombstone the current upload so it cannot be used again (in either + # failure or success). + self._finished = True + _helpers.require_status_code(response, (http.client.OK,), self._get_status_code) + + @staticmethod + def _get_status_code(response): + """Access the status code from an HTTP response. + + Args: + response (object): The HTTP response object. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + @staticmethod + def _get_headers(response): + """Access the headers from an HTTP response. + + Args: + response (object): The HTTP response object. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + @staticmethod + def _get_body(response): + """Access the response body from an HTTP response. + + Args: + response (object): The HTTP response object. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class SimpleUpload(UploadBase): + """Upload a resource to a Google API. + + A **simple** media upload sends no metadata and completes the upload + in a single request. + + Args: + upload_url (str): The URL where the content will be uploaded. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + """ + + def _prepare_request(self, data, content_type): + """Prepare the contents of an HTTP request. + + This is everything that must be done before a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + .. note: + + This method will be used only once, so ``headers`` will be + mutated by having a new key added to it. + + Args: + data (bytes): The resource content to be uploaded. + content_type (str): The content type for the request. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always POST) + * the URL for the request + * the body of the request + * headers for the request + + Raises: + ValueError: If the current upload has already finished. + TypeError: If ``data`` isn't bytes. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.finished: + raise ValueError("An upload can only be used once.") + + if not isinstance(data, bytes): + raise TypeError("`data` must be bytes, received", type(data)) + self._headers[_CONTENT_TYPE_HEADER] = content_type + return _POST, self.upload_url, data, self._headers + + def transmit(self, transport, data, content_type, timeout=None): + """Transmit the resource to be uploaded. + + Args: + transport (object): An object which can make authenticated + requests. + data (bytes): The resource content to be uploaded. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class MultipartUpload(UploadBase): + """Upload a resource with metadata to a Google API. + + A **multipart** upload sends both metadata and the resource in a single + (multipart) request. + + Args: + upload_url (str): The URL where the content will be uploaded. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. The request metadata will be amended + to include the computed value. Using this option will override a + manually-set checksum value. Supported values are "md5", + "crc32c", "auto", and None. The default is "auto", which will try + to detect if the C extension for crc32c is installed and fall back + to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + """ + + def __init__(self, upload_url, headers=None, checksum="auto", retry=DEFAULT_RETRY): + super(MultipartUpload, self).__init__(upload_url, headers=headers, retry=retry) + self._checksum_type = checksum + if self._checksum_type == "auto": + self._checksum_type = ( + "crc32c" if _helpers._is_crc32c_available_and_fast() else "md5" + ) + + def _prepare_request(self, data, metadata, content_type): + """Prepare the contents of an HTTP request. + + This is everything that must be done before a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + .. note: + + This method will be used only once, so ``headers`` will be + mutated by having a new key added to it. + + Args: + data (bytes): The resource content to be uploaded. + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always POST) + * the URL for the request + * the body of the request + * headers for the request + + Raises: + ValueError: If the current upload has already finished. + TypeError: If ``data`` isn't bytes. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.finished: + raise ValueError("An upload can only be used once.") + + if not isinstance(data, bytes): + raise TypeError("`data` must be bytes, received", type(data)) + + checksum_object = _helpers._get_checksum_object(self._checksum_type) + if checksum_object is not None: + checksum_object.update(data) + actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) + metadata_key = _helpers._get_metadata_key(self._checksum_type) + metadata[metadata_key] = actual_checksum + + content, multipart_boundary = construct_multipart_request( + data, metadata, content_type + ) + multipart_content_type = _RELATED_HEADER + multipart_boundary + b'"' + self._headers[_CONTENT_TYPE_HEADER] = multipart_content_type + + return _POST, self.upload_url, content, self._headers + + def transmit(self, transport, data, metadata, content_type, timeout=None): + """Transmit the resource to be uploaded. + + Args: + transport (object): An object which can make authenticated + requests. + data (bytes): The resource content to be uploaded. + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class ResumableUpload(UploadBase): + """Initiate and fulfill a resumable upload to a Google API. + + A **resumable** upload sends an initial request with the resource metadata + and then gets assigned an upload ID / upload URL to send bytes to. + Using the upload URL, the upload is then done in chunks (determined by + the user) until all bytes have been uploaded. + + Args: + upload_url (str): The URL where the resumable upload will be initiated. + chunk_size (int): The size of each chunk used to upload the resource. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with every request. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. After the upload is complete, the + server-computed checksum of the resulting object will be checked + and google.cloud.storage.exceptions.DataCorruption will be raised on + a mismatch. The corrupted file will not be deleted from the remote + host automatically. Supported values are "md5", "crc32c", "auto", + and None. The default is "auto", which will try to detect if the C + extension for crc32c is installed and fall back to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + + Raises: + ValueError: If ``chunk_size`` is not a multiple of + :data:`.UPLOAD_CHUNK_SIZE`. + """ + + def __init__( + self, upload_url, chunk_size, checksum="auto", headers=None, retry=DEFAULT_RETRY + ): + super(ResumableUpload, self).__init__(upload_url, headers=headers, retry=retry) + if chunk_size % UPLOAD_CHUNK_SIZE != 0: + raise ValueError( + "{} KB must divide chunk size".format(UPLOAD_CHUNK_SIZE / 1024) + ) + self._chunk_size = chunk_size + self._stream = None + self._content_type = None + self._bytes_uploaded = 0 + self._bytes_checksummed = 0 + self._checksum_type = checksum + if self._checksum_type == "auto": + self._checksum_type = ( + "crc32c" if _helpers._is_crc32c_available_and_fast() else "md5" + ) + self._checksum_object = None + self._total_bytes = None + self._resumable_url = None + self._invalid = False + + @property + def invalid(self): + """bool: Indicates if the upload is in an invalid state. + + This will occur if a call to :meth:`transmit_next_chunk` fails. + To recover from such a failure, call :meth:`recover`. + """ + return self._invalid + + @property + def chunk_size(self): + """int: The size of each chunk used to upload the resource.""" + return self._chunk_size + + @property + def resumable_url(self): + """Optional[str]: The URL of the in-progress resumable upload.""" + return self._resumable_url + + @property + def bytes_uploaded(self): + """int: Number of bytes that have been uploaded.""" + return self._bytes_uploaded + + @property + def total_bytes(self): + """Optional[int]: The total number of bytes to be uploaded. + + If this upload is initiated (via :meth:`initiate`) with + ``stream_final=True``, this value will be populated based on the size + of the ``stream`` being uploaded. (By default ``stream_final=True``.) + + If this upload is initiated with ``stream_final=False``, + :attr:`total_bytes` will be :data:`None` since it cannot be + determined from the stream. + """ + return self._total_bytes + + def _prepare_initiate_request( + self, stream, metadata, content_type, total_bytes=None, stream_final=True + ): + """Prepare the contents of HTTP request to initiate upload. + + This is everything that must be done before a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + stream (IO[bytes]): The stream (i.e. file-like object) that will + be uploaded. The stream **must** be at the beginning (i.e. + ``stream.tell() == 0``). + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + total_bytes (Optional[int]): The total number of bytes to be + uploaded. If specified, the upload size **will not** be + determined from the stream (even if ``stream_final=True``). + stream_final (Optional[bool]): Indicates if the ``stream`` is + "final" (i.e. no more bytes will be added to it). In this case + we determine the upload size from the size of the stream. If + ``total_bytes`` is passed, this argument will be ignored. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always POST) + * the URL for the request + * the body of the request + * headers for the request + + Raises: + ValueError: If the current upload has already been initiated. + ValueError: If ``stream`` is not at the beginning. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.resumable_url is not None: + raise ValueError("This upload has already been initiated.") + if stream.tell() != 0: + raise ValueError("Stream must be at beginning.") + + self._stream = stream + self._content_type = content_type + + # Signed URL requires content type set directly - not through x-upload-content-type + parse_result = urllib.parse.urlparse(self.upload_url) + parsed_query = urllib.parse.parse_qs(parse_result.query) + if "x-goog-signature" in parsed_query or "X-Goog-Signature" in parsed_query: + # Deconstruct **self._headers first so that content type defined here takes priority + headers = {**self._headers, _CONTENT_TYPE_HEADER: content_type} + else: + # Deconstruct **self._headers first so that content type defined here takes priority + headers = { + **self._headers, + _CONTENT_TYPE_HEADER: "application/json; charset=UTF-8", + "x-upload-content-type": content_type, + } + # Set the total bytes if possible. + if total_bytes is not None: + self._total_bytes = total_bytes + elif stream_final: + self._total_bytes = get_total_bytes(stream) + # Add the total bytes to the headers if set. + if self._total_bytes is not None: + content_length = "{:d}".format(self._total_bytes) + headers["x-upload-content-length"] = content_length + + payload = json.dumps(metadata).encode("utf-8") + return _POST, self.upload_url, payload, headers + + def _process_initiate_response(self, response): + """Process the response from an HTTP request that initiated upload. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + This method takes the URL from the ``Location`` header and stores it + for future use. Within that URL, we assume the ``upload_id`` query + parameter has been included, but we do not check. + + Args: + response (object): The HTTP response object (need headers). + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + _helpers.require_status_code( + response, + (http.client.OK, http.client.CREATED), + self._get_status_code, + callback=self._make_invalid, + ) + self._resumable_url = _helpers.header_required( + response, "location", self._get_headers + ) + + def initiate( + self, + transport, + stream, + metadata, + content_type, + total_bytes=None, + stream_final=True, + timeout=None, + ): + """Initiate a resumable upload. + + By default, this method assumes your ``stream`` is in a "final" + state ready to transmit. However, ``stream_final=False`` can be used + to indicate that the size of the resource is not known. This can happen + if bytes are being dynamically fed into ``stream``, e.g. if the stream + is attached to application logs. + + If ``stream_final=False`` is used, :attr:`chunk_size` bytes will be + read from the stream every time :meth:`transmit_next_chunk` is called. + If one of those reads produces strictly fewer bites than the chunk + size, the upload will be concluded. + + Args: + transport (object): An object which can make authenticated + requests. + stream (IO[bytes]): The stream (i.e. file-like object) that will + be uploaded. The stream **must** be at the beginning (i.e. + ``stream.tell() == 0``). + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + total_bytes (Optional[int]): The total number of bytes to be + uploaded. If specified, the upload size **will not** be + determined from the stream (even if ``stream_final=True``). + stream_final (Optional[bool]): Indicates if the ``stream`` is + "final" (i.e. no more bytes will be added to it). In this case + we determine the upload size from the size of the stream. If + ``total_bytes`` is passed, this argument will be ignored. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + def _prepare_request(self): + """Prepare the contents of HTTP request to upload a chunk. + + This is everything that must be done before a request that doesn't + require network I/O. This is based on the `sans-I/O`_ philosophy. + + For the time being, this **does require** some form of I/O to read + a chunk from ``stream`` (via :func:`get_next_chunk`). However, this + will (almost) certainly not be network I/O. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always PUT) + * the URL for the request + * the body of the request + * headers for the request + + The headers incorporate the ``_headers`` on the current instance. + + Raises: + ValueError: If the current upload has finished. + ValueError: If the current upload is in an invalid state. + ValueError: If the current upload has not been initiated. + ValueError: If the location in the stream (i.e. ``stream.tell()``) + does not agree with ``bytes_uploaded``. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.finished: + raise ValueError("Upload has finished.") + if self.invalid: + raise ValueError( + "Upload is in an invalid state. To recover call `recover()`." + ) + if self.resumable_url is None: + raise ValueError( + "This upload has not been initiated. Please call " + "initiate() before beginning to transmit chunks." + ) + + start_byte, payload, content_range = get_next_chunk( + self._stream, self._chunk_size, self._total_bytes + ) + if start_byte != self.bytes_uploaded: + msg = _STREAM_ERROR_TEMPLATE.format(start_byte, self.bytes_uploaded) + raise ValueError(msg) + + self._update_checksum(start_byte, payload) + + headers = { + **self._headers, + _CONTENT_TYPE_HEADER: self._content_type, + _helpers.CONTENT_RANGE_HEADER: content_range, + } + return _PUT, self.resumable_url, payload, headers + + def _update_checksum(self, start_byte, payload): + """Update the checksum with the payload if not already updated. + + Because error recovery can result in bytes being transmitted more than + once, the checksum tracks the number of bytes checked in + self._bytes_checksummed and skips bytes that have already been summed. + """ + if not self._checksum_type: + return + + if not self._checksum_object: + self._checksum_object = _helpers._get_checksum_object(self._checksum_type) + + if start_byte < self._bytes_checksummed: + offset = self._bytes_checksummed - start_byte + data = payload[offset:] + else: + data = payload + + self._checksum_object.update(data) + self._bytes_checksummed += len(data) + + def _make_invalid(self): + """Simple setter for ``invalid``. + + This is intended to be passed along as a callback to helpers that + raise an exception so they can mark this instance as invalid before + raising. + """ + self._invalid = True + + def _process_resumable_response(self, response, bytes_sent): + """Process the response from an HTTP request. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + bytes_sent (int): The number of bytes sent in the request that + ``response`` was returned for. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is 308 and the ``range`` header is not of the form + ``bytes 0-{end}``. + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 200 or 308. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + status_code = _helpers.require_status_code( + response, + (http.client.OK, http.client.PERMANENT_REDIRECT), + self._get_status_code, + callback=self._make_invalid, + ) + if status_code == http.client.OK: + # NOTE: We use the "local" information of ``bytes_sent`` to update + # ``bytes_uploaded``, but do not verify this against other + # state. However, there may be some other information: + # + # * a ``size`` key in JSON response body + # * the ``total_bytes`` attribute (if set) + # * ``stream.tell()`` (relying on fact that ``initiate()`` + # requires stream to be at the beginning) + self._bytes_uploaded = self._bytes_uploaded + bytes_sent + # Tombstone the current upload so it cannot be used again. + self._finished = True + # Validate the checksum. This can raise an exception on failure. + self._validate_checksum(response) + else: + bytes_range = _helpers.header_required( + response, + _helpers.RANGE_HEADER, + self._get_headers, + callback=self._make_invalid, + ) + match = _BYTES_RANGE_RE.match(bytes_range) + if match is None: + self._make_invalid() + raise InvalidResponse( + response, + 'Unexpected "range" header', + bytes_range, + 'Expected to be of the form "bytes=0-{end}"', + ) + self._bytes_uploaded = int(match.group("end_byte")) + 1 + + def _validate_checksum(self, response): + """Check the computed checksum, if any, against the recieved metadata. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.DataCorruption: If the checksum + computed locally and the checksum reported by the remote host do + not match. + """ + if self._checksum_type is None: + return + metadata_key = _helpers._get_metadata_key(self._checksum_type) + metadata = response.json() + remote_checksum = metadata.get(metadata_key) + if remote_checksum is None: + raise InvalidResponse( + response, + _UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE.format(metadata_key), + self._get_headers(response), + ) + local_checksum = _helpers.prepare_checksum_digest( + self._checksum_object.digest() + ) + if local_checksum != remote_checksum: + raise DataCorruption( + response, + _UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format( + self._checksum_type.upper(), local_checksum, remote_checksum + ), + ) + + def transmit_next_chunk(self, transport, timeout=None): + """Transmit the next chunk of the resource to be uploaded. + + If the current upload was initiated with ``stream_final=False``, + this method will dynamically determine if the upload has completed. + The upload will be considered complete if the stream produces + fewer than :attr:`chunk_size` bytes when a chunk is read from it. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + def _prepare_recover_request(self): + """Prepare the contents of HTTP request to recover from failure. + + This is everything that must be done before a request that doesn't + require network I/O. This is based on the `sans-I/O`_ philosophy. + + We assume that the :attr:`resumable_url` is set (i.e. the only way + the upload can end up :attr:`invalid` is if it has been initiated. + + Returns: + Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always PUT) + * the URL for the request + * the body of the request (always :data:`None`) + * headers for the request + + The headers **do not** incorporate the ``_headers`` on the + current instance. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + headers = {_helpers.CONTENT_RANGE_HEADER: "bytes */*"} + return _PUT, self.resumable_url, None, headers + + def _process_recover_response(self, response): + """Process the response from an HTTP request to recover from failure. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 308. + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is 308 and the ``range`` header is not of the form + ``bytes 0-{end}``. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + _helpers.require_status_code( + response, (http.client.PERMANENT_REDIRECT,), self._get_status_code + ) + headers = self._get_headers(response) + if _helpers.RANGE_HEADER in headers: + bytes_range = headers[_helpers.RANGE_HEADER] + match = _BYTES_RANGE_RE.match(bytes_range) + if match is None: + raise InvalidResponse( + response, + 'Unexpected "range" header', + bytes_range, + 'Expected to be of the form "bytes=0-{end}"', + ) + self._bytes_uploaded = int(match.group("end_byte")) + 1 + else: + # In this case, the upload has not "begun". + self._bytes_uploaded = 0 + + self._stream.seek(self._bytes_uploaded) + self._invalid = False + + def recover(self, transport): + """Recover from a failure. + + This method should be used when a :class:`ResumableUpload` is in an + :attr:`~ResumableUpload.invalid` state due to a request failure. + + This will verify the progress with the server and make sure the + current upload is in a valid state before :meth:`transmit_next_chunk` + can be used again. + + Args: + transport (object): An object which can make authenticated + requests. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class XMLMPUContainer(UploadBase): + """Initiate and close an upload using the XML MPU API. + + An XML MPU sends an initial request and then receives an upload ID. + Using the upload ID, the upload is then done in numbered parts and the + parts can be uploaded concurrently. + + In order to avoid concurrency issues with this container object, the + uploading of individual parts is handled separately, by XMLMPUPart objects + spawned from this container class. The XMLMPUPart objects are not + necessarily in the same process as the container, so they do not update the + container automatically. + + MPUs are sometimes referred to as "Multipart Uploads", which is ambiguous + given the JSON multipart upload, so the abbreviation "MPU" will be used + throughout. + + See: https://cloud.google.com/storage/docs/multipart-uploads + + Args: + upload_url (str): The URL of the object (without query parameters). The + initiate, PUT, and finalization requests will all use this URL, with + varying query parameters. + filename (str): The name (path) of the file to upload. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with every request. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + upload_id (Optional(str)): The ID of the upload from the initialization + response. + """ + + def __init__( + self, upload_url, filename, headers=None, upload_id=None, retry=DEFAULT_RETRY + ): + super().__init__(upload_url, headers=headers, retry=retry) + self._filename = filename + self._upload_id = upload_id + self._parts = {} + + @property + def upload_id(self): + return self._upload_id + + def register_part(self, part_number, etag): + """Register an uploaded part by part number and corresponding etag. + + XMLMPUPart objects represent individual parts, and their part number + and etag can be registered to the container object with this method + and therefore incorporated in the finalize() call to finish the upload. + + This method accepts part_number and etag, but not XMLMPUPart objects + themselves, to reduce the complexity involved in running XMLMPUPart + uploads in separate processes. + + Args: + part_number (int): The part number. Parts are assembled into the + final uploaded object with finalize() in order of their part + numbers. + etag (str): The etag included in the server response after upload. + """ + self._parts[part_number] = etag + + def _prepare_initiate_request(self, content_type): + """Prepare the contents of HTTP request to initiate upload. + + This is everything that must be done before a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always POST) + * the URL for the request + * the body of the request + * headers for the request + + Raises: + ValueError: If the current upload has already been initiated. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.upload_id is not None: + raise ValueError("This upload has already been initiated.") + + initiate_url = self.upload_url + _MPU_INITIATE_QUERY + + headers = { + **self._headers, + _CONTENT_TYPE_HEADER: content_type, + } + return _POST, initiate_url, None, headers + + def _process_initiate_response(self, response): + """Process the response from an HTTP request that initiated the upload. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + This method takes the URL from the ``Location`` header and stores it + for future use. Within that URL, we assume the ``upload_id`` query + parameter has been included, but we do not check. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 200. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + _helpers.require_status_code(response, (http.client.OK,), self._get_status_code) + root = ElementTree.fromstring(response.text) + self._upload_id = root.find(_S3_COMPAT_XML_NAMESPACE + _UPLOAD_ID_NODE).text + + def initiate( + self, + transport, + content_type, + timeout=None, + ): + """Initiate an MPU and record the upload ID. + + Args: + transport (object): An object which can make authenticated + requests. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + def _prepare_finalize_request(self): + """Prepare the contents of an HTTP request to finalize the upload. + + All of the parts must be registered before calling this method. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always POST) + * the URL for the request + * the body of the request + * headers for the request + + Raises: + ValueError: If the upload has not been initiated. + """ + if self.upload_id is None: + raise ValueError("This upload has not yet been initiated.") + + final_query = _MPU_FINAL_QUERY_TEMPLATE.format(upload_id=self._upload_id) + finalize_url = self.upload_url + final_query + final_xml_root = ElementTree.Element("CompleteMultipartUpload") + for part_number, etag in self._parts.items(): + part = ElementTree.SubElement(final_xml_root, "Part") # put in a loop + ElementTree.SubElement(part, "PartNumber").text = str(part_number) + ElementTree.SubElement(part, "ETag").text = etag + payload = ElementTree.tostring(final_xml_root) + return _POST, finalize_url, payload, self._headers + + def _process_finalize_response(self, response): + """Process the response from an HTTP request that finalized the upload. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 200. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + + _helpers.require_status_code(response, (http.client.OK,), self._get_status_code) + self._finished = True + + def finalize( + self, + transport, + timeout=None, + ): + """Finalize an MPU request with all the parts. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + def _prepare_cancel_request(self): + """Prepare the contents of an HTTP request to cancel the upload. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always DELETE) + * the URL for the request + * the body of the request + * headers for the request + + Raises: + ValueError: If the upload has not been initiated. + """ + if self.upload_id is None: + raise ValueError("This upload has not yet been initiated.") + + cancel_query = _MPU_FINAL_QUERY_TEMPLATE.format(upload_id=self._upload_id) + cancel_url = self.upload_url + cancel_query + return _DELETE, cancel_url, None, self._headers + + def _process_cancel_response(self, response): + """Process the response from an HTTP request that canceled the upload. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 204. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + + _helpers.require_status_code( + response, (http.client.NO_CONTENT,), self._get_status_code + ) + + def cancel( + self, + transport, + timeout=None, + ): + """Cancel an MPU request and permanently delete any uploaded parts. + + This cannot be undone. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + +class XMLMPUPart(UploadBase): + """Upload a single part of an existing XML MPU container. + + An XML MPU sends an initial request and then receives an upload ID. + Using the upload ID, the upload is then done in numbered parts and the + parts can be uploaded concurrently. + + In order to avoid concurrency issues with the container object, the + uploading of individual parts is handled separately by multiple objects + of this class. Once a part is uploaded, it can be registered with the + container with `container.register_part(part.part_number, part.etag)`. + + MPUs are sometimes referred to as "Multipart Uploads", which is ambiguous + given the JSON multipart upload, so the abbreviation "MPU" will be used + throughout. + + See: https://cloud.google.com/storage/docs/multipart-uploads + + Args: + upload_url (str): The URL of the object (without query parameters). + upload_id (str): The ID of the upload from the initialization response. + filename (str): The name (path) of the file to upload. + start (int): The byte index of the beginning of the part. + end (int): The byte index of the end of the part. + part_number (int): The part number. Part numbers will be assembled in + sequential order when the container is finalized. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with every request. + checksum (Optional([str])): The type of checksum to compute to verify + the integrity of the object. The request headers will be amended + to include the computed value. Supported values are "md5", "crc32c", + "auto" and None. The default is "auto", which will try to detect if + the C extension for crc32c is installed and fall back to md5 + otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL of the object (without query parameters). + upload_id (str): The ID of the upload from the initialization response. + filename (str): The name (path) of the file to upload. + start (int): The byte index of the beginning of the part. + end (int): The byte index of the end of the part. + part_number (int): The part number. Part numbers will be assembled in + sequential order when the container is finalized. + etag (Optional(str)): The etag returned by the service after upload. + """ + + def __init__( + self, + upload_url, + upload_id, + filename, + start, + end, + part_number, + headers=None, + checksum="auto", + retry=DEFAULT_RETRY, + ): + super().__init__(upload_url, headers=headers, retry=retry) + self._filename = filename + self._start = start + self._end = end + self._upload_id = upload_id + self._part_number = part_number + self._etag = None + self._checksum_type = checksum + if self._checksum_type == "auto": + self._checksum_type = ( + "crc32c" if _helpers._is_crc32c_available_and_fast() else "md5" + ) + self._checksum_object = None + + @property + def part_number(self): + return self._part_number + + @property + def upload_id(self): + return self._upload_id + + @property + def filename(self): + return self._filename + + @property + def etag(self): + return self._etag + + @property + def start(self): + return self._start + + @property + def end(self): + return self._end + + def _prepare_upload_request(self): + """Prepare the contents of HTTP request to upload a part. + + This is everything that must be done before a request that doesn't + require network I/O. This is based on the `sans-I/O`_ philosophy. + + For the time being, this **does require** some form of I/O to read + a part from ``stream`` (via :func:`get_part_payload`). However, this + will (almost) certainly not be network I/O. + + Returns: + Tuple[str, str, bytes, Mapping[str, str]]: The quadruple + + * HTTP verb for the request (always PUT) + * the URL for the request + * the body of the request + * headers for the request + + The headers incorporate the ``_headers`` on the current instance. + + Raises: + ValueError: If the current upload has finished. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + if self.finished: + raise ValueError("This part has already been uploaded.") + + with open(self._filename, "br") as f: + f.seek(self._start) + payload = f.read(self._end - self._start) + + self._checksum_object = _helpers._get_checksum_object(self._checksum_type) + if self._checksum_object is not None: + self._checksum_object.update(payload) + + part_query = _MPU_PART_QUERY_TEMPLATE.format( + part=self._part_number, upload_id=self._upload_id + ) + upload_url = self.upload_url + part_query + return _PUT, upload_url, payload, self._headers + + def _process_upload_response(self, response): + """Process the response from an HTTP request. + + This is everything that must be done after a request that doesn't + require network I/O (or other I/O). This is based on the `sans-I/O`_ + philosophy. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 200 or the response is missing data. + + .. _sans-I/O: https://sans-io.readthedocs.io/ + """ + _helpers.require_status_code( + response, + (http.client.OK,), + self._get_status_code, + ) + + self._validate_checksum(response) + + etag = _helpers.header_required(response, "etag", self._get_headers) + self._etag = etag + self._finished = True + + def upload( + self, + transport, + timeout=None, + ): + """Upload the part. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Raises: + NotImplementedError: Always, since virtual. + """ + raise NotImplementedError("This implementation is virtual.") + + def _validate_checksum(self, response): + """Check the computed checksum, if any, against the response headers. + + Args: + response (object): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.DataCorruption: If the checksum + computed locally and the checksum reported by the remote host do + not match. + """ + if self._checksum_type is None: + return + + remote_checksum = _helpers._get_uploaded_checksum_from_headers( + response, self._get_headers, self._checksum_type + ) + + if remote_checksum is None: + metadata_key = _helpers._get_metadata_key(self._checksum_type) + raise InvalidResponse( + response, + _UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE.format(metadata_key), + self._get_headers(response), + ) + local_checksum = _helpers.prepare_checksum_digest( + self._checksum_object.digest() + ) + if local_checksum != remote_checksum: + raise DataCorruption( + response, + _UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format( + self._checksum_type.upper(), local_checksum, remote_checksum + ), + ) + + +def get_boundary(): + """Get a random boundary for a multipart request. + + Returns: + bytes: The boundary used to separate parts of a multipart request. + """ + random_int = random.randrange(sys.maxsize) + boundary = _BOUNDARY_FORMAT.format(random_int) + # NOTE: Neither % formatting nor .format() are available for byte strings + # in Python 3.4, so we must use unicode strings as templates. + return boundary.encode("utf-8") + + +def construct_multipart_request(data, metadata, content_type): + """Construct a multipart request body. + + Args: + data (bytes): The resource content (UTF-8 encoded as bytes) + to be uploaded. + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + + Returns: + Tuple[bytes, bytes]: The multipart request body and the boundary used + between each part. + """ + multipart_boundary = get_boundary() + json_bytes = json.dumps(metadata).encode("utf-8") + content_type = content_type.encode("utf-8") + # Combine the two parts into a multipart payload. + # NOTE: We'd prefer a bytes template but are restricted by Python 3.4. + boundary_sep = _MULTIPART_SEP + multipart_boundary + content = ( + boundary_sep + + _MULTIPART_BEGIN + + json_bytes + + _CRLF + + boundary_sep + + _CRLF + + b"content-type: " + + content_type + + _CRLF + + _CRLF + + data # Empty line between headers and body. + + _CRLF + + boundary_sep + + _MULTIPART_SEP + ) + + return content, multipart_boundary + + +def get_total_bytes(stream): + """Determine the total number of bytes in a stream. + + Args: + stream (IO[bytes]): The stream (i.e. file-like object). + + Returns: + int: The number of bytes. + """ + current_position = stream.tell() + # NOTE: ``.seek()`` **should** return the same value that ``.tell()`` + # returns, but in Python 2, ``file`` objects do not. + stream.seek(0, os.SEEK_END) + end_position = stream.tell() + # Go back to the initial position. + stream.seek(current_position) + + return end_position + + +def get_next_chunk(stream, chunk_size, total_bytes): + """Get a chunk from an I/O stream. + + The ``stream`` may have fewer bytes remaining than ``chunk_size`` + so it may not always be the case that + ``end_byte == start_byte + chunk_size - 1``. + + Args: + stream (IO[bytes]): The stream (i.e. file-like object). + chunk_size (int): The size of the chunk to be read from the ``stream``. + total_bytes (Optional[int]): The (expected) total number of bytes + in the ``stream``. + + Returns: + Tuple[int, bytes, str]: Triple of: + + * the start byte index + * the content in between the start and end bytes (inclusive) + * content range header for the chunk (slice) that has been read + + Raises: + ValueError: If ``total_bytes == 0`` but ``stream.read()`` yields + non-empty content. + ValueError: If there is no data left to consume. This corresponds + exactly to the case ``end_byte < start_byte``, which can only + occur if ``end_byte == start_byte - 1``. + """ + start_byte = stream.tell() + if total_bytes is not None and start_byte + chunk_size >= total_bytes > 0: + payload = stream.read(total_bytes - start_byte) + else: + payload = stream.read(chunk_size) + end_byte = stream.tell() - 1 + + num_bytes_read = len(payload) + if total_bytes is None: + if num_bytes_read < chunk_size: + # We now **KNOW** the total number of bytes. + total_bytes = end_byte + 1 + elif total_bytes == 0: + # NOTE: We also expect ``start_byte == 0`` here but don't check + # because ``_prepare_initiate_request()`` requires the + # stream to be at the beginning. + if num_bytes_read != 0: + raise ValueError( + "Stream specified as empty, but produced non-empty content." + ) + else: + if num_bytes_read == 0: + raise ValueError( + "Stream is already exhausted. There is no content remaining." + ) + + content_range = get_content_range(start_byte, end_byte, total_bytes) + return start_byte, payload, content_range + + +def get_content_range(start_byte, end_byte, total_bytes): + """Convert start, end and total into content range header. + + If ``total_bytes`` is not known, uses "bytes {start}-{end}/*". + If we are dealing with an empty range (i.e. ``end_byte < start_byte``) + then "bytes */{total}" is used. + + This function **ASSUMES** that if the size is not known, the caller will + not also pass an empty range. + + Args: + start_byte (int): The start (inclusive) of the byte range. + end_byte (int): The end (inclusive) of the byte range. + total_bytes (Optional[int]): The number of bytes in the byte + range (if known). + + Returns: + str: The content range header. + """ + if total_bytes is None: + return _RANGE_UNKNOWN_TEMPLATE.format(start_byte, end_byte) + elif end_byte < start_byte: + return _EMPTY_RANGE_TEMPLATE.format(total_bytes) + else: + return _CONTENT_RANGE_TEMPLATE.format(start_byte, end_byte, total_bytes) diff --git a/google/cloud/storage/_media/common.py b/google/cloud/storage/_media/common.py new file mode 100644 index 000000000..2917ea53d --- /dev/null +++ b/google/cloud/storage/_media/common.py @@ -0,0 +1,21 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common utilities for Google Media Downloads and Resumable Uploads. + +Includes custom exception types, useful constants and shared helpers. +""" + +UPLOAD_CHUNK_SIZE = 262144 # 256 * 1024 +"""int: Chunks in a resumable upload must come in multiples of 256 KB.""" diff --git a/google/cloud/storage/_media/py.typed b/google/cloud/storage/_media/py.typed new file mode 100644 index 000000000..7705b065b --- /dev/null +++ b/google/cloud/storage/_media/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-resumable_media package uses inline types. diff --git a/google/cloud/storage/_media/requests/__init__.py b/google/cloud/storage/_media/requests/__init__.py new file mode 100644 index 000000000..743887eb9 --- /dev/null +++ b/google/cloud/storage/_media/requests/__init__.py @@ -0,0 +1,685 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""``requests`` utilities for Google Media Downloads and Resumable Uploads. + +This sub-package assumes callers will use the `requests`_ library +as transport and `google-auth`_ for sending authenticated HTTP traffic +with ``requests``. + +.. _requests: http://docs.python-requests.org/ +.. _google-auth: https://google-auth.readthedocs.io/ + +==================== +Authorized Transport +==================== + +To use ``google-auth`` and ``requests`` to create an authorized transport +that has read-only access to Google Cloud Storage (GCS): + +.. testsetup:: get-credentials + + import google.auth + import google.auth.credentials as creds_mod + import mock + + def mock_default(scopes=None): + credentials = mock.Mock(spec=creds_mod.Credentials) + return credentials, 'mock-project' + + # Patch the ``default`` function on the module. + original_default = google.auth.default + google.auth.default = mock_default + +.. doctest:: get-credentials + + >>> import google.auth + >>> import google.auth.transport.requests as tr_requests + >>> + >>> ro_scope = 'https://www.googleapis.com/auth/devstorage.read_only' + >>> credentials, _ = google.auth.default(scopes=(ro_scope,)) + >>> transport = tr_requests.AuthorizedSession(credentials) + >>> transport + + +.. testcleanup:: get-credentials + + # Put back the correct ``default`` function on the module. + google.auth.default = original_default + +================ +Simple Downloads +================ + +To download an object from Google Cloud Storage, construct the media URL +for the GCS object and download it with an authorized transport that has +access to the resource: + +.. testsetup:: basic-download + + import mock + import requests + import http.client + + bucket = 'bucket-foo' + blob_name = 'file.txt' + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + fake_response.headers['Content-Length'] = '1364156' + fake_content = mock.MagicMock(spec=['__len__']) + fake_content.__len__.return_value = 1364156 + fake_response._content = fake_content + + get_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=get_method, spec=['request']) + +.. doctest:: basic-download + + >>> from google.cloud.storage._media.requests import Download + >>> + >>> url_template = ( + ... 'https://www.googleapis.com/download/storage/v1/b/' + ... '{bucket}/o/{blob_name}?alt=media') + >>> media_url = url_template.format( + ... bucket=bucket, blob_name=blob_name) + >>> + >>> download = Download(media_url) + >>> response = download.consume(transport) + >>> download.finished + True + >>> response + + >>> response.headers['Content-Length'] + '1364156' + >>> len(response.content) + 1364156 + +To download only a portion of the bytes in the object, +specify ``start`` and ``end`` byte positions (both optional): + +.. testsetup:: basic-download-with-slice + + import mock + import requests + import http.client + + from google.cloud.storage._media.requests import Download + + media_url = 'http://test.invalid' + start = 4096 + end = 8191 + slice_size = end - start + 1 + + fake_response = requests.Response() + fake_response.status_code = int(http.client.PARTIAL_CONTENT) + fake_response.headers['Content-Length'] = '{:d}'.format(slice_size) + content_range = 'bytes {:d}-{:d}/1364156'.format(start, end) + fake_response.headers['Content-Range'] = content_range + fake_content = mock.MagicMock(spec=['__len__']) + fake_content.__len__.return_value = slice_size + fake_response._content = fake_content + + get_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=get_method, spec=['request']) + +.. doctest:: basic-download-with-slice + + >>> download = Download(media_url, start=4096, end=8191) + >>> response = download.consume(transport) + >>> download.finished + True + >>> response + + >>> response.headers['Content-Length'] + '4096' + >>> response.headers['Content-Range'] + 'bytes 4096-8191/1364156' + >>> len(response.content) + 4096 + +================= +Chunked Downloads +================= + +For very large objects or objects of unknown size, it may make more sense +to download the object in chunks rather than all at once. This can be done +to avoid dropped connections with a poor internet connection or can allow +multiple chunks to be downloaded in parallel to speed up the total +download. + +A :class:`.ChunkedDownload` uses the same media URL and authorized +transport that a basic :class:`.Download` would use, but also +requires a chunk size and a write-able byte ``stream``. The chunk size is used +to determine how much of the resouce to consume with each request and the +stream is to allow the resource to be written out (e.g. to disk) without +having to fit in memory all at once. + +.. testsetup:: chunked-download + + import io + + import mock + import requests + import http.client + + media_url = 'http://test.invalid' + + fifty_mb = 50 * 1024 * 1024 + one_gb = 1024 * 1024 * 1024 + fake_response = requests.Response() + fake_response.status_code = int(http.client.PARTIAL_CONTENT) + fake_response.headers['Content-Length'] = '{:d}'.format(fifty_mb) + content_range = 'bytes 0-{:d}/{:d}'.format(fifty_mb - 1, one_gb) + fake_response.headers['Content-Range'] = content_range + fake_content_begin = b'The beginning of the chunk...' + fake_content = fake_content_begin + b'1' * (fifty_mb - 29) + fake_response._content = fake_content + + get_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=get_method, spec=['request']) + +.. doctest:: chunked-download + + >>> from google.cloud.storage._media.requests import ChunkedDownload + >>> + >>> chunk_size = 50 * 1024 * 1024 # 50MB + >>> stream = io.BytesIO() + >>> download = ChunkedDownload( + ... media_url, chunk_size, stream) + >>> # Check the state of the download before starting. + >>> download.bytes_downloaded + 0 + >>> download.total_bytes is None + True + >>> response = download.consume_next_chunk(transport) + >>> # Check the state of the download after consuming one chunk. + >>> download.finished + False + >>> download.bytes_downloaded # chunk_size + 52428800 + >>> download.total_bytes # 1GB + 1073741824 + >>> response + + >>> response.headers['Content-Length'] + '52428800' + >>> response.headers['Content-Range'] + 'bytes 0-52428799/1073741824' + >>> len(response.content) == chunk_size + True + >>> stream.seek(0) + 0 + >>> stream.read(29) + b'The beginning of the chunk...' + +The download will change it's ``finished`` status to :data:`True` +once the final chunk is consumed. In some cases, the final chunk may +not be the same size as the other chunks: + +.. testsetup:: chunked-download-end + + import mock + import requests + import http.client + + from google.cloud.storage._media.requests import ChunkedDownload + + media_url = 'http://test.invalid' + + fifty_mb = 50 * 1024 * 1024 + one_gb = 1024 * 1024 * 1024 + stream = mock.Mock(spec=['write']) + download = ChunkedDownload(media_url, fifty_mb, stream) + download._bytes_downloaded = 20 * fifty_mb + download._total_bytes = one_gb + + fake_response = requests.Response() + fake_response.status_code = int(http.client.PARTIAL_CONTENT) + slice_size = one_gb - 20 * fifty_mb + fake_response.headers['Content-Length'] = '{:d}'.format(slice_size) + content_range = 'bytes {:d}-{:d}/{:d}'.format( + 20 * fifty_mb, one_gb - 1, one_gb) + fake_response.headers['Content-Range'] = content_range + fake_content = mock.MagicMock(spec=['__len__']) + fake_content.__len__.return_value = slice_size + fake_response._content = fake_content + + get_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=get_method, spec=['request']) + +.. doctest:: chunked-download-end + + >>> # The state of the download in progress. + >>> download.finished + False + >>> download.bytes_downloaded # 20 chunks at 50MB + 1048576000 + >>> download.total_bytes # 1GB + 1073741824 + >>> response = download.consume_next_chunk(transport) + >>> # The state of the download after consuming the final chunk. + >>> download.finished + True + >>> download.bytes_downloaded == download.total_bytes + True + >>> response + + >>> response.headers['Content-Length'] + '25165824' + >>> response.headers['Content-Range'] + 'bytes 1048576000-1073741823/1073741824' + >>> len(response.content) < download.chunk_size + True + +In addition, a :class:`.ChunkedDownload` can also take optional +``start`` and ``end`` byte positions. + +Usually, no checksum is returned with a chunked download. Even if one is returned, +it is not validated. If you need to validate the checksum, you can do so +by buffering the chunks and validating the checksum against the completed download. + +============== +Simple Uploads +============== + +Among the three supported upload classes, the simplest is +:class:`.SimpleUpload`. A simple upload should be used when the resource +being uploaded is small and when there is no metadata (other than the name) +associated with the resource. + +.. testsetup:: simple-upload + + import json + + import mock + import requests + import http.client + + bucket = 'some-bucket' + blob_name = 'file.txt' + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + payload = { + 'bucket': bucket, + 'contentType': 'text/plain', + 'md5Hash': 'M0XLEsX9/sMdiI+4pB4CAQ==', + 'name': blob_name, + 'size': '27', + } + fake_response._content = json.dumps(payload).encode('utf-8') + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + +.. doctest:: simple-upload + :options: +NORMALIZE_WHITESPACE + + >>> from google.cloud.storage._media.requests import SimpleUpload + >>> + >>> url_template = ( + ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?' + ... 'uploadType=media&' + ... 'name={blob_name}') + >>> upload_url = url_template.format( + ... bucket=bucket, blob_name=blob_name) + >>> + >>> upload = SimpleUpload(upload_url) + >>> data = b'Some not too large content.' + >>> content_type = 'text/plain' + >>> response = upload.transmit(transport, data, content_type) + >>> upload.finished + True + >>> response + + >>> json_response = response.json() + >>> json_response['bucket'] == bucket + True + >>> json_response['name'] == blob_name + True + >>> json_response['contentType'] == content_type + True + >>> json_response['md5Hash'] + 'M0XLEsX9/sMdiI+4pB4CAQ==' + >>> int(json_response['size']) == len(data) + True + +In the rare case that an upload fails, an :exc:`.InvalidResponse` +will be raised: + +.. testsetup:: simple-upload-fail + + import time + + import mock + import requests + import http.client + + from google.cloud.storage import _media + from google.cloud.storage._media import _helpers + from google.cloud.storage._media.requests import SimpleUpload as constructor + + upload_url = 'http://test.invalid' + data = b'Some not too large content.' + content_type = 'text/plain' + + fake_response = requests.Response() + fake_response.status_code = int(http.client.SERVICE_UNAVAILABLE) + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + + time_sleep = time.sleep + def dont_sleep(seconds): + raise RuntimeError('No sleep', seconds) + + def SimpleUpload(*args, **kwargs): + upload = constructor(*args, **kwargs) + # Mock the cumulative sleep to avoid retries (and `time.sleep()`). + upload._retry_strategy = _media.RetryStrategy( + max_cumulative_retry=-1.0) + return upload + + time.sleep = dont_sleep + +.. doctest:: simple-upload-fail + :options: +NORMALIZE_WHITESPACE + + >>> upload = SimpleUpload(upload_url) + >>> error = None + >>> try: + ... upload.transmit(transport, data, content_type) + ... except _media.InvalidResponse as caught_exc: + ... error = caught_exc + ... + >>> error + InvalidResponse('Request failed with status code', 503, + 'Expected one of', ) + >>> error.response + + >>> + >>> upload.finished + True + +.. testcleanup:: simple-upload-fail + + # Put back the correct ``sleep`` function on the ``time`` module. + time.sleep = time_sleep + +Even in the case of failure, we see that the upload is +:attr:`~.SimpleUpload.finished`, i.e. it cannot be re-used. + +================= +Multipart Uploads +================= + +After the simple upload, the :class:`.MultipartUpload` can be used to +achieve essentially the same task. However, a multipart upload allows some +metadata about the resource to be sent along as well. (This is the "multi": +we send a first part with the metadata and a second part with the actual +bytes in the resource.) + +Usage is similar to the simple upload, but :meth:`~.MultipartUpload.transmit` +accepts an extra required argument: ``metadata``. + +.. testsetup:: multipart-upload + + import json + + import mock + import requests + import http.client + + bucket = 'some-bucket' + blob_name = 'file.txt' + data = b'Some not too large content.' + content_type = 'text/plain' + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + payload = { + 'bucket': bucket, + 'name': blob_name, + 'metadata': {'color': 'grurple'}, + } + fake_response._content = json.dumps(payload).encode('utf-8') + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + +.. doctest:: multipart-upload + + >>> from google.cloud.storage._media.requests import MultipartUpload + >>> + >>> url_template = ( + ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?' + ... 'uploadType=multipart') + >>> upload_url = url_template.format(bucket=bucket) + >>> + >>> upload = MultipartUpload(upload_url) + >>> metadata = { + ... 'name': blob_name, + ... 'metadata': { + ... 'color': 'grurple', + ... }, + ... } + >>> response = upload.transmit(transport, data, metadata, content_type) + >>> upload.finished + True + >>> response + + >>> json_response = response.json() + >>> json_response['bucket'] == bucket + True + >>> json_response['name'] == blob_name + True + >>> json_response['metadata'] == metadata['metadata'] + True + +As with the simple upload, in the case of failure an :exc:`.InvalidResponse` +is raised, enclosing the :attr:`~.InvalidResponse.response` that caused +the failure and the ``upload`` object cannot be re-used after a failure. + +================= +Resumable Uploads +================= + +A :class:`.ResumableUpload` deviates from the other two upload classes: +it transmits a resource over the course of multiple requests. This +is intended to be used in cases where: + +* the size of the resource is not known (i.e. it is generated on the fly) +* requests must be short-lived +* the client has request **size** limitations +* the resource is too large to fit into memory + +In general, a resource should be sent in a **single** request to avoid +latency and reduce QPS. See `GCS best practices`_ for more things to +consider when using a resumable upload. + +.. _GCS best practices: https://cloud.google.com/storage/docs/\ + best-practices#uploading + +After creating a :class:`.ResumableUpload` instance, a +**resumable upload session** must be initiated to let the server know that +a series of chunked upload requests will be coming and to obtain an +``upload_id`` for the session. In contrast to the other two upload classes, +:meth:`~.ResumableUpload.initiate` takes a byte ``stream`` as input rather +than raw bytes as ``data``. This can be a file object, a :class:`~io.BytesIO` +object or any other stream implementing the same interface. + +.. testsetup:: resumable-initiate + + import io + + import mock + import requests + import http.client + + bucket = 'some-bucket' + blob_name = 'file.txt' + data = b'Some resumable bytes.' + content_type = 'text/plain' + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + fake_response._content = b'' + upload_id = 'ABCdef189XY_super_serious' + resumable_url_template = ( + 'https://www.googleapis.com/upload/storage/v1/b/{bucket}' + '/o?uploadType=resumable&upload_id={upload_id}') + resumable_url = resumable_url_template.format( + bucket=bucket, upload_id=upload_id) + fake_response.headers['location'] = resumable_url + fake_response.headers['x-guploader-uploadid'] = upload_id + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + +.. doctest:: resumable-initiate + + >>> from google.cloud.storage._media.requests import ResumableUpload + >>> + >>> url_template = ( + ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?' + ... 'uploadType=resumable') + >>> upload_url = url_template.format(bucket=bucket) + >>> + >>> chunk_size = 1024 * 1024 # 1MB + >>> upload = ResumableUpload(upload_url, chunk_size) + >>> stream = io.BytesIO(data) + >>> # The upload doesn't know how "big" it is until seeing a stream. + >>> upload.total_bytes is None + True + >>> metadata = {'name': blob_name} + >>> response = upload.initiate(transport, stream, metadata, content_type) + >>> response + + >>> upload.resumable_url == response.headers['Location'] + True + >>> upload.total_bytes == len(data) + True + >>> upload_id = response.headers['X-GUploader-UploadID'] + >>> upload_id + 'ABCdef189XY_super_serious' + >>> upload.resumable_url == upload_url + '&upload_id=' + upload_id + True + +Once a :class:`.ResumableUpload` has been initiated, the resource is +transmitted in chunks until completion: + +.. testsetup:: resumable-transmit + + import io + import json + + import mock + import requests + import http.client + + from google.cloud.storage. import _media + import google.cloud.storage._media.requests.upload as upload_mod + + data = b'01234567891' + stream = io.BytesIO(data) + # Create an "already initiated" upload. + upload_url = 'http://test.invalid' + chunk_size = 256 * 1024 # 256KB + upload = upload_mod.ResumableUpload(upload_url, chunk_size) + upload._resumable_url = 'http://test.invalid?upload_id=mocked' + upload._stream = stream + upload._content_type = 'text/plain' + upload._total_bytes = len(data) + + # After-the-fact update the chunk size so that len(data) + # is split into three. + upload._chunk_size = 4 + # Make three fake responses. + fake_response0 = requests.Response() + fake_response0.status_code = http.client.PERMANENT_REDIRECT + fake_response0.headers['range'] = 'bytes=0-3' + + fake_response1 = requests.Response() + fake_response1.status_code = http.client.PERMANENT_REDIRECT + fake_response1.headers['range'] = 'bytes=0-7' + + fake_response2 = requests.Response() + fake_response2.status_code = int(http.client.OK) + bucket = 'some-bucket' + blob_name = 'file.txt' + payload = { + 'bucket': bucket, + 'name': blob_name, + 'size': '{:d}'.format(len(data)), + } + fake_response2._content = json.dumps(payload).encode('utf-8') + + # Use the fake responses to mock a transport. + responses = [fake_response0, fake_response1, fake_response2] + put_method = mock.Mock(side_effect=responses, spec=[]) + transport = mock.Mock(request=put_method, spec=['request']) + +.. doctest:: resumable-transmit + + >>> response0 = upload.transmit_next_chunk(transport) + >>> response0 + + >>> upload.finished + False + >>> upload.bytes_uploaded == upload.chunk_size + True + >>> + >>> response1 = upload.transmit_next_chunk(transport) + >>> response1 + + >>> upload.finished + False + >>> upload.bytes_uploaded == 2 * upload.chunk_size + True + >>> + >>> response2 = upload.transmit_next_chunk(transport) + >>> response2 + + >>> upload.finished + True + >>> upload.bytes_uploaded == upload.total_bytes + True + >>> json_response = response2.json() + >>> json_response['bucket'] == bucket + True + >>> json_response['name'] == blob_name + True +""" +from google.cloud.storage._media.requests.download import ChunkedDownload +from google.cloud.storage._media.requests.download import Download +from google.cloud.storage._media.requests.upload import MultipartUpload +from google.cloud.storage._media.requests.download import RawChunkedDownload +from google.cloud.storage._media.requests.download import RawDownload +from google.cloud.storage._media.requests.upload import ResumableUpload +from google.cloud.storage._media.requests.upload import SimpleUpload +from google.cloud.storage._media.requests.upload import XMLMPUContainer +from google.cloud.storage._media.requests.upload import XMLMPUPart + +__all__ = [ + "ChunkedDownload", + "Download", + "MultipartUpload", + "RawChunkedDownload", + "RawDownload", + "ResumableUpload", + "SimpleUpload", + "XMLMPUContainer", + "XMLMPUPart", +] diff --git a/google/cloud/storage/_media/requests/_request_helpers.py b/google/cloud/storage/_media/requests/_request_helpers.py new file mode 100644 index 000000000..604ffc313 --- /dev/null +++ b/google/cloud/storage/_media/requests/_request_helpers.py @@ -0,0 +1,107 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared utilities used by both downloads and uploads. + +This utilities are explicitly catered to ``requests``-like transports. +""" + +_SINGLE_GET_CHUNK_SIZE = 8192 +# The number of seconds to wait to establish a connection +# (connect() call on socket). Avoid setting this to a multiple of 3 to not +# Align with TCP Retransmission timing. (typically 2.5-3s) +_DEFAULT_CONNECT_TIMEOUT = 61 +# The number of seconds to wait between bytes sent from the server. +_DEFAULT_READ_TIMEOUT = 60 + + +class RequestsMixin(object): + """Mix-in class implementing ``requests``-specific behavior. + + These are methods that are more general purpose, with implementations + specific to the types defined in ``requests``. + """ + + @staticmethod + def _get_status_code(response): + """Access the status code from an HTTP response. + + Args: + response (~requests.Response): The HTTP response object. + + Returns: + int: The status code. + """ + return response.status_code + + @staticmethod + def _get_headers(response): + """Access the headers from an HTTP response. + + Args: + response (~requests.Response): The HTTP response object. + + Returns: + ~requests.structures.CaseInsensitiveDict: The header mapping (keys + are case-insensitive). + """ + return response.headers + + @staticmethod + def _get_body(response): + """Access the response body from an HTTP response. + + Args: + response (~requests.Response): The HTTP response object. + + Returns: + bytes: The body of the ``response``. + """ + return response.content + + +class RawRequestsMixin(RequestsMixin): + @staticmethod + def _get_body(response): + """Access the response body from an HTTP response. + + Args: + response (~requests.Response): The HTTP response object. + + Returns: + bytes: The body of the ``response``. + """ + if response._content is False: + response._content = b"".join( + response.raw.stream(_SINGLE_GET_CHUNK_SIZE, decode_content=False) + ) + response._content_consumed = True + return response._content + + +def wait_and_retry(func, retry_strategy): + """Attempts to retry a call to ``func`` until success. + + Args: + func (Callable): A callable that takes no arguments and produces + an HTTP response which will be checked as retry-able. + retry_strategy (Optional[google.api_core.retry.Retry]): The + strategy to use if the request fails and must be retried. + + Returns: + object: The return value of ``func``. + """ + if retry_strategy: + func = retry_strategy(func) + return func() diff --git a/google/cloud/storage/_media/requests/download.py b/google/cloud/storage/_media/requests/download.py new file mode 100644 index 000000000..2c1b9392c --- /dev/null +++ b/google/cloud/storage/_media/requests/download.py @@ -0,0 +1,707 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Support for downloading media from Google APIs.""" + +import urllib3.response # type: ignore +import http + +from google.cloud.storage._media import _download +from google.cloud.storage._media import _helpers +from google.cloud.storage._media.requests import _request_helpers +from google.cloud.storage.exceptions import DataCorruption + +_CHECKSUM_MISMATCH = """\ +Checksum mismatch while downloading: + + {} + +The X-Goog-Hash header indicated an {checksum_type} checksum of: + + {} + +but the actual {checksum_type} checksum of the downloaded contents was: + + {} +""" + +_STREAM_SEEK_ERROR = """\ +Incomplete download for: +{} +Error writing to stream while handling a gzip-compressed file download. +Please restart the download. +""" + + +class Download(_request_helpers.RequestsMixin, _download.Download): + """Helper to manage downloading a resource from a Google API. + + "Slices" of the resource can be retrieved by specifying a range + with ``start`` and / or ``end``. However, in typical usage, neither + ``start`` nor ``end`` is expected to be provided. + + Args: + media_url (str): The URL containing the media to be downloaded. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + the downloaded resource can be written to. + start (int): The first byte in a range to be downloaded. If not + provided, but ``end`` is provided, will download from the + beginning to ``end`` of the media. + end (int): The last byte in a range to be downloaded. If not + provided, but ``start`` is provided, will download from the + ``start`` to the end of the media. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. The response headers must contain + a checksum of the requested type. If the headers lack an + appropriate checksum (for instance in the case of transcoded or + ranged downloads where the remote service does not know the + correct checksum) an INFO-level log will be emitted. Supported + values are "md5", "crc32c", "auto" and None. The default is "auto", + which will try to detect if the C extension for crc32c is installed + and fall back to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + media_url (str): The URL containing the media to be downloaded. + start (Optional[int]): The first byte in a range to be downloaded. + end (Optional[int]): The last byte in a range to be downloaded. + """ + + def _write_to_stream(self, response): + """Write response body to a write-able stream. + + .. note: + + This method assumes that the ``_stream`` attribute is set on the + current download. + + Args: + response (~requests.Response): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.DataCorruption: If the download's + checksum doesn't agree with server-computed checksum. + """ + + # Retrieve the expected checksum only once for the download request, + # then compute and validate the checksum when the full download completes. + # Retried requests are range requests, and there's no way to detect + # data corruption for that byte range alone. + if self._expected_checksum is None and self._checksum_object is None: + # `_get_expected_checksum()` may return None even if a checksum was + # requested, in which case it will emit an info log _MISSING_CHECKSUM. + # If an invalid checksum type is specified, this will raise ValueError. + expected_checksum, checksum_object = _helpers._get_expected_checksum( + response, self._get_headers, self.media_url, checksum_type=self.checksum + ) + self._expected_checksum = expected_checksum + self._checksum_object = checksum_object + else: + expected_checksum = self._expected_checksum + checksum_object = self._checksum_object + + with response: + # NOTE: In order to handle compressed streams gracefully, we try + # to insert our checksum object into the decompression stream. If + # the stream is indeed compressed, this will delegate the checksum + # object to the decoder and return a _DoNothingHash here. + local_checksum_object = _add_decoder(response.raw, checksum_object) + body_iter = response.iter_content( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + for chunk in body_iter: + self._stream.write(chunk) + self._bytes_downloaded += len(chunk) + local_checksum_object.update(chunk) + + # Don't validate the checksum for partial responses. + if ( + expected_checksum is not None + and response.status_code != http.client.PARTIAL_CONTENT + ): + actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) + if actual_checksum != expected_checksum: + msg = _CHECKSUM_MISMATCH.format( + self.media_url, + expected_checksum, + actual_checksum, + checksum_type=self.checksum.upper(), + ) + raise DataCorruption(response, msg) + + def consume( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Consume the resource to be downloaded. + + If a ``stream`` is attached to this download, then the downloaded + resource will be written to the stream. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + + Raises: + ~google.cloud.storage.exceptions.DataCorruption: If the download's + checksum doesn't agree with server-computed checksum. + ValueError: If the current :class:`Download` has already + finished. + """ + method, _, payload, headers = self._prepare_request() + # NOTE: We assume "payload is None" but pass it along anyway. + request_kwargs = { + "data": payload, + "headers": headers, + "timeout": timeout, + } + if self._stream is not None: + request_kwargs["stream"] = True + + # Assign object generation if generation is specified in the media url. + if self._object_generation is None: + self._object_generation = _helpers._get_generation_from_url(self.media_url) + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + url = self.media_url + + # To restart an interrupted download, read from the offset of last byte + # received using a range request, and set object generation query param. + if self._bytes_downloaded > 0: + _download.add_bytes_range( + (self.start or 0) + self._bytes_downloaded, self.end, self._headers + ) + request_kwargs["headers"] = self._headers + + # Set object generation query param to ensure the same object content is requested. + if ( + self._object_generation is not None + and _helpers._get_generation_from_url(self.media_url) is None + ): + query_param = {"generation": self._object_generation} + url = _helpers.add_query_parameters(self.media_url, query_param) + + result = transport.request(method, url, **request_kwargs) + + # If a generation hasn't been specified, and this is the first response we get, let's record the + # generation. In future requests we'll specify the generation query param to avoid data races. + if self._object_generation is None: + self._object_generation = _helpers._parse_generation_header( + result, self._get_headers + ) + + self._process_response(result) + + # With decompressive transcoding, GCS serves back the whole file regardless of the range request, + # thus we reset the stream position to the start of the stream. + # See: https://cloud.google.com/storage/docs/transcoding#range + if self._stream is not None: + if _helpers._is_decompressive_transcoding(result, self._get_headers): + try: + self._stream.seek(0) + except Exception as exc: + msg = _STREAM_SEEK_ERROR.format(url) + raise Exception(msg) from exc + self._bytes_downloaded = 0 + + self._write_to_stream(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class RawDownload(_request_helpers.RawRequestsMixin, _download.Download): + """Helper to manage downloading a raw resource from a Google API. + + "Slices" of the resource can be retrieved by specifying a range + with ``start`` and / or ``end``. However, in typical usage, neither + ``start`` nor ``end`` is expected to be provided. + + Args: + media_url (str): The URL containing the media to be downloaded. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + the downloaded resource can be written to. + start (int): The first byte in a range to be downloaded. If not + provided, but ``end`` is provided, will download from the + beginning to ``end`` of the media. + end (int): The last byte in a range to be downloaded. If not + provided, but ``start`` is provided, will download from the + ``start`` to the end of the media. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. The response headers must contain + a checksum of the requested type. If the headers lack an + appropriate checksum (for instance in the case of transcoded or + ranged downloads where the remote service does not know the + correct checksum) an INFO-level log will be emitted. Supported + values are "md5", "crc32c", "auto" and None. The default is "auto", + which will try to detect if the C extension for crc32c is installed + and fall back to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + media_url (str): The URL containing the media to be downloaded. + start (Optional[int]): The first byte in a range to be downloaded. + end (Optional[int]): The last byte in a range to be downloaded. + """ + + def _write_to_stream(self, response): + """Write response body to a write-able stream. + + .. note: + + This method assumes that the ``_stream`` attribute is set on the + current download. + + Args: + response (~requests.Response): The HTTP response object. + + Raises: + ~google.cloud.storage.exceptions.DataCorruption: If the download's + checksum doesn't agree with server-computed checksum. + """ + # Retrieve the expected checksum only once for the download request, + # then compute and validate the checksum when the full download completes. + # Retried requests are range requests, and there's no way to detect + # data corruption for that byte range alone. + if self._expected_checksum is None and self._checksum_object is None: + # `_get_expected_checksum()` may return None even if a checksum was + # requested, in which case it will emit an info log _MISSING_CHECKSUM. + # If an invalid checksum type is specified, this will raise ValueError. + expected_checksum, checksum_object = _helpers._get_expected_checksum( + response, self._get_headers, self.media_url, checksum_type=self.checksum + ) + self._expected_checksum = expected_checksum + self._checksum_object = checksum_object + else: + expected_checksum = self._expected_checksum + checksum_object = self._checksum_object + + with response: + body_iter = response.raw.stream( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + for chunk in body_iter: + self._stream.write(chunk) + self._bytes_downloaded += len(chunk) + checksum_object.update(chunk) + response._content_consumed = True + + # Don't validate the checksum for partial responses. + if ( + expected_checksum is not None + and response.status_code != http.client.PARTIAL_CONTENT + ): + actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) + + if actual_checksum != expected_checksum: + msg = _CHECKSUM_MISMATCH.format( + self.media_url, + expected_checksum, + actual_checksum, + checksum_type=self.checksum.upper(), + ) + raise DataCorruption(response, msg) + + def consume( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Consume the resource to be downloaded. + + If a ``stream`` is attached to this download, then the downloaded + resource will be written to the stream. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + + Raises: + ~google.cloud.storage.exceptions.DataCorruption: If the download's + checksum doesn't agree with server-computed checksum. + ValueError: If the current :class:`Download` has already + finished. + """ + method, _, payload, headers = self._prepare_request() + # NOTE: We assume "payload is None" but pass it along anyway. + request_kwargs = { + "data": payload, + "headers": headers, + "timeout": timeout, + "stream": True, + } + + # Assign object generation if generation is specified in the media url. + if self._object_generation is None: + self._object_generation = _helpers._get_generation_from_url(self.media_url) + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + url = self.media_url + + # To restart an interrupted download, read from the offset of last byte + # received using a range request, and set object generation query param. + if self._bytes_downloaded > 0: + _download.add_bytes_range( + (self.start or 0) + self._bytes_downloaded, self.end, self._headers + ) + request_kwargs["headers"] = self._headers + + # Set object generation query param to ensure the same object content is requested. + if ( + self._object_generation is not None + and _helpers._get_generation_from_url(self.media_url) is None + ): + query_param = {"generation": self._object_generation} + url = _helpers.add_query_parameters(self.media_url, query_param) + + result = transport.request(method, url, **request_kwargs) + + # If a generation hasn't been specified, and this is the first response we get, let's record the + # generation. In future requests we'll specify the generation query param to avoid data races. + if self._object_generation is None: + self._object_generation = _helpers._parse_generation_header( + result, self._get_headers + ) + + self._process_response(result) + + # With decompressive transcoding, GCS serves back the whole file regardless of the range request, + # thus we reset the stream position to the start of the stream. + # See: https://cloud.google.com/storage/docs/transcoding#range + if self._stream is not None: + if _helpers._is_decompressive_transcoding(result, self._get_headers): + try: + self._stream.seek(0) + except Exception as exc: + msg = _STREAM_SEEK_ERROR.format(url) + raise Exception(msg) from exc + self._bytes_downloaded = 0 + + self._write_to_stream(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload): + """Download a resource in chunks from a Google API. + + Args: + media_url (str): The URL containing the media to be downloaded. + chunk_size (int): The number of bytes to be retrieved in each + request. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + will be used to concatenate chunks of the resource as they are + downloaded. + start (int): The first byte in a range to be downloaded. If not + provided, defaults to ``0``. + end (int): The last byte in a range to be downloaded. If not + provided, will download to the end of the media. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with each request, e.g. headers for data encryption + key headers. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + media_url (str): The URL containing the media to be downloaded. + start (Optional[int]): The first byte in a range to be downloaded. + end (Optional[int]): The last byte in a range to be downloaded. + chunk_size (int): The number of bytes to be retrieved in each request. + + Raises: + ValueError: If ``start`` is negative. + """ + + def consume_next_chunk( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Consume the next chunk of the resource to be downloaded. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + + Raises: + ValueError: If the current download has finished. + """ + method, url, payload, headers = self._prepare_request() + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + # NOTE: We assume "payload is None" but pass it along anyway. + result = transport.request( + method, + url, + data=payload, + headers=headers, + timeout=timeout, + ) + self._process_response(result) + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload): + """Download a raw resource in chunks from a Google API. + + Args: + media_url (str): The URL containing the media to be downloaded. + chunk_size (int): The number of bytes to be retrieved in each + request. + stream (IO[bytes]): A write-able stream (i.e. file-like object) that + will be used to concatenate chunks of the resource as they are + downloaded. + start (int): The first byte in a range to be downloaded. If not + provided, defaults to ``0``. + end (int): The last byte in a range to be downloaded. If not + provided, will download to the end of the media. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with each request, e.g. headers for data encryption + key headers. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + media_url (str): The URL containing the media to be downloaded. + start (Optional[int]): The first byte in a range to be downloaded. + end (Optional[int]): The last byte in a range to be downloaded. + chunk_size (int): The number of bytes to be retrieved in each request. + + Raises: + ValueError: If ``start`` is negative. + """ + + def consume_next_chunk( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Consume the next chunk of the resource to be downloaded. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + + Raises: + ValueError: If the current download has finished. + """ + method, url, payload, headers = self._prepare_request() + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + # NOTE: We assume "payload is None" but pass it along anyway. + result = transport.request( + method, + url, + data=payload, + headers=headers, + stream=True, + timeout=timeout, + ) + self._process_response(result) + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +def _add_decoder(response_raw, checksum): + """Patch the ``_decoder`` on a ``urllib3`` response. + + This is so that we can intercept the compressed bytes before they are + decoded. + + Only patches if the content encoding is ``gzip`` or ``br``. + + Args: + response_raw (urllib3.response.HTTPResponse): The raw response for + an HTTP request. + checksum (object): + A checksum which will be updated with compressed bytes. + + Returns: + object: Either the original ``checksum`` if ``_decoder`` is not + patched, or a ``_DoNothingHash`` if the decoder is patched, since the + caller will no longer need to hash to decoded bytes. + """ + encoding = response_raw.headers.get("content-encoding", "").lower() + if encoding == "gzip": + response_raw._decoder = _GzipDecoder(checksum) + return _helpers._DoNothingHash() + # Only activate if brotli is installed + elif encoding == "br" and _BrotliDecoder: # type: ignore + response_raw._decoder = _BrotliDecoder(checksum) + return _helpers._DoNothingHash() + else: + return checksum + + +class _GzipDecoder(urllib3.response.GzipDecoder): + """Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes. + + Allows a checksum function to see the compressed bytes before they are + decoded. This way the checksum of the compressed value can be computed. + + Args: + checksum (object): + A checksum which will be updated with compressed bytes. + """ + + def __init__(self, checksum): + super().__init__() + self._checksum = checksum + + def decompress(self, data): + """Decompress the bytes. + + Args: + data (bytes): The compressed bytes to be decompressed. + + Returns: + bytes: The decompressed bytes from ``data``. + """ + self._checksum.update(data) + return super().decompress(data) + + +# urllib3.response.BrotliDecoder might not exist depending on whether brotli is +# installed. +if hasattr(urllib3.response, "BrotliDecoder"): + + class _BrotliDecoder: + """Handler for ``brotli`` encoded bytes. + + Allows a checksum function to see the compressed bytes before they are + decoded. This way the checksum of the compressed value can be computed. + + Because BrotliDecoder's decompress method is dynamically created in + urllib3, a subclass is not practical. Instead, this class creates a + captive urllib3.requests.BrotliDecoder instance and acts as a proxy. + + Args: + checksum (object): + A checksum which will be updated with compressed bytes. + """ + + def __init__(self, checksum): + self._decoder = urllib3.response.BrotliDecoder() + self._checksum = checksum + + def decompress(self, data): + """Decompress the bytes. + + Args: + data (bytes): The compressed bytes to be decompressed. + + Returns: + bytes: The decompressed bytes from ``data``. + """ + self._checksum.update(data) + return self._decoder.decompress(data) + + def flush(self): + return self._decoder.flush() + +else: # pragma: NO COVER + _BrotliDecoder = None # type: ignore # pragma: NO COVER diff --git a/google/cloud/storage/_media/requests/upload.py b/google/cloud/storage/_media/requests/upload.py new file mode 100644 index 000000000..75d4c53da --- /dev/null +++ b/google/cloud/storage/_media/requests/upload.py @@ -0,0 +1,771 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Support for resumable uploads. + +Also supported here are simple (media) uploads and multipart +uploads that contain both metadata and a small file as payload. +""" + + +from google.cloud.storage._media import _upload +from google.cloud.storage._media.requests import _request_helpers + + +class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload): + """Upload a resource to a Google API. + + A **simple** media upload sends no metadata and completes the upload + in a single request. + + Args: + upload_url (str): The URL where the content will be uploaded. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + """ + + def transmit( + self, + transport, + data, + content_type, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Transmit the resource to be uploaded. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + data (bytes): The resource content to be uploaded. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + method, url, payload, headers = self._prepare_request(data, content_type) + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class MultipartUpload(_request_helpers.RequestsMixin, _upload.MultipartUpload): + """Upload a resource with metadata to a Google API. + + A **multipart** upload sends both metadata and the resource in a single + (multipart) request. + + Args: + upload_url (str): The URL where the content will be uploaded. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the request, e.g. headers for encrypted data. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. The request metadata will be amended + to include the computed value. Using this option will override a + manually-set checksum value. Supported values are "md5", + "crc32c", "auto", and None. The default is "auto", which will try + to detect if the C extension for crc32c is installed and fall back + to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + """ + + def transmit( + self, + transport, + data, + metadata, + content_type, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Transmit the resource to be uploaded. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + data (bytes): The resource content to be uploaded. + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + method, url, payload, headers = self._prepare_request( + data, metadata, content_type + ) + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class ResumableUpload(_request_helpers.RequestsMixin, _upload.ResumableUpload): + """Initiate and fulfill a resumable upload to a Google API. + + A **resumable** upload sends an initial request with the resource metadata + and then gets assigned an upload ID / upload URL to send bytes to. + Using the upload URL, the upload is then done in chunks (determined by + the user) until all bytes have been uploaded. + + When constructing a resumable upload, only the resumable upload URL and + the chunk size are required: + + .. testsetup:: resumable-constructor + + bucket = 'bucket-foo' + + .. doctest:: resumable-constructor + + >>> from google.cloud.storage._media.requests import ResumableUpload + >>> + >>> url_template = ( + ... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?' + ... 'uploadType=resumable') + >>> upload_url = url_template.format(bucket=bucket) + >>> + >>> chunk_size = 3 * 1024 * 1024 # 3MB + >>> upload = ResumableUpload(upload_url, chunk_size) + + When initiating an upload (via :meth:`initiate`), the caller is expected + to pass the resource being uploaded as a file-like ``stream``. If the size + of the resource is explicitly known, it can be passed in directly: + + .. testsetup:: resumable-explicit-size + + import os + import tempfile + + import mock + import requests + import http.client + + from google.cloud.storage._media.requests import ResumableUpload + + upload_url = 'http://test.invalid' + chunk_size = 3 * 1024 * 1024 # 3MB + upload = ResumableUpload(upload_url, chunk_size) + + file_desc, filename = tempfile.mkstemp() + os.close(file_desc) + + data = b'some bytes!' + with open(filename, 'wb') as file_obj: + file_obj.write(data) + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + fake_response._content = b'' + resumable_url = 'http://test.invalid?upload_id=7up' + fake_response.headers['location'] = resumable_url + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + + .. doctest:: resumable-explicit-size + + >>> import os + >>> + >>> upload.total_bytes is None + True + >>> + >>> stream = open(filename, 'rb') + >>> total_bytes = os.path.getsize(filename) + >>> metadata = {'name': filename} + >>> response = upload.initiate( + ... transport, stream, metadata, 'text/plain', + ... total_bytes=total_bytes) + >>> response + + >>> + >>> upload.total_bytes == total_bytes + True + + .. testcleanup:: resumable-explicit-size + + os.remove(filename) + + If the stream is in a "final" state (i.e. it won't have any more bytes + written to it), the total number of bytes can be determined implicitly + from the ``stream`` itself: + + .. testsetup:: resumable-implicit-size + + import io + + import mock + import requests + import http.client + + from google.cloud.storage._media.requests import ResumableUpload + + upload_url = 'http://test.invalid' + chunk_size = 3 * 1024 * 1024 # 3MB + upload = ResumableUpload(upload_url, chunk_size) + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + fake_response._content = b'' + resumable_url = 'http://test.invalid?upload_id=7up' + fake_response.headers['location'] = resumable_url + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + + data = b'some MOAR bytes!' + metadata = {'name': 'some-file.jpg'} + content_type = 'image/jpeg' + + .. doctest:: resumable-implicit-size + + >>> stream = io.BytesIO(data) + >>> response = upload.initiate( + ... transport, stream, metadata, content_type) + >>> + >>> upload.total_bytes == len(data) + True + + If the size of the resource is **unknown** when the upload is initiated, + the ``stream_final`` argument can be used. This might occur if the + resource is being dynamically created on the client (e.g. application + logs). To use this argument: + + .. testsetup:: resumable-unknown-size + + import io + + import mock + import requests + import http.client + + from google.cloud.storage._media.requests import ResumableUpload + + upload_url = 'http://test.invalid' + chunk_size = 3 * 1024 * 1024 # 3MB + upload = ResumableUpload(upload_url, chunk_size) + + fake_response = requests.Response() + fake_response.status_code = int(http.client.OK) + fake_response._content = b'' + resumable_url = 'http://test.invalid?upload_id=7up' + fake_response.headers['location'] = resumable_url + + post_method = mock.Mock(return_value=fake_response, spec=[]) + transport = mock.Mock(request=post_method, spec=['request']) + + metadata = {'name': 'some-file.jpg'} + content_type = 'application/octet-stream' + + stream = io.BytesIO(b'data') + + .. doctest:: resumable-unknown-size + + >>> response = upload.initiate( + ... transport, stream, metadata, content_type, + ... stream_final=False) + >>> + >>> upload.total_bytes is None + True + + Args: + upload_url (str): The URL where the resumable upload will be initiated. + chunk_size (int): The size of each chunk used to upload the resource. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the :meth:`initiate` request, e.g. headers for + encrypted data. These **will not** be sent with + :meth:`transmit_next_chunk` or :meth:`recover` requests. + checksum Optional([str]): The type of checksum to compute to verify + the integrity of the object. After the upload is complete, the + server-computed checksum of the resulting object will be checked + and google.cloud.storage.exceptions.DataCorruption will be raised on + a mismatch. The corrupted file will not be deleted from the remote + host automatically. Supported values are "md5", "crc32c", "auto", + and None. The default is "auto", which will try to detect if the C + extension for crc32c is installed and fall back to md5 otherwise. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + + Raises: + ValueError: If ``chunk_size`` is not a multiple of + :data:`.UPLOAD_CHUNK_SIZE`. + """ + + def initiate( + self, + transport, + stream, + metadata, + content_type, + total_bytes=None, + stream_final=True, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Initiate a resumable upload. + + By default, this method assumes your ``stream`` is in a "final" + state ready to transmit. However, ``stream_final=False`` can be used + to indicate that the size of the resource is not known. This can happen + if bytes are being dynamically fed into ``stream``, e.g. if the stream + is attached to application logs. + + If ``stream_final=False`` is used, :attr:`chunk_size` bytes will be + read from the stream every time :meth:`transmit_next_chunk` is called. + If one of those reads produces strictly fewer bites than the chunk + size, the upload will be concluded. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + stream (IO[bytes]): The stream (i.e. file-like object) that will + be uploaded. The stream **must** be at the beginning (i.e. + ``stream.tell() == 0``). + metadata (Mapping[str, str]): The resource metadata, such as an + ACL list. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + total_bytes (Optional[int]): The total number of bytes to be + uploaded. If specified, the upload size **will not** be + determined from the stream (even if ``stream_final=True``). + stream_final (Optional[bool]): Indicates if the ``stream`` is + "final" (i.e. no more bytes will be added to it). In this case + we determine the upload size from the size of the stream. If + ``total_bytes`` is passed, this argument will be ignored. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + method, url, payload, headers = self._prepare_initiate_request( + stream, + metadata, + content_type, + total_bytes=total_bytes, + stream_final=stream_final, + ) + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_initiate_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + def transmit_next_chunk( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Transmit the next chunk of the resource to be uploaded. + + If the current upload was initiated with ``stream_final=False``, + this method will dynamically determine if the upload has completed. + The upload will be considered complete if the stream produces + fewer than :attr:`chunk_size` bytes when a chunk is read from it. + + In the case of failure, an exception is thrown that preserves the + failed response: + + .. testsetup:: bad-response + + import io + + import mock + import requests + import http.client + + from google.cloud.storage import _media + import google.cloud.storage._media.requests.upload as upload_mod + + transport = mock.Mock(spec=['request']) + fake_response = requests.Response() + fake_response.status_code = int(http.client.BAD_REQUEST) + transport.request.return_value = fake_response + + upload_url = 'http://test.invalid' + upload = upload_mod.ResumableUpload( + upload_url, _media.UPLOAD_CHUNK_SIZE) + # Fake that the upload has been initiate()-d + data = b'data is here' + upload._stream = io.BytesIO(data) + upload._total_bytes = len(data) + upload._resumable_url = 'http://test.invalid?upload_id=nope' + + .. doctest:: bad-response + :options: +NORMALIZE_WHITESPACE + + >>> error = None + >>> try: + ... upload.transmit_next_chunk(transport) + ... except _media.InvalidResponse as caught_exc: + ... error = caught_exc + ... + >>> error + InvalidResponse('Request failed with status code', 400, + 'Expected one of', , ) + >>> error.response + + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + + Raises: + ~google.cloud.storage.exceptions.InvalidResponse: If the status + code is not 200 or http.client.PERMANENT_REDIRECT. + ~google.cloud.storage.exceptions.DataCorruption: If this is the final + chunk, a checksum validation was requested, and the checksum + does not match or is not available. + """ + method, url, payload, headers = self._prepare_request() + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_resumable_response(result, len(payload)) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + def recover(self, transport): + """Recover from a failure and check the status of the current upload. + + This will verify the progress with the server and make sure the + current upload is in a valid state before :meth:`transmit_next_chunk` + can be used again. See https://cloud.google.com/storage/docs/performing-resumable-uploads#status-check + for more information. + + This method can be used when a :class:`ResumableUpload` is in an + :attr:`~ResumableUpload.invalid` state due to a request failure. + + Args: + transport (~requests.Session): A ``requests`` object which can + make authenticated requests. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + timeout = ( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ) + + method, url, payload, headers = self._prepare_recover_request() + # NOTE: We assume "payload is None" but pass it along anyway. + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_recover_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class XMLMPUContainer(_request_helpers.RequestsMixin, _upload.XMLMPUContainer): + """Initiate and close an upload using the XML MPU API. + + An XML MPU sends an initial request and then receives an upload ID. + Using the upload ID, the upload is then done in numbered parts and the + parts can be uploaded concurrently. + + In order to avoid concurrency issues with this container object, the + uploading of individual parts is handled separately, by XMLMPUPart objects + spawned from this container class. The XMLMPUPart objects are not + necessarily in the same process as the container, so they do not update the + container automatically. + + MPUs are sometimes referred to as "Multipart Uploads", which is ambiguous + given the JSON multipart upload, so the abbreviation "MPU" will be used + throughout. + + See: https://cloud.google.com/storage/docs/multipart-uploads + + Args: + upload_url (str): The URL of the object (without query parameters). The + initiate, PUT, and finalization requests will all use this URL, with + varying query parameters. + headers (Optional[Mapping[str, str]]): Extra headers that should + be sent with the :meth:`initiate` request, e.g. headers for + encrypted data. These headers will be propagated to individual + XMLMPUPart objects spawned from this container as well. + retry (Optional[google.api_core.retry.Retry]): How to retry the + RPC. A None value will disable retries. A + google.api_core.retry.Retry value will enable retries, and the + object will configure backoff and timeout options. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Attributes: + upload_url (str): The URL where the content will be uploaded. + upload_id (Optional(int)): The ID of the upload from the initialization + response. + """ + + def initiate( + self, + transport, + content_type, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Initiate an MPU and record the upload ID. + + Args: + transport (object): An object which can make authenticated + requests. + content_type (str): The content type of the resource, e.g. a JPEG + image has content type ``image/jpeg``. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + + method, url, payload, headers = self._prepare_initiate_request( + content_type, + ) + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_initiate_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + def finalize( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Finalize an MPU request with all the parts. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + method, url, payload, headers = self._prepare_finalize_request() + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_finalize_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + def cancel( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Cancel an MPU request and permanently delete any uploaded parts. + + This cannot be undone. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + method, url, payload, headers = self._prepare_cancel_request() + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_cancel_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) + + +class XMLMPUPart(_request_helpers.RequestsMixin, _upload.XMLMPUPart): + def upload( + self, + transport, + timeout=( + _request_helpers._DEFAULT_CONNECT_TIMEOUT, + _request_helpers._DEFAULT_READ_TIMEOUT, + ), + ): + """Upload the part. + + Args: + transport (object): An object which can make authenticated + requests. + timeout (Optional[Union[float, Tuple[float, float]]]): + The number of seconds to wait for the server response. + Depending on the retry strategy, a request may be repeated + several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + Returns: + ~requests.Response: The HTTP response returned by ``transport``. + """ + method, url, payload, headers = self._prepare_upload_request() + + # Wrap the request business logic in a function to be retried. + def retriable_request(): + result = transport.request( + method, url, data=payload, headers=headers, timeout=timeout + ) + + self._process_upload_response(result) + + return result + + return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) diff --git a/google/cloud/storage/_opentelemetry_tracing.py b/google/cloud/storage/_opentelemetry_tracing.py new file mode 100644 index 000000000..3416081cd --- /dev/null +++ b/google/cloud/storage/_opentelemetry_tracing.py @@ -0,0 +1,119 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Manages OpenTelemetry tracing span creation and handling. This is a PREVIEW FEATURE: Coverage and functionality may change.""" + +import logging +import os + +from contextlib import contextmanager + +from google.api_core import exceptions as api_exceptions +from google.api_core import retry as api_retry +from google.cloud.storage import __version__ +from google.cloud.storage.retry import ConditionalRetryPolicy + + +ENABLE_OTEL_TRACES_ENV_VAR = "ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES" +_DEFAULT_ENABLE_OTEL_TRACES_VALUE = False + +enable_otel_traces = os.environ.get( + ENABLE_OTEL_TRACES_ENV_VAR, _DEFAULT_ENABLE_OTEL_TRACES_VALUE +) +logger = logging.getLogger(__name__) + +try: + from opentelemetry import trace + + HAS_OPENTELEMETRY = True + +except ImportError: + logger.debug( + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry or one of its components could not be imported; " + "please add compatible versions of opentelemetry-api and " + "opentelemetry-instrumentation packages in order to get Storage " + "Tracing data." + ) + HAS_OPENTELEMETRY = False + +_default_attributes = { + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", +} + +_cloud_trace_adoption_attrs = { + "gcp.client.service": "storage", + "gcp.client.version": __version__, + "gcp.client.repo": "googleapis/python-storage", +} + + +@contextmanager +def create_trace_span(name, attributes=None, client=None, api_request=None, retry=None): + """Creates a context manager for a new span and set it as the current span + in the configured tracer. If no configuration exists yields None.""" + if not HAS_OPENTELEMETRY or not enable_otel_traces: + yield None + return + + tracer = trace.get_tracer(__name__) + final_attributes = _get_final_attributes(attributes, client, api_request, retry) + # Yield new span. + with tracer.start_as_current_span( + name=name, kind=trace.SpanKind.CLIENT, attributes=final_attributes + ) as span: + try: + yield span + except api_exceptions.GoogleAPICallError as error: + span.set_status(trace.Status(trace.StatusCode.ERROR)) + span.record_exception(error) + raise + + +def _get_final_attributes(attributes=None, client=None, api_request=None, retry=None): + collected_attr = _default_attributes.copy() + collected_attr.update(_cloud_trace_adoption_attrs) + if api_request: + collected_attr.update(_set_api_request_attr(api_request, client)) + if isinstance(retry, api_retry.Retry): + collected_attr.update(_set_retry_attr(retry)) + if isinstance(retry, ConditionalRetryPolicy): + collected_attr.update( + _set_retry_attr(retry.retry_policy, retry.conditional_predicate) + ) + if attributes: + collected_attr.update(attributes) + final_attributes = {k: v for k, v in collected_attr.items() if v is not None} + return final_attributes + + +def _set_api_request_attr(request, client): + attr = {} + if request.get("method"): + attr["http.request.method"] = request.get("method") + if request.get("path"): + path = request.get("path") + full_path = f"{client._connection.API_BASE_URL}{path}" + attr["url.full"] = full_path + if request.get("timeout"): + attr["connect_timeout,read_timeout"] = request.get("timeout") + return attr + + +def _set_retry_attr(retry, conditional_predicate=None): + predicate = conditional_predicate if conditional_predicate else retry._predicate + retry_info = f"multiplier{retry._multiplier}/deadline{retry._deadline}/max{retry._maximum}/initial{retry._initial}/predicate{predicate}" + return {"retry": retry_info} diff --git a/google/cloud/storage/_signing.py b/google/cloud/storage/_signing.py index a2b7209bc..9f47e1a6e 100644 --- a/google/cloud/storage/_signing.py +++ b/google/cloud/storage/_signing.py @@ -28,9 +28,15 @@ from google.auth import exceptions from google.auth.transport import requests from google.cloud import _helpers +from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC +from google.cloud.storage.retry import DEFAULT_RETRY -NOW = datetime.datetime.utcnow # To be replaced by tests. +# `google.cloud.storage._signing.NOW` is deprecated. +# Use `_NOW(_UTC)` instead. +NOW = datetime.datetime.utcnow SERVICE_ACCOUNT_URL = ( "https://googleapis.dev/python/google-api-core/latest/" @@ -103,13 +109,13 @@ def get_expiration_seconds_v2(expiration): """ # If it's a timedelta, add it to `now` in UTC. if isinstance(expiration, datetime.timedelta): - now = NOW().replace(tzinfo=_helpers.UTC) + now = _NOW(_UTC) expiration = now + expiration # If it's a datetime, convert to a timestamp. if isinstance(expiration, datetime.datetime): micros = _helpers._microseconds_from_datetime(expiration) - expiration = micros // 10 ** 6 + expiration = micros // 10**6 if not isinstance(expiration, int): raise TypeError( @@ -141,25 +147,21 @@ def get_expiration_seconds_v4(expiration): "timedelta. Got %s" % type(expiration) ) - now = NOW().replace(tzinfo=_helpers.UTC) + now = _NOW(_UTC) if isinstance(expiration, int): seconds = expiration if isinstance(expiration, datetime.datetime): - if expiration.tzinfo is None: expiration = expiration.replace(tzinfo=_helpers.UTC) - expiration = expiration - now if isinstance(expiration, datetime.timedelta): seconds = int(expiration.total_seconds()) if seconds > SEVEN_DAYS: - raise ValueError( - "Max allowed expiration interval is seven days {}".format(SEVEN_DAYS) - ) + raise ValueError(f"Max allowed expiration interval is seven days {SEVEN_DAYS}") return seconds @@ -252,7 +254,7 @@ def canonicalize_v2(method, resource, query_parameters, headers): for key, value in query_parameters.items() ) encoded_qp = urllib.parse.urlencode(normalized_qp) - canonical_resource = "{}?{}".format(resource, encoded_qp) + canonical_resource = f"{resource}?{encoded_qp}" return _Canonical(method, canonical_resource, normalized_qp, headers) @@ -271,6 +273,7 @@ def generate_signed_url_v2( query_parameters=None, service_account_email=None, access_token=None, + universe_domain=None, ): """Generate a V2 signed URL to provide query-string auth'n to a resource. @@ -284,15 +287,11 @@ def generate_signed_url_v2( .. note:: If you are on Google Compute Engine, you can't generate a signed URL. - Follow `Issue 922`_ for updates on this. If you'd like to be able to - generate a signed URL from GCE, you can use a standard service account - from a JSON file rather than a GCE service account. - - See headers `reference`_ for more details on optional arguments. + If you'd like to be able to generate a signed URL from GCE, you can use a + standard service account from a JSON file rather than a GCE service account. - .. _Issue 922: https://github.com/GoogleCloudPlatform/\ - google-cloud-python/issues/922 - .. _reference: https://cloud.google.com/storage/docs/reference-headers + See headers [reference](https://cloud.google.com/storage/docs/reference-headers) + for more details on optional arguments. :type credentials: :class:`google.auth.credentials.Signing` :param credentials: Credentials object with an associated private key to @@ -384,9 +383,13 @@ def generate_signed_url_v2( elements_to_sign.append(canonical.resource) string_to_sign = "\n".join(elements_to_sign) + # If you are on Google Compute Engine, you can't generate a signed URL. + # See https://github.com/googleapis/google-cloud-python/issues/922 # Set the right query parameters. if access_token and service_account_email: - signature = _sign_message(string_to_sign, access_token, service_account_email) + signature = _sign_message( + string_to_sign, access_token, service_account_email, universe_domain + ) signed_query_params = { "GoogleAccessId": service_account_email, "Expires": expiration_stamp, @@ -434,6 +437,7 @@ def generate_signed_url_v4( query_parameters=None, service_account_email=None, access_token=None, + universe_domain=None, _request_timestamp=None, # for testing only ): """Generate a V4 signed URL to provide query-string auth'n to a resource. @@ -448,16 +452,11 @@ def generate_signed_url_v4( .. note:: If you are on Google Compute Engine, you can't generate a signed URL. - Follow `Issue 922`_ for updates on this. If you'd like to be able to - generate a signed URL from GCE, you can use a standard service account - from a JSON file rather than a GCE service account. - - See headers `reference`_ for more details on optional arguments. - - .. _Issue 922: https://github.com/GoogleCloudPlatform/\ - google-cloud-python/issues/922 - .. _reference: https://cloud.google.com/storage/docs/reference-headers + If you'd like to be able to generate a signed URL from GCE,you can use a + standard service account from a JSON file rather than a GCE service account. + See headers [reference](https://cloud.google.com/storage/docs/reference-headers) + for more details on optional arguments. :type credentials: :class:`google.auth.credentials.Signing` :param credentials: Credentials object with an associated private key to @@ -476,7 +475,7 @@ def generate_signed_url_v4( ``tzinfo`` set, it will be assumed to be ``UTC``. :type api_access_endpoint: str - :param api_access_endpoint: (Optional) URI base. Defaults to + :param api_access_endpoint: URI base. Defaults to "https://storage.googleapis.com/" :type method: str @@ -545,13 +544,15 @@ def generate_signed_url_v4( request_timestamp = _request_timestamp datestamp = _request_timestamp[:8] + # If you are on Google Compute Engine, you can't generate a signed URL. + # See https://github.com/googleapis/google-cloud-python/issues/922 client_email = service_account_email if not access_token or not service_account_email: ensure_signed_credentials(credentials) client_email = credentials.signer_email - credential_scope = "{}/auto/storage/goog4_request".format(datestamp) - credential = "{}/{}".format(client_email, credential_scope) + credential_scope = f"{datestamp}/auto/storage/goog4_request" + credential = f"{client_email}/{credential_scope}" if headers is None: headers = {} @@ -628,7 +629,9 @@ def generate_signed_url_v4( string_to_sign = "\n".join(string_elements) if access_token and service_account_email: - signature = _sign_message(string_to_sign, access_token, service_account_email) + signature = _sign_message( + string_to_sign, access_token, service_account_email, universe_domain + ) signature_bytes = base64.b64decode(signature) signature = binascii.hexlify(signature_bytes).decode("ascii") else: @@ -646,14 +649,18 @@ def get_v4_now_dtstamps(): :rtype: str, str :returns: Current timestamp, datestamp. """ - now = NOW() + now = _NOW(_UTC).replace(tzinfo=None) timestamp = now.strftime("%Y%m%dT%H%M%SZ") datestamp = now.date().strftime("%Y%m%d") return timestamp, datestamp -def _sign_message(message, access_token, service_account_email): - +def _sign_message( + message, + access_token, + service_account_email, + universe_domain=_DEFAULT_UNIVERSE_DOMAIN, +): """Signs a message. :type message: str @@ -675,21 +682,26 @@ def _sign_message(message, access_token, service_account_email): message = _helpers._to_bytes(message) method = "POST" - url = "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/{}:signBlob?alt=json".format( - service_account_email - ) + url = f"https://iamcredentials.{universe_domain}/v1/projects/-/serviceAccounts/{service_account_email}:signBlob?alt=json" headers = { "Authorization": "Bearer " + access_token, "Content-type": "application/json", } body = json.dumps({"payload": base64.b64encode(message).decode("utf-8")}) - request = requests.Request() - response = request(url=url, method=method, body=body, headers=headers) + + def retriable_request(): + response = request(url=url, method=method, body=body, headers=headers) + return response + + # Apply the default retry object to the signBlob call. + retry = DEFAULT_RETRY + call = retry(retriable_request) + response = call() if response.status != http.client.OK: raise exceptions.TransportError( - "Error calling the IAM signBytes API: {}".format(response.data) + f"Error calling the IAM signBytes API: {response.data}" ) data = json.loads(response.data.decode("utf-8")) @@ -706,7 +718,7 @@ def _url_encode(query_params): :returns: URL encoded query params. """ params = [ - "{}={}".format(_quote_param(name), _quote_param(value)) + f"{_quote_param(name)}={_quote_param(value)}" for name, value in query_params.items() ] diff --git a/google/cloud/storage/acl.py b/google/cloud/storage/acl.py index b3b77766f..1384a5075 100644 --- a/google/cloud/storage/acl.py +++ b/google/cloud/storage/acl.py @@ -12,79 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Manipulate access control lists that Cloud Storage provides. - -:class:`google.cloud.storage.bucket.Bucket` has a getting method that creates -an ACL object under the hood, and you can interact with that using -:func:`google.cloud.storage.bucket.Bucket.acl`: - -.. literalinclude:: snippets.py - :start-after: [START client_bucket_acl] - :end-before: [END client_bucket_acl] - :dedent: 4 - - -Adding and removing permissions can be done with the following methods -(in increasing order of granularity): - -- :func:`ACL.all` - corresponds to access for all users. -- :func:`ACL.all_authenticated` corresponds - to access for all users that are signed into a Google account. -- :func:`ACL.domain` corresponds to access on a - per Google Apps domain (ie, ``example.com``). -- :func:`ACL.group` corresponds to access on a - per group basis (either by ID or e-mail address). -- :func:`ACL.user` corresponds to access on a - per user basis (either by ID or e-mail address). - -And you are able to ``grant`` and ``revoke`` the following roles: - -- **Reading**: - :func:`_ACLEntity.grant_read` and :func:`_ACLEntity.revoke_read` -- **Writing**: - :func:`_ACLEntity.grant_write` and :func:`_ACLEntity.revoke_write` -- **Owning**: - :func:`_ACLEntity.grant_owner` and :func:`_ACLEntity.revoke_owner` - -You can use any of these like any other factory method (these happen to -be :class:`_ACLEntity` factories): - -.. literalinclude:: snippets.py - :start-after: [START acl_user_settings] - :end-before: [END acl_user_settings] - :dedent: 4 - -After that, you can save any changes you make with the -:func:`google.cloud.storage.acl.ACL.save` method: - -.. literalinclude:: snippets.py - :start-after: [START acl_save] - :end-before: [END acl_save] - :dedent: 4 - -You can alternatively save any existing :class:`google.cloud.storage.acl.ACL` -object (whether it was created by a factory method or not) from a -:class:`google.cloud.storage.bucket.Bucket`: - -.. literalinclude:: snippets.py - :start-after: [START acl_save_bucket] - :end-before: [END acl_save_bucket] - :dedent: 4 - -To get the list of ``entity`` and ``role`` for each unique pair, the -:class:`ACL` class is iterable: - -.. literalinclude:: snippets.py - :start-after: [START acl_print] - :end-before: [END acl_print] - :dedent: 4 - -This list of tuples can be used as the ``entity`` and ``role`` fields -when sending metadata for ACLs to the API. -""" +"""Manage access to objects and buckets.""" from google.cloud.storage._helpers import _add_generation_match_parameters +from google.cloud.storage._opentelemetry_tracing import create_trace_span from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED @@ -120,9 +51,7 @@ def __str__(self): return "{acl.type}-{acl.identifier}".format(acl=self) def __repr__(self): - return "".format( - acl=self, roles=", ".join(self.roles) - ) + return f"" def get_roles(self): """Get the list of roles permitted by this entity. @@ -209,7 +138,6 @@ class ACL(object): # Subclasses must override to provide these attributes (typically, # as properties). - client = None reload_path = None save_path = None user_project = None @@ -242,7 +170,7 @@ def validate_predefined(cls, predefined): """ predefined = cls.PREDEFINED_XML_ACLS.get(predefined, predefined) if predefined and predefined not in cls.PREDEFINED_JSON_ACLS: - raise ValueError("Invalid predefined ACL: %s" % (predefined,)) + raise ValueError(f"Invalid predefined ACL: {predefined}") return predefined def reset(self): @@ -285,7 +213,7 @@ def entity_from_dict(self, entity_dict): entity = self.entity(entity_type=entity_type, identifier=identifier) if not isinstance(entity, _ACLEntity): - raise ValueError("Invalid dictionary: %s" % entity_dict) + raise ValueError(f"Invalid dictionary: {entity_dict}") entity.grant(role) return entity @@ -432,6 +360,7 @@ def _require_client(self, client): client = self.client return client + @create_trace_span(name="Storage.ACL.reload") def reload(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Reload the ACL data from Cloud Storage. @@ -460,7 +389,10 @@ def reload(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): self.entities.clear() found = client._get_resource( - path, query_params=query_params, timeout=timeout, retry=retry, + path, + query_params=query_params, + timeout=timeout, + retry=retry, ) self.loaded = True @@ -554,6 +486,7 @@ def _save( self.loaded = True + @create_trace_span(name="Storage.ACL.save") def save( self, acl=None, @@ -622,6 +555,7 @@ def save( retry=retry, ) + @create_trace_span(name="Storage.ACL.savePredefined") def save_predefined( self, predefined, @@ -687,6 +621,7 @@ def save_predefined( retry=retry, ) + @create_trace_span(name="Storage.ACL.clear") def clear( self, client=None, @@ -767,7 +702,7 @@ def client(self): @property def reload_path(self): """Compute the path for GET API requests for this ACL.""" - return "%s/%s" % (self.bucket.path, self._URL_PATH_ELEM) + return f"{self.bucket.path}/{self._URL_PATH_ELEM}" @property def save_path(self): @@ -806,7 +741,7 @@ def client(self): @property def reload_path(self): """Compute the path for GET API requests for this ACL.""" - return "%s/acl" % self.blob.path + return f"{self.blob.path}/acl" @property def save_path(self): @@ -817,3 +752,185 @@ def save_path(self): def user_project(self): """Compute the user project charged for API requests for this ACL.""" return self.blob.user_project + + def save( + self, + acl=None, + client=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY, + ): + """Save this ACL for the current object. + + If :attr:`user_project` is set, bills the API request to that project. + + :type acl: :class:`google.cloud.storage.acl.ACL`, or a compatible list. + :param acl: The ACL object to save. If left blank, this will save + current entries. + + :type client: :class:`~google.cloud.storage.client.Client` or + ``NoneType`` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the ACL's parent. + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: + (Optional) How to retry the RPC. See: :ref:`configuring_retries` + """ + super().save( + acl=acl, + client=client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + retry=retry, + ) + + def save_predefined( + self, + predefined, + client=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY, + ): + """Save this ACL for the current object using a predefined ACL. + + If :attr:`user_project` is set, bills the API request to that project. + + :type predefined: str + :param predefined: An identifier for a predefined ACL. Must be one + of the keys in :attr:`PREDEFINED_JSON_ACLS` + or :attr:`PREDEFINED_XML_ACLS` (which will be + aliased to the corresponding JSON name). + If passed, `acl` must be None. + + :type client: :class:`~google.cloud.storage.client.Client` or + ``NoneType`` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the ACL's parent. + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: + (Optional) How to retry the RPC. See: :ref:`configuring_retries` + """ + super().save_predefined( + predefined=predefined, + client=client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + retry=retry, + ) + + def clear( + self, + client=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY, + ): + """Remove all ACL entries. + + If :attr:`user_project` is set, bills the API request to that project. + + Note that this won't actually remove *ALL* the rules, but it + will remove all the non-default rules. In short, you'll still + have access to a bucket that you created even after you clear + ACL rules with this method. + + :type client: :class:`~google.cloud.storage.client.Client` or + ``NoneType`` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the ACL's parent. + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: + (Optional) How to retry the RPC. See: :ref:`configuring_retries` + """ + super().clear( + client=client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + retry=retry, + ) diff --git a/google/cloud/storage/batch.py b/google/cloud/storage/batch.py index cbc93397f..03a27fc23 100644 --- a/google/cloud/storage/batch.py +++ b/google/cloud/storage/batch.py @@ -13,7 +13,21 @@ # limitations under the License. """Batch updates / deletes of storage buckets / blobs. -See https://cloud.google.com/storage/docs/json_api/v1/how-tos/batch +A batch request is a single standard HTTP request containing multiple Cloud Storage JSON API calls. +Within this main HTTP request, there are multiple parts which each contain a nested HTTP request. +The body of each part is itself a complete HTTP request, with its own verb, URL, headers, and body. + +Note that Cloud Storage does not support batch operations for uploading or downloading. +Additionally, the current batch design does not support library methods whose return values +depend on the response payload. See more details in the [Sending Batch Requests official guide](https://cloud.google.com/storage/docs/batch). + +Examples of situations when you might want to use the Batch module: +``blob.patch()`` +``blob.update()`` +``blob.delete()`` +``bucket.delete_blob()`` +``bucket.patch()`` +``bucket.update()`` """ from email.encoders import encode_noop from email.generator import Generator @@ -57,10 +71,8 @@ def __init__(self, method, uri, headers, body): headers["Content-Length"] = len(body) if body is None: body = "" - lines = ["%s %s HTTP/1.1" % (method, uri)] - lines.extend( - ["%s: %s" % (key, value) for key, value in sorted(headers.items())] - ) + lines = [f"{method} {uri} HTTP/1.1"] + lines.extend([f"{key}: {value}" for key, value in sorted(headers.items())]) lines.append("") lines.append(body) payload = "\r\n".join(lines) @@ -86,7 +98,7 @@ def get(key, default=None): :raises: :class:`KeyError` always since the future is intended to fail as a dictionary. """ - raise KeyError("Cannot get(%r, default=%r) on a future" % (key, default)) + raise KeyError(f"Cannot get({key!r}, default={default!r}) on a future") def __getitem__(self, key): """Stand-in for dict[key]. @@ -97,7 +109,7 @@ def __getitem__(self, key): :raises: :class:`KeyError` always since the future is intended to fail as a dictionary. """ - raise KeyError("Cannot get item %r from a future" % (key,)) + raise KeyError(f"Cannot get item {key!r} from a future") def __setitem__(self, key, value): """Stand-in for dict[key] = value. @@ -111,7 +123,7 @@ def __setitem__(self, key, value): :raises: :class:`KeyError` always since the future is intended to fail as a dictionary. """ - raise KeyError("Cannot set %r -> %r on a future" % (key, value)) + raise KeyError(f"Cannot set {key!r} -> {value!r} on a future") class _FutureResponse(requests.Response): @@ -133,13 +145,26 @@ def content(self): class Batch(Connection): """Proxy an underlying connection, batching up change operations. + .. warning:: + + Cloud Storage does not support batch operations for uploading or downloading. + Additionally, the current batch design does not support library methods whose + return values depend on the response payload. + :type client: :class:`google.cloud.storage.client.Client` :param client: The client to use for making connections. + + :type raise_exception: bool + :param raise_exception: + (Optional) Defaults to True. If True, instead of adding exceptions + to the list of return responses, the final exception will be raised. + Note that exceptions are unwrapped after all operations are complete + in success or failure, and only the last exception is raised. """ _MAX_BATCH_SIZE = 1000 - def __init__(self, client): + def __init__(self, client, raise_exception=True): api_endpoint = client._connection.API_BASE_URL client_info = client._connection._client_info super(Batch, self).__init__( @@ -147,6 +172,8 @@ def __init__(self, client): ) self._requests = [] self._target_objects = [] + self._responses = [] + self._raise_exception = raise_exception def _do_request( self, method, url, headers, data, target_object, timeout=_DEFAULT_TIMEOUT @@ -221,24 +248,34 @@ def _prepare_batch_request(self): _, body = payload.split("\n\n", 1) return dict(multi._headers), body, timeout - def _finish_futures(self, responses): + def _finish_futures(self, responses, raise_exception=True): """Apply all the batch responses to the futures created. :type responses: list of (headers, payload) tuples. :param responses: List of headers and payloads from each response in the batch. + :type raise_exception: bool + :param raise_exception: + (Optional) Defaults to True. If True, instead of adding exceptions + to the list of return responses, the final exception will be raised. + Note that exceptions are unwrapped after all operations are complete + in success or failure, and only the last exception is raised. + :raises: :class:`ValueError` if no requests have been deferred. """ # If a bad status occurs, we track it, but don't raise an exception # until all futures have been populated. + # If raise_exception=False, we add exceptions to the list of responses. exception_args = None if len(self._target_objects) != len(responses): # pragma: NO COVER raise ValueError("Expected a response for every request.") for target_object, subresponse in zip(self._target_objects, responses): - if not 200 <= subresponse.status_code < 300: + # For backwards compatibility, only the final exception will be raised. + # Set raise_exception=False to include all exceptions to the list of return responses. + if not 200 <= subresponse.status_code < 300 and raise_exception: exception_args = exception_args or subresponse elif target_object is not None: try: @@ -249,15 +286,22 @@ def _finish_futures(self, responses): if exception_args is not None: raise exceptions.from_http_response(exception_args) - def finish(self): + def finish(self, raise_exception=True): """Submit a single `multipart/mixed` request with deferred requests. + :type raise_exception: bool + :param raise_exception: + (Optional) Defaults to True. If True, instead of adding exceptions + to the list of return responses, the final exception will be raised. + Note that exceptions are unwrapped after all operations are complete + in success or failure, and only the last exception is raised. + :rtype: list of tuples :returns: one ``(headers, payload)`` tuple per deferred request. """ headers, body, timeout = self._prepare_batch_request() - url = "%s/batch/storage/v1" % self.API_BASE_URL + url = f"{self.API_BASE_URL}/batch/storage/v1" # Use the private ``_base_connection`` rather than the property # ``_connection``, since the property may be this @@ -271,7 +315,8 @@ def finish(self): raise exceptions.from_http_response(response) responses = list(_unpack_batch_response(response)) - self._finish_futures(responses) + self._finish_futures(responses, raise_exception=raise_exception) + self._responses = responses return responses def current(self): @@ -285,7 +330,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): try: if exc_type is None: - self.finish() + self.finish(raise_exception=self._raise_exception) finally: self._client._pop_batch() @@ -332,7 +377,7 @@ def _unpack_batch_response(response): subresponse = requests.Response() subresponse.request = requests.Request( - method="BATCH", url="contentid://{}".format(content_id) + method="BATCH", url=f"contentid://{content_id}" ).prepare() subresponse.status_code = int(status) subresponse.headers.update(msg_headers) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 36a090af2..3cda582ca 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -15,18 +15,9 @@ # pylint: disable=too-many-lines """Create / interact with Google Cloud Storage blobs. - -.. _API reference docs: https://cloud.google.com/storage/docs/\ - json_api/v1/objects -.. _customer-supplied: https://cloud.google.com/storage/docs/\ - encryption#customer-supplied -.. _google-resumable-media: https://googleapis.github.io/\ - google-resumable-media-python/latest/\ - google.resumable_media.requests.html """ import base64 -import cgi import copy import hashlib from io import BytesIO @@ -35,6 +26,7 @@ import mimetypes import os import re +from email.parser import HeaderParser from urllib.parse import parse_qsl from urllib.parse import quote from urllib.parse import urlencode @@ -42,13 +34,12 @@ from urllib.parse import urlunsplit import warnings -from google import resumable_media -from google.resumable_media.requests import ChunkedDownload -from google.resumable_media.requests import Download -from google.resumable_media.requests import RawDownload -from google.resumable_media.requests import RawChunkedDownload -from google.resumable_media.requests import MultipartUpload -from google.resumable_media.requests import ResumableUpload +from google.cloud.storage._media.requests import ChunkedDownload +from google.cloud.storage._media.requests import Download +from google.cloud.storage._media.requests import RawDownload +from google.cloud.storage._media.requests import RawChunkedDownload +from google.cloud.storage._media.requests import MultipartUpload +from google.cloud.storage._media.requests import ResumableUpload from google.api_core.iam import Policy from google.cloud import exceptions @@ -63,10 +54,13 @@ from google.cloud.storage._helpers import _scalar_property from google.cloud.storage._helpers import _bucket_bound_hostname_url from google.cloud.storage._helpers import _raise_if_more_than_one_set -from google.cloud.storage._helpers import _api_core_retry_to_resumable_media_retry +from google.cloud.storage._helpers import _get_default_headers +from google.cloud.storage._helpers import _get_default_storage_base_url from google.cloud.storage._signing import generate_signed_url_v2 from google.cloud.storage._signing import generate_signed_url_v4 -from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE +from google.cloud.storage._helpers import _API_VERSION +from google.cloud.storage._helpers import _virtual_hosted_style_base_url +from google.cloud.storage._opentelemetry_tracing import create_trace_span from google.cloud.storage.acl import ACL from google.cloud.storage.acl import ObjectACL from google.cloud.storage.constants import _DEFAULT_TIMEOUT @@ -76,21 +70,23 @@ from google.cloud.storage.constants import NEARLINE_STORAGE_CLASS from google.cloud.storage.constants import REGIONAL_LEGACY_STORAGE_CLASS from google.cloud.storage.constants import STANDARD_STORAGE_CLASS +from google.cloud.storage.exceptions import DataCorruption +from google.cloud.storage.exceptions import InvalidResponse from google.cloud.storage.retry import ConditionalRetryPolicy from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED -from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED from google.cloud.storage.fileio import BlobReader from google.cloud.storage.fileio import BlobWriter -_API_ACCESS_ENDPOINT = "https://storage.googleapis.com" -_DEFAULT_CONTENT_TYPE = u"application/octet-stream" -_DOWNLOAD_URL_TEMPLATE = u"{hostname}/download/storage/v1{path}?alt=media" -_BASE_UPLOAD_TEMPLATE = u"{hostname}/upload/storage/v1{bucket_path}/o?uploadType=" -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"multipart" -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"resumable" +_DEFAULT_CONTENT_TYPE = "application/octet-stream" +_DOWNLOAD_URL_TEMPLATE = "{hostname}/download/storage/{api_version}{path}?alt=media" +_BASE_UPLOAD_TEMPLATE = ( + "{hostname}/upload/storage/{api_version}{bucket_path}/o?uploadType=" +) +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" # NOTE: "acl" is also writeable but we defer ACL management to # the classes in the google.cloud.storage.acl module. _CONTENT_TYPE_FIELD = "contentType" @@ -105,6 +101,7 @@ "md5Hash", "metadata", "name", + "retention", "storageClass", ) _READ_LESS_THAN_SIZE = ( @@ -137,7 +134,13 @@ "Blob.download_as_string() is deprecated and will be removed in future. " "Use Blob.download_as_bytes() instead." ) - +_FROM_STRING_DEPRECATED = ( + "Blob.from_string() is deprecated and will be removed in future. " + "Use Blob.from_uri() instead." +) +_GS_URL_REGEX_PATTERN = re.compile( + r"(?Pgs)://(?P[a-z0-9_.-]+)/(?P.+)" +) _DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MB _MAX_MULTIPART_SIZE = 8388608 # 8 MB @@ -319,7 +322,7 @@ def __repr__(self): else: bucket_name = None - return "" % (bucket_name, self.name, self.generation) + return f"" @property def path(self): @@ -376,18 +379,30 @@ def public_url(self): :rtype: `string` :returns: The public URL for this blob. """ + if self.client: + endpoint = self.client.api_endpoint + else: + endpoint = _get_default_storage_base_url() return "{storage_base_url}/{bucket_name}/{quoted_name}".format( - storage_base_url=_API_ACCESS_ENDPOINT, + storage_base_url=endpoint, bucket_name=self.bucket.name, quoted_name=_quote(self.name, safe=b"/~"), ) @classmethod - def from_string(cls, uri, client=None): + def from_uri(cls, uri, client=None): """Get a constructor for blob object by URI. + .. code-block:: python + + from google.cloud import storage + from google.cloud.storage.blob import Blob + client = storage.Client() + blob = Blob.from_uri("gs://bucket/object", client=client) + :type uri: str - :param uri: The blob uri pass to get blob object. + :param uri: The blob uri following a gs://bucket/object pattern. + Both a bucket and object name is required to construct a blob object. :type client: :class:`~google.cloud.storage.client.Client` :param client: @@ -396,28 +411,48 @@ def from_string(cls, uri, client=None): :rtype: :class:`google.cloud.storage.blob.Blob` :returns: The blob object created. - - Example: - Get a constructor for blob object by URI. - - >>> from google.cloud import storage - >>> from google.cloud.storage.blob import Blob - >>> client = storage.Client() - >>> blob = Blob.from_string("gs://bucket/object", client=client) """ from google.cloud.storage.bucket import Bucket - scheme, netloc, path, query, frag = urlsplit(uri) - if scheme != "gs": - raise ValueError("URI scheme must be gs") + match = _GS_URL_REGEX_PATTERN.match(uri) + if not match: + raise ValueError("URI pattern must be gs://bucket/object") + bucket = Bucket(client, name=match.group("bucket_name")) + return cls(match.group("object_name"), bucket) + + @classmethod + def from_string(cls, uri, client=None): + """(Deprecated) Get a constructor for blob object by URI. + + .. note:: + Deprecated alias for :meth:`from_uri`. + + .. code-block:: python + + from google.cloud import storage + from google.cloud.storage.blob import Blob + client = storage.Client() + blob = Blob.from_string("gs://bucket/object", client=client) + + :type uri: str + :param uri: The blob uri following a gs://bucket/object pattern. + Both a bucket and object name is required to construct a blob object. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. Application code should + *always* pass ``client``. - bucket = Bucket(client, name=netloc) - return cls(path[1:], bucket) + :rtype: :class:`google.cloud.storage.blob.Blob` + :returns: The blob object created. + """ + warnings.warn(_FROM_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2) + return Blob.from_uri(uri=uri, client=client) def generate_signed_url( self, expiration=None, - api_access_endpoint=_API_ACCESS_ENDPOINT, + api_access_endpoint=None, method="GET", content_md5=None, content_type=None, @@ -440,37 +475,24 @@ def generate_signed_url( .. note:: If you are on Google Compute Engine, you can't generate a signed - URL using GCE service account. Follow `Issue 50`_ for updates on - this. If you'd like to be able to generate a signed URL from GCE, + URL using GCE service account. + If you'd like to be able to generate a signed URL from GCE, you can use a standard service account from a JSON file rather than a GCE service account. - .. _Issue 50: https://github.com/GoogleCloudPlatform/\ - google-auth-library-python/issues/50 - If you have a blob that you want to allow access to for a set amount of time, you can use this method to generate a URL that is only valid within a certain time period. - If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`, - ``https`` works only if using a ``CDN``. - - Example: - Generates a signed URL for this blob using bucket_bound_hostname and scheme. - - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.get_bucket('my-bucket-name') - >>> blob = bucket.get_blob('my-blob-name') - >>> url = blob.generate_signed_url(expiration='url-expiration-time', bucket_bound_hostname='mydomain.tld', - >>> version='v4') - >>> url = blob.generate_signed_url(expiration='url-expiration-time', bucket_bound_hostname='mydomain.tld', - >>> version='v4',scheme='https') # If using ``CDN`` + See a [code sample](https://cloud.google.com/storage/docs/samples/storage-generate-signed-url-v4#storage_generate_signed_url_v4-python). This is particularly useful if you don't want publicly accessible blobs, but don't want to require users to explicitly log in. + If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`, + ``https`` works only if using a ``CDN``. + :type expiration: Union[Integer, datetime.datetime, datetime.timedelta] :param expiration: Point in time when the signed URL should expire. If a ``datetime`` @@ -478,7 +500,9 @@ def generate_signed_url( assumed to be ``UTC``. :type api_access_endpoint: str - :param api_access_endpoint: (Optional) URI base. + :param api_access_endpoint: (Optional) URI base, for instance + "https://storage.googleapis.com". If not specified, the client's + api_endpoint will be used. Incompatible with bucket_bound_hostname. :type method: str :param method: The HTTP verb that will be used when requesting the URL. @@ -551,13 +575,14 @@ def generate_signed_url( :param virtual_hosted_style: (Optional) If true, then construct the URL relative the bucket's virtual hostname, e.g., '.storage.googleapis.com'. + Incompatible with bucket_bound_hostname. :type bucket_bound_hostname: str :param bucket_bound_hostname: - (Optional) If passed, then construct the URL relative to the - bucket-bound hostname. Value can be a bare or with scheme, e.g., - 'example.com' or 'http://example.com'. See: - https://cloud.google.com/storage/docs/request-endpoints#cname + (Optional) If passed, then construct the URL relative to the bucket-bound hostname. + Value can be a bare or with scheme, e.g., 'example.com' or 'http://example.com'. + Incompatible with api_access_endpoint and virtual_hosted_style. + See: https://cloud.google.com/storage/docs/request-endpoints#cname :type scheme: str :param scheme: @@ -565,7 +590,7 @@ def generate_signed_url( hostname, use this value as the scheme. ``https`` will work only when using a CDN. Defaults to ``"http"``. - :raises: :exc:`ValueError` when version is invalid. + :raises: :exc:`ValueError` when version is invalid or mutually exclusive arguments are used. :raises: :exc:`TypeError` when expiration is not a valid type. :raises: :exc:`AttributeError` if credentials is not an instance of :class:`google.auth.credentials.Signing`. @@ -579,28 +604,43 @@ def generate_signed_url( elif version not in ("v2", "v4"): raise ValueError("'version' must be either 'v2' or 'v4'") + if ( + api_access_endpoint is not None or virtual_hosted_style + ) and bucket_bound_hostname: + raise ValueError( + "The bucket_bound_hostname argument is not compatible with " + "either api_access_endpoint or virtual_hosted_style." + ) + + if api_access_endpoint is None: + client = self._require_client(client) + api_access_endpoint = client.api_endpoint + quoted_name = _quote(self.name, safe=b"/~") + # If you are on Google Compute Engine, you can't generate a signed URL + # using GCE service account. + # See https://github.com/googleapis/google-auth-library-python/issues/50 if virtual_hosted_style: - api_access_endpoint = "https://{bucket_name}.storage.googleapis.com".format( - bucket_name=self.bucket.name + api_access_endpoint = _virtual_hosted_style_base_url( + api_access_endpoint, self.bucket.name ) + resource = f"/{quoted_name}" elif bucket_bound_hostname: api_access_endpoint = _bucket_bound_hostname_url( bucket_bound_hostname, scheme ) + resource = f"/{quoted_name}" else: - resource = "/{bucket_name}/{quoted_name}".format( - bucket_name=self.bucket.name, quoted_name=quoted_name - ) - - if virtual_hosted_style or bucket_bound_hostname: - resource = "/{quoted_name}".format(quoted_name=quoted_name) + resource = f"/{self.bucket.name}/{quoted_name}" if credentials is None: - client = self._require_client(client) + client = self._require_client(client) # May be redundant, but that's ok. credentials = client._credentials + client = self._require_client(client) + universe_domain = client.universe_domain + if version == "v2": helper = generate_signed_url_v2 else: @@ -632,8 +672,10 @@ def generate_signed_url( query_parameters=query_parameters, service_account_email=service_account_email, access_token=access_token, + universe_domain=universe_domain, ) + @create_trace_span(name="Storage.Blob.exists") def exists( self, client=None, @@ -645,6 +687,7 @@ def exists( if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, + soft_deleted=None, ): """Determines whether or not this blob exists. @@ -689,6 +732,13 @@ def exists( :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + :type soft_deleted: bool + :param soft_deleted: + (Optional) If True, looks for a soft-deleted object. Will only return True + if the object exists and is in a soft-deleted state. + :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True. + See: https://cloud.google.com/storage/docs/soft-delete + :rtype: bool :returns: True if the blob exists in Cloud Storage. """ @@ -697,6 +747,8 @@ def exists( # minimize the returned payload. query_params = self._query_params query_params["fields"] = "name" + if soft_deleted is not None: + query_params["softDeleted"] = soft_deleted _add_generation_match_parameters( query_params, @@ -729,6 +781,7 @@ def exists( return False return True + @create_trace_span(name="Storage.Blob.delete") def delete( self, client=None, @@ -737,7 +790,7 @@ def delete( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ): """Deletes a blob from Cloud Storage. @@ -771,8 +824,21 @@ def delete( for the server response. See: :ref:`configuring_timeouts` :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy - :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_generation_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. :raises: :class:`google.cloud.exceptions.NotFound` (propagated from @@ -845,9 +911,11 @@ def _get_download_url( name_value_pairs = [] if self.media_link is None: hostname = _get_host_name(client._connection) - base_url = _DOWNLOAD_URL_TEMPLATE.format(hostname=hostname, path=self.path) + base_url = _DOWNLOAD_URL_TEMPLATE.format( + hostname=hostname, path=self.path, api_version=_API_VERSION + ) if self.generation is not None: - name_value_pairs.append(("generation", "{:d}".format(self.generation))) + name_value_pairs.append(("generation", f"{self.generation:d}")) else: base_url = self.media_link @@ -915,12 +983,12 @@ def _do_download( end=None, raw_download=False, timeout=_DEFAULT_TIMEOUT, - checksum="md5", - retry=None, + checksum="auto", + retry=DEFAULT_RETRY, ): """Perform a download without any error handling. - This is intended to be called by :meth:`download_to_file` so it can + This is intended to be called by :meth:`_prep_and_do_download` so it can be wrapped with error handling / remapping. :type transport: @@ -961,32 +1029,39 @@ def _do_download( instance in the case of transcoded or ranged downloads where the remote service does not know the correct checksum, including downloads where chunk_size is set) an INFO-level log will be - emitted. Supported values are "md5", "crc32c" and None. The default - is "md5". + emitted. Supported values are "md5", "crc32c", "auto" and None. The + default is "auto", which will try to detect if the C extension for + crc32c is installed and fall back to md5 otherwise. :type retry: google.api_core.retry.Retry :param retry: (Optional) How to retry the RPC. A None value will disable retries. A google.api_core.retry.Retry value will enable retries, - and the object will configure backoff and timeout options. Custom - predicates (customizable error codes) are not supported for media - operations such as this one. + and the object will configure backoff and timeout options. This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead - evaluated in client.download_blob_to_file(). + evaluated in blob._prep_and_do_download(). See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how to configure them. """ - retry_strategy = _api_core_retry_to_resumable_media_retry(retry) + extra_attributes = { + "url.full": download_url, + "download.chunk_size": f"{self.chunk_size}", + "download.raw_download": raw_download, + "upload.checksum": f"{checksum}", + } + args = {"timeout": timeout} if self.chunk_size is None: if raw_download: klass = RawDownload + download_class = "RawDownload" else: klass = Download + download_class = "Download" download = klass( download_url, @@ -995,20 +1070,26 @@ def _do_download( start=start, end=end, checksum=checksum, + retry=retry, ) - download._retry_strategy = retry_strategy - response = download.consume(transport, timeout=timeout) - self._extract_headers_from_download(response) + with create_trace_span( + name=f"Storage.{download_class}/consume", + attributes=extra_attributes, + api_request=args, + ): + response = download.consume(transport, timeout=timeout) + self._extract_headers_from_download(response) else: - if checksum: msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum) _logger.info(msg) if raw_download: klass = RawChunkedDownload + download_class = "RawChunkedDownload" else: klass = ChunkedDownload + download_class = "ChunkedDownload" download = klass( download_url, @@ -1017,12 +1098,18 @@ def _do_download( headers=headers, start=start if start else 0, end=end, + retry=retry, ) - download._retry_strategy = retry_strategy - while not download.finished: - download.consume_next_chunk(transport, timeout=timeout) + with create_trace_span( + name=f"Storage.{download_class}/consumeNextChunk", + attributes=extra_attributes, + api_request=args, + ): + while not download.finished: + download.consume_next_chunk(transport, timeout=timeout) + @create_trace_span(name="Storage.Blob.downloadToFile") def download_to_file( self, file_obj, @@ -1037,34 +1124,23 @@ def download_to_file( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ): - """DEPRECATED. Download the contents of this blob into a file-like object. + """Download the contents of this blob into a file-like object. .. note:: If the server-set property, :attr:`media_link`, is not yet initialized, makes an additional API request to load it. - Downloading a file that has been encrypted with a `customer-supplied`_ - encryption key: - - .. literalinclude:: snippets.py - :start-after: [START download_to_file] - :end-before: [END download_to_file] - :dedent: 4 - - The ``encryption_key`` should be a str or bytes with a length of at - least 32. - If the :attr:`chunk_size` of a current blob is `None`, will download data in single download request otherwise it will download the :attr:`chunk_size` of data in each request. For more fine-grained control over the download process, check out - `google-resumable-media`_. For example, this library allows - downloading **parts** of a blob rather than the whole thing. + [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html). + For example, this library allows downloading **parts** of a blob rather than the whole thing. If :attr:`user_project` is set on the bucket, bills the API request to that project. @@ -1124,8 +1200,9 @@ def download_to_file( instance in the case of transcoded or ranged downloads where the remote service does not know the correct checksum, including downloads where chunk_size is set) an INFO-level log will be - emitted. Supported values are "md5", "crc32c" and None. The default - is "md5". + emitted. Supported values are "md5", "crc32c", "auto" and None. The + default is "auto", which will try to detect if the C extension for + crc32c is installed and fall back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -1144,18 +1221,12 @@ def download_to_file( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. - :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) - client.download_blob_to_file( - self, - file_obj=file_obj, + self._prep_and_do_download( + file_obj, + client=client, start=start, end=end, raw_download=raw_download, @@ -1170,6 +1241,34 @@ def download_to_file( retry=retry, ) + def _handle_filename_and_download(self, filename, *args, **kwargs): + """Download the contents of this blob into a named file. + + :type filename: str + :param filename: A filename to be passed to ``open``. + + For *args and **kwargs, refer to the documentation for download_to_filename() for more information. + """ + + try: + with open(filename, "wb") as file_obj: + self._prep_and_do_download( + file_obj, + *args, + **kwargs, + ) + + except (DataCorruption, NotFound): + # Delete the corrupt or empty downloaded file. + os.remove(filename) + raise + + updated = self.updated + if updated is not None: + mtime = updated.timestamp() + os.utime(file_obj.name, (mtime, mtime)) + + @create_trace_span(name="Storage.Blob.downloadToFilename") def download_to_filename( self, filename, @@ -1184,7 +1283,7 @@ def download_to_filename( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ): """Download the contents of this blob into a named file. @@ -1192,6 +1291,9 @@ def download_to_filename( If :attr:`user_project` is set on the bucket, bills the API request to that project. + See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python) + to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied). + :type filename: str :param filename: A filename to be passed to ``open``. @@ -1247,8 +1349,9 @@ def download_to_filename( instance in the case of transcoded or ranged downloads where the remote service does not know the correct checksum, including downloads where chunk_size is set) an INFO-level log will be - emitted. Supported values are "md5", "crc32c" and None. The default - is "md5". + emitted. Supported values are "md5", "crc32c", "auto" and None. The + default is "auto", which will try to detect if the C extension for + crc32c is installed and fall back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -1267,42 +1370,27 @@ def download_to_filename( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. - :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) - try: - with open(filename, "wb") as file_obj: - client.download_blob_to_file( - self, - file_obj, - start=start, - end=end, - raw_download=raw_download, - if_etag_match=if_etag_match, - if_etag_not_match=if_etag_not_match, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=timeout, - checksum=checksum, - retry=retry, - ) - except resumable_media.DataCorruption: - # Delete the corrupt downloaded file. - os.remove(filename) - raise - updated = self.updated - if updated is not None: - mtime = updated.timestamp() - os.utime(file_obj.name, (mtime, mtime)) + self._handle_filename_and_download( + filename, + client=client, + start=start, + end=end, + raw_download=raw_download, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + @create_trace_span(name="Storage.Blob.downloadAsBytes") def download_as_bytes( self, client=None, @@ -1316,7 +1404,7 @@ def download_as_bytes( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ): """Download the contents of this blob as a bytes object. @@ -1376,8 +1464,9 @@ def download_as_bytes( instance in the case of transcoded or ranged downloads where the remote service does not know the correct checksum, including downloads where chunk_size is set) an INFO-level log will be - emitted. Supported values are "md5", "crc32c" and None. The default - is "md5". + emitted. Supported values are "md5", "crc32c", "auto" and None. The + default is "auto", which will try to detect if the C extension for + crc32c is installed and fall back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -1396,21 +1485,17 @@ def download_as_bytes( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. - :rtype: bytes :returns: The data stored in this blob. :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) + string_buffer = BytesIO() - client.download_blob_to_file( - self, + + self._prep_and_do_download( string_buffer, + client=client, start=start, end=end, raw_download=raw_download, @@ -1426,6 +1511,7 @@ def download_as_bytes( ) return string_buffer.getvalue() + @create_trace_span(name="Storage.Blob.downloadAsString") def download_as_string( self, client=None, @@ -1510,11 +1596,6 @@ def download_as_string( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. - :rtype: bytes :returns: The data stored in this blob. @@ -1538,6 +1619,7 @@ def download_as_string( retry=retry, ) + @create_trace_span(name="Storage.Blob.downloadAsText") def download_as_text( self, client=None, @@ -1625,11 +1707,6 @@ def download_as_text( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. - :rtype: text :returns: The data stored in this blob, decoded to text. """ @@ -1652,7 +1729,8 @@ def download_as_text( return data.decode(encoding) if self.content_type is not None: - _, params = cgi.parse_header(self.content_type) + msg = HeaderParser().parsestr("Content-Type: " + self.content_type) + params = dict(msg.get_params()[1:]) if "charset" in params: return data.decode(params["charset"]) @@ -1693,8 +1771,8 @@ def _get_writable_metadata(self): This is intended to be used when creating a new object / blob. - See the `API reference docs`_ for more information, the fields - marked as writable are: + See the [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects) + for more information, the fields marked as writable are: * ``acl`` * ``cacheControl`` @@ -1707,6 +1785,7 @@ def _get_writable_metadata(self): * ``md5Hash`` * ``metadata`` * ``name`` + * ``retention`` * ``storageClass`` For now, we don't support ``acl``, access control lists should be @@ -1720,7 +1799,7 @@ def _get_writable_metadata(self): return object_metadata - def _get_upload_arguments(self, content_type): + def _get_upload_arguments(self, client, content_type, filename=None, command=None): """Get required arguments for performing an upload. The content type returned will be determined in order of precedence: @@ -1732,6 +1811,12 @@ def _get_upload_arguments(self, content_type): :type content_type: str :param content_type: Type of content being uploaded (or :data:`None`). + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: tuple :returns: A triple of @@ -1739,9 +1824,16 @@ def _get_upload_arguments(self, content_type): * An object metadata dictionary * The ``content_type`` as a string (according to precedence) """ - headers = _get_encryption_headers(self._encryption_key) + content_type = self._get_content_type(content_type, filename=filename) + # Add any client attached custom headers to the upload headers. + headers = { + **_get_default_headers( + client._connection.user_agent, content_type, command=command + ), + **_get_encryption_headers(self._encryption_key), + **client._extra_headers, + } object_metadata = self._get_writable_metadata() - content_type = self._get_content_type(content_type) return headers, object_metadata, content_type def _do_multipart_upload( @@ -1750,15 +1842,15 @@ def _do_multipart_upload( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout=_DEFAULT_TIMEOUT, - checksum=None, + checksum="auto", retry=None, + command=None, ): """Perform a multipart upload. @@ -1786,15 +1878,6 @@ def _do_multipart_upload( ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type predefined_acl: str :param predefined_acl: (Optional) Predefined access control list @@ -1824,15 +1907,14 @@ def _do_multipart_upload( (Optional) The type of checksum to compute to verify the integrity of the object. The request metadata will be amended to include the computed value. Using this option will override a - manually-set checksum value. Supported values are "md5", - "crc32c" and None. The default is None. - + manually-set checksum value. Supported values are "md5", "crc32c", + "auto" and None. The default is "auto", which will try to detect if + the C extension for crc32c is installed and fall back to md5 + otherwise. :type retry: google.api_core.retry.Retry :param retry: (Optional) How to retry the RPC. A None value will disable retries. A google.api_core.retry.Retry value will enable retries, - and the object will configure backoff and timeout options. Custom - predicates (customizable error codes) are not supported for media - operations such as this one. + and the object will configure backoff and timeout options. This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead @@ -1842,6 +1924,12 @@ def _do_multipart_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the multipart upload request. @@ -1860,12 +1948,12 @@ def _do_multipart_upload( transport = self._get_transport(client) if "metadata" in self._properties and "metadata" not in self._changes: self._changes.add("metadata") - info = self._get_upload_arguments(content_type) + info = self._get_upload_arguments(client, content_type, command=command) headers, object_metadata, content_type = info hostname = _get_host_name(client._connection) base_url = _MULTIPART_URL_TEMPLATE.format( - hostname=hostname, bucket_path=self.bucket.path + hostname=hostname, bucket_path=self.bucket.path, api_version=_API_VERSION ) name_value_pairs = [] @@ -1901,17 +1989,26 @@ def _do_multipart_upload( ) upload_url = _add_query_parameters(base_url, name_value_pairs) - upload = MultipartUpload(upload_url, headers=headers, checksum=checksum) - - upload._retry_strategy = _api_core_retry_to_resumable_media_retry( - retry, num_retries + upload = MultipartUpload( + upload_url, headers=headers, checksum=checksum, retry=retry ) - response = upload.transmit( - transport, data, object_metadata, content_type, timeout=timeout - ) + extra_attributes = { + "url.full": upload_url, + "upload.checksum": f"{checksum}", + } + args = {"timeout": timeout} + with create_trace_span( + name="Storage.MultipartUpload/transmit", + attributes=extra_attributes, + client=client, + api_request=args, + ): + response = upload.transmit( + transport, data, object_metadata, content_type, timeout=timeout + ) - return response + return response def _initiate_resumable_upload( self, @@ -1919,7 +2016,6 @@ def _initiate_resumable_upload( stream, content_type, size, - num_retries, predefined_acl=None, extra_headers=None, chunk_size=None, @@ -1928,8 +2024,9 @@ def _initiate_resumable_upload( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum=None, + checksum="auto", retry=None, + command=None, ): """Initiate a resumable upload. @@ -1960,15 +2057,6 @@ def _initiate_resumable_upload( :type predefined_acl: str :param predefined_acl: (Optional) Predefined access control list - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type extra_headers: dict :param extra_headers: (Optional) Extra headers to add to standard headers. @@ -1976,7 +2064,7 @@ def _initiate_resumable_upload( :type chunk_size: int :param chunk_size: (Optional) Chunk size to use when creating a - :class:`~google.resumable_media.requests.ResumableUpload`. + :class:`~google.cloud.storage._media.requests.ResumableUpload`. If not passed, will fall back to the chunk size on the current blob, if the chunk size of a current blob is also `None`, will set the default value. @@ -2008,17 +2096,17 @@ def _initiate_resumable_upload( (Optional) The type of checksum to compute to verify the integrity of the object. After the upload is complete, the server-computed checksum of the resulting object will be checked - and google.resumable_media.common.DataCorruption will be raised on + and google.cloud.storage.exceptions.DataCorruption will be raised on a mismatch. On a validation failure, the client will attempt to - delete the uploaded object automatically. Supported values - are "md5", "crc32c" and None. The default is None. + delete the uploaded object automatically. Supported values are + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. :type retry: google.api_core.retry.Retry :param retry: (Optional) How to retry the RPC. A None value will disable retries. A google.api_core.retry.Retry value will enable retries, - and the object will configure backoff and timeout options. Custom - predicates (customizable error codes) are not supported for media - operations such as this one. + and the object will configure backoff and timeout options. This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead @@ -2028,11 +2116,17 @@ def _initiate_resumable_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: tuple :returns: Pair of - * The :class:`~google.resumable_media.requests.ResumableUpload` + * The :class:`~google.cloud.storage._media.requests.ResumableUpload` that was created * The ``transport`` used to initiate the upload. """ @@ -2045,14 +2139,14 @@ def _initiate_resumable_upload( transport = self._get_transport(client) if "metadata" in self._properties and "metadata" not in self._changes: self._changes.add("metadata") - info = self._get_upload_arguments(content_type) + info = self._get_upload_arguments(client, content_type, command=command) headers, object_metadata, content_type = info if extra_headers is not None: headers.update(extra_headers) hostname = _get_host_name(client._connection) base_url = _RESUMABLE_URL_TEMPLATE.format( - hostname=hostname, bucket_path=self.bucket.path + hostname=hostname, bucket_path=self.bucket.path, api_version=_API_VERSION ) name_value_pairs = [] @@ -2089,11 +2183,7 @@ def _initiate_resumable_upload( upload_url = _add_query_parameters(base_url, name_value_pairs) upload = ResumableUpload( - upload_url, chunk_size, headers=headers, checksum=checksum - ) - - upload._retry_strategy = _api_core_retry_to_resumable_media_retry( - retry, num_retries + upload_url, chunk_size, headers=headers, checksum=checksum, retry=retry ) upload.initiate( @@ -2114,15 +2204,15 @@ def _do_resumable_upload( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout=_DEFAULT_TIMEOUT, - checksum=None, + checksum="auto", retry=None, + command=None, ): """Perform a resumable upload. @@ -2153,15 +2243,6 @@ def _do_resumable_upload( ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type predefined_acl: str :param predefined_acl: (Optional) Predefined access control list @@ -2191,17 +2272,17 @@ def _do_resumable_upload( (Optional) The type of checksum to compute to verify the integrity of the object. After the upload is complete, the server-computed checksum of the resulting object will be checked - and google.resumable_media.common.DataCorruption will be raised on + and google.cloud.storage.exceptions.DataCorruption will be raised on a mismatch. On a validation failure, the client will attempt to - delete the uploaded object automatically. Supported values - are "md5", "crc32c" and None. The default is None. + delete the uploaded object automatically. Supported values are + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. :type retry: google.api_core.retry.Retry :param retry: (Optional) How to retry the RPC. A None value will disable retries. A google.api_core.retry.Retry value will enable retries, - and the object will configure backoff and timeout options. Custom - predicates (customizable error codes) are not supported for media - operations such as this one. + and the object will configure backoff and timeout options. This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead @@ -2211,6 +2292,12 @@ def _do_resumable_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the final chunk is uploaded. @@ -2220,7 +2307,6 @@ def _do_resumable_upload( stream, content_type, size, - num_retries, predefined_acl=predefined_acl, if_generation_match=if_generation_match, if_generation_not_match=if_generation_not_match, @@ -2229,17 +2315,28 @@ def _do_resumable_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) - - while not upload.finished: - try: - response = upload.transmit_next_chunk(transport, timeout=timeout) - except resumable_media.DataCorruption: - # Attempt to delete the corrupted object. - self.delete() - raise - - return response + extra_attributes = { + "url.full": upload.resumable_url, + "upload.chunk_size": upload.chunk_size, + "upload.checksum": f"{checksum}", + } + args = {"timeout": timeout} + with create_trace_span( + name="Storage.ResumableUpload/transmitNextChunk", + attributes=extra_attributes, + client=client, + api_request=args, + ): + while not upload.finished: + try: + response = upload.transmit_next_chunk(transport, timeout=timeout) + except DataCorruption: + # Attempt to delete the corrupted object. + self.delete() + raise + return response def _do_upload( self, @@ -2247,15 +2344,15 @@ def _do_upload( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, timeout=_DEFAULT_TIMEOUT, - checksum=None, + checksum="auto", retry=None, + command=None, ): """Determine an upload strategy and then perform the upload. @@ -2287,15 +2384,6 @@ def _do_upload( ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type predefined_acl: str :param predefined_acl: (Optional) Predefined access control list @@ -2329,9 +2417,11 @@ def _do_upload( is too large and must be transmitted in multiple requests, the checksum will be incrementally computed and the client will handle verification and error handling, raising - google.resumable_media.common.DataCorruption on a mismatch and + google.cloud.storage.exceptions.DataCorruption on a mismatch and attempting to delete the corrupted file. Supported values are - "md5", "crc32c" and None. The default is None. + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -2350,10 +2440,11 @@ def _do_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. :rtype: dict :returns: The parsed JSON from the "200 OK" response. This will be the @@ -2379,7 +2470,6 @@ def _do_upload( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, @@ -2388,6 +2478,7 @@ def _do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) else: response = self._do_resumable_upload( @@ -2395,7 +2486,6 @@ def _do_upload( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, @@ -2404,17 +2494,17 @@ def _do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) return response.json() - def upload_from_file( + def _prep_and_do_upload( self, file_obj, rewind=False, size=None, content_type=None, - num_retries=None, client=None, predefined_acl=None, if_generation_match=None, @@ -2422,8 +2512,9 @@ def upload_from_file( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum=None, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + checksum="auto", + retry=DEFAULT_RETRY, + command=None, ): """Upload the contents of this blob from a file-like object. @@ -2440,31 +2531,22 @@ def upload_from_file( bucket. In the absence of those policies, upload will overwrite any existing contents. - See the `object versioning`_ and `lifecycle`_ API documents - for details. - - Uploading a file with a `customer-supplied`_ encryption key: - - .. literalinclude:: snippets.py - :start-after: [START upload_from_file] - :end-before: [END upload_from_file] - :dedent: 4 - - The ``encryption_key`` should be a str or bytes with a length of at - least 32. + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) + API documents for details. If the size of the data to be uploaded exceeds 8 MB a resumable media request will be used, otherwise the content and the metadata will be uploaded in a single multipart upload request. For more fine-grained over the upload process, check out - `google-resumable-media`_. + [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html). If :attr:`user_project` is set on the bucket, bills the API request to that project. :type file_obj: file - :param file_obj: A file handle open for reading. + :param file_obj: A file handle opened in binary mode for reading. :type rewind: bool :param rewind: @@ -2480,15 +2562,6 @@ def upload_from_file( :type content_type: str :param content_type: (Optional) Type of content being uploaded. - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type client: :class:`~google.cloud.storage.client.Client` :param client: (Optional) The client to use. If not passed, falls back to the @@ -2527,9 +2600,11 @@ def upload_from_file( is too large and must be transmitted in multiple requests, the checksum will be incrementally computed and the client will handle verification and error handling, raising - google.resumable_media.common.DataCorruption on a mismatch and + google.cloud.storage.exceptions.DataCorruption on a mismatch and attempting to delete the corrupted file. Supported values are - "md5", "crc32c" and None. The default is None. + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -2548,26 +2623,15 @@ def upload_from_file( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the upload response returns an error status. - - .. _object versioning: https://cloud.google.com/storage/\ - docs/object-versioning - .. _lifecycle: https://cloud.google.com/storage/docs/lifecycle """ - if num_retries is not None: - warnings.warn(_NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2) - # num_retries and retry are mutually exclusive. If num_retries is - # set and retry is exactly the default, then nullify retry for - # backwards compatibility. - if retry is DEFAULT_RETRY_IF_GENERATION_SPECIFIED: - retry = None - _maybe_rewind(file_obj, rewind=rewind) predefined_acl = ACL.validate_predefined(predefined_acl) @@ -2577,7 +2641,6 @@ def upload_from_file( file_obj, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, @@ -2586,16 +2649,19 @@ def upload_from_file( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) self._set_properties(created_json) - except resumable_media.InvalidResponse as exc: + except InvalidResponse as exc: _raise_from_invalid_response(exc) - def upload_from_filename( + @create_trace_span(name="Storage.Blob.uploadFromFile") + def upload_from_file( self, - filename, + file_obj, + rewind=False, + size=None, content_type=None, - num_retries=None, client=None, predefined_acl=None, if_generation_match=None, @@ -2603,17 +2669,16 @@ def upload_from_filename( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum=None, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + checksum="auto", + retry=DEFAULT_RETRY, ): - """Upload this blob's contents from the content of a named file. + """Upload the contents of this blob from a file-like object. The content type of the upload will be determined in order of precedence: - The value passed in to this method (if not :data:`None`) - The value stored on the current blob - - The value given by ``mimetypes.guess_type`` - The default value ('application/octet-stream') .. note:: @@ -2622,16 +2687,33 @@ def upload_from_filename( bucket. In the absence of those policies, upload will overwrite any existing contents. - See the `object versioning - `_ and - `lifecycle `_ + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) API documents for details. + If the size of the data to be uploaded exceeds 8 MB a resumable media + request will be used, otherwise the content and the metadata will be + uploaded in a single multipart upload request. + + For more fine-grained over the upload process, check out + [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html). + If :attr:`user_project` is set on the bucket, bills the API request to that project. - :type filename: str - :param filename: The path to the file. + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type rewind: bool + :param rewind: + If True, seek to the beginning of the file handle before writing + the file to Cloud Storage. + + :type size: int + :param size: + The number of bytes to be uploaded (which will be read from + ``file_obj``). If not provided, the upload will be concluded once + ``file_obj`` is exhausted. :type content_type: str :param content_type: (Optional) Type of content being uploaded. @@ -2641,15 +2723,6 @@ def upload_from_filename( (Optional) The client to use. If not passed, falls back to the ``client`` stored on the blob's bucket. - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type predefined_acl: str :param predefined_acl: (Optional) Predefined access control list @@ -2683,9 +2756,11 @@ def upload_from_filename( is too large and must be transmitted in multiple requests, the checksum will be incrementally computed and the client will handle verification and error handling, raising - google.resumable_media.common.DataCorruption on a mismatch and + google.cloud.storage.exceptions.DataCorruption on a mismatch and attempting to delete the corrupted file. Supported values are - "md5", "crc32c" and None. The default is None. + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -2704,36 +2779,54 @@ def upload_from_filename( (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` + if the upload response returns an error status. + """ + self._prep_and_do_upload( + file_obj, + rewind=rewind, + size=size, + content_type=content_type, + client=client, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + + def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs): + """Upload this blob's contents from the content of a named file. + + :type filename: str + :param filename: The path to the file. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + For *args and **kwargs, refer to the documentation for upload_from_filename() for more information. """ + content_type = self._get_content_type(content_type, filename=filename) with open(filename, "rb") as file_obj: total_bytes = os.fstat(file_obj.fileno()).st_size - self.upload_from_file( + self._prep_and_do_upload( file_obj, content_type=content_type, - num_retries=num_retries, - client=client, size=total_bytes, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=timeout, - checksum=checksum, - retry=retry, + *args, + **kwargs, ) - def upload_from_string( + @create_trace_span(name="Storage.Blob.uploadFromFilename") + def upload_from_filename( self, - data, - content_type="text/plain", - num_retries=None, + filename, + content_type=None, client=None, predefined_acl=None, if_generation_match=None, @@ -2741,10 +2834,18 @@ def upload_from_string( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum=None, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + checksum="auto", + retry=DEFAULT_RETRY, ): - """Upload contents of this blob from the provided string. + """Upload this blob's contents from the content of a named file. + + The content type of the upload will be determined in order + of precedence: + + - The value passed in to this method (if not :data:`None`) + - The value stored on the current blob + - The value given by ``mimetypes.guess_type`` + - The default value ('application/octet-stream') .. note:: The effect of uploading to an existing blob depends on the @@ -2752,17 +2853,132 @@ def upload_from_string( bucket. In the absence of those policies, upload will overwrite any existing contents. - See the `object versioning - `_ and - `lifecycle `_ + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) API documents for details. If :attr:`user_project` is set on the bucket, bills the API request to that project. - :type data: bytes or str - :param data: - The data to store in this blob. If the value is text, it will be + See a [code sample](https://cloud.google.com/storage/docs/samples/storage-upload-encrypted-file#storage_upload_encrypted_file-python) + to upload a file with a + [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied). + + :type filename: str + :param filename: The path to the file. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. If not passed, falls back to the + ``client`` stored on the blob's bucket. + + :type predefined_acl: str + :param predefined_acl: (Optional) Predefined access control list + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type checksum: str + :param checksum: + (Optional) The type of checksum to compute to verify + the integrity of the object. If the upload is completed in a single + request, the checksum will be entirely precomputed and the remote + server will handle verification and error handling. If the upload + is too large and must be transmitted in multiple requests, the + checksum will be incrementally computed and the client will handle + verification and error handling, raising + google.cloud.storage.exceptions.DataCorruption on a mismatch and + attempting to delete the corrupted file. Supported values are + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_generation_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + """ + + self._handle_filename_and_upload( + filename, + content_type=content_type, + client=client, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + + @create_trace_span(name="Storage.Blob.uploadFromString") + def upload_from_string( + self, + data, + content_type="text/plain", + client=None, + predefined_acl=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + checksum="auto", + retry=DEFAULT_RETRY, + ): + """Upload contents of this blob from the provided string. + + .. note:: + The effect of uploading to an existing blob depends on the + "versioning" and "lifecycle" policies defined on the blob's + bucket. In the absence of those policies, upload will + overwrite any existing contents. + + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) + API documents for details. + + If :attr:`user_project` is set on the bucket, bills the API request + to that project. + + :type data: bytes or str + :param data: + The data to store in this blob. If the value is text, it will be encoded as UTF-8. :type content_type: str @@ -2770,15 +2986,6 @@ def upload_from_string( (Optional) Type of content being uploaded. Defaults to ``'text/plain'``. - :type num_retries: int - :param num_retries: - Number of upload retries. By default, only uploads with - if_generation_match set will be retried, as uploads without the - argument are not guaranteed to be idempotent. Setting num_retries - will override this default behavior and guarantee retries even when - if_generation_match is not set. (Deprecated: This argument - will be removed in a future release.) - :type client: :class:`~google.cloud.storage.client.Client` :param client: (Optional) The client to use. If not passed, falls back to the @@ -2817,9 +3024,11 @@ def upload_from_string( is too large and must be transmitted in multiple requests, the checksum will be incrementally computed and the client will handle verification and error handling, raising - google.resumable_media.common.DataCorruption on a mismatch and + google.cloud.storage.exceptions.DataCorruption on a mismatch and attempting to delete the corrupted file. Supported values are - "md5", "crc32c" and None. The default is None. + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. A None value will disable @@ -2837,11 +3046,6 @@ def upload_from_string( See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how to configure them. - - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. """ data = _to_bytes(data, encoding="utf-8") string_buffer = BytesIO(data) @@ -2849,7 +3053,6 @@ def upload_from_string( file_obj=string_buffer, size=len(data), content_type=content_type, - num_retries=num_retries, client=client, predefined_acl=predefined_acl, if_generation_match=if_generation_match, @@ -2861,6 +3064,7 @@ def upload_from_string( retry=retry, ) + @create_trace_span(name="Storage.Blob.createResumableUploadSession") def create_resumable_upload_session( self, content_type=None, @@ -2868,12 +3072,13 @@ def create_resumable_upload_session( origin=None, client=None, timeout=_DEFAULT_TIMEOUT, - checksum=None, + checksum="auto", + predefined_acl=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ): """Create a resumable upload session. @@ -2883,12 +3088,10 @@ def create_resumable_upload_session( passes the session URL to the client that will upload the binary data. The client performs a PUT request on the session URL to complete the upload. This process allows untrusted clients to upload to an - access-controlled bucket. For more details, see the - `documentation on signed URLs`_. + access-controlled bucket. - .. _documentation on signed URLs: - https://cloud.google.com/storage/\ - docs/access-control/signed-urls#signing-resumable + For more details, see the + documentation on [`signed URLs`](https://cloud.google.com/storage/docs/access-control/signed-urls#signing-resumable). The content type of the upload will be determined in order of precedence: @@ -2903,13 +3106,13 @@ def create_resumable_upload_session( bucket. In the absence of those policies, upload will overwrite any existing contents. - See the `object versioning - `_ and - `lifecycle `_ + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) API documents for details. If :attr:`encryption_key` is set, the blob will be encrypted with - a `customer-supplied`_ encryption key. + a [`customer-supplied`](https://cloud.google.com/storage/docs/encryption#customer-supplied) + encryption key. If :attr:`user_project` is set on the bucket, bills the API request to that project. @@ -2944,10 +3147,15 @@ def create_resumable_upload_session( (Optional) The type of checksum to compute to verify the integrity of the object. After the upload is complete, the server-computed checksum of the resulting object will be checked - and google.resumable_media.common.DataCorruption will be raised on + and google.cloud.storage.exceptions.DataCorruption will be raised on a mismatch. On a validation failure, the client will attempt to - delete the uploaded object automatically. Supported values - are "md5", "crc32c" and None. The default is None. + delete the uploaded object automatically. Supported values are + "md5", "crc32c", "auto" and None. The default is "auto", which will + try to detect if the C extension for crc32c is installed and fall + back to md5 otherwise. + + :type predefined_acl: str + :param predefined_acl: (Optional) Predefined access control list :type if_generation_match: long :param if_generation_match: @@ -2970,19 +3178,17 @@ def create_resumable_upload_session( retries. A google.api_core.retry.Retry value will enable retries, and the object will define retriable response codes and errors and configure backoff and timeout options. + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a Retry object and activates it only if certain conditions are met. This class exists to provide safe defaults for RPC calls that are not technically safe to retry normally (due to potential data duplication or other side-effects) but become safe to retry if a condition such as if_generation_match is set. + See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how to configure them. - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. :rtype: str :returns: The resumable upload session URL. The upload can be @@ -3021,8 +3227,7 @@ def create_resumable_upload_session( fake_stream, content_type, size, - None, - predefined_acl=None, + predefined_acl=predefined_acl, if_generation_match=if_generation_match, if_generation_not_match=if_generation_not_match, if_metageneration_match=if_metageneration_match, @@ -3035,9 +3240,10 @@ def create_resumable_upload_session( ) return upload.resumable_url - except resumable_media.InvalidResponse as exc: + except InvalidResponse as exc: _raise_from_invalid_response(exc) + @create_trace_span(name="Storage.Blob.getIamPolicy") def get_iam_policy( self, client=None, @@ -3098,7 +3304,7 @@ def get_iam_policy( query_params["optionsRequestedPolicyVersion"] = requested_policy_version info = client._get_resource( - "%s/iam" % (self.path,), + f"{self.path}/iam", query_params=query_params, timeout=timeout, retry=retry, @@ -3106,6 +3312,7 @@ def get_iam_policy( ) return Policy.from_api_repr(info) + @create_trace_span(name="Storage.Blob.setIamPolicy") def set_iam_policy( self, policy, @@ -3154,7 +3361,7 @@ def set_iam_policy( if self.user_project is not None: query_params["userProject"] = self.user_project - path = "{}/iam".format(self.path) + path = f"{self.path}/iam" resource = policy.to_api_repr() resource["resourceId"] = self.path info = client._put_resource( @@ -3167,6 +3374,7 @@ def set_iam_policy( ) return Policy.from_api_repr(info) + @create_trace_span(name="Storage.Blob.testIamPermissions") def test_iam_permissions( self, permissions, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY ): @@ -3210,7 +3418,7 @@ def test_iam_permissions( if self.user_project is not None: query_params["userProject"] = self.user_project - path = "%s/iam/testPermissions" % (self.path,) + path = f"{self.path}/iam/testPermissions" resp = client._get_resource( path, query_params=query_params, @@ -3221,6 +3429,7 @@ def test_iam_permissions( return resp.get("permissions", []) + @create_trace_span(name="Storage.Blob.makePublic") def make_public( self, client=None, @@ -3229,7 +3438,7 @@ def make_public( if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ): """Update blob's ACL, granting read access to anonymous users. @@ -3274,6 +3483,7 @@ def make_public( retry=retry, ) + @create_trace_span(name="Storage.Blob.makePrivate") def make_private( self, client=None, @@ -3282,7 +3492,7 @@ def make_private( if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ): """Update blob's ACL, revoking read access for anonymous users. @@ -3327,6 +3537,7 @@ def make_private( retry=retry, ) + @create_trace_span(name="Storage.Blob.compose") def compose( self, sources, @@ -3342,6 +3553,9 @@ def compose( If :attr:`user_project` is set on the bucket, bills the API request to that project. + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/compose) + and a [code sample](https://cloud.google.com/storage/docs/samples/storage-compose-file#storage_compose_file-python). + :type sources: list of :class:`Blob` :param sources: Blobs whose contents will be composed into this blob. @@ -3361,14 +3575,11 @@ def compose( destination object's current generation matches the given value. Setting to 0 makes the operation succeed only if there are no live versions of the object. - - .. note:: - - In a previous version, this argument worked identically to the - ``if_source_generation_match`` argument. For - backwards-compatibility reasons, if a list is passed in, - this argument will behave like ``if_source_generation_match`` - and also issue a DeprecationWarning. + Note: In a previous version, this argument worked identically to the + ``if_source_generation_match`` argument. For + backwards-compatibility reasons, if a list is passed in, + this argument will behave like ``if_source_generation_match`` + and also issue a DeprecationWarning. :type if_metageneration_match: long :param if_metageneration_match: @@ -3388,21 +3599,13 @@ def compose( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` - - Example: - Compose blobs using source generation match preconditions. - - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.bucket("bucket-name") - - >>> blobs = [bucket.blob("blob-name-1"), bucket.blob("blob-name-2")] - >>> if_source_generation_match = [None] * len(blobs) - >>> if_source_generation_match[0] = "123" # precondition for "blob-name-1" - - >>> composed_blob = bucket.blob("composed-name") - >>> composed_blob.compose(blobs, if_source_generation_match=if_source_generation_match) + (Optional) How to retry the RPC. + The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry + policy which will only enable retries if ``if_generation_match`` or ``generation`` + is set, in order to ensure requests are idempotent before retrying them. + Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object + to enable retries regardless of generation precondition setting. + See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). """ sources_len = len(sources) client = self._require_client(client) @@ -3465,7 +3668,7 @@ def compose( ) api_response = client._post_resource( - "{}/compose".format(self.path), + f"{self.path}/compose", request, query_params=query_params, timeout=timeout, @@ -3474,6 +3677,7 @@ def compose( ) self._set_properties(api_response) + @create_trace_span(name="Storage.Blob.rewrite") def rewrite( self, source, @@ -3495,6 +3699,10 @@ def rewrite( If :attr:`user_project` is set on the bucket, bills the API request to that project. + .. note:: + + ``rewrite`` is not supported in a ``Batch`` context. + :type source: :class:`Blob` :param source: blob whose contents will be rewritten into this blob. @@ -3560,7 +3768,13 @@ def rewrite( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` + (Optional) How to retry the RPC. + The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry + policy which will only enable retries if ``if_generation_match`` or ``generation`` + is set, in order to ensure requests are idempotent before retrying them. + Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object + to enable retries regardless of generation precondition setting. + See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). :rtype: tuple :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token`` @@ -3583,7 +3797,15 @@ def rewrite( if source.generation: query_params["sourceGeneration"] = source.generation - if self.kms_key_name is not None: + # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object + # at rest, object resource metadata will store the version of the Key Management + # Service cryptographic material. If a Blob instance with KMS Key metadata set is + # used to rewrite the object, then the existing kmsKeyName version + # value can't be used in the rewrite request and the client instead ignores it. + if ( + self.kms_key_name is not None + and "cryptoKeyVersions" not in self.kms_key_name + ): query_params["destinationKmsKeyName"] = self.kms_key_name _add_generation_match_parameters( @@ -3598,7 +3820,7 @@ def rewrite( if_source_metageneration_not_match=if_source_metageneration_not_match, ) - path = "{}/rewriteTo{}".format(source.path, self.path) + path = f"{source.path}/rewriteTo{self.path}" api_response = client._post_resource( path, self._properties, @@ -3620,6 +3842,7 @@ def rewrite( return api_response["rewriteToken"], rewritten, size + @create_trace_span(name="Storage.Blob.updateStorageClass") def update_storage_class( self, new_class, @@ -3712,11 +3935,14 @@ def update_storage_class( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` + (Optional) How to retry the RPC. + The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry + policy which will only enable retries if ``if_generation_match`` or ``generation`` + is set, in order to ensure requests are idempotent before retrying them. + Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object + to enable retries regardless of generation precondition setting. + See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). """ - if new_class not in self.STORAGE_CLASSES: - raise ValueError("Invalid storage class: %s" % (new_class,)) - # Update current blob's storage class prior to rewrite self._patch_property("storageClass", new_class) @@ -3750,6 +3976,7 @@ def update_storage_class( retry=retry, ) + @create_trace_span(name="Storage.Blob.open") def open( self, mode="r", @@ -3758,7 +3985,7 @@ def open( encoding=None, errors=None, newline=None, - **kwargs + **kwargs, ): r"""Create a file handler for file-like I/O to or from this blob. @@ -3778,6 +4005,29 @@ def open( which do not provide checksums to validate. See https://cloud.google.com/storage/docs/hashes-etags for details. + See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_fileio_write_read.py). + + Keyword arguments to pass to the underlying API calls. + For both uploads and downloads, the following arguments are + supported: + + - ``if_generation_match`` + - ``if_generation_not_match`` + - ``if_metageneration_match`` + - ``if_metageneration_not_match`` + - ``timeout`` + - ``retry`` + + For downloads only, the following additional arguments are supported: + + - ``raw_download`` + + For uploads only, the following additional arguments are supported: + + - ``content_type`` + - ``predefined_acl`` + - ``checksum`` + :type mode: str :param mode: (Optional) A mode string, as per standard Python `open()` semantics.The first @@ -3832,53 +4082,9 @@ def open( newline mode" and writes use the system default. See the Python 'io' module documentation for 'io.TextIOWrapper' for details. - :param kwargs: - Keyword arguments to pass to the underlying API calls. - For both uploads and downloads, the following arguments are - supported: - - - ``if_generation_match`` - - ``if_generation_not_match`` - - ``if_metageneration_match`` - - ``if_metageneration_not_match`` - - ``timeout`` - - ``retry`` - - For downloads only, the following additional arguments are supported: - - - ``raw_download`` - - For uploads only, the following additional arguments are supported: - - - ``content_type`` - - ``num_retries`` - - ``predefined_acl`` - - ``checksum`` - - .. note:: - - ``num_retries`` is supported for backwards-compatibility - reasons only; please use ``retry`` with a Retry object or - ConditionalRetryPolicy instead. - :returns: A 'BlobReader' or 'BlobWriter' from 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one of those classes, depending on the 'mode' argument. - - Example: - Read from a text blob by using open() as context manager. - - Using bucket.get_blob() fetches metadata such as the generation, - which prevents race conditions in case the blob is modified. - - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.bucket("bucket-name") - - >>> blob = bucket.blob("blob-name.txt") - >>> with blob.open("rt") as f: - >>> print(f.read()) - """ if mode == "rb": if encoding or errors or newline: @@ -3929,51 +4135,47 @@ def open( cache_control = _scalar_property("cacheControl") """HTTP 'Cache-Control' header for this object. - See `RFC 7234`_ and `API reference docs`_. + See [`RFC 7234`](https://tools.ietf.org/html/rfc7234#section-5.2) + and [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). :rtype: str or ``NoneType`` - .. _RFC 7234: https://tools.ietf.org/html/rfc7234#section-5.2 """ content_disposition = _scalar_property("contentDisposition") """HTTP 'Content-Disposition' header for this object. - See `RFC 6266`_ and `API reference docs`_. + See [`RFC 6266`](https://tools.ietf.org/html/rfc7234#section-5.2) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). :rtype: str or ``NoneType`` - - .. _RFC 6266: https://tools.ietf.org/html/rfc7234#section-5.2 """ content_encoding = _scalar_property("contentEncoding") """HTTP 'Content-Encoding' header for this object. - See `RFC 7231`_ and `API reference docs`_. + See [`RFC 7231`](https://tools.ietf.org/html/rfc7231#section-3.1.2.2) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). :rtype: str or ``NoneType`` - - .. _RFC 7231: https://tools.ietf.org/html/rfc7231#section-3.1.2.2 """ content_language = _scalar_property("contentLanguage") """HTTP 'Content-Language' header for this object. - See `BCP47`_ and `API reference docs`_. + See [`BCP47`](https://tools.ietf.org/html/bcp47) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). :rtype: str or ``NoneType`` - - .. _BCP47: https://tools.ietf.org/html/bcp47 """ content_type = _scalar_property(_CONTENT_TYPE_FIELD) """HTTP 'Content-Type' header for this object. - See `RFC 2616`_ and `API reference docs`_. + See [`RFC 2616`](https://tools.ietf.org/html/rfc2616#section-14.17) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). :rtype: str or ``NoneType`` - - .. _RFC 2616: https://tools.ietf.org/html/rfc2616#section-14.17 """ crc32c = _scalar_property("crc32c") @@ -3982,30 +4184,173 @@ def open( This returns the blob's CRC32C checksum. To retrieve the value, first use a reload method of the Blob class which loads the blob's properties from the server. - See `RFC 4960`_ and `API reference docs`_. + See [`RFC 4960`](https://tools.ietf.org/html/rfc4960#appendix-B) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). If not set before upload, the server will compute the hash. :rtype: str or ``NoneType`` + """ + + def _prep_and_do_download( + self, + file_obj, + client=None, + start=None, + end=None, + raw_download=False, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + checksum="auto", + retry=DEFAULT_RETRY, + command=None, + ): + """Download the contents of a blob object into a file-like object. + + See https://cloud.google.com/storage/docs/downloading-objects + + If :attr:`user_project` is set on the bucket, bills the API request + to that project. - .. _RFC 4960: https://tools.ietf.org/html/rfc4960#appendix-B + :type file_obj: file + :param file_obj: A file handle to which to write the blob's data. - Example: - Retrieve the crc32c hash of blob. + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. If not passed, falls back to the + ``client`` stored on the blob's bucket. - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.get_bucket("my-bucket-name") - >>> blob = bucket.blob('my-blob') + :type start: int + :param start: (Optional) The first byte in a range to be downloaded. - >>> blob.crc32c # return None - >>> blob.reload() - >>> blob.crc32c # return crc32c hash + :type end: int + :param end: (Optional) The last byte in a range to be downloaded. - >>> # Another approach - >>> blob = bucket.get_blob('my-blob') - >>> blob.crc32c # return crc32c hash - """ + :type raw_download: bool + :param raw_download: + (Optional) If true, download the object without any expansion. + + :type if_etag_match: Union[str, Set[str]] + :param if_etag_match: + (Optional) See :ref:`using-if-etag-match` + + :type if_etag_not_match: Union[str, Set[str]] + :param if_etag_not_match: + (Optional) See :ref:`using-if-etag-not-match` + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type checksum: str + :param checksum: + (Optional) The type of checksum to compute to verify the integrity + of the object. The response headers must contain a checksum of the + requested type. If the headers lack an appropriate checksum (for + instance in the case of transcoded or ranged downloads where the + remote service does not know the correct checksum, including + downloads where chunk_size is set) an INFO-level log will be + emitted. Supported values are "md5", "crc32c", "auto" and None. The + default is "auto", which will try to detect if the C extension for + crc32c is installed and fall back to md5 otherwise. + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_metageneration_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + :type command: str + :param command: + (Optional) Information about which interface for download was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + """ + # Handle ConditionalRetryPolicy. + if isinstance(retry, ConditionalRetryPolicy): + # Conditional retries are designed for non-media calls, which change + # arguments into query_params dictionaries. Media operations work + # differently, so here we make a "fake" query_params to feed to the + # ConditionalRetryPolicy. + query_params = { + "ifGenerationMatch": if_generation_match, + "ifMetagenerationMatch": if_metageneration_match, + } + retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) + + client = self._require_client(client) + + download_url = self._get_download_url( + client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + ) + headers = _get_encryption_headers(self._encryption_key) + headers["accept-encoding"] = "gzip" + _add_etag_match_headers( + headers, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, + ) + # Add any client attached custom headers to be sent with the request. + headers = { + **_get_default_headers(client._connection.user_agent, command=command), + **headers, + **client._extra_headers, + } + + transport = client._http + + try: + self._do_download( + transport, + file_obj, + download_url, + headers, + start, + end, + raw_download, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + except InvalidResponse as exc: + _raise_from_invalid_response(exc) @property def component_count(self): @@ -4027,20 +4372,19 @@ def component_count(self): def etag(self): """Retrieve the ETag for the object. - See `RFC 2616 (etags)`_ and `API reference docs`_. + See [`RFC 2616 (etags)`](https://tools.ietf.org/html/rfc2616#section-3.11) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). :rtype: str or ``NoneType`` :returns: The blob etag or ``None`` if the blob's resource has not been loaded from the server. - - .. _RFC 2616 (etags): https://tools.ietf.org/html/rfc2616#section-3.11 """ return self._properties.get("etag") event_based_hold = _scalar_property("eventBasedHold") """Is an event-based hold active on the object? - See `API reference docs`_. + See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). If the property is not set locally, returns :data:`None`. @@ -4081,29 +4425,12 @@ def id(self): This returns the blob's MD5 hash. To retrieve the value, first use a reload method of the Blob class which loads the blob's properties from the server. - See `RFC 1321`_ and `API reference docs`_. + See [`RFC 1321`](https://tools.ietf.org/html/rfc1321) and + [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). If not set before upload, the server will compute the hash. :rtype: str or ``NoneType`` - - .. _RFC 1321: https://tools.ietf.org/html/rfc1321 - - Example: - Retrieve the md5 hash of blob. - - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.get_bucket("my-bucket-name") - >>> blob = bucket.blob('my-blob') - - >>> blob.md5_hash # return None - >>> blob.reload() - >>> blob.md5_hash # return md5 hash - - >>> # Another approach - >>> blob = bucket.get_blob('my-blob') - >>> blob.md5_hash # return md5 hash """ @property @@ -4263,7 +4590,7 @@ def kms_key_name(self, value): temporary_hold = _scalar_property("temporaryHold") """Is a temporary hold active on the object? - See `API reference docs`_. + See [`API reference docs`](https://cloud.google.com/storage/docs/json_api/v1/objects). If the property is not set locally, returns :data:`None`. @@ -4351,6 +4678,42 @@ def custom_time(self, value): self._patch_property("customTime", value) + @property + def retention(self): + """Retrieve the retention configuration for this object. + + :rtype: :class:`Retention` + :returns: an instance for managing the object's retention configuration. + """ + info = self._properties.get("retention", {}) + return Retention.from_api_repr(info, self) + + @property + def soft_delete_time(self): + """If this object has been soft-deleted, returns the time at which it became soft-deleted. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: + (readonly) The time that the object became soft-deleted. + Note this property is only set for soft-deleted objects. + """ + soft_delete_time = self._properties.get("softDeleteTime") + if soft_delete_time is not None: + return _rfc3339_nanos_to_datetime(soft_delete_time) + + @property + def hard_delete_time(self): + """If this object has been soft-deleted, returns the time at which it will be permanently deleted. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: + (readonly) The time that the object will be permanently deleted. + Note this property is only set for soft-deleted objects. + """ + hard_delete_time = self._properties.get("hardDeleteTime") + if hard_delete_time is not None: + return _rfc3339_nanos_to_datetime(hard_delete_time) + def _get_host_name(connection): """Returns the host name from the given connection. @@ -4441,7 +4804,7 @@ def _maybe_rewind(stream, rewind=False): def _raise_from_invalid_response(error): """Re-wrap and raise an ``InvalidResponse`` exception. - :type error: :exc:`google.resumable_media.InvalidResponse` + :type error: :exc:`google.cloud.storage.exceptions.InvalidResponse` :param error: A caught exception from the ``google-resumable-media`` library. @@ -4457,9 +4820,7 @@ def _raise_from_invalid_response(error): else: error_message = str(error) - message = u"{method} {url}: {error}".format( - method=response.request.method, url=response.request.url, error=error_message - ) + message = f"{response.request.method} {response.request.url}: {error_message}" raise exceptions.from_http_status(response.status_code, message, response=response) @@ -4483,3 +4844,126 @@ def _add_query_parameters(base_url, name_value_pairs): query = parse_qsl(query) query.extend(name_value_pairs) return urlunsplit((scheme, netloc, path, urlencode(query), frag)) + + +class Retention(dict): + """Map an object's retention configuration. + + :type blob: :class:`Blob` + :params blob: blob for which this retention configuration applies to. + + :type mode: str or ``NoneType`` + :params mode: + (Optional) The mode of the retention configuration, which can be either Unlocked or Locked. + See: https://cloud.google.com/storage/docs/object-lock + + :type retain_until_time: :class:`datetime.datetime` or ``NoneType`` + :params retain_until_time: + (Optional) The earliest time that the object can be deleted or replaced, which is the + retention configuration set for this object. + + :type retention_expiration_time: :class:`datetime.datetime` or ``NoneType`` + :params retention_expiration_time: + (Optional) The earliest time that the object can be deleted, which depends on any + retention configuration set for the object and any retention policy set for the bucket + that contains the object. This value should normally only be set by the back-end API. + """ + + def __init__( + self, + blob, + mode=None, + retain_until_time=None, + retention_expiration_time=None, + ): + data = {"mode": mode} + if retain_until_time is not None: + retain_until_time = _datetime_to_rfc3339(retain_until_time) + data["retainUntilTime"] = retain_until_time + + if retention_expiration_time is not None: + retention_expiration_time = _datetime_to_rfc3339(retention_expiration_time) + data["retentionExpirationTime"] = retention_expiration_time + + super(Retention, self).__init__(data) + self._blob = blob + + @classmethod + def from_api_repr(cls, resource, blob): + """Factory: construct instance from resource. + + :type blob: :class:`Blob` + :params blob: Blob for which this retention configuration applies to. + + :type resource: dict + :param resource: mapping as returned from API call. + + :rtype: :class:`Retention` + :returns: Retention configuration created from resource. + """ + instance = cls(blob) + instance.update(resource) + return instance + + @property + def blob(self): + """Blob for which this retention configuration applies to. + + :rtype: :class:`Blob` + :returns: the instance's blob. + """ + return self._blob + + @property + def mode(self): + """The mode of the retention configuration. Options are 'Unlocked' or 'Locked'. + + :rtype: string + :returns: The mode of the retention configuration, which can be either set to 'Unlocked' or 'Locked'. + """ + return self.get("mode") + + @mode.setter + def mode(self, value): + self["mode"] = value + self.blob._patch_property("retention", self) + + @property + def retain_until_time(self): + """The earliest time that the object can be deleted or replaced, which is the + retention configuration set for this object. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: Datetime object parsed from RFC3339 valid timestamp, or + ``None`` if the blob's resource has not been loaded from + the server (see :meth:`reload`). + """ + value = self.get("retainUntilTime") + if value is not None: + return _rfc3339_nanos_to_datetime(value) + + @retain_until_time.setter + def retain_until_time(self, value): + """Set the retain_until_time for the object retention configuration. + + :type value: :class:`datetime.datetime` + :param value: The earliest time that the object can be deleted or replaced. + """ + if value is not None: + value = _datetime_to_rfc3339(value) + self["retainUntilTime"] = value + self.blob._patch_property("retention", self) + + @property + def retention_expiration_time(self): + """The earliest time that the object can be deleted, which depends on any + retention configuration set for the object and any retention policy set for + the bucket that contains the object. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: + (readonly) The earliest time that the object can be deleted. + """ + retention_expiration_time = self.get("retentionExpirationTime") + if retention_expiration_time is not None: + return _rfc3339_nanos_to_datetime(retention_expiration_time) diff --git a/google/cloud/storage/bucket.py b/google/cloud/storage/bucket.py index 6f738976b..10156c795 100644 --- a/google/cloud/storage/bucket.py +++ b/google/cloud/storage/bucket.py @@ -23,19 +23,22 @@ from google.api_core import datetime_helpers from google.cloud._helpers import _datetime_to_rfc3339 -from google.cloud._helpers import _NOW from google.cloud._helpers import _rfc3339_nanos_to_datetime from google.cloud.exceptions import NotFound from google.api_core.iam import Policy from google.cloud.storage import _signing from google.cloud.storage._helpers import _add_etag_match_headers from google.cloud.storage._helpers import _add_generation_match_parameters +from google.cloud.storage._helpers import _NOW from google.cloud.storage._helpers import _PropertyMixin +from google.cloud.storage._helpers import _UTC from google.cloud.storage._helpers import _scalar_property from google.cloud.storage._helpers import _validate_name from google.cloud.storage._signing import generate_signed_url_v2 from google.cloud.storage._signing import generate_signed_url_v4 from google.cloud.storage._helpers import _bucket_bound_hostname_url +from google.cloud.storage._helpers import _virtual_hosted_style_base_url +from google.cloud.storage._opentelemetry_tracing import create_trace_span from google.cloud.storage.acl import BucketACL from google.cloud.storage.acl import DefaultObjectACL from google.cloud.storage.blob import Blob @@ -82,7 +85,9 @@ "valid before the bucket is created. Instead, pass the location " "to `Bucket.create`." ) -_API_ACCESS_ENDPOINT = "https://storage.googleapis.com" +_FROM_STRING_MESSAGE = ( + "Bucket.from_string() is deprecated. " "Use Bucket.from_uri() instead." +) def _blobs_page_start(iterator, page, response): @@ -163,11 +168,19 @@ class LifecycleRuleConditions(dict): rule action to versioned items with at least one newer version. + :type matches_prefix: list(str) + :param matches_prefix: (Optional) Apply rule action to items which + any prefix matches the beginning of the item name. + :type matches_storage_class: list(str), one or more of :attr:`Bucket.STORAGE_CLASSES`. - :param matches_storage_class: (Optional) Apply rule action to items which + :param matches_storage_class: (Optional) Apply rule action to items whose storage class matches this value. + :type matches_suffix: list(str) + :param matches_suffix: (Optional) Apply rule action to items which + any suffix matches the end of the item name. + :type number_of_newer_versions: int :param number_of_newer_versions: (Optional) Apply rule action to versioned items having N newer versions. @@ -211,6 +224,8 @@ def __init__( custom_time_before=None, days_since_noncurrent_time=None, noncurrent_time_before=None, + matches_prefix=None, + matches_suffix=None, _factory=False, ): conditions = {} @@ -236,15 +251,21 @@ def __init__( if custom_time_before is not None: conditions["customTimeBefore"] = custom_time_before.isoformat() - if not _factory and not conditions: - raise ValueError("Supply at least one condition") - if days_since_noncurrent_time is not None: conditions["daysSinceNoncurrentTime"] = days_since_noncurrent_time if noncurrent_time_before is not None: conditions["noncurrentTimeBefore"] = noncurrent_time_before.isoformat() + if matches_prefix is not None: + conditions["matchesPrefix"] = matches_prefix + + if matches_suffix is not None: + conditions["matchesSuffix"] = matches_suffix + + if not _factory and not conditions: + raise ValueError("Supply at least one condition") + super(LifecycleRuleConditions, self).__init__(conditions) @classmethod @@ -278,11 +299,21 @@ def is_live(self): """Conditon's 'is_live' value.""" return self.get("isLive") + @property + def matches_prefix(self): + """Conditon's 'matches_prefix' value.""" + return self.get("matchesPrefix") + @property def matches_storage_class(self): """Conditon's 'matches_storage_class' value.""" return self.get("matchesStorageClass") + @property + def matches_suffix(self): + """Conditon's 'matches_suffix' value.""" + return self.get("matchesSuffix") + @property def number_of_newer_versions(self): """Conditon's 'number_of_newer_versions' value.""" @@ -323,7 +354,7 @@ class LifecycleRuleDelete(dict): def __init__(self, **kw): conditions = LifecycleRuleConditions(**kw) rule = {"action": {"type": "Delete"}, "condition": dict(conditions)} - super(LifecycleRuleDelete, self).__init__(rule) + super().__init__(rule) @classmethod def from_api_repr(cls, resource): @@ -356,7 +387,7 @@ def __init__(self, storage_class, **kw): "action": {"type": "SetStorageClass", "storageClass": storage_class}, "condition": dict(conditions), } - super(LifecycleRuleSetStorageClass, self).__init__(rule) + super().__init__(rule) @classmethod def from_api_repr(cls, resource): @@ -365,7 +396,7 @@ def from_api_repr(cls, resource): :type resource: dict :param resource: mapping as returned from API call. - :rtype: :class:`LifecycleRuleDelete` + :rtype: :class:`LifecycleRuleSetStorageClass` :returns: Instance created from resource. """ action = resource["action"] @@ -374,6 +405,38 @@ def from_api_repr(cls, resource): return instance +class LifecycleRuleAbortIncompleteMultipartUpload(dict): + """Map a rule aborting incomplete multipart uploads of matching items. + + The "age" lifecycle condition is the only supported condition for this rule. + + :type kw: dict + :params kw: arguments passed to :class:`LifecycleRuleConditions`. + """ + + def __init__(self, **kw): + conditions = LifecycleRuleConditions(**kw) + rule = { + "action": {"type": "AbortIncompleteMultipartUpload"}, + "condition": dict(conditions), + } + super().__init__(rule) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from resource. + + :type resource: dict + :param resource: mapping as returned from API call. + + :rtype: :class:`LifecycleRuleAbortIncompleteMultipartUpload` + :returns: Instance created from resource. + """ + instance = cls(_factory=True) + instance.update(resource) + return instance + + _default = object() @@ -416,7 +479,6 @@ def __init__( bucket_policy_only_locked_time=_default, ): if bucket_policy_only_enabled is not _default: - if uniform_bucket_level_access_enabled is not _default: raise ValueError(_UBLA_BPO_ENABLED_MESSAGE) @@ -424,7 +486,6 @@ def __init__( uniform_bucket_level_access_enabled = bucket_policy_only_enabled if bucket_policy_only_locked_time is not _default: - if uniform_bucket_level_access_locked_time is not _default: raise ValueError(_UBLA_BPO_LOCK_TIME_MESSAGE) @@ -568,6 +629,10 @@ class Bucket(_PropertyMixin): :type user_project: str :param user_project: (Optional) the project ID to be billed for API requests made via this instance. + + :type generation: int + :param generation: (Optional) If present, selects a specific revision of + this bucket. """ _MAX_OBJECTS_FOR_ITERATION = 256 @@ -601,7 +666,7 @@ class Bucket(_PropertyMixin): ) """Allowed values for :attr:`location_type`.""" - def __init__(self, client, name=None, user_project=None): + def __init__(self, client, name=None, user_project=None, generation=None): """ property :attr:`name` Get the bucket's name. @@ -614,8 +679,11 @@ def __init__(self, client, name=None, user_project=None): self._label_removals = set() self._user_project = user_project + if generation is not None: + self._properties["generation"] = generation + def __repr__(self): - return "" % (self.name,) + return f"" @property def client(self): @@ -631,6 +699,29 @@ def _set_properties(self, value): self._label_removals.clear() return super(Bucket, self)._set_properties(value) + @property + def rpo(self): + """Get the RPO (Recovery Point Objective) of this bucket + + See: https://cloud.google.com/storage/docs/managing-turbo-replication + + "ASYNC_TURBO" or "DEFAULT" + :rtype: str + """ + return self._properties.get("rpo") + + @rpo.setter + def rpo(self, value): + """ + Set the RPO (Recovery Point Objective) of this bucket. + + See: https://cloud.google.com/storage/docs/managing-turbo-replication + + :type value: str + :param value: "ASYNC_TURBO" or "DEFAULT" + """ + self._patch_property("rpo", value) + @property def user_project(self): """Project ID to be billed for API requests made via this bucket. @@ -645,10 +736,61 @@ def user_project(self): """ return self._user_project + @property + def generation(self): + """Retrieve the generation for the bucket. + + :rtype: int or ``NoneType`` + :returns: The generation of the bucket or ``None`` if the bucket's + resource has not been loaded from the server. + """ + generation = self._properties.get("generation") + if generation is not None: + return int(generation) + + @property + def soft_delete_time(self): + """If this bucket has been soft-deleted, returns the time at which it became soft-deleted. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: + (readonly) The time that the bucket became soft-deleted. + Note this property is only set for soft-deleted buckets. + """ + soft_delete_time = self._properties.get("softDeleteTime") + if soft_delete_time is not None: + return _rfc3339_nanos_to_datetime(soft_delete_time) + + @property + def hard_delete_time(self): + """If this bucket has been soft-deleted, returns the time at which it will be permanently deleted. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: + (readonly) The time that the bucket will be permanently deleted. + Note this property is only set for soft-deleted buckets. + """ + hard_delete_time = self._properties.get("hardDeleteTime") + if hard_delete_time is not None: + return _rfc3339_nanos_to_datetime(hard_delete_time) + + @property + def _query_params(self): + """Default query parameters.""" + params = super()._query_params + return params + @classmethod - def from_string(cls, uri, client=None): + def from_uri(cls, uri, client=None): """Get a constructor for bucket object by URI. + .. code-block:: python + + from google.cloud import storage + from google.cloud.storage.bucket import Bucket + client = storage.Client() + bucket = Bucket.from_uri("gs://bucket", client=client) + :type uri: str :param uri: The bucket uri pass to get bucket object. @@ -659,14 +801,6 @@ def from_string(cls, uri, client=None): :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The bucket object created. - - Example: - Get a constructor for bucket object by URI.. - - >>> from google.cloud import storage - >>> from google.cloud.storage.bucket import Bucket - >>> client = storage.Client() - >>> bucket = Bucket.from_string("gs://bucket", client=client) """ scheme, netloc, path, query, frag = urlsplit(uri) @@ -675,6 +809,34 @@ def from_string(cls, uri, client=None): return cls(client, name=netloc) + @classmethod + def from_string(cls, uri, client=None): + """Get a constructor for bucket object by URI. + + .. note:: + Deprecated alias for :meth:`from_uri`. + + .. code-block:: python + + from google.cloud import storage + from google.cloud.storage.bucket import Bucket + client = storage.Client() + bucket = Bucket.from_string("gs://bucket", client=client) + + :type uri: str + :param uri: The bucket uri pass to get bucket object. + + :type client: :class:`~google.cloud.storage.client.Client` or + ``NoneType`` + :param client: (Optional) The client to use. Application code should + *always* pass ``client``. + + :rtype: :class:`google.cloud.storage.bucket.Bucket` + :returns: The bucket object created. + """ + warnings.warn(_FROM_STRING_MESSAGE, PendingDeprecationWarning, stacklevel=2) + return Bucket.from_uri(uri=uri, client=client) + def blob( self, blob_name, @@ -748,6 +910,7 @@ def notification( notification_id=notification_id, ) + @create_trace_span(name="Storage.Bucket.exists") def exists( self, client=None, @@ -832,6 +995,7 @@ def exists( return False return True + @create_trace_span(name="Storage.Bucket.create") def create( self, client=None, @@ -839,13 +1003,11 @@ def create( location=None, predefined_acl=None, predefined_default_object_acl=None, + enable_object_retention=False, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, ): - """DEPRECATED. Creates current bucket. - - .. note:: - Direct use of this method is deprecated. Use ``Client.create_bucket()`` instead. + """Creates current bucket. If the bucket already exists, will raise :class:`google.cloud.exceptions.Conflict`. @@ -881,6 +1043,11 @@ def create( (Optional) Name of predefined ACL to apply to bucket's objects. See: https://cloud.google.com/storage/docs/access-control/lists#predefined-acl + :type enable_object_retention: bool + :param enable_object_retention: + (Optional) Whether object retention should be enabled on this bucket. See: + https://cloud.google.com/storage/docs/object-lock + :type timeout: float or tuple :param timeout: (Optional) The amount of time, in seconds, to wait @@ -890,12 +1057,6 @@ def create( :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` """ - warnings.warn( - "Bucket.create() is deprecated and will be removed in future." - "Use Client.create_bucket() instead.", - PendingDeprecationWarning, - stacklevel=1, - ) client = self._require_client(client) client.create_bucket( @@ -905,10 +1066,12 @@ def create( location=location, predefined_acl=predefined_acl, predefined_default_object_acl=predefined_default_object_acl, + enable_object_retention=enable_object_retention, timeout=timeout, retry=retry, ) + @create_trace_span(name="Storage.Bucket.update") def update( self, client=None, @@ -953,6 +1116,7 @@ def update( retry=retry, ) + @create_trace_span(name="Storage.Bucket.reload") def reload( self, client=None, @@ -963,6 +1127,7 @@ def reload( if_metageneration_match=None, if_metageneration_not_match=None, retry=DEFAULT_RETRY, + soft_deleted=None, ): """Reload properties from Cloud Storage. @@ -1002,6 +1167,13 @@ def reload( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + + :type soft_deleted: bool + :param soft_deleted: (Optional) If True, looks for a soft-deleted + bucket. Will only return the bucket metadata if the bucket exists + and is in a soft-deleted state. The bucket ``generation`` must be + set if ``soft_deleted`` is set to True. + See: https://cloud.google.com/storage/docs/soft-delete """ super(Bucket, self).reload( client=client, @@ -1012,8 +1184,10 @@ def reload( if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, retry=retry, + soft_deleted=soft_deleted, ) + @create_trace_span(name="Storage.Bucket.patch") def patch( self, client=None, @@ -1097,6 +1271,7 @@ def path(self): return self.path_helper(self.name) + @create_trace_span(name="Storage.Bucket.getBlob") def get_blob( self, blob_name, @@ -1111,16 +1286,13 @@ def get_blob( if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, - **kwargs + soft_deleted=None, + **kwargs, ): """Get a blob object by name. - This will return None if the blob doesn't exist: - - .. literalinclude:: snippets.py - :start-after: [START get_blob] - :end-before: [END get_blob] - :dedent: 4 + See a [code sample](https://cloud.google.com/storage/docs/samples/storage-get-metadata#storage_get_metadata-python) + on how to retrieve metadata of an object. If :attr:`user_project` is set, bills the API request to that project. @@ -1175,6 +1347,13 @@ def get_blob( :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + :type soft_deleted: bool + :param soft_deleted: + (Optional) If True, looks for a soft-deleted object. Will only return + the object metadata if the object exists and is in a soft-deleted state. + Object ``generation`` is required if ``soft_deleted`` is set to True. + See: https://cloud.google.com/storage/docs/soft-delete + :param kwargs: Keyword arguments to pass to the :class:`~google.cloud.storage.blob.Blob` constructor. @@ -1186,7 +1365,7 @@ def get_blob( name=blob_name, encryption_key=encryption_key, generation=generation, - **kwargs + **kwargs, ) try: # NOTE: This will not fail immediately in a batch. However, when @@ -1202,12 +1381,14 @@ def get_blob( if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, retry=retry, + soft_deleted=soft_deleted, ) except NotFound: return None else: return blob + @create_trace_span(name="Storage.Bucket.listBlobs") def list_blobs( self, max_results=None, @@ -1223,11 +1404,12 @@ def list_blobs( client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, + match_glob=None, + include_folders_as_prefixes=None, + soft_deleted=None, + page_size=None, ): - """DEPRECATED. Return an iterator used to find blobs in the bucket. - - .. note:: - Direct use of this method is deprecated. Use ``Client.list_blobs`` instead. + """Return an iterator used to find blobs in the bucket. If :attr:`user_project` is set, bills the API request to that project. @@ -1301,18 +1483,32 @@ def list_blobs( :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + :type match_glob: str + :param match_glob: + (Optional) A glob pattern used to filter results (for example, foo*bar). + The string value must be UTF-8 encoded. See: + https://cloud.google.com/storage/docs/json_api/v1/objects/list#list-object-glob + + :type include_folders_as_prefixes: bool + (Optional) If true, includes Folders and Managed Folders in the set of + ``prefixes`` returned by the query. Only applicable if ``delimiter`` is set to /. + See: https://cloud.google.com/storage/docs/managed-folders + + :type soft_deleted: bool + :param soft_deleted: + (Optional) If true, only soft-deleted objects will be listed as distinct results in order of increasing + generation number. This parameter can only be used successfully if the bucket has a soft delete policy. + Note ``soft_deleted`` and ``versions`` cannot be set to True simultaneously. See: + https://cloud.google.com/storage/docs/soft-delete + + :type page_size: int + :param page_size: + (Optional) Maximum number of blobs to return in each page. + Defaults to a value set by the API. + :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: Iterator of all :class:`~google.cloud.storage.blob.Blob` in this bucket matching the arguments. - - Example: - List blobs in the bucket with user_project. - - >>> from google.cloud import storage - >>> client = storage.Client() - - >>> bucket = storage.Bucket(client, "my-bucket-name", user_project="my-project") - >>> all_blobs = list(client.list_blobs(bucket)) """ client = self._require_client(client) return client.list_blobs( @@ -1327,10 +1523,15 @@ def list_blobs( versions=versions, projection=projection, fields=fields, + page_size=page_size, timeout=timeout, retry=retry, + match_glob=match_glob, + include_folders_as_prefixes=include_folders_as_prefixes, + soft_deleted=soft_deleted, ) + @create_trace_span(name="Storage.Bucket.listNotifications") def list_notifications( self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY ): @@ -1360,11 +1561,15 @@ def list_notifications( client = self._require_client(client) path = self.path + "/notificationConfigs" iterator = client._list_resource( - path, _item_to_notification, timeout=timeout, retry=retry, + path, + _item_to_notification, + timeout=timeout, + retry=retry, ) iterator.bucket = self return iterator + @create_trace_span(name="Storage.Bucket.getNotification") def get_notification( self, notification_id, @@ -1374,8 +1579,8 @@ def get_notification( ): """Get Pub / Sub notification for this bucket. - See: - https://cloud.google.com/storage/docs/json_api/v1/notifications/get + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/notifications/get) + and a [code sample](https://cloud.google.com/storage/docs/samples/storage-print-pubsub-bucket-notification#storage_print_pubsub_bucket_notification-python). If :attr:`user_project` is set, bills the API request to that project. @@ -1397,20 +1602,12 @@ def get_notification( :rtype: :class:`.BucketNotification` :returns: notification instance. - - Example: - Get notification using notification id. - - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.get_bucket('my-bucket-name') # API request. - >>> notification = bucket.get_notification(notification_id='id') # API request. - """ notification = self.notification(notification_id=notification_id) notification.reload(client=client, timeout=timeout, retry=retry) return notification + @create_trace_span(name="Storage.Bucket.delete") def delete( self, force=False, @@ -1433,7 +1630,8 @@ def delete( If ``force=True`` and the bucket contains more than 256 objects / blobs this will cowardly refuse to delete the objects (or the bucket). This is to prevent accidental bucket deletion and to prevent extremely long - runtime of this method. + runtime of this method. Also note that ``force=True`` is not supported + in a ``Batch`` context. If :attr:`user_project` is set, bills the API request to that project. @@ -1483,6 +1681,7 @@ def delete( client=client, timeout=timeout, retry=retry, + versions=True, ) ) if len(blobs) > self._MAX_OBJECTS_FOR_ITERATION: @@ -1501,6 +1700,7 @@ def delete( client=client, timeout=timeout, retry=retry, + preserve_generation=True, ) # We intentionally pass `_target_object=None` since a DELETE @@ -1514,6 +1714,7 @@ def delete( _target_object=None, ) + @create_trace_span(name="Storage.Bucket.deleteBlob") def delete_blob( self, blob_name, @@ -1524,20 +1725,10 @@ def delete_blob( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ): """Deletes a blob from the current bucket. - If the blob isn't found (backend 404), raises a - :class:`google.cloud.exceptions.NotFound`. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START delete_blob] - :end-before: [END delete_blob] - :dedent: 4 - If :attr:`user_project` is set, bills the API request to that project. :type blob_name: str @@ -1574,18 +1765,26 @@ def delete_blob( for the server response. See: :ref:`configuring_timeouts` :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy - :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` - - :raises: :class:`google.cloud.exceptions.NotFound` (to suppress - the exception, call ``delete_blobs``, passing a no-op - ``on_error`` callback, e.g.: - - .. literalinclude:: snippets.py - :start-after: [START delete_blobs] - :end-before: [END delete_blobs] - :dedent: 4 - + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_generation_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + :raises: :class:`google.cloud.exceptions.NotFound` Raises a NotFound + if the blob isn't found. To suppress + the exception, use :meth:`delete_blobs` by passing a no-op + ``on_error`` callback. """ client = self._require_client(client) blob = Blob(blob_name, bucket=self, generation=generation) @@ -1609,22 +1808,28 @@ def delete_blob( _target_object=None, ) + @create_trace_span(name="Storage.Bucket.deleteBlobs") def delete_blobs( self, blobs, on_error=None, client=None, + preserve_generation=False, timeout=_DEFAULT_TIMEOUT, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ): """Deletes a list of blobs from the current bucket. Uses :meth:`delete_blob` to delete each individual blob. + By default, any generation information in the list of blobs is ignored, and the + live versions of all blobs are deleted. Set `preserve_generation` to True + if blob generation should instead be propagated from the list of blobs. + If :attr:`user_project` is set, bills the API request to that project. :type blobs: list @@ -1632,15 +1837,22 @@ def delete_blobs( blob names to delete. :type on_error: callable - :param on_error: (Optional) Takes single argument: ``blob``. Called - called once for each blob raising + :param on_error: (Optional) Takes single argument: ``blob``. + Called once for each blob raising :class:`~google.cloud.exceptions.NotFound`; otherwise, the exception is propagated. + Note that ``on_error`` is not supported in a ``Batch`` context. :type client: :class:`~google.cloud.storage.client.Client` :param client: (Optional) The client to use. If not passed, falls back to the ``client`` stored on the current bucket. + :type preserve_generation: bool + :param preserve_generation: (Optional) Deletes only the generation specified on the blob object, + instead of the live version, if set to True. Only :class:~google.cloud.storage.blob.Blob + objects can have their generation set in this way. + Default: False. + :type if_generation_match: list of long :param if_generation_match: (Optional) See :ref:`using-if-generation-match` @@ -1668,25 +1880,24 @@ def delete_blobs( for the server response. See: :ref:`configuring_timeouts` :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy - :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_generation_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. :raises: :class:`~google.cloud.exceptions.NotFound` (if `on_error` is not passed). - - Example: - Delete blobs using generation match preconditions. - - >>> from google.cloud import storage - - >>> client = storage.Client() - >>> bucket = client.bucket("bucket-name") - - >>> blobs = [bucket.blob("blob-name-1"), bucket.blob("blob-name-2")] - >>> if_generation_match = [None] * len(blobs) - >>> if_generation_match[0] = "123" # precondition for "blob-name-1" - - >>> bucket.delete_blobs(blobs, if_generation_match=if_generation_match) """ _raise_if_len_differs( len(blobs), @@ -1703,11 +1914,15 @@ def delete_blobs( for blob in blobs: try: blob_name = blob + generation = None if not isinstance(blob_name, str): blob_name = blob.name + generation = blob.generation if preserve_generation else None + self.delete_blob( blob_name, client=client, + generation=generation, if_generation_match=next(if_generation_match, None), if_generation_not_match=next(if_generation_not_match, None), if_metageneration_match=next(if_metageneration_match, None), @@ -1721,6 +1936,7 @@ def delete_blobs( else: raise + @create_trace_span(name="Storage.Bucket.copyBlob") def copy_blob( self, blob, @@ -1744,6 +1960,9 @@ def copy_blob( If :attr:`user_project` is set, bills the API request to that project. + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/copy) + and a [code sample](https://cloud.google.com/storage/docs/samples/storage-copy-file#storage_copy_file-python). + :type blob: :class:`google.cloud.storage.blob.Blob` :param blob: The blob to be copied. @@ -1763,6 +1982,8 @@ def copy_blob( :param preserve_acl: DEPRECATED. This argument is not functional! (Optional) Copies ACL from old blob to new blob. Default: True. + Note that ``preserve_acl`` is not supported in a + ``Batch`` context. :type source_generation: long :param source_generation: (Optional) The generation of the blob to be @@ -1819,24 +2040,16 @@ def copy_blob( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` + (Optional) How to retry the RPC. + The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry + policy which will only enable retries if ``if_generation_match`` or ``generation`` + is set, in order to ensure requests are idempotent before retrying them. + Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object + to enable retries regardless of generation precondition setting. + See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). :rtype: :class:`google.cloud.storage.blob.Blob` :returns: The new Blob. - - Example: - Copy a blob including ACL. - - >>> from google.cloud import storage - - >>> client = storage.Client(project="project") - - >>> bucket = client.bucket("bucket") - >>> dst_bucket = client.bucket("destination-bucket") - - >>> blob = bucket.blob("file.ext") - >>> new_blob = bucket.copy_blob(blob, dst_bucket) - >>> new_blob.acl.save(blob.acl) """ client = self._require_client(client) query_params = {} @@ -1879,6 +2092,7 @@ def copy_blob( new_blob._set_properties(copy_result) return new_blob + @create_trace_span(name="Storage.Bucket.renameBlob") def rename_blob( self, blob, @@ -1908,8 +2122,11 @@ def rename_blob( old blob. This means that with very large objects renaming could be a very (temporarily) costly or a very slow operation. If you need more control over the copy and deletion, instead - use `google.cloud.storage.blob.Blob.copy_to` and - `google.cloud.storage.blob.Blob.delete` directly. + use ``google.cloud.storage.blob.Blob.copy_to`` and + ``google.cloud.storage.blob.Blob.delete`` directly. + + Also note that this method is not fully supported in a + ``Batch`` context. :type blob: :class:`google.cloud.storage.blob.Blob` :param blob: The blob to be renamed. @@ -1977,7 +2194,13 @@ def rename_blob( :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy :param retry: - (Optional) How to retry the RPC. See: :ref:`configuring_retries` + (Optional) How to retry the RPC. + The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, a conditional retry + policy which will only enable retries if ``if_generation_match`` or ``generation`` + is set, in order to ensure requests are idempotent before retrying them. + Change the value to ``DEFAULT_RETRY`` or another `google.api_core.retry.Retry` object + to enable retries regardless of generation precondition setting. + See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). :rtype: :class:`Blob` :returns: The newly-renamed blob. @@ -2013,6 +2236,111 @@ def rename_blob( ) return new_blob + @create_trace_span(name="Storage.Bucket.restore_blob") + def restore_blob( + self, + blob_name, + client=None, + generation=None, + copy_source_acl=None, + projection=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ): + """Restores a soft-deleted object. + + If :attr:`user_project` is set on the bucket, bills the API request to that project. + + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/objects/restore) + + :type blob_name: str + :param blob_name: The name of the blob to be restored. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the current bucket. + + :type generation: int + :param generation: Selects the specific revision of the object. + + :type copy_source_acl: bool + :param copy_source_acl: (Optional) If true, copy the soft-deleted object's access controls. + + :type projection: str + :param projection: (Optional) Specifies the set of properties to return. + If used, must be 'full' or 'noAcl'. + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: + (Optional) How to retry the RPC. + The default value is ``DEFAULT_RETRY_IF_GENERATION_SPECIFIED``, which + only restore operations with ``if_generation_match`` or ``generation`` set + will be retried. + + Users can configure non-default retry behavior. A ``None`` value will + disable retries. A ``DEFAULT_RETRY`` value will enable retries + even if restore operations are not guaranteed to be idempotent. + See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). + + :rtype: :class:`google.cloud.storage.blob.Blob` + :returns: The restored Blob. + """ + client = self._require_client(client) + query_params = {} + + if self.user_project is not None: + query_params["userProject"] = self.user_project + if generation is not None: + query_params["generation"] = generation + if copy_source_acl is not None: + query_params["copySourceAcl"] = copy_source_acl + if projection is not None: + query_params["projection"] = projection + + _add_generation_match_parameters( + query_params, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + ) + + blob = Blob(bucket=self, name=blob_name) + api_response = client._post_resource( + f"{blob.path}/restore", + None, + query_params=query_params, + timeout=timeout, + retry=retry, + ) + blob._set_properties(api_response) + return blob + @property def cors(self): """Retrieve or set CORS policies configured for this bucket. @@ -2180,6 +2508,18 @@ def iam_configuration(self): info = self._properties.get("iamConfiguration", {}) return IAMConfiguration.from_api_repr(info, self) + @property + def soft_delete_policy(self): + """Retrieve the soft delete policy for this bucket. + + See https://cloud.google.com/storage/docs/soft-delete + + :rtype: :class:`SoftDeletePolicy` + :returns: an instance for managing the bucket's soft delete policy. + """ + policy = self._properties.get("softDeletePolicy", {}) + return SoftDeletePolicy.from_api_repr(policy, self) + @property def lifecycle_rules(self): """Retrieve or set lifecycle rules configured for this bucket. @@ -2189,20 +2529,20 @@ def lifecycle_rules(self): .. note:: - The getter for this property returns a list which contains + The getter for this property returns a generator which yields *copies* of the bucket's lifecycle rules mappings. Mutating the - list or one of its dicts has no effect unless you then re-assign - the dict via the setter. E.g.: + output dicts has no effect unless you then re-assign the dict via + the setter. E.g.: - >>> rules = bucket.lifecycle_rules + >>> rules = list(bucket.lifecycle_rules) >>> rules.append({'origin': '/foo', ...}) >>> rules[1]['rule']['action']['type'] = 'Delete' >>> del rules[0] >>> bucket.lifecycle_rules = rules >>> bucket.update() - :setter: Set lifestyle rules for this bucket. - :getter: Gets the lifestyle rules for this bucket. + :setter: Set lifecycle rules for this bucket. + :getter: Gets the lifecycle rules for this bucket. :rtype: generator(dict) :returns: A sequence of mappings describing each lifecycle rule. @@ -2214,6 +2554,8 @@ def lifecycle_rules(self): yield LifecycleRuleDelete.from_api_repr(rule) elif action_type == "SetStorageClass": yield LifecycleRuleSetStorageClass.from_api_repr(rule) + elif action_type == "AbortIncompleteMultipartUpload": + yield LifecycleRuleAbortIncompleteMultipartUpload.from_api_repr(rule) else: warnings.warn( "Unknown lifecycle rule type received: {}. Please upgrade to the latest version of google-cloud-storage.".format( @@ -2225,7 +2567,7 @@ def lifecycle_rules(self): @lifecycle_rules.setter def lifecycle_rules(self, rules): - """Set lifestyle rules configured for this bucket. + """Set lifecycle rules configured for this bucket. See https://cloud.google.com/storage/docs/lifecycle and https://cloud.google.com/storage/docs/json_api/v1/buckets @@ -2236,24 +2578,25 @@ def lifecycle_rules(self, rules): rules = [dict(rule) for rule in rules] # Convert helpers if needed self._patch_property("lifecycle", {"rule": rules}) - def clear_lifecyle_rules(self): - """Set lifestyle rules configured for this bucket. + def clear_lifecycle_rules(self): + """Clear lifecycle rules configured for this bucket. See https://cloud.google.com/storage/docs/lifecycle and https://cloud.google.com/storage/docs/json_api/v1/buckets """ self.lifecycle_rules = [] - def add_lifecycle_delete_rule(self, **kw): - """Add a "delete" rule to lifestyle rules configured for this bucket. + def clear_lifecyle_rules(self): + """Deprecated alias for clear_lifecycle_rules.""" + return self.clear_lifecycle_rules() - See https://cloud.google.com/storage/docs/lifecycle and - https://cloud.google.com/storage/docs/json_api/v1/buckets + def add_lifecycle_delete_rule(self, **kw): + """Add a "delete" rule to lifecycle rules configured for this bucket. - .. literalinclude:: snippets.py - :start-after: [START add_lifecycle_delete_rule] - :end-before: [END add_lifecycle_delete_rule] - :dedent: 4 + This defines a [lifecycle configuration](https://cloud.google.com/storage/docs/lifecycle), + which is set on the bucket. For the general format of a lifecycle configuration, see the + [bucket resource representation for JSON](https://cloud.google.com/storage/docs/json_api/v1/buckets). + See also a [code sample](https://cloud.google.com/storage/docs/samples/storage-enable-bucket-lifecycle-management#storage_enable_bucket_lifecycle_management-python). :type kw: dict :params kw: arguments passed to :class:`LifecycleRuleConditions`. @@ -2263,15 +2606,11 @@ def add_lifecycle_delete_rule(self, **kw): self.lifecycle_rules = rules def add_lifecycle_set_storage_class_rule(self, storage_class, **kw): - """Add a "delete" rule to lifestyle rules configured for this bucket. + """Add a "set storage class" rule to lifecycle rules. - See https://cloud.google.com/storage/docs/lifecycle and - https://cloud.google.com/storage/docs/json_api/v1/buckets - - .. literalinclude:: snippets.py - :start-after: [START add_lifecycle_set_storage_class_rule] - :end-before: [END add_lifecycle_set_storage_class_rule] - :dedent: 4 + This defines a [lifecycle configuration](https://cloud.google.com/storage/docs/lifecycle), + which is set on the bucket. For the general format of a lifecycle configuration, see the + [bucket resource representation for JSON](https://cloud.google.com/storage/docs/json_api/v1/buckets). :type storage_class: str, one of :attr:`STORAGE_CLASSES`. :param storage_class: new storage class to assign to matching items. @@ -2283,6 +2622,24 @@ def add_lifecycle_set_storage_class_rule(self, storage_class, **kw): rules.append(LifecycleRuleSetStorageClass(storage_class, **kw)) self.lifecycle_rules = rules + def add_lifecycle_abort_incomplete_multipart_upload_rule(self, **kw): + """Add a "abort incomplete multipart upload" rule to lifecycle rules. + + .. note:: + The "age" lifecycle condition is the only supported condition + for this rule. + + This defines a [lifecycle configuration](https://cloud.google.com/storage/docs/lifecycle), + which is set on the bucket. For the general format of a lifecycle configuration, see the + [bucket resource representation for JSON](https://cloud.google.com/storage/docs/json_api/v1/buckets). + + :type kw: dict + :params kw: arguments passed to :class:`LifecycleRuleConditions`. + """ + rules = list(self.lifecycle_rules) + rules.append(LifecycleRuleAbortIncompleteMultipartUpload(**kw)) + self.lifecycle_rules = rules + _location = _scalar_property("location") @property @@ -2290,7 +2647,7 @@ def location(self): """Retrieve location configured for this bucket. See https://cloud.google.com/storage/docs/json_api/v1/buckets and - https://cloud.google.com/storage/docs/bucket-locations + https://cloud.google.com/storage/docs/locations Returns ``None`` if the property has not been set before creation, or if the bucket's resource has not been loaded from the server. @@ -2316,13 +2673,27 @@ def location(self, value): warnings.warn(_LOCATION_SETTER_MESSAGE, DeprecationWarning, stacklevel=2) self._location = value + @property + def data_locations(self): + """Retrieve the list of regional locations for custom dual-region buckets. + + See https://cloud.google.com/storage/docs/json_api/v1/buckets and + https://cloud.google.com/storage/docs/locations + + Returns ``None`` if the property has not been set before creation, + if the bucket's resource has not been loaded from the server, + or if the bucket is not a dual-regions bucket. + :rtype: list of str or ``NoneType`` + """ + custom_placement_config = self._properties.get("customPlacementConfig", {}) + return custom_placement_config.get("dataLocations") + @property def location_type(self): - """Retrieve or set the location type for the bucket. + """Retrieve the location type for the bucket. See https://cloud.google.com/storage/docs/storage-classes - :setter: Set the location type for this bucket. :getter: Gets the the location type for this bucket. :rtype: str or ``NoneType`` @@ -2523,8 +2894,6 @@ def storage_class(self, value): or :attr:`~google.cloud.storage.constants.DURABLE_REDUCED_AVAILABILITY_LEGACY_STORAGE_CLASS`, """ - if value not in self.STORAGE_CLASSES: - raise ValueError("Invalid storage class: %s" % (value,)) self._patch_property("storageClass", value) @property @@ -2542,6 +2911,21 @@ def time_created(self): if value is not None: return _rfc3339_nanos_to_datetime(value) + @property + def updated(self): + """Retrieve the timestamp at which the bucket was last updated. + + See https://cloud.google.com/storage/docs/json_api/v1/buckets + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: Datetime object parsed from RFC3339 valid timestamp, or + ``None`` if the bucket's resource has not been loaded + from the server. + """ + value = self._properties.get("updated") + if value is not None: + return _rfc3339_nanos_to_datetime(value) + @property def versioning_enabled(self): """Is versioning enabled for this bucket? @@ -2599,34 +2983,141 @@ def requester_pays(self, value): """ self._patch_property("billing", {"requesterPays": bool(value)}) - def configure_website(self, main_page_suffix=None, not_found_page=None): - """Configure website-related properties. + @property + def autoclass_enabled(self): + """Whether Autoclass is enabled for this bucket. - See https://cloud.google.com/storage/docs/hosting-static-website + See https://cloud.google.com/storage/docs/using-autoclass for details. - .. note:: - This (apparently) only works - if your bucket name is a domain name - (and to do that, you need to get approved somehow...). + :setter: Update whether autoclass is enabled for this bucket. + :getter: Query whether autoclass is enabled for this bucket. + + :rtype: bool + :returns: True if enabled, else False. + """ + autoclass = self._properties.get("autoclass", {}) + return autoclass.get("enabled", False) + + @autoclass_enabled.setter + def autoclass_enabled(self, value): + """Enable or disable Autoclass at the bucket-level. + + See https://cloud.google.com/storage/docs/using-autoclass for details. + + :type value: convertible to boolean + :param value: If true, enable Autoclass for this bucket. + If false, disable Autoclass for this bucket. + """ + autoclass = self._properties.get("autoclass", {}) + autoclass["enabled"] = bool(value) + self._patch_property("autoclass", autoclass) + + @property + def autoclass_toggle_time(self): + """Retrieve the toggle time when Autoclaass was last enabled or disabled for the bucket. + :rtype: datetime.datetime or ``NoneType`` + :returns: point-in time at which the bucket's autoclass is toggled, or ``None`` if the property is not set locally. + """ + autoclass = self._properties.get("autoclass") + if autoclass is not None: + timestamp = autoclass.get("toggleTime") + if timestamp is not None: + return _rfc3339_nanos_to_datetime(timestamp) + + @property + def autoclass_terminal_storage_class(self): + """The storage class that objects in an Autoclass bucket eventually transition to if + they are not read for a certain length of time. Valid values are NEARLINE and ARCHIVE. + + See https://cloud.google.com/storage/docs/using-autoclass for details. + + :setter: Set the terminal storage class for Autoclass configuration. + :getter: Get the terminal storage class for Autoclass configuration. + + :rtype: str + :returns: The terminal storage class if Autoclass is enabled, else ``None``. + """ + autoclass = self._properties.get("autoclass", {}) + return autoclass.get("terminalStorageClass", None) + + @autoclass_terminal_storage_class.setter + def autoclass_terminal_storage_class(self, value): + """The storage class that objects in an Autoclass bucket eventually transition to if + they are not read for a certain length of time. Valid values are NEARLINE and ARCHIVE. + + See https://cloud.google.com/storage/docs/using-autoclass for details. + + :type value: str + :param value: The only valid values are `"NEARLINE"` and `"ARCHIVE"`. + """ + autoclass = self._properties.get("autoclass", {}) + autoclass["terminalStorageClass"] = value + self._patch_property("autoclass", autoclass) + + @property + def autoclass_terminal_storage_class_update_time(self): + """The time at which the Autoclass terminal_storage_class field was last updated for this bucket + :rtype: datetime.datetime or ``NoneType`` + :returns: point-in time at which the bucket's terminal_storage_class is last updated, or ``None`` if the property is not set locally. + """ + autoclass = self._properties.get("autoclass") + if autoclass is not None: + timestamp = autoclass.get("terminalStorageClassUpdateTime") + if timestamp is not None: + return _rfc3339_nanos_to_datetime(timestamp) - If you want this bucket to host a website, just provide the name - of an index page and a page to use when a blob isn't found: + @property + def object_retention_mode(self): + """Retrieve the object retention mode set on the bucket. + + :rtype: str + :returns: When set to Enabled, retention configurations can be + set on objects in the bucket. + """ + object_retention = self._properties.get("objectRetention") + if object_retention is not None: + return object_retention.get("mode") + + @property + def hierarchical_namespace_enabled(self): + """Whether hierarchical namespace is enabled for this bucket. + + :setter: Update whether hierarchical namespace is enabled for this bucket. + :getter: Query whether hierarchical namespace is enabled for this bucket. + + :rtype: bool + :returns: True if enabled, else False. + """ + hns = self._properties.get("hierarchicalNamespace", {}) + return hns.get("enabled") + + @hierarchical_namespace_enabled.setter + def hierarchical_namespace_enabled(self, value): + """Enable or disable hierarchical namespace at the bucket-level. + + :type value: convertible to boolean + :param value: If true, enable hierarchical namespace for this bucket. + If false, disable hierarchical namespace for this bucket. - .. literalinclude:: snippets.py - :start-after: [START configure_website] - :end-before: [END configure_website] - :dedent: 4 + .. note:: + To enable hierarchical namespace, you must set it at bucket creation time. + Currently, hierarchical namespace configuration cannot be changed after bucket creation. + """ + hns = self._properties.get("hierarchicalNamespace", {}) + hns["enabled"] = bool(value) + self._patch_property("hierarchicalNamespace", hns) - You probably should also make the whole bucket public: + def configure_website(self, main_page_suffix=None, not_found_page=None): + """Configure website-related properties. - .. literalinclude:: snippets.py - :start-after: [START make_public] - :end-before: [END make_public] - :dedent: 4 + See https://cloud.google.com/storage/docs/static-website - This says: "Make the bucket public, and all the stuff already in - the bucket, and anything else I add to the bucket. Just make it - all public." + .. note:: + This configures the bucket's website-related properties,controlling how + the service behaves when accessing bucket contents as a web site. + See [tutorials](https://cloud.google.com/storage/docs/hosting-static-website) and + [code samples](https://cloud.google.com/storage/docs/samples/storage-define-bucket-website-configuration#storage_define_bucket_website_configuration-python) + for more information. :type main_page_suffix: str :param main_page_suffix: The page to use as the main page @@ -2647,6 +3138,7 @@ def disable_website(self): """ return self.configure_website(None, None) + @create_trace_span(name="Storage.Bucket.getIamPolicy") def get_iam_policy( self, client=None, @@ -2656,8 +3148,8 @@ def get_iam_policy( ): """Retrieve the IAM policy for the bucket. - See - https://cloud.google.com/storage/docs/json_api/v1/buckets/getIamPolicy + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/getIamPolicy) + and a [code sample](https://cloud.google.com/storage/docs/samples/storage-view-bucket-iam-members#storage_view_bucket_iam_members-python). If :attr:`user_project` is set, bills the API request to that project. @@ -2690,30 +3182,6 @@ def get_iam_policy( :rtype: :class:`google.api_core.iam.Policy` :returns: the policy instance, based on the resource returned from the ``getIamPolicy`` API request. - - Example: - - .. code-block:: python - - from google.cloud.storage.iam import STORAGE_OBJECT_VIEWER_ROLE - - policy = bucket.get_iam_policy(requested_policy_version=3) - - policy.version = 3 - - # Add a binding to the policy via it's bindings property - policy.bindings.append({ - "role": STORAGE_OBJECT_VIEWER_ROLE, - "members": {"serviceAccount:account@project.iam.gserviceaccount.com", ...}, - # Optional: - "condition": { - "title": "prefix" - "description": "Objects matching prefix" - "expression": "resource.name.startsWith(\"projects/project-name/buckets/bucket-name/objects/prefix\")" - } - }) - - bucket.set_iam_policy(policy) """ client = self._require_client(client) query_params = {} @@ -2725,7 +3193,7 @@ def get_iam_policy( query_params["optionsRequestedPolicyVersion"] = requested_policy_version info = client._get_resource( - "%s/iam" % (self.path,), + f"{self.path}/iam", query_params=query_params, timeout=timeout, retry=retry, @@ -2733,6 +3201,7 @@ def get_iam_policy( ) return Policy.from_api_repr(info) + @create_trace_span(name="Storage.Bucket.setIamPolicy") def set_iam_policy( self, policy, @@ -2774,7 +3243,7 @@ def set_iam_policy( if self.user_project is not None: query_params["userProject"] = self.user_project - path = "{}/iam".format(self.path) + path = f"{self.path}/iam" resource = policy.to_api_repr() resource["resourceId"] = self.path @@ -2789,6 +3258,7 @@ def set_iam_policy( return Policy.from_api_repr(info) + @create_trace_span(name="Storage.Bucket.testIamPermissions") def test_iam_permissions( self, permissions, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY ): @@ -2826,7 +3296,7 @@ def test_iam_permissions( if self.user_project is not None: query_params["userProject"] = self.user_project - path = "%s/iam/testPermissions" % (self.path,) + path = f"{self.path}/iam/testPermissions" resp = client._get_resource( path, query_params=query_params, @@ -2836,6 +3306,7 @@ def test_iam_permissions( ) return resp.get("permissions", []) + @create_trace_span(name="Storage.Bucket.makePublic") def make_public( self, recursive=False, @@ -2929,9 +3400,11 @@ def make_public( for blob in blobs: blob.acl.all().grant_read() blob.acl.save( - client=client, timeout=timeout, + client=client, + timeout=timeout, ) + @create_trace_span(name="Storage.Bucket.makePrivate") def make_private( self, recursive=False, @@ -3029,19 +3502,10 @@ def generate_upload_policy(self, conditions, expiration=None, client=None): """Create a signed upload policy for uploading objects. This method generates and signs a policy document. You can use - `policy documents`_ to allow visitors to a website to upload files to + [`policy documents`](https://cloud.google.com/storage/docs/xml-api/post-object-forms) + to allow visitors to a website to upload files to Google Cloud Storage without giving them direct write access. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START policy_document] - :end-before: [END policy_document] - :dedent: 4 - - .. _policy documents: - https://cloud.google.com/storage/docs/xml-api\ - /post-object#policydocument + See a [code sample](https://cloud.google.com/storage/docs/xml-api/post-object-forms#python). :type expiration: datetime :param expiration: (Optional) Expiration in UTC. If not specified, the @@ -3049,7 +3513,7 @@ def generate_upload_policy(self, conditions, expiration=None, client=None): :type conditions: list :param conditions: A list of conditions as described in the - `policy documents`_ documentation. + `policy documents` documentation. :type client: :class:`~google.cloud.storage.client.Client` :param client: (Optional) The client to use. If not passed, falls back @@ -3065,7 +3529,7 @@ def generate_upload_policy(self, conditions, expiration=None, client=None): _signing.ensure_signed_credentials(credentials) if expiration is None: - expiration = _NOW() + datetime.timedelta(hours=1) + expiration = _NOW(_UTC).replace(tzinfo=None) + datetime.timedelta(hours=1) conditions = conditions + [{"bucket": self.name}] @@ -3088,6 +3552,7 @@ def generate_upload_policy(self, conditions, expiration=None, client=None): return fields + @create_trace_span(name="Storage.Bucket.lockRetentionPolicy") def lock_retention_policy( self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY ): @@ -3130,7 +3595,7 @@ def lock_retention_policy( if self.user_project is not None: query_params["userProject"] = self.user_project - path = "/b/{}/lockRetentionPolicy".format(self.name) + path = f"/b/{self.name}/lockRetentionPolicy" api_response = client._post_resource( path, None, @@ -3144,7 +3609,7 @@ def lock_retention_policy( def generate_signed_url( self, expiration=None, - api_access_endpoint=_API_ACCESS_ENDPOINT, + api_access_endpoint=None, method="GET", headers=None, query_parameters=None, @@ -3160,13 +3625,9 @@ def generate_signed_url( .. note:: If you are on Google Compute Engine, you can't generate a signed - URL using GCE service account. Follow `Issue 50`_ for updates on - this. If you'd like to be able to generate a signed URL from GCE, - you can use a standard service account from a JSON file rather - than a GCE service account. - - .. _Issue 50: https://github.com/GoogleCloudPlatform/\ - google-auth-library-python/issues/50 + URL using GCE service account. If you'd like to be able to generate + a signed URL from GCE, you can use a standard service account from a + JSON file rather than a GCE service account. If you have a bucket that you want to allow access to for a set amount of time, you can use this method to generate a URL that @@ -3175,28 +3636,15 @@ def generate_signed_url( If ``bucket_bound_hostname`` is set as an argument of :attr:`api_access_endpoint`, ``https`` works only if using a ``CDN``. - Example: - Generates a signed URL for this bucket using bucket_bound_hostname and scheme. - - >>> from google.cloud import storage - >>> client = storage.Client() - >>> bucket = client.get_bucket('my-bucket-name') - >>> url = bucket.generate_signed_url(expiration='url-expiration-time', bucket_bound_hostname='mydomain.tld', - >>> version='v4') - >>> url = bucket.generate_signed_url(expiration='url-expiration-time', bucket_bound_hostname='mydomain.tld', - >>> version='v4',scheme='https') # If using ``CDN`` - - This is particularly useful if you don't want publicly - accessible buckets, but don't want to require users to explicitly - log in. - :type expiration: Union[Integer, datetime.datetime, datetime.timedelta] :param expiration: Point in time when the signed URL should expire. If a ``datetime`` instance is passed without an explicit ``tzinfo`` set, it will be assumed to be ``UTC``. :type api_access_endpoint: str - :param api_access_endpoint: (Optional) URI base. + :param api_access_endpoint: (Optional) URI base, for instance + "https://storage.googleapis.com". If not specified, the client's + api_endpoint will be used. Incompatible with bucket_bound_hostname. :type method: str :param method: The HTTP verb that will be used when requesting the URL. @@ -3220,7 +3668,6 @@ def generate_signed_url( :param client: (Optional) The client to use. If not passed, falls back to the ``client`` stored on the blob's bucket. - :type credentials: :class:`google.auth.credentials.Credentials` or :class:`NoneType` :param credentials: The authorization credentials to attach to requests. @@ -3236,11 +3683,13 @@ def generate_signed_url( :param virtual_hosted_style: (Optional) If true, then construct the URL relative the bucket's virtual hostname, e.g., '.storage.googleapis.com'. + Incompatible with bucket_bound_hostname. :type bucket_bound_hostname: str :param bucket_bound_hostname: - (Optional) If pass, then construct the URL relative to the bucket-bound hostname. - Value cane be a bare or with scheme, e.g., 'example.com' or 'http://example.com'. + (Optional) If passed, then construct the URL relative to the bucket-bound hostname. + Value can be a bare or with scheme, e.g., 'example.com' or 'http://example.com'. + Incompatible with api_access_endpoint and virtual_hosted_style. See: https://cloud.google.com/storage/docs/request-endpoints#cname :type scheme: str @@ -3249,7 +3698,7 @@ def generate_signed_url( this value as the scheme. ``https`` will work only when using a CDN. Defaults to ``"http"``. - :raises: :exc:`ValueError` when version is invalid. + :raises: :exc:`ValueError` when version is invalid or mutually exclusive arguments are used. :raises: :exc:`TypeError` when expiration is not a valid type. :raises: :exc:`AttributeError` if credentials is not an instance of :class:`google.auth.credentials.Signing`. @@ -3263,22 +3712,36 @@ def generate_signed_url( elif version not in ("v2", "v4"): raise ValueError("'version' must be either 'v2' or 'v4'") + if ( + api_access_endpoint is not None or virtual_hosted_style + ) and bucket_bound_hostname: + raise ValueError( + "The bucket_bound_hostname argument is not compatible with " + "either api_access_endpoint or virtual_hosted_style." + ) + + if api_access_endpoint is None: + client = self._require_client(client) + api_access_endpoint = client.api_endpoint + + # If you are on Google Compute Engine, you can't generate a signed URL + # using GCE service account. + # See https://github.com/googleapis/google-auth-library-python/issues/50 if virtual_hosted_style: - api_access_endpoint = "https://{bucket_name}.storage.googleapis.com".format( - bucket_name=self.name + api_access_endpoint = _virtual_hosted_style_base_url( + api_access_endpoint, self.name ) + resource = "/" elif bucket_bound_hostname: api_access_endpoint = _bucket_bound_hostname_url( bucket_bound_hostname, scheme ) - else: - resource = "/{bucket_name}".format(bucket_name=self.name) - - if virtual_hosted_style or bucket_bound_hostname: resource = "/" + else: + resource = f"/{self.name}" if credentials is None: - client = self._require_client(client) + client = self._require_client(client) # May be redundant, but that's ok. credentials = client._credentials if version == "v2": @@ -3297,6 +3760,102 @@ def generate_signed_url( ) +class SoftDeletePolicy(dict): + """Map a bucket's soft delete policy. + + See https://cloud.google.com/storage/docs/soft-delete + + :type bucket: :class:`Bucket` + :param bucket: Bucket for which this instance is the policy. + + :type retention_duration_seconds: int + :param retention_duration_seconds: + (Optional) The period of time in seconds that soft-deleted objects in the bucket + will be retained and cannot be permanently deleted. + + :type effective_time: :class:`datetime.datetime` + :param effective_time: + (Optional) When the bucket's soft delete policy is effective. + This value should normally only be set by the back-end API. + """ + + def __init__(self, bucket, **kw): + data = {} + retention_duration_seconds = kw.get("retention_duration_seconds") + data["retentionDurationSeconds"] = retention_duration_seconds + + effective_time = kw.get("effective_time") + if effective_time is not None: + effective_time = _datetime_to_rfc3339(effective_time) + data["effectiveTime"] = effective_time + + super().__init__(data) + self._bucket = bucket + + @classmethod + def from_api_repr(cls, resource, bucket): + """Factory: construct instance from resource. + + :type resource: dict + :param resource: mapping as returned from API call. + + :type bucket: :class:`Bucket` + :params bucket: Bucket for which this instance is the policy. + + :rtype: :class:`SoftDeletePolicy` + :returns: Instance created from resource. + """ + instance = cls(bucket) + instance.update(resource) + return instance + + @property + def bucket(self): + """Bucket for which this instance is the policy. + + :rtype: :class:`Bucket` + :returns: the instance's bucket. + """ + return self._bucket + + @property + def retention_duration_seconds(self): + """Get the retention duration of the bucket's soft delete policy. + + :rtype: int or ``NoneType`` + :returns: The period of time in seconds that soft-deleted objects in the bucket + will be retained and cannot be permanently deleted; Or ``None`` if the + property is not set. + """ + duration = self.get("retentionDurationSeconds") + if duration is not None: + return int(duration) + + @retention_duration_seconds.setter + def retention_duration_seconds(self, value): + """Set the retention duration of the bucket's soft delete policy. + + :type value: int + :param value: + The period of time in seconds that soft-deleted objects in the bucket + will be retained and cannot be permanently deleted. + """ + self["retentionDurationSeconds"] = value + self.bucket._patch_property("softDeletePolicy", self) + + @property + def effective_time(self): + """Get the effective time of the bucket's soft delete policy. + + :rtype: datetime.datetime or ``NoneType`` + :returns: point-in time at which the bucket's soft delte policy is + effective, or ``None`` if the property is not set. + """ + timestamp = self.get("effectiveTime") + if timestamp is not None: + return _rfc3339_nanos_to_datetime(timestamp) + + def _raise_if_len_differs(expected_len, **generation_match_args): """ Raise an error if any generation match argument @@ -3312,6 +3871,4 @@ def _raise_if_len_differs(expected_len, **generation_match_args): """ for name, value in generation_match_args.items(): if value is not None and len(value) != expected_len: - raise ValueError( - "'{}' length must be the same as 'blobs' length".format(name) - ) + raise ValueError(f"'{name}' length must be the same as 'blobs' length") diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 9d1d49af8..57fa7043b 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -25,16 +25,25 @@ from google.auth.credentials import AnonymousCredentials -from google import resumable_media - from google.api_core import page_iterator -from google.cloud._helpers import _LocalStack, _NOW +from google.cloud._helpers import _LocalStack from google.cloud.client import ClientWithProject from google.cloud.exceptions import NotFound -from google.cloud.storage._helpers import _get_storage_host -from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST + +from google.cloud.storage._helpers import _add_generation_match_parameters from google.cloud.storage._helpers import _bucket_bound_hostname_url -from google.cloud.storage._helpers import _add_etag_match_headers +from google.cloud.storage._helpers import _get_api_endpoint_override +from google.cloud.storage._helpers import _get_environ_project +from google.cloud.storage._helpers import _get_storage_emulator_override +from google.cloud.storage._helpers import _use_client_cert +from google.cloud.storage._helpers import _virtual_hosted_style_base_url +from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN +from google.cloud.storage._helpers import _DEFAULT_SCHEME +from google.cloud.storage._helpers import _STORAGE_HOST_TEMPLATE +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC +from google.cloud.storage._opentelemetry_tracing import create_trace_span + from google.cloud.storage._http import Connection from google.cloud.storage._signing import ( get_expiration_seconds_v4, @@ -44,17 +53,12 @@ ) from google.cloud.storage.batch import Batch from google.cloud.storage.bucket import Bucket, _item_to_blob, _blobs_page_start -from google.cloud.storage.blob import ( - Blob, - _get_encryption_headers, - _raise_from_invalid_response, -) +from google.cloud.storage.blob import Blob from google.cloud.storage.hmac_key import HMACKeyMetadata from google.cloud.storage.acl import BucketACL from google.cloud.storage.acl import DefaultObjectACL from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY -from google.cloud.storage.retry import ConditionalRetryPolicy _marker = object() @@ -92,7 +96,18 @@ class Client(ClientWithProject): :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` :param client_options: (Optional) Client options used to set user options on the client. - API Endpoint should be set through client_options. + A non-default universe domain or api endpoint should be set through client_options. + + :type use_auth_w_custom_endpoint: bool + :param use_auth_w_custom_endpoint: + (Optional) Whether authentication is required under custom endpoints. + If false, uses AnonymousCredentials and bypasses authentication. + Defaults to True. Note this is only used when a custom endpoint is set in conjunction. + + :type extra_headers: dict + :param extra_headers: + (Optional) Custom headers to be sent with the requests attached to the client. + For example, you can add custom audit logging headers. """ SCOPE = ( @@ -109,6 +124,8 @@ def __init__( _http=None, client_info=None, client_options=None, + use_auth_w_custom_endpoint=True, + extra_headers={}, ): self._base_connection = None @@ -121,6 +138,93 @@ def __init__( if project is _marker: project = None + # Save the initial value of constructor arguments before they + # are passed along, for use in __reduce__ defined elsewhere. + self._initial_client_info = client_info + self._initial_client_options = client_options + self._extra_headers = extra_headers + + connection_kw_args = {"client_info": client_info} + + if client_options: + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict( + client_options + ) + + if client_options and client_options.universe_domain: + self._universe_domain = client_options.universe_domain + else: + self._universe_domain = None + + storage_emulator_override = _get_storage_emulator_override() + api_endpoint_override = _get_api_endpoint_override() + + # Determine the api endpoint. The rules are as follows: + + # 1. If the `api_endpoint` is set in `client_options`, use that as the + # endpoint. + if client_options and client_options.api_endpoint: + api_endpoint = client_options.api_endpoint + + # 2. Elif the "STORAGE_EMULATOR_HOST" env var is set, then use that as the + # endpoint. + elif storage_emulator_override: + api_endpoint = storage_emulator_override + + # 3. Elif the "API_ENDPOINT_OVERRIDE" env var is set, then use that as the + # endpoint. + elif api_endpoint_override: + api_endpoint = api_endpoint_override + + # 4. Elif the `universe_domain` is set in `client_options`, + # create the endpoint using that as the default. + # + # Mutual TLS is not compatible with a non-default universe domain + # at this time. If such settings are enabled along with the + # "GOOGLE_API_USE_CLIENT_CERTIFICATE" env variable, a ValueError will + # be raised. + + elif self._universe_domain: + # The final decision of whether to use mTLS takes place in + # google-auth-library-python. We peek at the environment variable + # here only to issue an exception in case of a conflict. + if _use_client_cert(): + raise ValueError( + 'The "GOOGLE_API_USE_CLIENT_CERTIFICATE" env variable is ' + 'set to "true" and a non-default universe domain is ' + "configured. mTLS is not supported in any universe other than" + "googleapis.com." + ) + api_endpoint = _DEFAULT_SCHEME + _STORAGE_HOST_TEMPLATE.format( + universe_domain=self._universe_domain + ) + + # 5. Else, use the default, which is to use the default + # universe domain of "googleapis.com" and create the endpoint + # "storage.googleapis.com" from that. + else: + api_endpoint = None + + connection_kw_args["api_endpoint"] = api_endpoint + + self._is_emulator_set = True if storage_emulator_override else False + + # If a custom endpoint is set, the client checks for credentials + # or finds the default credentials based on the current environment. + # Authentication may be bypassed under certain conditions: + # (1) STORAGE_EMULATOR_HOST is set (for backwards compatibility), OR + # (2) use_auth_w_custom_endpoint is set to False. + if connection_kw_args["api_endpoint"] is not None: + if self._is_emulator_set or not use_auth_w_custom_endpoint: + if credentials is None: + credentials = AnonymousCredentials() + if project is None: + project = _get_environ_project() + if project is None: + no_project = True + project = "" + super(Client, self).__init__( project=project, credentials=credentials, @@ -128,30 +232,26 @@ def __init__( _http=_http, ) - kw_args = {"client_info": client_info} - - # `api_endpoint` should be only set by the user via `client_options`, - # or if the _get_storage_host() returns a non-default value. - # `api_endpoint` plays an important role for mTLS, if it is not set, - # then mTLS logic will be applied to decide which endpoint will be used. - storage_host = _get_storage_host() - kw_args["api_endpoint"] = ( - storage_host if storage_host != _DEFAULT_STORAGE_HOST else None - ) - - if client_options: - if type(client_options) == dict: - client_options = google.api_core.client_options.from_dict( - client_options + # Validate that the universe domain of the credentials matches the + # universe domain of the client. + if self._credentials.universe_domain != self.universe_domain: + raise ValueError( + "The configured universe domain ({client_ud}) does not match " + "the universe domain found in the credentials ({cred_ud}). If " + "you haven't configured the universe domain explicitly, " + "`googleapis.com` is the default.".format( + client_ud=self.universe_domain, + cred_ud=self._credentials.universe_domain, ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - kw_args["api_endpoint"] = api_endpoint + ) if no_project: self.project = None - self._connection = Connection(self, **kw_args) + # Pass extra_headers to Connection + connection = Connection(self, **connection_kw_args) + connection.extra_headers = extra_headers + self._connection = connection self._batch_stack = _LocalStack() @classmethod @@ -170,6 +270,14 @@ def create_anonymous_client(cls): client.project = None return client + @property + def universe_domain(self): + return self._universe_domain or _DEFAULT_UNIVERSE_DOMAIN + + @property + def api_endpoint(self): + return self._connection.API_BASE_URL + @property def _connection(self): """Get connection or batch on the client. @@ -231,6 +339,7 @@ def current_batch(self): """ return self._batch_stack.top + @create_trace_span(name="Storage.Client.getServiceAccountEmail") def get_service_account_email( self, project=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY ): @@ -255,11 +364,11 @@ def get_service_account_email( if project is None: project = self.project - path = "/projects/%s/serviceAccount" % (project,) + path = f"/projects/{project}/serviceAccount" api_response = self._get_resource(path, timeout=timeout, retry=retry) return api_response["email_address"] - def bucket(self, bucket_name, user_project=None): + def bucket(self, bucket_name, user_project=None, generation=None): """Factory constructor for bucket object. .. note:: @@ -273,22 +382,38 @@ def bucket(self, bucket_name, user_project=None): :param user_project: (Optional) The project ID to be billed for API requests made via the bucket. + :type generation: int + :param generation: (Optional) If present, selects a specific revision of + this bucket. + :rtype: :class:`google.cloud.storage.bucket.Bucket` :returns: The bucket object created. """ - return Bucket(client=self, name=bucket_name, user_project=user_project) + return Bucket( + client=self, + name=bucket_name, + user_project=user_project, + generation=generation, + ) - def batch(self): + def batch(self, raise_exception=True): """Factory constructor for batch object. .. note:: This will not make an HTTP request; it simply instantiates a batch object owned by this client. + :type raise_exception: bool + :param raise_exception: + (Optional) Defaults to True. If True, instead of adding exceptions + to the list of return responses, the final exception will be raised. + Note that exceptions are unwrapped after all operations are complete + in success or failure, and only the last exception is raised. + :rtype: :class:`google.cloud.storage.batch.Batch` :returns: The batch object created. """ - return Batch(client=self) + return Batch(client=self, raise_exception=raise_exception) def _get_resource( self, @@ -368,9 +493,20 @@ def _list_resource( timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, ): - api_request = functools.partial( - self._connection.api_request, timeout=timeout, retry=retry - ) + kwargs = { + "method": "GET", + "path": path, + "timeout": timeout, + } + with create_trace_span( + name="Storage.Client._list_resource_returns_iterator", + client=self, + api_request=kwargs, + retry=retry, + ): + api_request = functools.partial( + self._connection.api_request, timeout=timeout, retry=retry + ) return page_iterator.HTTPIterator( client=self, api_request=api_request, @@ -585,6 +721,7 @@ def _post_resource( google.cloud.exceptions.NotFound If the bucket is not found. """ + return self._connection.api_request( method="POST", path=path, @@ -662,7 +799,7 @@ def _delete_resource( _target_object=_target_object, ) - def _bucket_arg_to_bucket(self, bucket_or_name): + def _bucket_arg_to_bucket(self, bucket_or_name, generation=None): """Helper to return given bucket or create new by name. Args: @@ -671,19 +808,30 @@ def _bucket_arg_to_bucket(self, bucket_or_name): str, \ ]): The bucket resource to pass or name to create. + generation (Optional[int]): + The bucket generation. If generation is specified, + bucket_or_name must be a name (str). Returns: google.cloud.storage.bucket.Bucket The newly created bucket or the given one. """ if isinstance(bucket_or_name, Bucket): + if generation: + raise ValueError( + "The generation can only be specified if a " + "name is used to specify a bucket, not a Bucket object. " + "Create a new Bucket object with the correct generation " + "instead." + ) bucket = bucket_or_name if bucket.client is None: bucket._client = self else: - bucket = Bucket(self, name=bucket_or_name) + bucket = Bucket(self, name=bucket_or_name, generation=generation) return bucket + @create_trace_span(name="Storage.Client.getBucket") def get_bucket( self, bucket_or_name, @@ -691,11 +839,13 @@ def get_bucket( if_metageneration_match=None, if_metageneration_not_match=None, retry=DEFAULT_RETRY, + *, + generation=None, + soft_deleted=None, ): - """API call: retrieve a bucket via a GET request. + """Retrieve a bucket via a GET request. - See - https://cloud.google.com/storage/docs/json_api/v1/buckets/get + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/get) and a [code sample](https://cloud.google.com/storage/docs/samples/storage-get-bucket-metadata#storage_get_bucket_metadata-python). Args: bucket_or_name (Union[ \ @@ -710,12 +860,12 @@ def get_bucket( Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - if_metageneration_match (Optional[long]): + if_metageneration_match (Optional[int]): Make the operation conditional on whether the - blob's current metageneration matches the given value. + bucket's current metageneration matches the given value. - if_metageneration_not_match (Optional[long]): - Make the operation conditional on whether the blob's + if_metageneration_not_match (Optional[int]): + Make the operation conditional on whether the bucket's current metageneration does not match the given value. retry (Optional[Union[google.api_core.retry.Retry, google.cloud.storage.retry.ConditionalRetryPolicy]]): @@ -732,6 +882,19 @@ def get_bucket( See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how to configure them. + generation (Optional[int]): + The generation of the bucket. The generation can be used to + specify a specific soft-deleted version of the bucket, in + conjunction with the ``soft_deleted`` argument below. If + ``soft_deleted`` is not True, the generation is unused. + + soft_deleted (Optional[bool]): + If True, looks for a soft-deleted bucket. Will only return + the bucket metadata if the bucket exists and is in a + soft-deleted state. The bucket ``generation`` is required if + ``soft_deleted`` is set to True. + See: https://cloud.google.com/storage/docs/soft-delete + Returns: google.cloud.storage.bucket.Bucket The bucket matching the name provided. @@ -739,38 +902,19 @@ def get_bucket( Raises: google.cloud.exceptions.NotFound If the bucket is not found. - - Examples: - Retrieve a bucket using a string. - - .. literalinclude:: snippets.py - :start-after: [START get_bucket] - :end-before: [END get_bucket] - :dedent: 4 - - Get a bucket using a resource. - - >>> from google.cloud import storage - >>> client = storage.Client() - - >>> # Set properties on a plain resource object. - >>> bucket = client.get_bucket("my-bucket-name") - - >>> # Time passes. Another program may have modified the bucket - ... # in the meantime, so you want to get the latest state. - >>> bucket = client.get_bucket(bucket) # API request. - """ - bucket = self._bucket_arg_to_bucket(bucket_or_name) + bucket = self._bucket_arg_to_bucket(bucket_or_name, generation=generation) bucket.reload( client=self, timeout=timeout, if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, retry=retry, + soft_deleted=soft_deleted, ) return bucket + @create_trace_span(name="Storage.Client.lookupBucket") def lookup_bucket( self, bucket_name, @@ -782,12 +926,7 @@ def lookup_bucket( """Get a bucket by name, returning None if not found. You can use this if you would rather check for a None value - than catching an exception: - - .. literalinclude:: snippets.py - :start-after: [START lookup_bucket] - :end-before: [END lookup_bucket] - :dedent: 4 + than catching a NotFound exception. :type bucket_name: str :param bucket_name: The name of the bucket to get. @@ -809,7 +948,7 @@ def lookup_bucket( :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` - :rtype: :class:`google.cloud.storage.bucket.Bucket` + :rtype: :class:`google.cloud.storage.bucket.Bucket` or ``NoneType`` :returns: The bucket matching the name provided or None if not found. """ try: @@ -823,6 +962,7 @@ def lookup_bucket( except NotFound: return None + @create_trace_span(name="Storage.Client.createBucket") def create_bucket( self, bucket_or_name, @@ -830,15 +970,16 @@ def create_bucket( project=None, user_project=None, location=None, + data_locations=None, predefined_acl=None, predefined_default_object_acl=None, + enable_object_retention=False, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, ): - """API call: create a new bucket via a POST request. + """Create a new bucket via a POST request. - See - https://cloud.google.com/storage/docs/json_api/v1/buckets/insert + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/insert) and a [code sample](https://cloud.google.com/storage/docs/samples/storage-create-bucket#storage_create_bucket-python). Args: bucket_or_name (Union[ \ @@ -858,14 +999,22 @@ def create_bucket( made via created bucket. location (str): (Optional) The location of the bucket. If not passed, - the default location, US, will be used. See - https://cloud.google.com/storage/docs/bucket-locations + the default location, US, will be used. If specifying a dual-region, + `data_locations` should be set in conjunction. See: + https://cloud.google.com/storage/docs/locations + data_locations (list of str): + (Optional) The list of regional locations of a custom dual-region bucket. + Dual-regions require exactly 2 regional locations. See: + https://cloud.google.com/storage/docs/locations predefined_acl (str): (Optional) Name of predefined ACL to apply to bucket. See: https://cloud.google.com/storage/docs/access-control/lists#predefined-acl predefined_default_object_acl (str): (Optional) Name of predefined ACL to apply to bucket's objects. See: https://cloud.google.com/storage/docs/access-control/lists#predefined-acl + enable_object_retention (bool): + (Optional) Whether object retention should be enabled on this bucket. See: + https://cloud.google.com/storage/docs/object-lock timeout (Optional[Union[float, Tuple[float, float]]]): The amount of time, in seconds, to wait for the server response. @@ -893,36 +1042,24 @@ def create_bucket( Raises: google.cloud.exceptions.Conflict If the bucket already exists. - - Examples: - Create a bucket using a string. - - .. literalinclude:: snippets.py - :start-after: [START create_bucket] - :end-before: [END create_bucket] - :dedent: 4 - - Create a bucket using a resource. - - >>> from google.cloud import storage - >>> client = storage.Client() - - >>> # Set properties on a plain resource object. - >>> bucket = storage.Bucket("my-bucket-name") - >>> bucket.location = "europe-west6" - >>> bucket.storage_class = "COLDLINE" - - >>> # Pass that resource object to the client. - >>> bucket = client.create_bucket(bucket) # API request. - """ bucket = self._bucket_arg_to_bucket(bucket_or_name) + query_params = {} if project is None: project = self.project - if project is None: - raise ValueError("Client project not set: pass an explicit project.") + # Use no project if STORAGE_EMULATOR_HOST is set + if self._is_emulator_set: + if project is None: + project = _get_environ_project() + if project is None: + project = "" + + # Only include the project parameter if a project is set. + # If a project is not set, falls back to API validation (BadRequest). + if project is not None: + query_params = {"project": project} if requester_pays is not None: warnings.warn( @@ -932,8 +1069,6 @@ def create_bucket( ) bucket.requester_pays = requester_pays - query_params = {"project": project} - if predefined_acl is not None: predefined_acl = BucketACL.validate_predefined(predefined_acl) query_params["predefinedAcl"] = predefined_acl @@ -947,12 +1082,18 @@ def create_bucket( if user_project is not None: query_params["userProject"] = user_project + if enable_object_retention: + query_params["enableObjectRetention"] = enable_object_retention + properties = {key: bucket._properties[key] for key in bucket._changes} properties["name"] = bucket.name if location is not None: properties["location"] = location + if data_locations is not None: + properties["customPlacementConfig"] = {"dataLocations": data_locations} + api_response = self._post_resource( "/b", properties, @@ -965,6 +1106,7 @@ def create_bucket( bucket._set_properties(api_response) return bucket + @create_trace_span(name="Storage.Client.downloadBlobToFile") def download_blob_to_file( self, blob_or_uri, @@ -979,11 +1121,13 @@ def download_blob_to_file( if_metageneration_match=None, if_metageneration_not_match=None, timeout=_DEFAULT_TIMEOUT, - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ): """Download the contents of a blob object or blob URI into a file-like object. + See https://cloud.google.com/storage/docs/downloading-objects + Args: blob_or_uri (Union[ \ :class:`~google.cloud.storage.blob.Blob`, \ @@ -1032,8 +1176,10 @@ def download_blob_to_file( instance in the case of transcoded or ranged downloads where the remote service does not know the correct checksum, including downloads where chunk_size is set) an INFO-level log will be - emitted. Supported values are "md5", "crc32c" and None. The default - is "md5". + emitted. Supported values are "md5", "crc32c", "auto" and None. + The default is "auto", which will try to detect if the C + extension for crc32c is installed and fall back to md5 otherwise. + retry (google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy) (Optional) How to retry the RPC. A None value will disable retries. A google.api_core.retry.Retry value will enable retries, @@ -1050,81 +1196,29 @@ def download_blob_to_file( See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how to configure them. - - Media operations (downloads and uploads) do not support non-default - predicates in a Retry object. The default will always be used. Other - configuration changes for Retry objects such as delays and deadlines - are respected. - - Examples: - Download a blob using a blob resource. - - >>> from google.cloud import storage - >>> client = storage.Client() - - >>> bucket = client.get_bucket('my-bucket-name') - >>> blob = storage.Blob('path/to/blob', bucket) - - >>> with open('file-to-download-to', 'w') as file_obj: - >>> client.download_blob_to_file(blob, file_obj) # API request. - - - Download a blob using a URI. - - >>> from google.cloud import storage - >>> client = storage.Client() - - >>> with open('file-to-download-to', 'w') as file_obj: - >>> client.download_blob_to_file( - >>> 'gs://bucket_name/path/to/blob', file_obj) - - """ - # Handle ConditionalRetryPolicy. - if isinstance(retry, ConditionalRetryPolicy): - # Conditional retries are designed for non-media calls, which change - # arguments into query_params dictionaries. Media operations work - # differently, so here we make a "fake" query_params to feed to the - # ConditionalRetryPolicy. - query_params = { - "ifGenerationMatch": if_generation_match, - "ifMetagenerationMatch": if_metageneration_match, - } - retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) - if not isinstance(blob_or_uri, Blob): - blob_or_uri = Blob.from_string(blob_or_uri) - download_url = blob_or_uri._get_download_url( - self, + blob_or_uri = Blob.from_uri(blob_or_uri) + + blob_or_uri._prep_and_do_download( + file_obj, + client=self, + start=start, + end=end, + raw_download=raw_download, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, if_generation_match=if_generation_match, if_generation_not_match=if_generation_not_match, if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, ) - headers = _get_encryption_headers(blob_or_uri._encryption_key) - headers["accept-encoding"] = "gzip" - _add_etag_match_headers( - headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match, - ) - - transport = self._http - try: - blob_or_uri._do_download( - transport, - file_obj, - download_url, - headers, - start, - end, - raw_download, - timeout=timeout, - checksum=checksum, - retry=retry, - ) - except resumable_media.InvalidResponse as exc: - _raise_from_invalid_response(exc) + @create_trace_span(name="Storage.Client.listBlobs") def list_blobs( self, bucket_or_name, @@ -1141,11 +1235,19 @@ def list_blobs( page_size=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, + match_glob=None, + include_folders_as_prefixes=None, + soft_deleted=None, ): """Return an iterator used to find blobs in the bucket. If :attr:`user_project` is set, bills the API request to that project. + .. note:: + List prefixes (directories) in a bucket using a prefix and delimiter. + See a [code sample](https://cloud.google.com/storage/docs/samples/storage-list-files-with-prefix#storage_list_files_with_prefix-python) + listing objects using a prefix filter. + Args: bucket_or_name (Union[ \ :class:`~google.cloud.storage.bucket.Bucket`, \ @@ -1229,18 +1331,29 @@ def list_blobs( See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how to configure them. - Returns: - Iterator of all :class:`~google.cloud.storage.blob.Blob` - in this bucket matching the arguments. + match_glob (str): + (Optional) A glob pattern used to filter results (for example, foo*bar). + The string value must be UTF-8 encoded. See: + https://cloud.google.com/storage/docs/json_api/v1/objects/list#list-object-glob - Example: - List blobs in the bucket with user_project. + include_folders_as_prefixes (bool): + (Optional) If true, includes Folders and Managed Folders in the set of + ``prefixes`` returned by the query. Only applicable if ``delimiter`` is set to /. + See: https://cloud.google.com/storage/docs/managed-folders - >>> from google.cloud import storage - >>> client = storage.Client() + soft_deleted (bool): + (Optional) If true, only soft-deleted objects will be listed as distinct results in order of increasing + generation number. This parameter can only be used successfully if the bucket has a soft delete policy. + Note ``soft_deleted`` and ``versions`` cannot be set to True simultaneously. See: + https://cloud.google.com/storage/docs/soft-delete + + Returns: + Iterator of all :class:`~google.cloud.storage.blob.Blob` + in this bucket matching the arguments. The RPC call + returns a response when the iterator is consumed. - >>> bucket = storage.Bucket(client, "my-bucket-name", user_project="my-project") - >>> all_blobs = list(client.list_blobs(bucket)) + As part of the response, you'll also get back an iterator.prefixes entity that lists object names + up to and including the requested delimiter. Duplicate entries are omitted from this list. """ bucket = self._bucket_arg_to_bucket(bucket_or_name) @@ -1252,6 +1365,9 @@ def list_blobs( if delimiter is not None: extra_params["delimiter"] = delimiter + if match_glob is not None: + extra_params["matchGlob"] = match_glob + if start_offset is not None: extra_params["startOffset"] = start_offset @@ -1267,6 +1383,12 @@ def list_blobs( if fields is not None: extra_params["fields"] = fields + if include_folders_as_prefixes is not None: + extra_params["includeFoldersAsPrefixes"] = include_folders_as_prefixes + + if soft_deleted is not None: + extra_params["softDeleted"] = soft_deleted + if bucket.user_project is not None: extra_params["userProject"] = bucket.user_project @@ -1286,6 +1408,7 @@ def list_blobs( iterator.prefixes = set() return iterator + @create_trace_span(name="Storage.Client.listBuckets") def list_buckets( self, max_results=None, @@ -1297,18 +1420,15 @@ def list_buckets( page_size=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, + *, + soft_deleted=None, ): """Get all buckets in the project associated to the client. This will not populate the list of blobs available in each bucket. - .. literalinclude:: snippets.py - :start-after: [START list_buckets] - :end-before: [END list_buckets] - :dedent: 4 - - This implements "storage.buckets.list". + See [API reference docs](https://cloud.google.com/storage/docs/json_api/v1/buckets/list) and a [code sample](https://cloud.google.com/storage/docs/samples/storage-list-buckets#storage_list_buckets-python). :type max_results: int :param max_results: (Optional) The maximum number of buckets to return. @@ -1354,19 +1474,34 @@ def list_buckets( :param retry: (Optional) How to retry the RPC. See: :ref:`configuring_retries` + :type soft_deleted: bool + :param soft_deleted: + (Optional) If true, only soft-deleted buckets will be listed as distinct results in order of increasing + generation number. This parameter can only be used successfully if the bucket has a soft delete policy. + See: https://cloud.google.com/storage/docs/soft-delete + :rtype: :class:`~google.api_core.page_iterator.Iterator` :raises ValueError: if both ``project`` is ``None`` and the client's project is also ``None``. :returns: Iterator of all :class:`~google.cloud.storage.bucket.Bucket` belonging to this project. """ + extra_params = {} + if project is None: project = self.project - if project is None: - raise ValueError("Client project not set: pass an explicit project.") + # Use no project if STORAGE_EMULATOR_HOST is set + if self._is_emulator_set: + if project is None: + project = _get_environ_project() + if project is None: + project = "" - extra_params = {"project": project} + # Only include the project parameter if a project is set. + # If a project is not set, falls back to API validation (BadRequest). + if project is not None: + extra_params = {"project": project} if prefix is not None: extra_params["prefix"] = prefix @@ -1376,6 +1511,9 @@ def list_buckets( if fields is not None: extra_params["fields"] = fields + if soft_deleted is not None: + extra_params["softDeleted"] = soft_deleted + return self._list_resource( "/b", _item_to_bucket, @@ -1387,6 +1525,72 @@ def list_buckets( retry=retry, ) + def restore_bucket( + self, + bucket_name, + generation, + projection="noAcl", + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY, + ): + """Restores a soft-deleted bucket. + + :type bucket_name: str + :param bucket_name: The name of the bucket to be restored. + + :type generation: int + :param generation: Selects the specific revision of the bucket. + + :type projection: str + :param projection: + (Optional) Specifies the set of properties to return. If used, must + be 'full' or 'noAcl'. Defaults to 'noAcl'. + + if_metageneration_match (Optional[int]): + Make the operation conditional on whether the + blob's current metageneration matches the given value. + + if_metageneration_not_match (Optional[int]): + Make the operation conditional on whether the blob's + current metageneration does not match the given value. + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: + (Optional) How to retry the RPC. + + Users can configure non-default retry behavior. A ``None`` value will + disable retries. See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout). + + :rtype: :class:`google.cloud.storage.bucket.Bucket` + :returns: The restored Bucket. + """ + query_params = {"generation": generation, "projection": projection} + + _add_generation_match_parameters( + query_params, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + ) + + bucket = self.bucket(bucket_name) + api_response = self._post_resource( + f"{bucket.path}/restore", + None, + query_params=query_params, + timeout=timeout, + retry=retry, + ) + bucket._set_properties(api_response) + return bucket + + @create_trace_span(name="Storage.Client.createHmacKey") def create_hmac_key( self, service_account_email, @@ -1433,20 +1637,25 @@ def create_hmac_key( if project_id is None: project_id = self.project - path = "/projects/{}/hmacKeys".format(project_id) + path = f"/projects/{project_id}/hmacKeys" qs_params = {"serviceAccountEmail": service_account_email} if user_project is not None: qs_params["userProject"] = user_project api_response = self._post_resource( - path, None, query_params=qs_params, timeout=timeout, retry=retry, + path, + None, + query_params=qs_params, + timeout=timeout, + retry=retry, ) metadata = HMACKeyMetadata(self) metadata._properties = api_response["metadata"] secret = api_response["secret"] return metadata, secret + @create_trace_span(name="Storage.Client.listHmacKeys") def list_hmac_keys( self, max_results=None, @@ -1495,7 +1704,7 @@ def list_hmac_keys( if project_id is None: project_id = self.project - path = "/projects/{}/hmacKeys".format(project_id) + path = f"/projects/{project_id}/hmacKeys" extra_params = {} if service_account_email is not None: @@ -1516,6 +1725,7 @@ def list_hmac_keys( retry=retry, ) + @create_trace_span(name="Storage.Client.getHmacKeyMetadata") def get_hmac_key_metadata( self, access_id, project_id=None, user_project=None, timeout=_DEFAULT_TIMEOUT ): @@ -1554,7 +1764,7 @@ def generate_signed_post_policy_v4( service_account_email=None, access_token=None, ): - """Generate a V4 signed policy object. + """Generate a V4 signed policy object. Generated policy object allows user to upload objects with a POST request. .. note:: @@ -1563,7 +1773,7 @@ def generate_signed_post_policy_v4( ``credentials`` has a ``service_account_email`` property which identifies the credentials. - Generated policy object allows user to upload objects with a POST request. + See a [code sample](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_generate_signed_post_policy_v4.py). :type bucket_name: str :param bucket_name: Bucket name. @@ -1588,13 +1798,16 @@ def generate_signed_post_policy_v4( key to sign text. :type virtual_hosted_style: bool - :param virtual_hosted_style: (Optional) If True, construct the URL relative to the bucket - virtual hostname, e.g., '.storage.googleapis.com'. + :param virtual_hosted_style: + (Optional) If True, construct the URL relative to the bucket + virtual hostname, e.g., '.storage.googleapis.com'. + Incompatible with bucket_bound_hostname. :type bucket_bound_hostname: str :param bucket_bound_hostname: (Optional) If passed, construct the URL relative to the bucket-bound hostname. Value can be bare or with a scheme, e.g., 'example.com' or 'http://example.com'. + Incompatible with virtual_hosted_style. See: https://cloud.google.com/storage/docs/request-endpoints#cname :type scheme: str @@ -1609,39 +1822,28 @@ def generate_signed_post_policy_v4( :type access_token: str :param access_token: (Optional) Access token for a service account. + :raises: :exc:`ValueError` when mutually exclusive arguments are used. + :rtype: dict :returns: Signed POST policy. - - Example: - Generate signed POST policy and upload a file. - - >>> import datetime - >>> from google.cloud import storage - >>> client = storage.Client() - >>> tz = datetime.timezone(datetime.timedelta(hours=1), 'CET') - >>> policy = client.generate_signed_post_policy_v4( - "bucket-name", - "blob-name", - expiration=datetime.datetime(2020, 3, 17, tzinfo=tz), - conditions=[ - ["content-length-range", 0, 255] - ], - fields=[ - "x-goog-meta-hello" => "world" - ], - ) - >>> with open("bucket-name", "rb") as f: - files = {"file": ("bucket-name", f)} - requests.post(policy["url"], data=policy["fields"], files=files) """ + if virtual_hosted_style and bucket_bound_hostname: + raise ValueError( + "Only one of virtual_hosted_style and bucket_bound_hostname " + "can be specified." + ) + credentials = self._credentials if credentials is None else credentials - ensure_signed_credentials(credentials) + client_email = service_account_email + if not access_token or not service_account_email: + ensure_signed_credentials(credentials) + client_email = credentials.signer_email # prepare policy conditions and fields timestamp, datestamp = get_v4_now_dtstamps() x_goog_credential = "{email}/{datestamp}/auto/storage/goog4_request".format( - email=credentials.signer_email, datestamp=datestamp + email=client_email, datestamp=datestamp ) required_conditions = [ {"bucket": bucket_name}, @@ -1661,7 +1863,7 @@ def generate_signed_post_policy_v4( conditions += required_conditions # calculate policy expiration time - now = _NOW() + now = _NOW(_UTC).replace(tzinfo=None) if expiration is None: expiration = now + datetime.timedelta(hours=1) @@ -1705,11 +1907,13 @@ def generate_signed_post_policy_v4( ) # designate URL if virtual_hosted_style: - url = "https://{}.storage.googleapis.com/".format(bucket_name) + url = _virtual_hosted_style_base_url( + self.api_endpoint, bucket_name, trailing_slash=True + ) elif bucket_bound_hostname: - url = _bucket_bound_hostname_url(bucket_bound_hostname, scheme) + url = f"{_bucket_bound_hostname_url(bucket_bound_hostname, scheme)}/" else: - url = "https://storage.googleapis.com/{}/".format(bucket_name) + url = f"{self.api_endpoint}/{bucket_name}/" return {"url": url, "fields": policy_fields} diff --git a/google/cloud/storage/constants.py b/google/cloud/storage/constants.py index 2e1c1dd2a..eba0a19df 100644 --- a/google/cloud/storage/constants.py +++ b/google/cloud/storage/constants.py @@ -11,7 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Constants used acros google.cloud.storage modules.""" + +"""Constants used across google.cloud.storage modules. + +See [Python Storage Client Constants Page](https://github.com/googleapis/python-storage/blob/main/google/cloud/storage/constants.py) +for constants used across storage classes, location types, public access prevention, etc. + +""" # Storage classes @@ -117,3 +123,17 @@ See: https://cloud.google.com/storage/docs/public-access-prevention """ + +RPO_ASYNC_TURBO = "ASYNC_TURBO" +"""The recovery point objective (RPO) indicates how quickly newly written objects are asynchronously replicated to a separate geographic location. +When the RPO value is set to ASYNC_TURBO, the turbo replication feature is enabled. + +See: https://cloud.google.com/storage/docs/managing-turbo-replication +""" + +RPO_DEFAULT = "DEFAULT" +"""The recovery point objective (RPO) indicates how quickly newly written objects are asynchronously replicated to a separate geographic location. +When the RPO value is set to DEFAULT, the default replication behavior is enabled. + +See: https://cloud.google.com/storage/docs/managing-turbo-replication +""" diff --git a/google/cloud/storage/exceptions.py b/google/cloud/storage/exceptions.py new file mode 100644 index 000000000..4eb05cef7 --- /dev/null +++ b/google/cloud/storage/exceptions.py @@ -0,0 +1,69 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exceptions raised by the library.""" + +# These exceptions were originally part of the google-resumable-media library +# but were integrated into python-storage in version 3.0. For backwards +# compatibility with applications which use except blocks with +# google-resumable-media exceptions, if the library google-resumable-media is +# installed, make all exceptions subclasses of the exceptions from that library. +# Note that either way, the classes will subclass Exception, either directly or +# indirectly. +# +# This backwards compatibility feature may be removed in a future major version +# update. Please update application code to use the new exception classes in +# this module. +try: + from google.resumable_media import InvalidResponse as InvalidResponseDynamicParent + from google.resumable_media import DataCorruption as DataCorruptionDynamicParent +except ImportError: + InvalidResponseDynamicParent = Exception + DataCorruptionDynamicParent = Exception + + +class InvalidResponse(InvalidResponseDynamicParent): + """Error class for responses which are not in the correct state. + + Args: + response (object): The HTTP response which caused the failure. + args (tuple): The positional arguments typically passed to an + exception class. + """ + + def __init__(self, response, *args): + if InvalidResponseDynamicParent is Exception: + super().__init__(*args) + self.response = response + """object: The HTTP response object that caused the failure.""" + else: + super().__init__(response, *args) + + +class DataCorruption(DataCorruptionDynamicParent): + """Error class for corrupt media transfers. + + Args: + response (object): The HTTP response which caused the failure. + args (tuple): The positional arguments typically passed to an + exception class. + """ + + def __init__(self, response, *args): + if DataCorruptionDynamicParent is Exception: + super().__init__(*args) + self.response = response + """object: The HTTP response object that caused the failure.""" + else: + super().__init__(response, *args) diff --git a/google/cloud/storage/fileio.py b/google/cloud/storage/fileio.py index 95bb12b1f..2b4754648 100644 --- a/google/cloud/storage/fileio.py +++ b/google/cloud/storage/fileio.py @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Module for file-like access of blobs, usually invoked via Blob.open().""" + import io -import warnings from google.api_core.exceptions import RequestRangeNotSatisfiable -from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE from google.cloud.storage.retry import DEFAULT_RETRY -from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED from google.cloud.storage.retry import ConditionalRetryPolicy @@ -43,7 +42,6 @@ VALID_UPLOAD_KWARGS = { "content_type", "predefined_acl", - "num_retries", "if_generation_match", "if_generation_not_match", "if_metageneration_match", @@ -90,6 +88,7 @@ class BlobReader(io.BufferedIOBase): configuration changes for Retry objects such as delays and deadlines are respected. + :type download_kwargs: dict :param download_kwargs: Keyword arguments to pass to the underlying API calls. The following arguments are supported: @@ -99,14 +98,17 @@ class BlobReader(io.BufferedIOBase): - ``if_metageneration_match`` - ``if_metageneration_not_match`` - ``timeout`` + - ``raw_download`` + + Note that download_kwargs (excluding ``raw_download``) are also applied to blob.reload(), + if a reload is needed during seek(). """ def __init__(self, blob, chunk_size=None, retry=DEFAULT_RETRY, **download_kwargs): - """docstring note that download_kwargs also used for reload()""" for kwarg in download_kwargs: if kwarg not in VALID_DOWNLOAD_KWARGS: raise ValueError( - "BlobReader does not support keyword argument {}.".format(kwarg) + f"BlobReader does not support keyword argument {kwarg}." ) self._blob = blob @@ -123,9 +125,12 @@ def read(self, size=-1): # If the read request demands more bytes than are buffered, fetch more. remaining_size = size - len(result) if remaining_size > 0 or size < 0: + self._pos += self._buffer.tell() + read_size = len(result) + self._buffer.seek(0) self._buffer.truncate(0) # Clear the buffer to make way for new data. - fetch_start = self._pos + len(result) + fetch_start = self._pos if size > 0: # Fetch the larger of self._chunk_size or the remaining_size. fetch_end = fetch_start + max(remaining_size, self._chunk_size) @@ -141,7 +146,7 @@ def read(self, size=-1): end=fetch_end, checksum=None, retry=self._retry, - **self._download_kwargs + **self._download_kwargs, ) except RequestRangeNotSatisfiable: # We've reached the end of the file. Python file objects should @@ -154,9 +159,8 @@ def read(self, size=-1): self._buffer.write(result[size:]) self._buffer.seek(0) result = result[:size] - - self._pos += len(result) - + # Increment relative offset by true amount read. + self._pos += len(result) - read_size return result def read1(self, size=-1): @@ -172,38 +176,45 @@ def seek(self, pos, whence=0): self._checkClosed() # Raises ValueError if closed. if self._blob.size is None: - self._blob.reload(**self._download_kwargs) + reload_kwargs = { + k: v for k, v in self._download_kwargs.items() if k != "raw_download" + } + self._blob.reload(**reload_kwargs) - initial_pos = self._pos + initial_offset = self._pos + self._buffer.tell() if whence == 0: - self._pos = pos + target_pos = pos elif whence == 1: - self._pos += pos + target_pos = initial_offset + pos elif whence == 2: - self._pos = self._blob.size + pos + target_pos = self._blob.size + pos if whence not in {0, 1, 2}: raise ValueError("invalid whence value") - if self._pos > self._blob.size: - self._pos = self._blob.size + if target_pos > self._blob.size: + target_pos = self._blob.size # Seek or invalidate buffer as needed. - difference = self._pos - initial_pos - new_buffer_pos = self._buffer.seek(difference, 1) - if new_buffer_pos != difference: # Buffer does not contain new pos. - # Invalidate buffer. + if target_pos < self._pos: + # Target position < relative offset <= true offset. + # As data is not in buffer, invalidate buffer. self._buffer.seek(0) self._buffer.truncate(0) - - return self._pos + new_pos = target_pos + self._pos = target_pos + else: + # relative offset <= target position <= size of file. + difference = target_pos - initial_offset + new_pos = self._pos + self._buffer.seek(difference, 1) + return new_pos def close(self): self._buffer.close() - def _checkClosed(self): - if self._buffer.closed: - raise ValueError("I/O operation on closed file.") + @property + def closed(self): + return self._buffer.closed def readable(self): return True @@ -230,12 +241,6 @@ class BlobWriter(io.BufferedIOBase): writes must be exactly a multiple of 256KiB as with other resumable uploads. The default is the chunk_size of the blob, or 40 MiB. - :type text_mode: bool - :param text_mode: - (Deprecated) A synonym for ignore_flush. For backwards-compatibility, - if True, sets ignore_flush to True. Use ignore_flush instead. This - parameter will be removed in a future release. - :type ignore_flush: bool :param ignore_flush: Makes flush() do nothing instead of raise an error. flush() without @@ -271,6 +276,7 @@ class BlobWriter(io.BufferedIOBase): configuration changes for Retry objects such as delays and deadlines are respected. + :type upload_kwargs: dict :param upload_kwargs: Keyword arguments to pass to the underlying API calls. The following arguments are supported: @@ -281,7 +287,6 @@ class BlobWriter(io.BufferedIOBase): - ``if_metageneration_not_match`` - ``timeout`` - ``content_type`` - - ``num_retries`` - ``predefined_acl`` - ``checksum`` """ @@ -290,15 +295,14 @@ def __init__( self, blob, chunk_size=None, - text_mode=False, ignore_flush=False, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, - **upload_kwargs + retry=DEFAULT_RETRY, + **upload_kwargs, ): for kwarg in upload_kwargs: if kwarg not in VALID_UPLOAD_KWARGS: raise ValueError( - "BlobWriter does not support keyword argument {}.".format(kwarg) + f"BlobWriter does not support keyword argument {kwarg}." ) self._blob = blob self._buffer = SlidingBuffer() @@ -306,8 +310,7 @@ def __init__( # Resumable uploads require a chunk size of a multiple of 256KiB. # self._chunk_size must not be changed after the upload is initiated. self._chunk_size = chunk_size or blob.chunk_size or DEFAULT_CHUNK_SIZE - # text_mode is a deprecated synonym for ignore_flush - self._ignore_flush = ignore_flush or text_mode + self._ignore_flush = ignore_flush self._retry = retry self._upload_kwargs = upload_kwargs @@ -349,19 +352,9 @@ def write(self, b): return pos def _initiate_upload(self): - # num_retries is only supported for backwards-compatibility reasons. - num_retries = self._upload_kwargs.pop("num_retries", None) retry = self._retry content_type = self._upload_kwargs.pop("content_type", None) - if num_retries is not None: - warnings.warn(_NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2) - # num_retries and retry are mutually exclusive. If num_retries is - # set and retry is exactly the default, then nullify retry for - # backwards compatibility. - if retry is DEFAULT_RETRY_IF_GENERATION_SPECIFIED: - retry = None - # Handle ConditionalRetryPolicy. if isinstance(retry, ConditionalRetryPolicy): # Conditional retries are designed for non-media calls, which change @@ -381,10 +374,9 @@ def _initiate_upload(self): self._buffer, content_type, None, - num_retries, chunk_size=self._chunk_size, retry=retry, - **self._upload_kwargs + **self._upload_kwargs, ) def _upload_chunks_from_buffer(self, num_chunks): @@ -396,9 +388,15 @@ def _upload_chunks_from_buffer(self, num_chunks): upload, transport = self._upload_and_transport + # Attach timeout if specified in the keyword arguments. + # Otherwise, the default timeout will be used from the media library. + kwargs = {} + if "timeout" in self._upload_kwargs: + kwargs = {"timeout": self._upload_kwargs.get("timeout")} + # Upload chunks. The SlidingBuffer class will manage seek position. for _ in range(num_chunks): - upload.transmit_next_chunk(transport) + upload.transmit_next_chunk(transport, **kwargs) # Wipe the buffer of chunks uploaded, preserving any remaining data. self._buffer.flush() @@ -417,14 +415,26 @@ def flush(self): ) def close(self): - self._checkClosed() # Raises ValueError if closed. + if not self._buffer.closed: + self._upload_chunks_from_buffer(1) + self._buffer.close() - self._upload_chunks_from_buffer(1) + def terminate(self): + """Cancel the ResumableUpload.""" + if self._upload_and_transport: + upload, transport = self._upload_and_transport + transport.delete(upload.upload_url) self._buffer.close() - def _checkClosed(self): - if self._buffer.closed: - raise ValueError("I/O operation on closed file.") + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is not None: + self.terminate() + else: + self.close() + + @property + def closed(self): + return self._buffer.closed def readable(self): return False diff --git a/google/cloud/storage/hmac_key.py b/google/cloud/storage/hmac_key.py index 5cec51fa7..d37bc071b 100644 --- a/google/cloud/storage/hmac_key.py +++ b/google/cloud/storage/hmac_key.py @@ -12,9 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Configure HMAC keys that can be used to authenticate requests to Google Cloud Storage. + +See [HMAC keys documentation](https://cloud.google.com/storage/docs/authentication/hmackeys) +""" + from google.cloud.exceptions import NotFound from google.cloud._helpers import _rfc3339_nanos_to_datetime +from google.cloud.storage._opentelemetry_tracing import create_trace_span from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON @@ -131,13 +137,6 @@ def state(self): @state.setter def state(self, value): - if value not in self._SETTABLE_STATES: - raise ValueError( - "State may only be set to one of: {}".format( - ", ".join(self._SETTABLE_STATES) - ) - ) - self._properties["state"] = value @property @@ -177,7 +176,7 @@ def path(self): if project is None: project = self._client.project - return "/projects/{}/hmacKeys/{}".format(project, self.access_id) + return f"/projects/{project}/hmacKeys/{self.access_id}" @property def user_project(self): @@ -189,6 +188,7 @@ def user_project(self): """ return self._user_project + @create_trace_span(name="Storage.HmacKey.exists") def exists(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Determine whether or not the key for this metadata exists. @@ -211,13 +211,17 @@ def exists(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): qs_params["userProject"] = self.user_project self._client._get_resource( - self.path, query_params=qs_params, timeout=timeout, retry=retry, + self.path, + query_params=qs_params, + timeout=timeout, + retry=retry, ) except NotFound: return False else: return True + @create_trace_span(name="Storage.HmacKey.reload") def reload(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Reload properties from Cloud Storage. @@ -239,9 +243,13 @@ def reload(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): qs_params["userProject"] = self.user_project self._properties = self._client._get_resource( - self.path, query_params=qs_params, timeout=timeout, retry=retry, + self.path, + query_params=qs_params, + timeout=timeout, + retry=retry, ) + @create_trace_span(name="Storage.HmacKey.update") def update(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY_IF_ETAG_IN_JSON): """Save writable properties to Cloud Storage. @@ -263,9 +271,14 @@ def update(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY_IF_ETAG_IN_JSON): payload = {"state": self.state} self._properties = self._client._put_resource( - self.path, payload, query_params=qs_params, timeout=timeout, retry=retry, + self.path, + payload, + query_params=qs_params, + timeout=timeout, + retry=retry, ) + @create_trace_span(name="Storage.HmacKey.delete") def delete(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Delete the key from Cloud Storage. @@ -281,13 +294,13 @@ def delete(self, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): :raises :class:`~google.api_core.exceptions.NotFound`: if the key does not exist on the back-end. """ - if self.state != self.INACTIVE_STATE: - raise ValueError("Cannot delete key if not in 'INACTIVE' state.") - qs_params = {} if self.user_project is not None: qs_params["userProject"] = self.user_project self._client._delete_resource( - self.path, query_params=qs_params, timeout=timeout, retry=retry, + self.path, + query_params=qs_params, + timeout=timeout, + retry=retry, ) diff --git a/google/cloud/storage/notification.py b/google/cloud/storage/notification.py index 57faea571..d9d49fc4b 100644 --- a/google/cloud/storage/notification.py +++ b/google/cloud/storage/notification.py @@ -12,12 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Support for bucket notification resources.""" +"""Configure bucket notification resources to interact with Google Cloud Pub/Sub. + +See [Cloud Pub/Sub Notifications for Google Cloud Storage](https://cloud.google.com/storage/docs/pubsub-notifications) +""" import re from google.api_core.exceptions import NotFound +from google.cloud.storage._opentelemetry_tracing import create_trace_span from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY @@ -156,26 +160,22 @@ def topic_name(self): @property def topic_project(self): - """Project ID of topic to which notifications are published. - """ + """Project ID of topic to which notifications are published.""" return self._topic_project @property def custom_attributes(self): - """Custom attributes passed with notification events. - """ + """Custom attributes passed with notification events.""" return self._properties.get("custom_attributes") @property def event_types(self): - """Event types for which notification events are published. - """ + """Event types for which notification events are published.""" return self._properties.get("event_types") @property def blob_name_prefix(self): - """Prefix of blob names for which notification events are published. - """ + """Prefix of blob names for which notification events are published.""" return self._properties.get("object_name_prefix") @property @@ -206,9 +206,7 @@ def client(self): @property def path(self): """The URL path for this notification.""" - return "/b/{}/notificationConfigs/{}".format( - self.bucket.name, self.notification_id - ) + return f"/b/{self.bucket.name}/notificationConfigs/{self.notification_id}" def _require_client(self, client): """Check client or verify over-ride. @@ -233,6 +231,7 @@ def _set_properties(self, response): self._properties.clear() self._properties.update(response) + @create_trace_span(name="Storage.BucketNotification.create") def create(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=None): """API wrapper: create the notification. @@ -258,7 +257,7 @@ def create(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=None): """ if self.notification_id is not None: raise ValueError( - "Notification already exists w/ id: {}".format(self.notification_id) + f"notification_id already set to {self.notification_id}; must be None to create a Notification." ) client = self._require_client(client) @@ -267,7 +266,7 @@ def create(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=None): if self.bucket.user_project is not None: query_params["userProject"] = self.bucket.user_project - path = "/b/{}/notificationConfigs".format(self.bucket.name) + path = f"/b/{self.bucket.name}/notificationConfigs" properties = self._properties.copy() if self.topic_name is None: @@ -278,9 +277,14 @@ def create(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=None): ) self._properties = client._post_resource( - path, properties, query_params=query_params, timeout=timeout, retry=retry, + path, + properties, + query_params=query_params, + timeout=timeout, + retry=retry, ) + @create_trace_span(name="Storage.BucketNotification.exists") def exists(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Test whether this notification exists. @@ -308,7 +312,7 @@ def exists(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): :raises ValueError: if the notification has no ID. """ if self.notification_id is None: - raise ValueError("Notification not intialized by server") + raise ValueError("Notification ID not set: set an explicit notification_id") client = self._require_client(client) @@ -318,13 +322,17 @@ def exists(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): try: client._get_resource( - self.path, query_params=query_params, timeout=timeout, retry=retry, + self.path, + query_params=query_params, + timeout=timeout, + retry=retry, ) except NotFound: return False else: return True + @create_trace_span(name="Storage.BucketNotification.reload") def reload(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Update this notification from the server configuration. @@ -351,7 +359,7 @@ def reload(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): :raises ValueError: if the notification has no ID. """ if self.notification_id is None: - raise ValueError("Notification not intialized by server") + raise ValueError("Notification ID not set: set an explicit notification_id") client = self._require_client(client) @@ -360,10 +368,14 @@ def reload(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): query_params["userProject"] = self.bucket.user_project response = client._get_resource( - self.path, query_params=query_params, timeout=timeout, retry=retry, + self.path, + query_params=query_params, + timeout=timeout, + retry=retry, ) self._set_properties(response) + @create_trace_span(name="Storage.BucketNotification.delete") def delete(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): """Delete this notification. @@ -391,7 +403,7 @@ def delete(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): :raises ValueError: if the notification has no ID. """ if self.notification_id is None: - raise ValueError("Notification not intialized by server") + raise ValueError("Notification ID not set: set an explicit notification_id") client = self._require_client(client) @@ -400,29 +412,27 @@ def delete(self, client=None, timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY): query_params["userProject"] = self.bucket.user_project client._delete_resource( - self.path, query_params=query_params, timeout=timeout, retry=retry, + self.path, + query_params=query_params, + timeout=timeout, + retry=retry, ) def _parse_topic_path(topic_path): """Verify that a topic path is in the correct format. - .. _resource manager docs: https://cloud.google.com/resource-manager/\ - reference/rest/v1beta1/projects#\ - Project.FIELDS.project_id - .. _topic spec: https://cloud.google.com/storage/docs/json_api/v1/\ - notifications/insert#topic - Expected to be of the form: //pubsub.googleapis.com/projects/{project}/topics/{topic} where the ``project`` value must be "6 to 30 lowercase letters, digits, or hyphens. It must start with a letter. Trailing hyphens are prohibited." - (see `resource manager docs`_) and ``topic`` must have length at least two, + (see [`resource manager docs`](https://cloud.google.com/resource-manager/reference/rest/v1beta1/projects#Project.FIELDS.project_id)) + and ``topic`` must have length at least two, must start with a letter and may only contain alphanumeric characters or ``-``, ``_``, ``.``, ``~``, ``+`` or ``%`` (i.e characters used for URL - encoding, see `topic spec`_). + encoding, see [`topic spec`](https://cloud.google.com/storage/docs/json_api/v1/notifications/insert#topic)). Args: topic_path (str): The topic path to be verified. diff --git a/google/cloud/storage/retry.py b/google/cloud/storage/retry.py index 6037cbe1d..d1d5a7686 100644 --- a/google/cloud/storage/retry.py +++ b/google/cloud/storage/retry.py @@ -12,12 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Helpers for configuring retries with exponential back-off. + +See [Retry Strategy for Google Cloud Storage](https://cloud.google.com/storage/docs/retry-strategy#client-libraries) +""" + +import http + import requests import requests.exceptions as requests_exceptions +import urllib3 from google.api_core import exceptions as api_exceptions from google.api_core import retry from google.auth import exceptions as auth_exceptions +from google.cloud.storage.exceptions import InvalidResponse _RETRYABLE_TYPES = ( @@ -29,11 +38,25 @@ ConnectionError, requests.ConnectionError, requests_exceptions.ChunkedEncodingError, + requests_exceptions.Timeout, + http.client.BadStatusLine, + http.client.IncompleteRead, + http.client.ResponseNotReady, + urllib3.exceptions.PoolError, + urllib3.exceptions.ProtocolError, + urllib3.exceptions.SSLError, + urllib3.exceptions.TimeoutError, ) -# Some retriable errors don't have their own custom exception in api_core. -_ADDITIONAL_RETRYABLE_STATUS_CODES = (408,) +_RETRYABLE_STATUS_CODES = ( + http.client.TOO_MANY_REQUESTS, # 429 + http.client.REQUEST_TIMEOUT, # 408 + http.client.INTERNAL_SERVER_ERROR, # 500 + http.client.BAD_GATEWAY, # 502 + http.client.SERVICE_UNAVAILABLE, # 503 + http.client.GATEWAY_TIMEOUT, # 504 +) def _should_retry(exc): @@ -41,7 +64,9 @@ def _should_retry(exc): if isinstance(exc, _RETRYABLE_TYPES): return True elif isinstance(exc, api_exceptions.GoogleAPICallError): - return exc.code in _ADDITIONAL_RETRYABLE_STATUS_CODES + return exc.code in _RETRYABLE_STATUS_CODES + elif isinstance(exc, InvalidResponse): + return exc.response.status_code in _RETRYABLE_STATUS_CODES elif isinstance(exc, auth_exceptions.TransportError): return _should_retry(exc.args[0]) else: @@ -87,7 +112,8 @@ class ConditionalRetryPolicy(object): :type required_kwargs: list(str) :param required_kwargs: A list of keyword argument keys that will be extracted from the API call - and passed into the ``conditional predicate`` in order. + and passed into the ``conditional predicate`` in order. For example, + ``["query_params"]`` is commmonly used for preconditions in query_params. """ def __init__(self, retry_policy, conditional_predicate, required_kwargs): diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py new file mode 100644 index 000000000..fafe68f1c --- /dev/null +++ b/google/cloud/storage/transfer_manager.py @@ -0,0 +1,1376 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Concurrent media operations.""" + +import concurrent.futures + +import io +import inspect +import os +import warnings +import pickle +import copyreg +import struct +import base64 +import functools + +from google.api_core import exceptions +from google.cloud.storage import Client +from google.cloud.storage import Blob +from google.cloud.storage.blob import _get_host_name +from google.cloud.storage.blob import _quote +from google.cloud.storage.constants import _DEFAULT_TIMEOUT +from google.cloud.storage.retry import DEFAULT_RETRY + +import google_crc32c + +from google.cloud.storage._media.requests.upload import XMLMPUContainer +from google.cloud.storage._media.requests.upload import XMLMPUPart +from google.cloud.storage.exceptions import DataCorruption + +TM_DEFAULT_CHUNK_SIZE = 32 * 1024 * 1024 +DEFAULT_MAX_WORKERS = 8 +MAX_CRC32C_ZERO_ARRAY_SIZE = 4 * 1024 * 1024 +METADATA_HEADER_TRANSLATION = { + "cacheControl": "Cache-Control", + "contentDisposition": "Content-Disposition", + "contentEncoding": "Content-Encoding", + "contentLanguage": "Content-Language", + "customTime": "x-goog-custom-time", + "storageClass": "x-goog-storage-class", +} + +# Constants to be passed in as `worker_type`. +PROCESS = "process" +THREAD = "thread" + +DOWNLOAD_CRC32C_MISMATCH_TEMPLATE = """\ +Checksum mismatch while downloading: + + {} + +The object metadata indicated a crc32c checksum of: + + {} + +but the actual crc32c checksum of the downloaded contents was: + + {} +""" + + +_cached_clients = {} + + +def _deprecate_threads_param(func): + @functools.wraps(func) + def convert_threads_or_raise(*args, **kwargs): + binding = inspect.signature(func).bind(*args, **kwargs) + threads = binding.arguments.get("threads") + if threads: + worker_type = binding.arguments.get("worker_type") + max_workers = binding.arguments.get("max_workers") + if worker_type or max_workers: # Parameter conflict + raise ValueError( + "The `threads` parameter is deprecated and conflicts with its replacement parameters, `worker_type` and `max_workers`." + ) + # No conflict, so issue a warning and set worker_type and max_workers. + warnings.warn( + "The `threads` parameter is deprecated. Please use `worker_type` and `max_workers` parameters instead." + ) + args = binding.args + kwargs = binding.kwargs + kwargs["worker_type"] = THREAD + kwargs["max_workers"] = threads + return func(*args, **kwargs) + else: + return func(*args, **kwargs) + + return convert_threads_or_raise + + +@_deprecate_threads_param +def upload_many( + file_blob_pairs, + skip_if_exists=False, + upload_kwargs=None, + threads=None, + deadline=None, + raise_exception=False, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, +): + """Upload many files concurrently via a worker pool. + + :type file_blob_pairs: List(Tuple(IOBase or str, 'google.cloud.storage.blob.Blob')) + :param file_blob_pairs: + A list of tuples of a file or filename and a blob. Each file will be + uploaded to the corresponding blob by using APIs identical to + `blob.upload_from_file()` or `blob.upload_from_filename()` as + appropriate. + + File handlers are only supported if worker_type is set to THREAD. + If worker_type is set to PROCESS, please use filenames only. + + :type skip_if_exists: bool + :param skip_if_exists: + If True, blobs that already have a live version will not be overwritten. + This is accomplished by setting `if_generation_match = 0` on uploads. + Uploads so skipped will result in a 412 Precondition Failed response + code, which will be included in the return value but not raised + as an exception regardless of the value of raise_exception. + + :type upload_kwargs: dict + :param upload_kwargs: + A dictionary of keyword arguments to pass to the upload method. Refer + to the documentation for `blob.upload_from_file()` or + `blob.upload_from_filename()` for more information. The dict is directly + passed into the upload methods and is not validated by this function. + + :type threads: int + :param threads: + ***DEPRECATED*** Sets `worker_type` to THREAD and `max_workers` to the + number specified. If `worker_type` or `max_workers` are set explicitly, + this parameter should be set to None. Please use `worker_type` and + `max_workers` instead of this parameter. + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. + + :type raise_exception: bool + :param raise_exception: + If True, instead of adding exceptions to the list of return values, + instead they will be raised. Note that encountering an exception on one + operation will not prevent other operations from starting. Exceptions + are only processed and potentially raised after all operations are + complete in success or failure. + + If skip_if_exists is True, 412 Precondition Failed responses are + considered part of normal operation and are not raised as an exception. + + :type worker_type: str + :param worker_type: + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + PROCESS workers do not support writing to file handlers. Please refer + to files by filename only when using PROCESS workers. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + + :rtype: list + :returns: A list of results corresponding to, in order, each item in the + input list. If an exception was received, it will be the result + for that operation. Otherwise, the return value from the successful + upload method is used (which will be None). + """ + if upload_kwargs is None: + upload_kwargs = {} + + if skip_if_exists: + upload_kwargs = upload_kwargs.copy() + upload_kwargs["if_generation_match"] = 0 + + upload_kwargs["command"] = "tm.upload_many" + + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) + + with pool_class(max_workers=max_workers) as executor: + futures = [] + for path_or_file, blob in file_blob_pairs: + # File objects are only supported by the THREAD worker because they can't + # be pickled. + if needs_pickling and not isinstance(path_or_file, str): + raise ValueError( + "Passing in a file object is only supported by the THREAD worker type. Please either select THREAD workers, or pass in filenames only." + ) + + futures.append( + executor.submit( + _call_method_on_maybe_pickled_blob, + _pickle_client(blob) if needs_pickling else blob, + "_handle_filename_and_upload" + if isinstance(path_or_file, str) + else "_prep_and_do_upload", + path_or_file, + **upload_kwargs, + ) + ) + concurrent.futures.wait( + futures, timeout=deadline, return_when=concurrent.futures.ALL_COMPLETED + ) + + results = [] + for future in futures: + exp = future.exception() + + # If raise_exception is False, don't call future.result() + if exp and not raise_exception: + results.append(exp) + # If skip_if_exists and the exception is PreconditionFailed, do same. + elif exp and skip_if_exists and isinstance(exp, exceptions.PreconditionFailed): + results.append(exp) + # Get the real result. If there was an exception not handled above, + # this will raise it. + else: + results.append(future.result()) + return results + + +@_deprecate_threads_param +def download_many( + blob_file_pairs, + download_kwargs=None, + threads=None, + deadline=None, + raise_exception=False, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, + *, + skip_if_exists=False, +): + """Download many blobs concurrently via a worker pool. + + :type blob_file_pairs: List(Tuple('google.cloud.storage.blob.Blob', IOBase or str)) + :param blob_file_pairs: + A list of tuples of blob and a file or filename. Each blob will be downloaded to the corresponding blob by using APIs identical to blob.download_to_file() or blob.download_to_filename() as appropriate. + + Note that blob.download_to_filename() does not delete the destination file if the download fails. + + File handlers are only supported if worker_type is set to THREAD. + If worker_type is set to PROCESS, please use filenames only. + + :type download_kwargs: dict + :param download_kwargs: + A dictionary of keyword arguments to pass to the download method. Refer + to the documentation for `blob.download_to_file()` or + `blob.download_to_filename()` for more information. The dict is directly + passed into the download methods and is not validated by this function. + + :type threads: int + :param threads: + ***DEPRECATED*** Sets `worker_type` to THREAD and `max_workers` to the + number specified. If `worker_type` or `max_workers` are set explicitly, + this parameter should be set to None. Please use `worker_type` and + `max_workers` instead of this parameter. + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. + + :type raise_exception: bool + :param raise_exception: + If True, instead of adding exceptions to the list of return values, + instead they will be raised. Note that encountering an exception on one + operation will not prevent other operations from starting. Exceptions + are only processed and potentially raised after all operations are + complete in success or failure. + + :type worker_type: str + :param worker_type: + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + PROCESS workers do not support writing to file handlers. Please refer + to files by filename only when using PROCESS workers. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :type skip_if_exists: bool + :param skip_if_exists: + Before downloading each blob, check if the file for the filename exists; + if it does, skip that blob. + + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + + :rtype: list + :returns: A list of results corresponding to, in order, each item in the + input list. If an exception was received, it will be the result + for that operation. Otherwise, the return value from the successful + download method is used (which will be None). + """ + + if download_kwargs is None: + download_kwargs = {} + + download_kwargs["command"] = "tm.download_many" + + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) + + with pool_class(max_workers=max_workers) as executor: + futures = [] + for blob, path_or_file in blob_file_pairs: + # File objects are only supported by the THREAD worker because they can't + # be pickled. + if needs_pickling and not isinstance(path_or_file, str): + raise ValueError( + "Passing in a file object is only supported by the THREAD worker type. Please either select THREAD workers, or pass in filenames only." + ) + + if skip_if_exists and isinstance(path_or_file, str): + if os.path.isfile(path_or_file): + continue + + futures.append( + executor.submit( + _call_method_on_maybe_pickled_blob, + _pickle_client(blob) if needs_pickling else blob, + "_handle_filename_and_download" + if isinstance(path_or_file, str) + else "_prep_and_do_download", + path_or_file, + **download_kwargs, + ) + ) + concurrent.futures.wait( + futures, timeout=deadline, return_when=concurrent.futures.ALL_COMPLETED + ) + + results = [] + for future in futures: + # If raise_exception is False, don't call future.result() + if not raise_exception: + exp = future.exception() + if exp: + results.append(exp) + continue + # Get the real result. If there was an exception, this will raise it. + results.append(future.result()) + return results + + +@_deprecate_threads_param +def upload_many_from_filenames( + bucket, + filenames, + source_directory="", + blob_name_prefix="", + skip_if_exists=False, + blob_constructor_kwargs=None, + upload_kwargs=None, + threads=None, + deadline=None, + raise_exception=False, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, + *, + additional_blob_attributes=None, +): + """Upload many files concurrently by their filenames. + + The destination blobs are automatically created, with blob names based on + the source filenames and the blob_name_prefix. + + For example, if the `filenames` include "images/icon.jpg", + `source_directory` is "/home/myuser/", and `blob_name_prefix` is "myfiles/", + then the file at "/home/myuser/images/icon.jpg" will be uploaded to a blob + named "myfiles/images/icon.jpg". + + :type bucket: :class:`google.cloud.storage.bucket.Bucket` + :param bucket: + The bucket which will contain the uploaded blobs. + + :type filenames: list(str) + :param filenames: + A list of filenames to be uploaded. This may include part of the path. + The file will be accessed at the full path of `source_directory` + + `filename`. + + :type source_directory: str + :param source_directory: + A string that will be prepended (with `os.path.join()`) to each filename + in the input list, in order to find the source file for each blob. + Unlike the filename itself, the source_directory does not affect the + name of the uploaded blob. + + For instance, if the source_directory is "/tmp/img/" and a filename is + "0001.jpg", with an empty blob_name_prefix, then the file uploaded will + be "/tmp/img/0001.jpg" and the destination blob will be "0001.jpg". + + This parameter can be an empty string. + + Note that this parameter allows directory traversal (e.g. "/", "../") + and is not intended for unsanitized end user input. + + :type blob_name_prefix: str + :param blob_name_prefix: + A string that will be prepended to each filename in the input list, in + order to determine the name of the destination blob. Unlike the filename + itself, the prefix string does not affect the location the library will + look for the source data on the local filesystem. + + For instance, if the source_directory is "/tmp/img/", the + blob_name_prefix is "myuser/mystuff-" and a filename is "0001.jpg" then + the file uploaded will be "/tmp/img/0001.jpg" and the destination blob + will be "myuser/mystuff-0001.jpg". + + The blob_name_prefix can be blank (an empty string). + + :type skip_if_exists: bool + :param skip_if_exists: + If True, blobs that already have a live version will not be overwritten. + This is accomplished by setting `if_generation_match = 0` on uploads. + Uploads so skipped will result in a 412 Precondition Failed response + code, which will be included in the return value, but not raised + as an exception regardless of the value of raise_exception. + + :type blob_constructor_kwargs: dict + :param blob_constructor_kwargs: + A dictionary of keyword arguments to pass to the blob constructor. Refer + to the documentation for `blob.Blob()` for more information. The dict is + directly passed into the constructor and is not validated by this + function. `name` and `bucket` keyword arguments are reserved by this + function and will result in an error if passed in here. + + :type upload_kwargs: dict + :param upload_kwargs: + A dictionary of keyword arguments to pass to the upload method. Refer + to the documentation for `blob.upload_from_file()` or + `blob.upload_from_filename()` for more information. The dict is directly + passed into the upload methods and is not validated by this function. + + :type threads: int + :param threads: + ***DEPRECATED*** Sets `worker_type` to THREAD and `max_workers` to the + number specified. If `worker_type` or `max_workers` are set explicitly, + this parameter should be set to None. Please use `worker_type` and + `max_workers` instead of this parameter. + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. + + :type raise_exception: bool + :param raise_exception: + If True, instead of adding exceptions to the list of return values, + instead they will be raised. Note that encountering an exception on one + operation will not prevent other operations from starting. Exceptions + are only processed and potentially raised after all operations are + complete in success or failure. + + If skip_if_exists is True, 412 Precondition Failed responses are + considered part of normal operation and are not raised as an exception. + + :type worker_type: str + :param worker_type: + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :type additional_blob_attributes: dict + :param additional_blob_attributes: + A dictionary of blob attribute names and values. This allows the + configuration of blobs beyond what is possible with + blob_constructor_kwargs. For instance, {"cache_control": "no-cache"} + would set the cache_control attribute of each blob to "no-cache". + + As with blob_constructor_kwargs, this affects the creation of every + blob identically. To fine-tune each blob individually, use `upload_many` + and create the blobs as desired before passing them in. + + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + + :rtype: list + :returns: A list of results corresponding to, in order, each item in the + input list. If an exception was received, it will be the result + for that operation. Otherwise, the return value from the successful + upload method is used (which will be None). + """ + if blob_constructor_kwargs is None: + blob_constructor_kwargs = {} + if additional_blob_attributes is None: + additional_blob_attributes = {} + + file_blob_pairs = [] + + for filename in filenames: + path = os.path.join(source_directory, filename) + blob_name = blob_name_prefix + filename + blob = bucket.blob(blob_name, **blob_constructor_kwargs) + for prop, value in additional_blob_attributes.items(): + setattr(blob, prop, value) + file_blob_pairs.append((path, blob)) + + return upload_many( + file_blob_pairs, + skip_if_exists=skip_if_exists, + upload_kwargs=upload_kwargs, + deadline=deadline, + raise_exception=raise_exception, + worker_type=worker_type, + max_workers=max_workers, + ) + + +@_deprecate_threads_param +def download_many_to_path( + bucket, + blob_names, + destination_directory="", + blob_name_prefix="", + download_kwargs=None, + threads=None, + deadline=None, + create_directories=True, + raise_exception=False, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, + *, + skip_if_exists=False, +): + """Download many files concurrently by their blob names. + + The destination files are automatically created, with paths based on the + source blob_names and the destination_directory. + + The destination files are not automatically deleted if their downloads fail, + so please check the return value of this function for any exceptions, or + enable `raise_exception=True`, and process the files accordingly. + + For example, if the `blob_names` include "icon.jpg", `destination_directory` + is "/home/myuser/", and `blob_name_prefix` is "images/", then the blob named + "images/icon.jpg" will be downloaded to a file named + "/home/myuser/icon.jpg". + + :type bucket: :class:`google.cloud.storage.bucket.Bucket` + :param bucket: + The bucket which contains the blobs to be downloaded + + :type blob_names: list(str) + :param blob_names: + A list of blobs to be downloaded. The blob name in this string will be + used to determine the destination file path as well. + + The full name to the blob must be blob_name_prefix + blob_name. The + blob_name is separate from the blob_name_prefix because the blob_name + will also determine the name of the destination blob. Any shared part of + the blob names that need not be part of the destination path should be + included in the blob_name_prefix. + + :type destination_directory: str + :param destination_directory: + A string that will be prepended (with os.path.join()) to each blob_name + in the input list, in order to determine the destination path for that + blob. + + For instance, if the destination_directory string is "/tmp/img" and a + blob_name is "0001.jpg", with an empty blob_name_prefix, then the source + blob "0001.jpg" will be downloaded to destination "/tmp/img/0001.jpg" . + + This parameter can be an empty string. + + Note that this parameter allows directory traversal (e.g. "/", "../") + and is not intended for unsanitized end user input. + + :type blob_name_prefix: str + :param blob_name_prefix: + A string that will be prepended to each blob_name in the input list, in + order to determine the name of the source blob. Unlike the blob_name + itself, the prefix string does not affect the destination path on the + local filesystem. For instance, if the destination_directory is + "/tmp/img/", the blob_name_prefix is "myuser/mystuff-" and a blob_name + is "0001.jpg" then the source blob "myuser/mystuff-0001.jpg" will be + downloaded to "/tmp/img/0001.jpg". The blob_name_prefix can be blank + (an empty string). + + :type download_kwargs: dict + :param download_kwargs: + A dictionary of keyword arguments to pass to the download method. Refer + to the documentation for `blob.download_to_file()` or + `blob.download_to_filename()` for more information. The dict is directly + passed into the download methods and is not validated by this function. + + :type threads: int + :param threads: + ***DEPRECATED*** Sets `worker_type` to THREAD and `max_workers` to the + number specified. If `worker_type` or `max_workers` are set explicitly, + this parameter should be set to None. Please use `worker_type` and + `max_workers` instead of this parameter. + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. + + :type create_directories: bool + :param create_directories: + If True, recursively create any directories that do not exist. For + instance, if downloading object "images/img001.png", create the + directory "images" before downloading. + + :type raise_exception: bool + :param raise_exception: + If True, instead of adding exceptions to the list of return values, + instead they will be raised. Note that encountering an exception on one + operation will not prevent other operations from starting. Exceptions + are only processed and potentially raised after all operations are + complete in success or failure. If skip_if_exists is True, 412 + Precondition Failed responses are considered part of normal operation + and are not raised as an exception. + + :type worker_type: str + :param worker_type: + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :type skip_if_exists: bool + :param skip_if_exists: + Before downloading each blob, check if the file for the filename exists; + if it does, skip that blob. This only works for filenames. + + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + + :rtype: list + :returns: A list of results corresponding to, in order, each item in the + input list. If an exception was received, it will be the result + for that operation. Otherwise, the return value from the successful + download method is used (which will be None). + """ + blob_file_pairs = [] + + for blob_name in blob_names: + full_blob_name = blob_name_prefix + blob_name + path = os.path.join(destination_directory, blob_name) + if create_directories: + directory, _ = os.path.split(path) + os.makedirs(directory, exist_ok=True) + blob_file_pairs.append((bucket.blob(full_blob_name), path)) + + return download_many( + blob_file_pairs, + download_kwargs=download_kwargs, + deadline=deadline, + raise_exception=raise_exception, + worker_type=worker_type, + max_workers=max_workers, + skip_if_exists=skip_if_exists, + ) + + +def download_chunks_concurrently( + blob, + filename, + chunk_size=TM_DEFAULT_CHUNK_SIZE, + download_kwargs=None, + deadline=None, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, + *, + crc32c_checksum=True, +): + """Download a single file in chunks, concurrently. + + In some environments, using this feature with mutiple processes will result + in faster downloads of large files. + + Using this feature with multiple threads is unlikely to improve download + performance under normal circumstances due to Python interpreter threading + behavior. The default is therefore to use processes instead of threads. + + :type blob: :class:`google.cloud.storage.blob.Blob` + :param blob: + The blob to be downloaded. + + :type filename: str + :param filename: + The destination filename or path. + + :type chunk_size: int + :param chunk_size: + The size in bytes of each chunk to send. The optimal chunk size for + maximum throughput may vary depending on the exact network environment + and size of the blob. + + :type download_kwargs: dict + :param download_kwargs: + A dictionary of keyword arguments to pass to the download method. Refer + to the documentation for `blob.download_to_file()` or + `blob.download_to_filename()` for more information. The dict is directly + passed into the download methods and is not validated by this function. + + Keyword arguments "start" and "end" which are not supported and will + cause a ValueError if present. The key "checksum" is also not supported + in `download_kwargs`, but see the argument `crc32c_checksum` (which does + not go in `download_kwargs`) below. + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. + + :type worker_type: str + :param worker_type: + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :type crc32c_checksum: bool + :param crc32c_checksum: + Whether to compute a checksum for the resulting object, using the crc32c + algorithm. As the checksums for each chunk must be combined using a + feature of crc32c that is not available for md5, md5 is not supported. + + :raises: + :exc:`concurrent.futures.TimeoutError` + if deadline is exceeded. + :exc:`google.cloud.storage._media.common.DataCorruption` + if the download's checksum doesn't agree with server-computed + checksum. The `google.cloud.storage._media` exception is used here for + consistency with other download methods despite the exception + originating elsewhere. + """ + client = blob.client + + if download_kwargs is None: + download_kwargs = {} + if "start" in download_kwargs or "end" in download_kwargs: + raise ValueError( + "Download arguments 'start' and 'end' are not supported by download_chunks_concurrently." + ) + if "checksum" in download_kwargs: + raise ValueError( + "'checksum' is in download_kwargs, but is not supported because sliced downloads have a different checksum mechanism from regular downloads. Use the 'crc32c_checksum' argument on download_chunks_concurrently instead." + ) + + download_kwargs = download_kwargs.copy() + download_kwargs["checksum"] = None + download_kwargs["command"] = "tm.download_sharded" + + # We must know the size and the generation of the blob. + if not blob.size or not blob.generation: + blob.reload() + + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) + # Pickle the blob ahead of time (just once, not once per chunk) if needed. + maybe_pickled_blob = _pickle_client(blob) if needs_pickling else blob + + futures = [] + + # Create and/or truncate the destination file to prepare for sparse writing. + with open(filename, "wb") as _: + pass + + with pool_class(max_workers=max_workers) as executor: + cursor = 0 + end = blob.size + while cursor < end: + start = cursor + cursor = min(cursor + chunk_size, end) + futures.append( + executor.submit( + _download_and_write_chunk_in_place, + maybe_pickled_blob, + filename, + start=start, + end=cursor - 1, + download_kwargs=download_kwargs, + crc32c_checksum=crc32c_checksum, + ) + ) + + concurrent.futures.wait( + futures, timeout=deadline, return_when=concurrent.futures.ALL_COMPLETED + ) + + # Raise any exceptions; combine checksums. + results = [] + for future in futures: + results.append(future.result()) + + if crc32c_checksum and results: + crc_digest = _digest_ordered_checksum_and_size_pairs(results) + actual_checksum = base64.b64encode(crc_digest).decode("utf-8") + expected_checksum = blob.crc32c + if actual_checksum != expected_checksum: + # For consistency with other download methods we will use + # "google.cloud.storage._media.common.DataCorruption" despite the error + # not originating inside google.cloud.storage._media. + download_url = blob._get_download_url( + client, + if_generation_match=download_kwargs.get("if_generation_match"), + if_generation_not_match=download_kwargs.get("if_generation_not_match"), + if_metageneration_match=download_kwargs.get("if_metageneration_match"), + if_metageneration_not_match=download_kwargs.get( + "if_metageneration_not_match" + ), + ) + raise DataCorruption( + None, + DOWNLOAD_CRC32C_MISMATCH_TEMPLATE.format( + download_url, expected_checksum, actual_checksum + ), + ) + return None + + +def upload_chunks_concurrently( + filename, + blob, + content_type=None, + chunk_size=TM_DEFAULT_CHUNK_SIZE, + deadline=None, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, + *, + checksum="auto", + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY, +): + """Upload a single file in chunks, concurrently. + + This function uses the XML MPU API to initialize an upload and upload a + file in chunks, concurrently with a worker pool. + + The XML MPU API is significantly different from other uploads; please review + the documentation at `https://cloud.google.com/storage/docs/multipart-uploads` + before using this feature. + + The library will attempt to cancel uploads that fail due to an exception. + If the upload fails in a way that precludes cancellation, such as a + hardware failure, process termination, or power outage, then the incomplete + upload may persist indefinitely. To mitigate this, set the + `AbortIncompleteMultipartUpload` with a nonzero `Age` in bucket lifecycle + rules, or refer to the XML API documentation linked above to learn more + about how to list and delete individual downloads. + + Using this feature with multiple threads is unlikely to improve upload + performance under normal circumstances due to Python interpreter threading + behavior. The default is therefore to use processes instead of threads. + + ACL information cannot be sent with this function and should be set + separately with :class:`ObjectACL` methods. + + :type filename: str + :param filename: + The path to the file to upload. File-like objects are not supported. + + :type blob: :class:`google.cloud.storage.blob.Blob` + :param blob: + The blob to which to upload. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + :type chunk_size: int + :param chunk_size: + The size in bytes of each chunk to send. The optimal chunk size for + maximum throughput may vary depending on the exact network environment + and size of the blob. The remote API has restrictions on the minimum + and maximum size allowable, see: `https://cloud.google.com/storage/quotas#requests` + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. + + :type worker_type: str + :param worker_type: + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :type checksum: str + :param checksum: + (Optional) The checksum scheme to use: either "md5", "crc32c", "auto" + or None. The default is "auto", which will try to detect if the C + extension for crc32c is installed and fall back to md5 otherwise. + Each individual part is checksummed. At present, the selected + checksum rule is only applied to parts and a separate checksum of the + entire resulting blob is not computed. Please compute and compare the + checksum of the file to the resulting blob separately if needed, using + the "crc32c" algorithm as per the XML MPU documentation. + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A `google.api_core.retry.Retry` value will enable retries, + and the object will configure backoff and timeout options. Custom + predicates (customizable error codes) are not supported for media + operations such as this one. + + This function does not accept `ConditionalRetryPolicy` values because + preconditions are not supported by the underlying API call. + + See the retry.py source code and docstrings in this package + (`google.cloud.storage.retry`) for information on retry types and how + to configure them. + + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + """ + + bucket = blob.bucket + client = blob.client + transport = blob._get_transport(client) + + hostname = _get_host_name(client._connection) + url = "{hostname}/{bucket}/{blob}".format( + hostname=hostname, bucket=bucket.name, blob=_quote(blob.name) + ) + + base_headers, object_metadata, content_type = blob._get_upload_arguments( + client, content_type, filename=filename, command="tm.upload_sharded" + ) + headers = {**base_headers, **_headers_from_metadata(object_metadata)} + + if blob.user_project is not None: + headers["x-goog-user-project"] = blob.user_project + + # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object + # at rest, object resource metadata will store the version of the Key Management + # Service cryptographic material. If a Blob instance with KMS Key metadata set is + # used to upload a new version of the object then the existing kmsKeyName version + # value can't be used in the upload request and the client instead ignores it. + if blob.kms_key_name is not None and "cryptoKeyVersions" not in blob.kms_key_name: + headers["x-goog-encryption-kms-key-name"] = blob.kms_key_name + + container = XMLMPUContainer(url, filename, headers=headers, retry=retry) + + container.initiate(transport=transport, content_type=content_type) + upload_id = container.upload_id + + size = os.path.getsize(filename) + num_of_parts = -(size // -chunk_size) # Ceiling division + + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) + # Pickle the blob ahead of time (just once, not once per chunk) if needed. + maybe_pickled_client = _pickle_client(client) if needs_pickling else client + + futures = [] + + with pool_class(max_workers=max_workers) as executor: + for part_number in range(1, num_of_parts + 1): + start = (part_number - 1) * chunk_size + end = min(part_number * chunk_size, size) + + futures.append( + executor.submit( + _upload_part, + maybe_pickled_client, + url, + upload_id, + filename, + start=start, + end=end, + part_number=part_number, + checksum=checksum, + headers=headers, + retry=retry, + ) + ) + + concurrent.futures.wait( + futures, timeout=deadline, return_when=concurrent.futures.ALL_COMPLETED + ) + + try: + # Harvest results and raise exceptions. + for future in futures: + part_number, etag = future.result() + container.register_part(part_number, etag) + + container.finalize(blob._get_transport(client)) + except Exception: + container.cancel(blob._get_transport(client)) + raise + + +def _upload_part( + maybe_pickled_client, + url, + upload_id, + filename, + start, + end, + part_number, + checksum, + headers, + retry, +): + """Helper function that runs inside a thread or subprocess to upload a part. + + `maybe_pickled_client` is either a Client (for threads) or a specially + pickled Client (for processes) because the default pickling mangles Client + objects.""" + + if isinstance(maybe_pickled_client, Client): + client = maybe_pickled_client + else: + client = pickle.loads(maybe_pickled_client) + part = XMLMPUPart( + url, + upload_id, + filename, + start=start, + end=end, + part_number=part_number, + checksum=checksum, + headers=headers, + retry=retry, + ) + part.upload(client._http) + return (part_number, part.etag) + + +def _headers_from_metadata(metadata): + """Helper function to translate object metadata into a header dictionary.""" + + headers = {} + # Handle standard writable metadata + for key, value in metadata.items(): + if key in METADATA_HEADER_TRANSLATION: + headers[METADATA_HEADER_TRANSLATION[key]] = value + # Handle custom metadata + if "metadata" in metadata: + for key, value in metadata["metadata"].items(): + headers["x-goog-meta-" + key] = value + return headers + + +def _download_and_write_chunk_in_place( + maybe_pickled_blob, filename, start, end, download_kwargs, crc32c_checksum +): + """Helper function that runs inside a thread or subprocess. + + `maybe_pickled_blob` is either a Blob (for threads) or a specially pickled + Blob (for processes) because the default pickling mangles Client objects + which are attached to Blobs. + + Returns a crc if configured (or None) and the size written. + """ + + if isinstance(maybe_pickled_blob, Blob): + blob = maybe_pickled_blob + else: + blob = pickle.loads(maybe_pickled_blob) + + with _ChecksummingSparseFileWrapper(filename, start, crc32c_checksum) as f: + blob._prep_and_do_download(f, start=start, end=end, **download_kwargs) + return (f.crc, (end - start) + 1) + + +class _ChecksummingSparseFileWrapper: + """A file wrapper that writes to a sparse file and optionally checksums. + + This wrapper only implements write() and does not inherit from `io` module + base classes. + """ + + def __init__(self, filename, start_position, crc32c_enabled): + # Open in mixed read/write mode to avoid truncating or appending + self.f = open(filename, "rb+") + self.f.seek(start_position) + self._crc = None + self._crc32c_enabled = crc32c_enabled + + def write(self, chunk): + if self._crc32c_enabled: + if self._crc is None: + self._crc = google_crc32c.value(chunk) + else: + self._crc = google_crc32c.extend(self._crc, chunk) + self.f.write(chunk) + + @property + def crc(self): + return self._crc + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.f.close() + + +def _call_method_on_maybe_pickled_blob( + maybe_pickled_blob, method_name, *args, **kwargs +): + """Helper function that runs inside a thread or subprocess. + + `maybe_pickled_blob` is either a Blob (for threads) or a specially pickled + Blob (for processes) because the default pickling mangles Client objects + which are attached to Blobs.""" + + if isinstance(maybe_pickled_blob, Blob): + blob = maybe_pickled_blob + else: + blob = pickle.loads(maybe_pickled_blob) + return getattr(blob, method_name)(*args, **kwargs) + + +def _reduce_client(cl): + """Replicate a Client by constructing a new one with the same params. + + LazyClient performs transparent caching for when the same client is needed + on the same process multiple times.""" + + client_object_id = id(cl) + project = cl.project + credentials = cl._credentials + _http = None # Can't carry this over + client_info = cl._initial_client_info + client_options = cl._initial_client_options + extra_headers = cl._extra_headers + + return _LazyClient, ( + client_object_id, + project, + credentials, + _http, + client_info, + client_options, + extra_headers, + ) + + +def _pickle_client(obj): + """Pickle a Client or an object that owns a Client (like a Blob)""" + + # We need a custom pickler to process Client objects, which are attached to + # Buckets (and therefore to Blobs in turn). Unfortunately, the Python + # multiprocessing library doesn't seem to have a good way to use a custom + # pickler, and using copyreg will mutate global state and affect code + # outside of the client library. Instead, we'll pre-pickle the object and + # pass the bytestring in. + f = io.BytesIO() + p = pickle.Pickler(f) + p.dispatch_table = copyreg.dispatch_table.copy() + p.dispatch_table[Client] = _reduce_client + p.dump(obj) + return f.getvalue() + + +def _get_pool_class_and_requirements(worker_type): + """Returns the pool class, and whether the pool requires pickled Blobs.""" + + if worker_type == PROCESS: + # Use processes. Pickle blobs with custom logic to handle the client. + return (concurrent.futures.ProcessPoolExecutor, True) + elif worker_type == THREAD: + # Use threads. Pass blobs through unpickled. + return (concurrent.futures.ThreadPoolExecutor, False) + else: + raise ValueError( + "The worker_type must be google.cloud.storage.transfer_manager.PROCESS or google.cloud.storage.transfer_manager.THREAD" + ) + + +def _digest_ordered_checksum_and_size_pairs(checksum_and_size_pairs): + base_crc = None + zeroes = bytes(MAX_CRC32C_ZERO_ARRAY_SIZE) + for part_crc, size in checksum_and_size_pairs: + if not base_crc: + base_crc = part_crc + else: + base_crc ^= 0xFFFFFFFF # precondition + + # Zero pad base_crc32c. To conserve memory, do so with only + # MAX_CRC32C_ZERO_ARRAY_SIZE at a time. Reuse the zeroes array where + # possible. + padded = 0 + while padded < size: + desired_zeroes_size = min((size - padded), MAX_CRC32C_ZERO_ARRAY_SIZE) + base_crc = google_crc32c.extend(base_crc, zeroes[:desired_zeroes_size]) + padded += desired_zeroes_size + + base_crc ^= 0xFFFFFFFF # postcondition + base_crc ^= part_crc + crc_digest = struct.pack( + ">L", base_crc + ) # https://cloud.google.com/storage/docs/json_api/v1/objects#crc32c + return crc_digest + + +class _LazyClient: + """An object that will transform into either a cached or a new Client""" + + def __new__(cls, id, *args, **kwargs): + cached_client = _cached_clients.get(id) + if cached_client: + return cached_client + else: + cached_client = Client(*args, **kwargs) + _cached_clients[id] = cached_client + return cached_client diff --git a/google/cloud/storage/version.py b/google/cloud/storage/version.py index a12de3d25..d6f7def8c 100644 --- a/google/cloud/storage/version.py +++ b/google/cloud/storage/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.0.0" +__version__ = "3.0.0" diff --git a/noxfile.py b/noxfile.py index 318bc3957..384880848 100644 --- a/noxfile.py +++ b/noxfile.py @@ -24,18 +24,21 @@ import nox -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ["docs", "google", "tests", "noxfile.py", "setup.py"] DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] -UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] CONFORMANCE_TEST_PYTHON_VERSIONS = ["3.8"] _DEFAULT_STORAGE_HOST = "https://storage.googleapis.com" CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True + @nox.session(python=DEFAULT_PYTHON_VERSION) def lint(session): @@ -44,9 +47,13 @@ def lint(session): Returns a failure if the linters find linting errors or sufficiently serious code quality issues. """ - session.install("flake8", BLACK_VERSION) + # Pin flake8 to 6.0.0 + # See https://github.com/googleapis/python-storage/issues/1102 + session.install("flake8==6.0.0", BLACK_VERSION) session.run( - "black", "--check", *BLACK_PATHS, + "black", + "--check", + *BLACK_PATHS, ) session.run("flake8", "google", "tests") @@ -59,7 +66,8 @@ def blacken(session): """ session.install(BLACK_VERSION) session.run( - "black", *BLACK_PATHS, + "black", + *BLACK_PATHS, ) @@ -70,18 +78,28 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def default(session): +def default(session, install_extras=True): constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) # Install all test dependencies, then install this package in-place. - session.install("mock", "pytest", "pytest-cov", "-c", constraints_path) + session.install("mock", "pytest", "pytest-cov", "brotli", "-c", constraints_path) + + if install_extras: + session.install("opentelemetry-api", "opentelemetry-sdk") + session.install("-e", ".", "-c", constraints_path) + # This dependency is included in setup.py for backwards compatibility only + # and the client library is expected to pass all tests without it. See + # setup.py and README for details. + session.run("pip", "uninstall", "-y", "google-resumable-media") + # Run py.test against the unit tests. session.run( "py.test", "--quiet", + f"--junitxml=unit_{session.python}_sponge_log.xml", "--cov=google.cloud.storage", "--cov=google.cloud", "--cov=tests.unit", @@ -90,6 +108,7 @@ def default(session): "--cov-report=", "--cov-fail-under=0", os.path.join("tests", "unit"), + os.path.join("tests", "resumable_media", "unit"), *session.posargs, ) @@ -106,49 +125,57 @@ def system(session): CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) """Run the system test suite.""" - system_test_path = os.path.join("tests", "system.py") - system_test_folder_path = os.path.join("tests", "system") + rerun_count = 0 # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": session.skip("RUN_SYSTEM_TESTS is set to false, skipping") # Environment check: Only run tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable") + session.skip( + "Credentials must be set via environment variable GOOGLE_APPLICATION_CREDENTIALS" + ) # mTLS tests requires pyopenssl. if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": session.install("pyopenssl") - - system_test_exists = os.path.exists(system_test_path) - system_test_folder_exists = os.path.exists(system_test_folder_path) - # Environment check: only run tests if found. - if not system_test_exists and not system_test_folder_exists: - session.skip("System tests were not found") + # Check if endpoint is being overriden for rerun_count + if ( + os.getenv("API_ENDPOINT_OVERRIDE", "https://storage.googleapis.com") + != "https://storage.googleapis.com" + ): + rerun_count = 3 # Use pre-release gRPC for system tests. - session.install("--pre", "grpcio") + # TODO: Remove ban of 1.52.0rc1 once grpc/grpc#31885 is resolved. + session.install("--pre", "grpcio!=1.52.0rc1") # Install all test dependencies, then install this package into the # virtualenv's dist-packages. # 2021-05-06: defer installing 'google-cloud-*' to after this package, # in order to work around Python 2.7 googolapis-common-protos # issue. - session.install("mock", "pytest", "-c", constraints_path) + session.install("mock", "pytest", "pytest-rerunfailures", "-c", constraints_path) session.install("-e", ".", "-c", constraints_path) session.install( "google-cloud-testutils", "google-cloud-iam", "google-cloud-pubsub < 2.0.0", "google-cloud-kms < 2.0dev", + "brotli", "-c", constraints_path, ) # Run py.test against the system tests. - if system_test_exists: - session.run("py.test", "--quiet", system_test_path, *session.posargs) - if system_test_folder_exists: - session.run("py.test", "--quiet", system_test_folder_path, *session.posargs) + session.run( + "py.test", + "--quiet", + f"--junitxml=system_{session.python}_sponge_log.xml", + "--reruns={}".format(rerun_count), + os.path.join("tests", "system"), + os.path.join("tests", "resumable_media", "system"), + *session.posargs, + ) @nox.session(python=CONFORMANCE_TEST_PYTHON_VERSIONS) @@ -193,12 +220,25 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.10") def docs(session): """Build the docs for this library.""" session.install("-e", ".") - session.install("sphinx==4.0.1", "alabaster", "recommonmark") + session.install( + # We need to pin to specific versions of the `sphinxcontrib-*` packages + # which still support sphinx 4.x. + # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344 + # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345. + "sphinxcontrib-applehelp==1.0.4", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", + "sphinx==4.5.0", + "alabaster", + "recommonmark", + ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( @@ -215,14 +255,25 @@ def docs(session): ) -@nox.session(python=DEFAULT_PYTHON_VERSION) +@nox.session(python="3.10") def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") session.install("grpcio") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + # We need to pin to specific versions of the `sphinxcontrib-*` packages + # which still support sphinx 4.x. + # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344 + # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345. + "sphinxcontrib-applehelp==1.0.4", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", + "gcp-sphinx-docfx-yaml", + "alabaster", + "recommonmark", ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) diff --git a/owlbot.py b/owlbot.py index 828536f24..8bd9de751 100644 --- a/owlbot.py +++ b/owlbot.py @@ -26,7 +26,6 @@ templated_files = common.py_library( cov_level=100, split_system_tests=True, - unit_test_python_versions=["3.6", "3.7", "3.8", "3.9", "3.10"], system_test_external_dependencies=[ "google-cloud-iam", "google-cloud-pubsub < 2.0.0", @@ -34,7 +33,9 @@ "google-cloud-kms < 2.0dev", ], intersphinx_dependencies={ - "requests": "https://docs.python-requests.org/en/master/" + # python-requests url temporary change related to + # https://github.com/psf/requests/issues/6140#issuecomment-1135071992 + "requests": "https://requests.readthedocs.io/en/stable/" }, ) @@ -43,12 +44,52 @@ excludes=[ "docs/multiprocessing.rst", "noxfile.py", - "renovate.json", # do not bundle reports "CONTRIBUTING.rst", - ".github/CODEOWNERS", + "README.rst", + ".kokoro/continuous/continuous.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/samples/python3.6", # remove python 3.6 support + ".github/blunderbuss.yml", # blunderbuss assignment to python squad + ".github/workflows", # exclude gh actions as credentials are needed for tests + ".github/release-please.yml", # special support for a python2 branch in this repo ], ) +s.replace( + ".kokoro/build.sh", + "export PYTHONUNBUFFERED=1", + """export PYTHONUNBUFFERED=1 + +# Export variable to override api endpoint +export API_ENDPOINT_OVERRIDE + +# Export variable to override api endpoint version +export API_VERSION_OVERRIDE + +# Export dual region locations +export DUAL_REGION_LOC_1 +export DUAL_REGION_LOC_2 + +# Setup universe domain testing needed environment variables. +export TEST_UNIVERSE_DOMAIN_CREDENTIAL=$(realpath ${KOKORO_GFILE_DIR}/secret_manager/client-library-test-universe-domain-credential) +export TEST_UNIVERSE_DOMAIN=$(gcloud secrets versions access latest --project cloud-devrel-kokoro-resources --secret=client-library-test-universe-domain) +export TEST_UNIVERSE_PROJECT_ID=$(gcloud secrets versions access latest --project cloud-devrel-kokoro-resources --secret=client-library-test-universe-project-id) +export TEST_UNIVERSE_LOCATION=$(gcloud secrets versions access latest --project cloud-devrel-kokoro-resources --secret=client-library-test-universe-storage-location) + +""") + +s.replace( + ".coveragerc", + "omit =", + """omit = + .nox/*""") + +s.replace( + ".kokoro/release/common.cfg", + 'value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem"', + 'value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem, client-library-test-universe-domain-credential"' +) + python.py_samples(skip_readmes=True) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/renovate.json b/renovate.json index 9fa8816fe..c7875c469 100644 --- a/renovate.json +++ b/renovate.json @@ -1,10 +1,11 @@ { "extends": [ "config:base", + "group:all", ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } diff --git a/samples/README.md b/samples/README.md index 2751bf722..490af710a 100644 --- a/samples/README.md +++ b/samples/README.md @@ -41,6 +41,69 @@ for more detailed instructions. pip install -r requirements.txt ``` + +## Running tests locally + +Before running the tests, make sure you've followed the steps outlined in +[Setup](#setup). + +### Install nox + +We use [nox](https://nox.readthedocs.io/en/latest/) to instrument our tests. + +``` +pip install nox +``` + +### Set environment variables + +You can run tests locally using your own gcs project or with a valid service account in project `python-docs-samples-tests`. This outlines the workflow of running tests locally using your own gcs project. + +Refer to [`noxfile_config.py`](https://github.com/googleapis/python-storage/blob/main/samples/snippets/noxfile_config.py) and [a list of environment variables](https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/testing/test-env.tmpl.sh) that can be set manually. Not every test needs all of these variables. +Below outlines some common environment variables used in the storage samples. +See [Other Resources](#other-resources) on how to create credentials, keys, and secrets. + + export GOOGLE_CLOUD_PROJECT=[your-project-name] + export MAIN_GOOGLE_CLOUD_PROJECT=[your-project-name] + export BUILD_SPECIFIC_GCLOUD_PROJECT=[your-project-name] + export HMAC_KEY_TEST_SERVICE_ACCOUNT=[your-service-account] + export CLOUD_KMS_KEY=[your-kms-key] + export GOOGLE_APPLICATION_CREDENTIALS=[your-credentials] + +If you are running a single test locally that does not use the environment variables, you can delete the `noxfile_config.py` file and simply set your `GOOGLE_CLOUD_PROJECT` + +``` +export GOOGLE_CLOUD_PROJECT=[your-project-name] +``` + + +### Run tests with nox +``` +nox -s lint +nox -s py-3.9 -- snippets_test.py +nox -s py-3.9 -- snippets_test.py::test_list_blobs +``` + +### Special test configurations +There are restrictions on the testing projects used in Kokoro. For instance, +we change the service account based on different test sessions to avoid +hitting the maximum limit of HMAC keys on a single service account. +Another example is `requester_pays_test.py` needs to use a different Storage bucket, and looks for an environment variable `REQUESTER_PAYS_TEST_BUCKET`. +Please refer to [`noxfile_config.py`](https://github.com/googleapis/python-storage/blob/main/samples/snippets/noxfile_config.py) , [kokoro configs](https://github.com/googleapis/python-storage/tree/main/.kokoro/samples), and test files to see if there are special test configurations required. + + +## Other Resources +* [Create Cloud KMS Keys](https://cloud.google.com/kms/docs/creating-keys) +* [Create HMAC Keys](https://cloud.google.com/storage/docs/authentication/managing-hmackeys) +* [Create Service Accounts](https://cloud.google.com/docs/authentication/getting-started#creating_a_service_account) + +[shell_img]: https://gstatic.com/cloudssh/images/open-btn.png +[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/README.md +[product-docs]: https://cloud.google.com/storage + + +----- + ## Samples
List of Samples @@ -63,7 +126,9 @@ for more detailed instructions. * [CORS Configuration](#cors-configuration) * [Create Bucket](#create-bucket) * [Create Bucket Class Location](#create-bucket-class-location) +* [Create Bucket Dual Region](#create-bucket-dual-region) * [Create Bucket Notifications](#create-bucket-notifications) +* [Create Bucket Turbo Replication](#create-bucket-turbo-replication) * [Create HMAC Key](#create-hmac-key) * [Deactivate HMAC Key](#deactivate-hmac-key) * [Define Bucket Website Configuration](#define-bucket-website-configuration) @@ -88,8 +153,8 @@ for more detailed instructions. * [Enable Requester Pays](#enable-requester-pays) * [Enable Uniform Bucket Level Access](#enable-uniform-bucket-level-access) * [Enable Versioning](#enable-versioning) -* [FileIO Write-Read] (#fileio-write-read) -* [FileIO Pandas] (#fileio-pandas) +* [FileIO Write-Read](#fileio-write-read) +* [FileIO Pandas](#fileio-pandas) * [Generate Encryption Key](#generate-encryption-key) * [Generate Signed Post Policy V4](#generate-signed-post-policy-v4) * [Generate Signed Url V2](#generate-signed-url-v2) @@ -103,6 +168,7 @@ for more detailed instructions. * [Get Public Access Prevention](#get-public-access-prevention) * [Get Requester Pays Status](#get-requester-pays-status) * [Get Retention Policy](#get-retention-policy) +* [Get RPO](#get-rpo) * [Get Service Account](#get-service-account) * [Get Uniform Bucket Level Access](#get-uniform-bucket-level-access) * [List Buckets](#list-buckets) @@ -139,6 +205,8 @@ for more detailed instructions. * [Set Metadata](#set-metadata) * [Set Public Access Prevention Enforced](#set-public-access-prevention-enforced) * [Set Public Access Prevention Inherited](#set-public-access-prevention-inherited) +* [Set RPO Async Turbo](#set-rpo-async-turbo) +* [Set RPO Default](#set-rpo-default) * [Set Retention Policy](#set-retention-policy) * [Set Temporary Hold](#set-temporary-hold) * [Upload Encrypted File](#upload-encrypted-file) @@ -312,6 +380,15 @@ View the [source code](https://github.com/googleapis/python-storage/blob/main/sa `python storage_create_bucket_class_location.py ` +----- +### Create Bucket Dual Region +[![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_create_bucket_dual_region.py,samples/README.md) + +View the [source code](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_create_bucket_dual_region.py). To run this sample: + + +`python storage_create_bucket_dual_region.py ` + ----- ### Create Bucket Notifications [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_create_bucket_notifications.py,samples/README.md) @@ -321,6 +398,15 @@ View the [source code](https://github.com/googleapis/python-storage/blob/main/sa `python storage_create_bucket_notifications.py ` +----- +### Create Bucket Turbo Replication +[![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_create_bucket_turbo_replication.py,samples/README.md) + +View the [source code](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_create_bucket_turbo_replication.py). To run this sample: + + +`python storage_create_bucket_turbo_replication.py ` + ----- ### Create HMAC Key [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_create_hmac_key.py,samples/README.md) @@ -672,6 +758,15 @@ View the [source code](https://github.com/googleapis/python-storage/blob/main/sa `python storage_get_retention_policy.py ` +----- +### Get RPO +[![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_get_rpo.py,samples/README.md) + +View the [source code](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_get_rpo.py). To run this sample: + + +`python storage_get_rpo.py ` + ----- ### Get Service Account [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_get_service_account.py,samples/README.md) @@ -1005,6 +1100,25 @@ View the [source code](https://github.com/googleapis/python-storage/blob/main/sa `python storage_set_retention_policy.py ` + +----- +### Set RPO Async Turbo +[![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_set_rpo_async_turbo.py,samples/README.md) + +View the [source code](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_set_rpo_async_turbo.py). To run this sample: + + +`python storage_set_rpo_async_turbo.py ` + +----- +### Set RPO Default +[![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_set_rpo_default.py,samples/README.md) + +View the [source code](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_set_rpo_default.py). To run this sample: + + +`python storage_set_rpo_default.py ` + ----- ### Set Temporary Hold [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/snippets/storage_set_temporary_hold.py,samples/README.md) @@ -1059,54 +1173,3 @@ View the [source code](https://github.com/googleapis/python-storage/blob/main/sa `python storage_view_bucket_iam_members.py ` ------ - -## Running tests locally - -Before running the tests, make sure you've followed the steps outlined in -[Setup](#setup). - -### Install nox -``` -pip install nox -``` - -### Set environment variables - -You can run tests locally using your own gcs project or with a valid service account in project `python-docs-samples-tests`. This outlines the workflow of running tests locally using your own gcs project. - -Refer to [`noxfile_config.py`](https://github.com/googleapis/python-storage/blob/main/samples/snippets/noxfile_config.py) and [a list of environment variables](https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/testing/test-env.tmpl.sh) that can be set manually. Not every test needs all of these variables. -The common environment variables used in the storage samples include: - - export GOOGLE_CLOUD_PROJECT=[your-project-name] - export MAIN_GOOGLE_CLOUD_PROJECT=[your-project-name] - export BUILD_SPECIFIC_GCLOUD_PROJECT=[your-project-name] - export HMAC_KEY_TEST_SERVICE_ACCOUNT=[your-service-account] - export CLOUD_KMS_KEY=[your-kms-key] - export GOOGLE_APPLICATION_CREDENTIALS=[your-credentials] - -See [Other Resources](#other-resources) on how to create credentials, keys, and secrets - -### Run tests with nox -``` -nox -s lint -nox -s py-3.7 -- snippets_test.py -nox -s py-3.7 -- snippets_test.py::test_list_blobs -``` - -### Special test configurations -There are restrictions on the testing projects used in Kokoro. For instance, -we change the service account based on different test sessions to avoid -hitting the maximum limit of HMAC keys on a single service account. -Another example is `requester_pays_test.py` needs to use a different Storage bucket, and looks for an environment variable `REQUESTER_PAYS_TEST_BUCKET`. -Please refer to [`noxfile_config.py`](https://github.com/googleapis/python-storage/blob/main/samples/snippets/noxfile_config.py) , [kokoro configs](https://github.com/googleapis/python-storage/tree/main/.kokoro/samples), and test files to see if there are special test configurations required. - - -### Other Resources -* [Create Cloud KMS Keys](https://cloud.google.com/kms/docs/creating-keys) -* [Create HMAC Keys](https://cloud.google.com/storage/docs/authentication/managing-hmackeys) -* [Create Service Accounts](https://cloud.google.com/docs/authentication/getting-started#creating_a_service_account) - -[shell_img]: https://gstatic.com/cloudssh/images/open-btn.png -[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/python-storage&page=editor&open_in_editor=samples/README.md -[product-docs]: https://cloud.google.com/storage \ No newline at end of file diff --git a/samples/snippets/acl_test.py b/samples/snippets/acl_test.py index 91856d816..eecee522b 100644 --- a/samples/snippets/acl_test.py +++ b/samples/snippets/acl_test.py @@ -46,7 +46,7 @@ def test_bucket(): os.environ["GOOGLE_CLOUD_PROJECT"] = os.environ["MAIN_GOOGLE_CLOUD_PROJECT"] bucket = None while bucket is None or bucket.exists(): - bucket_name = "acl-test-{}".format(uuid.uuid4()) + bucket_name = f"acl-test-{uuid.uuid4()}" bucket = storage.Client().bucket(bucket_name) bucket.create() yield bucket @@ -59,7 +59,7 @@ def test_bucket(): def test_blob(test_bucket): """Yields a blob that is deleted after the test completes.""" bucket = test_bucket - blob = bucket.blob("storage_acl_test_sigil-{}".format(uuid.uuid4())) + blob = bucket.blob(f"storage_acl_test_sigil-{uuid.uuid4()}") blob.upload_from_string("Hello, is it me you're looking for?") yield blob diff --git a/samples/snippets/bucket_lock_test.py b/samples/snippets/bucket_lock_test.py index 67d4ec685..9b7b4fa2a 100644 --- a/samples/snippets/bucket_lock_test.py +++ b/samples/snippets/bucket_lock_test.py @@ -42,7 +42,7 @@ def bucket(): """Yields a bucket that is deleted after the test completes.""" bucket = None while bucket is None or bucket.exists(): - bucket_name = "bucket-lock-{}".format(uuid.uuid4()) + bucket_name = f"bucket-lock-{uuid.uuid4()}" bucket = storage.Client().bucket(bucket_name) bucket.create() yield bucket @@ -61,7 +61,7 @@ def test_retention_policy_no_lock(bucket, capsys): storage_get_retention_policy.get_retention_policy(bucket.name) out, _ = capsys.readouterr() - assert "Retention Policy for {}".format(bucket.name) in out + assert f"Retention Policy for {bucket.name}" in out assert "Retention Period: 5" in out assert "Effective Time: " in out assert "Retention Policy is locked" not in out @@ -100,11 +100,11 @@ def test_enable_disable_bucket_default_event_based_hold(bucket, capsys): ) out, _ = capsys.readouterr() assert ( - "Default event-based hold is not enabled for {}".format(bucket.name) + f"Default event-based hold is not enabled for {bucket.name}" in out ) assert ( - "Default event-based hold is enabled for {}".format(bucket.name) + f"Default event-based hold is enabled for {bucket.name}" not in out ) @@ -120,7 +120,7 @@ def test_enable_disable_bucket_default_event_based_hold(bucket, capsys): ) out, _ = capsys.readouterr() assert ( - "Default event-based hold is enabled for {}".format(bucket.name) in out + f"Default event-based hold is enabled for {bucket.name}" in out ) # Changes to the bucket will be readable immediately after writing, diff --git a/samples/snippets/encryption_test.py b/samples/snippets/encryption_test.py index 6c2377e0f..9039b1fad 100644 --- a/samples/snippets/encryption_test.py +++ b/samples/snippets/encryption_test.py @@ -29,7 +29,7 @@ import storage_upload_encrypted_file BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] -KMS_KEY = os.environ["CLOUD_KMS_KEY"] +KMS_KEY = os.environ["MAIN_CLOUD_KMS_KEY"] TEST_ENCRYPTION_KEY = "brtJUWneL92g5q0N2gyDSnlPSYAiIVZ/cWgjyZNeMy0=" TEST_ENCRYPTION_KEY_DECODED = base64.b64decode(TEST_ENCRYPTION_KEY) @@ -47,22 +47,25 @@ def test_generate_encryption_key(capsys): def test_upload_encrypted_blob(): + blob_name = f"test_upload_encrypted_{uuid.uuid4().hex}" with tempfile.NamedTemporaryFile() as source_file: source_file.write(b"test") storage_upload_encrypted_file.upload_encrypted_blob( BUCKET, source_file.name, - "test_encrypted_upload_blob", + blob_name, TEST_ENCRYPTION_KEY, ) + bucket = storage.Client().bucket(BUCKET) + bucket.delete_blob(blob_name) @pytest.fixture(scope="module") def test_blob(): """Provides a pre-existing blob in the test bucket.""" bucket = storage.Client().bucket(BUCKET) - blob_name = "test_blob_{}".format(uuid.uuid4().hex) + blob_name = f"test_blob_{uuid.uuid4().hex}" blob = Blob( blob_name, bucket, @@ -81,7 +84,7 @@ def test_blob(): blob.delete() except NotFound as e: # For the case that the rotation succeeded. - print("Ignoring 404, detail: {}".format(e)) + print(f"Ignoring 404, detail: {e}") blob = Blob( blob_name, bucket, @@ -122,4 +125,4 @@ def test_object_csek_to_cmek(test_blob): BUCKET, test_blob_name, TEST_ENCRYPTION_KEY_2, KMS_KEY ) - assert cmek_blob.download_as_string(), test_blob_content + assert cmek_blob.download_as_bytes(), test_blob_content diff --git a/samples/snippets/fileio_test.py b/samples/snippets/fileio_test.py index cf98ce1ab..b8a4b8272 100644 --- a/samples/snippets/fileio_test.py +++ b/samples/snippets/fileio_test.py @@ -19,14 +19,14 @@ def test_fileio_write_read(bucket, capsys): - blob_name = "test-fileio-{}".format(uuid.uuid4()) + blob_name = f"test-fileio-{uuid.uuid4()}" storage_fileio_write_read.write_read(bucket.name, blob_name) out, _ = capsys.readouterr() assert "Hello world" in out def test_fileio_pandas(bucket, capsys): - blob_name = "test-fileio-{}".format(uuid.uuid4()) + blob_name = f"test-fileio-{uuid.uuid4()}" storage_fileio_pandas.pandas_write(bucket.name, blob_name) out, _ = capsys.readouterr() assert f"Wrote csv with pandas with name {blob_name} from bucket {bucket.name}." in out diff --git a/samples/snippets/hmac_samples_test.py b/samples/snippets/hmac_samples_test.py index 60eba2401..988b40305 100644 --- a/samples/snippets/hmac_samples_test.py +++ b/samples/snippets/hmac_samples_test.py @@ -64,7 +64,10 @@ def new_hmac_key(): if not hmac_key.state == "INACTIVE": hmac_key.state = "INACTIVE" hmac_key.update() - hmac_key.delete() + try: + hmac_key.delete() + except google.api_core.exceptions.BadRequest: + pass def test_list_keys(capsys, new_hmac_key): diff --git a/samples/snippets/iam_test.py b/samples/snippets/iam_test.py index edeb8427d..7700b6c6a 100644 --- a/samples/snippets/iam_test.py +++ b/samples/snippets/iam_test.py @@ -42,7 +42,7 @@ def bucket(): bucket = None while bucket is None or bucket.exists(): storage_client = storage.Client() - bucket_name = "test-iam-{}".format(uuid.uuid4()) + bucket_name = f"test-iam-{uuid.uuid4()}" bucket = storage_client.bucket(bucket_name) bucket.iam_configuration.uniform_bucket_level_access_enabled = True storage_client.create_bucket(bucket) @@ -60,7 +60,7 @@ def public_bucket(): bucket = None while bucket is None or bucket.exists(): storage_client = storage.Client() - bucket_name = "test-iam-{}".format(uuid.uuid4()) + bucket_name = f"test-iam-{uuid.uuid4()}" bucket = storage_client.bucket(bucket_name) bucket.iam_configuration.uniform_bucket_level_access_enabled = True storage_client.create_bucket(bucket) diff --git a/samples/snippets/notification_polling.py b/samples/snippets/notification_polling.py index 3182db6da..2ee6789c3 100644 --- a/samples/snippets/notification_polling.py +++ b/samples/snippets/notification_polling.py @@ -38,7 +38,7 @@ $ gsutil notification create -f json -t testtopic gs://testbucket 5. Create a subscription for your new topic: - $ gcloud beta pubsub subscriptions create testsubscription --topic=testtopic + $ gcloud pubsub subscriptions create testsubscription --topic=testtopic 6. Run this program: $ python notification_polling.py my-project-id testsubscription @@ -76,13 +76,9 @@ def summarize(message): ) if "overwroteGeneration" in attributes: - description += "\tOverwrote generation: %s\n" % ( - attributes["overwroteGeneration"] - ) + description += f"\tOverwrote generation: {attributes['overwroteGeneration']}\n" if "overwrittenByGeneration" in attributes: - description += "\tOverwritten by generation: %s\n" % ( - attributes["overwrittenByGeneration"] - ) + description += f"\tOverwritten by generation: {attributes['overwrittenByGeneration']}\n" payload_format = attributes["payloadFormat"] if payload_format == "JSON_API_V1": @@ -110,14 +106,14 @@ def poll_notifications(project, subscription_name): ) def callback(message): - print("Received message:\n{}".format(summarize(message))) + print(f"Received message:\n{summarize(message)}") message.ack() subscriber.subscribe(subscription_path, callback=callback) # The subscriber is non-blocking, so we must keep the main thread from # exiting to allow it to process messages in the background. - print("Listening for messages on {}".format(subscription_path)) + print(f"Listening for messages on {subscription_path}") while True: time.sleep(60) diff --git a/samples/snippets/notification_test.py b/samples/snippets/notification_test.py index 13553c844..a2fdbe3ef 100644 --- a/samples/snippets/notification_test.py +++ b/samples/snippets/notification_test.py @@ -55,7 +55,7 @@ def _notification_topic(storage_client, publisher_client): binding = policy.bindings.add() binding.role = "roles/pubsub.publisher" binding.members.append( - "serviceAccount:{}".format(storage_client.get_service_account_email()) + f"serviceAccount:{storage_client.get_service_account_email()}" ) publisher_client.set_iam_policy(request={"resource": topic_path, "policy": policy}) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 3bbef5d54..a169b5b5b 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -18,7 +18,7 @@ import os from pathlib import Path import sys -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional import nox @@ -29,7 +29,8 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" +ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -88,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] @@ -108,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]: # -def _determine_local_import_names(start_dir: str) -> List[str]: - """Determines all import names that should be considered "local". - - This is used when running the linter to insure that import order is - properly checked. - """ - file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] - return [ - basename - for basename, extension in file_ext_pairs - if extension == ".py" - or os.path.isdir(os.path.join(start_dir, basename)) - and basename not in ("__pycache__") - ] - - # Linting with flake8. # # We ignore the following rules: @@ -138,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]: "--show-source", "--builtin=gettext", "--max-complexity=20", - "--import-order-style=google", "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", "--max-line-length=88", @@ -148,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]: @nox.session def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: - session.install("flake8", "flake8-import-order") + session.install("flake8") else: - session.install("flake8", "flake8-import-order", "flake8-annotations") + session.install("flake8", "flake8-annotations") - local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ - "--application-import-names", - ",".join(local_names), ".", ] session.run("flake8", *args) @@ -168,12 +149,32 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: + """Run black. Format code to uniform standard.""" session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) +# +# format = isort + black +# + +@nox.session +def format(session: nox.sessions.Session) -> None: + """ + Run isort to sort imports. Then run black + to format code to uniform standard. + """ + session.install(BLACK_VERSION, ISORT_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + # Use the --fss option to sort imports using strict alphabetical order. + # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections + session.run("isort", "--fss", *python_files) + session.run("black", *python_files) + + # # Sample Tests # @@ -186,43 +187,56 @@ def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: # check for presence of tests - test_list = glob.glob("*_test.py") + glob.glob("test_*.py") + test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob("**/test_*.py", recursive=True) + test_list.extend(glob.glob("**/tests", recursive=True)) + if len(test_list) == 0: print("No tests found, skipping directory.") - else: - if TEST_CONFIG["pip_version_override"]: - pip_version = TEST_CONFIG["pip_version_override"] - session.install(f"pip=={pip_version}") - """Runs py.test for a particular project.""" - if os.path.exists("requirements.txt"): - if os.path.exists("constraints.txt"): - session.install("-r", "requirements.txt", "-c", "constraints.txt") - else: - session.install("-r", "requirements.txt") - - if os.path.exists("requirements-test.txt"): - if os.path.exists("constraints-test.txt"): - session.install( - "-r", "requirements-test.txt", "-c", "constraints-test.txt" - ) - else: - session.install("-r", "requirements-test.txt") - - if INSTALL_LIBRARY_FROM_SOURCE: - session.install("-e", _get_repo_root()) - - if post_install: - post_install(session) - - session.run( - "pytest", - *(PYTEST_COMMON_ARGS + session.posargs), - # Pytest will return 5 when no tests are collected. This can happen - # on travis where slow and flaky tests are excluded. - # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html - success_codes=[0, 5], - env=get_pytest_env_vars(), - ) + return + + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") + """Runs py.test for a particular project.""" + concurrent_args = [] + if os.path.exists("requirements.txt"): + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") + with open("requirements.txt") as rfile: + packages = rfile.read() + + if os.path.exists("requirements-test.txt"): + if os.path.exists("constraints-test.txt"): + session.install( + "-r", "requirements-test.txt", "-c", "constraints-test.txt" + ) + else: + session.install("-r", "requirements-test.txt") + with open("requirements-test.txt") as rtfile: + packages += rtfile.read() + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + if "pytest-parallel" in packages: + concurrent_args.extend(['--workers', 'auto', '--tests-per-worker', 'auto']) + elif "pytest-xdist" in packages: + concurrent_args.extend(['-n', 'auto']) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars(), + ) @nox.session(python=ALL_VERSIONS) diff --git a/samples/snippets/noxfile_config.py b/samples/snippets/noxfile_config.py index 463da97de..17a05b9f2 100644 --- a/samples/snippets/noxfile_config.py +++ b/samples/snippets/noxfile_config.py @@ -67,12 +67,18 @@ def get_cloud_kms_key(): if session == 'py-3.10': return ('projects/python-docs-samples-tests-310/locations/us/' 'keyRings/gcs-kms-key-ring/cryptoKeys/gcs-kms-key') + if session == 'py-3.11': + return ('projects/python-docs-samples-tests-311/locations/us/' + 'keyRings/gcs-kms-key-ring/cryptoKeys/gcs-kms-key') + if session == 'py-3.12': + return ('projects/python-docs-samples-tests-312/locations/us/' + 'keyRings/gcs-kms-key-ring/cryptoKeys/gcs-kms-key') return os.environ['CLOUD_KMS_KEY'] TEST_CONFIG_OVERRIDE = { # You can opt out from the test for specific Python versions. - 'ignored_versions': ["2.7"], + 'ignored_versions': ["2.7", "3.6", "3.7", "3.11", "3.12"], # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a @@ -91,6 +97,8 @@ def get_cloud_kms_key(): # 'constraints/iam.disableServiceAccountKeyCreation' policy. # 2. The new projects buckets need to have universal permission model. # For those tests, we'll use the original project. - 'MAIN_GOOGLE_CLOUD_PROJECT': 'python-docs-samples-tests' + 'MAIN_GOOGLE_CLOUD_PROJECT': 'python-docs-samples-tests', + 'MAIN_CLOUD_KMS_KEY': ('projects/python-docs-samples-tests/locations/us/' + 'keyRings/gcs-kms-key-ring/cryptoKeys/gcs-kms-key') }, } diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py index 578e50753..54148b1fb 100644 --- a/samples/snippets/quickstart.py +++ b/samples/snippets/quickstart.py @@ -29,7 +29,7 @@ def run_quickstart(): # Creates the new bucket bucket = storage_client.create_bucket(bucket_name) - print("Bucket {} created.".format(bucket.name)) + print(f"Bucket {bucket.name} created.") # [END storage_quickstart] diff --git a/samples/snippets/requester_pays_test.py b/samples/snippets/requester_pays_test.py index 9a178edb0..4bef0cb89 100644 --- a/samples/snippets/requester_pays_test.py +++ b/samples/snippets/requester_pays_test.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import backoff import os import tempfile +from google.api_core.exceptions import GoogleAPIError from google.cloud import storage import pytest @@ -31,22 +33,25 @@ PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] +@backoff.on_exception(backoff.expo, GoogleAPIError, max_time=60) def test_enable_requester_pays(capsys): storage_enable_requester_pays.enable_requester_pays(BUCKET) out, _ = capsys.readouterr() - assert "Requester Pays has been enabled for {}".format(BUCKET) in out + assert f"Requester Pays has been enabled for {BUCKET}" in out +@backoff.on_exception(backoff.expo, GoogleAPIError, max_time=60) def test_disable_requester_pays(capsys): storage_disable_requester_pays.disable_requester_pays(BUCKET) out, _ = capsys.readouterr() - assert "Requester Pays has been disabled for {}".format(BUCKET) in out + assert f"Requester Pays has been disabled for {BUCKET}" in out +@backoff.on_exception(backoff.expo, GoogleAPIError, max_time=60) def test_get_requester_pays_status(capsys): storage_get_requester_pays_status.get_requester_pays_status(BUCKET) out, _ = capsys.readouterr() - assert "Requester Pays is disabled for {}".format(BUCKET) in out + assert f"Requester Pays is disabled for {BUCKET}" in out @pytest.fixture @@ -58,6 +63,7 @@ def test_blob(): return blob +@backoff.on_exception(backoff.expo, GoogleAPIError, max_time=60) def test_download_file_requester_pays(test_blob, capsys): with tempfile.NamedTemporaryFile() as dest_file: storage_download_file_requester_pays.download_file_requester_pays( diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 0a7557580..7f13e54c9 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,4 @@ -pytest==6.2.5 -mock==4.0.3 -backoff==1.11.1 \ No newline at end of file +pytest===7.4.4; python_version == '3.7' +pytest==8.3.4; python_version >= '3.8' +mock==5.1.0 +backoff==2.2.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 14c2e74b4..a5a006ab2 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,8 @@ -google-cloud-pubsub==2.9.0 -google-cloud-storage==1.44.0 -pandas==1.3.5; python_version > '3.6' -pandas==1.1.5; python_version < '3.7' +google-cloud-pubsub==2.27.2 +google-cloud-storage==2.19.0 +pandas===1.3.5; python_version == '3.7' +pandas===2.0.3; python_version == '3.8' +pandas==2.2.3; python_version >= '3.9' +opentelemetry-exporter-gcp-trace +opentelemetry-propagator-gcp +opentelemetry-instrumentation-requests diff --git a/samples/snippets/rpo_test.py b/samples/snippets/rpo_test.py new file mode 100644 index 000000000..0dcf15746 --- /dev/null +++ b/samples/snippets/rpo_test.py @@ -0,0 +1,61 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +from google.cloud import storage +import pytest + +import storage_create_bucket_turbo_replication +import storage_get_rpo +import storage_set_rpo_async_turbo +import storage_set_rpo_default + + +@pytest.fixture +def dual_region_bucket(): + """Yields a dual region bucket that is deleted after the test completes.""" + bucket = None + location = "NAM4" + while bucket is None or bucket.exists(): + bucket_name = f"bucket-lock-{uuid.uuid4()}" + bucket = storage.Client().bucket(bucket_name) + bucket.create(location=location) + yield bucket + bucket.delete(force=True) + + +def test_get_rpo(dual_region_bucket, capsys): + storage_get_rpo.get_rpo(dual_region_bucket.name) + out, _ = capsys.readouterr() + assert f"RPO for {dual_region_bucket.name} is DEFAULT." in out + + +def test_set_rpo_async_turbo(dual_region_bucket, capsys): + storage_set_rpo_async_turbo.set_rpo_async_turbo(dual_region_bucket.name) + out, _ = capsys.readouterr() + assert f"RPO is set to ASYNC_TURBO for {dual_region_bucket.name}." in out + + +def test_set_rpo_default(dual_region_bucket, capsys): + storage_set_rpo_default.set_rpo_default(dual_region_bucket.name) + out, _ = capsys.readouterr() + assert f"RPO is set to DEFAULT for {dual_region_bucket.name}." in out + + +def test_create_bucket_turbo_replication(capsys): + bucket_name = f"test-rpo-{uuid.uuid4()}" + storage_create_bucket_turbo_replication.create_bucket_turbo_replication(bucket_name) + out, _ = capsys.readouterr() + assert f"{bucket_name} created with the recovery point objective (RPO) set to ASYNC_TURBO in NAM4." in out diff --git a/samples/snippets/snippets_test.py b/samples/snippets/snippets_test.py index 7c0a5b91d..339693dd8 100644 --- a/samples/snippets/snippets_test.py +++ b/samples/snippets/snippets_test.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio +import io import os import tempfile import time @@ -23,6 +25,7 @@ import requests import storage_add_bucket_label +import storage_async_upload import storage_batch_request import storage_bucket_delete_default_kms_key import storage_change_default_storage_class @@ -33,6 +36,9 @@ import storage_copy_file_archived_generation import storage_cors_configuration import storage_create_bucket_class_location +import storage_create_bucket_dual_region +import storage_create_bucket_hierarchical_namespace +import storage_create_bucket_object_retention import storage_define_bucket_website_configuration import storage_delete_file import storage_delete_file_archived_generation @@ -42,12 +48,14 @@ import storage_download_file import storage_download_into_memory import storage_download_public_file +import storage_download_to_stream import storage_enable_bucket_lifecycle_management import storage_enable_versioning import storage_generate_signed_post_policy_v4 import storage_generate_signed_url_v2 import storage_generate_signed_url_v4 import storage_generate_upload_signed_url_v4 +import storage_get_autoclass import storage_get_bucket_labels import storage_get_bucket_metadata import storage_get_metadata @@ -62,13 +70,24 @@ import storage_remove_bucket_label import storage_remove_cors_configuration import storage_rename_file +import storage_set_autoclass import storage_set_bucket_default_kms_key +import storage_set_client_endpoint +import storage_set_object_retention_policy import storage_set_metadata +import storage_trace_quickstart +import storage_transfer_manager_download_bucket +import storage_transfer_manager_download_chunks_concurrently +import storage_transfer_manager_download_many +import storage_transfer_manager_upload_chunks_concurrently +import storage_transfer_manager_upload_directory +import storage_transfer_manager_upload_many import storage_upload_file import storage_upload_from_memory +import storage_upload_from_stream import storage_upload_with_kms_key -KMS_KEY = os.environ["CLOUD_KMS_KEY"] +KMS_KEY = os.environ.get("CLOUD_KMS_KEY") def test_enable_default_kms_key(test_bucket): @@ -104,7 +123,7 @@ def test_bucket(): """Yields a bucket that is deleted after the test completes.""" bucket = None while bucket is None or bucket.exists(): - bucket_name = "storage-snippets-test-{}".format(uuid.uuid4()) + bucket_name = f"storage-snippets-test-{uuid.uuid4()}" bucket = storage.Client().bucket(bucket_name) bucket.create() yield bucket @@ -115,25 +134,36 @@ def test_bucket(): def test_public_bucket(): # The new projects don't allow to make a bucket available to public, so # for some tests we need to use the old main project for now. - original_value = os.environ['GOOGLE_CLOUD_PROJECT'] - os.environ['GOOGLE_CLOUD_PROJECT'] = os.environ['MAIN_GOOGLE_CLOUD_PROJECT'] + original_value = os.environ["GOOGLE_CLOUD_PROJECT"] + os.environ["GOOGLE_CLOUD_PROJECT"] = os.environ["MAIN_GOOGLE_CLOUD_PROJECT"] bucket = None while bucket is None or bucket.exists(): storage_client = storage.Client() - bucket_name = "storage-snippets-test-{}".format(uuid.uuid4()) + bucket_name = f"storage-snippets-test-{uuid.uuid4()}" bucket = storage_client.bucket(bucket_name) storage_client.create_bucket(bucket) yield bucket bucket.delete(force=True) # Set the value back. - os.environ['GOOGLE_CLOUD_PROJECT'] = original_value + os.environ["GOOGLE_CLOUD_PROJECT"] = original_value + + +@pytest.fixture(scope="module") +def new_bucket_obj(): + """Yields a new bucket object that is deleted after the test completes.""" + bucket = None + while bucket is None or bucket.exists(): + bucket_name = f"storage-snippets-test-{uuid.uuid4()}" + bucket = storage.Client().bucket(bucket_name) + yield bucket + bucket.delete(force=True) @pytest.fixture def test_blob(test_bucket): """Yields a blob that is deleted after the test completes.""" bucket = test_bucket - blob = bucket.blob("storage_snippets_test_sigil-{}".format(uuid.uuid4())) + blob = bucket.blob(f"storage_snippets_test_sigil-{uuid.uuid4()}") blob.upload_from_string("Hello, is it me you're looking for?") yield blob @@ -142,7 +172,7 @@ def test_blob(test_bucket): def test_public_blob(test_public_bucket): """Yields a blob that is deleted after the test completes.""" bucket = test_public_bucket - blob = bucket.blob("storage_snippets_test_sigil-{}".format(uuid.uuid4())) + blob = bucket.blob(f"storage_snippets_test_sigil-{uuid.uuid4()}") blob.upload_from_string("Hello, is it me you're looking for?") yield blob @@ -152,7 +182,7 @@ def test_bucket_create(): """Yields a bucket object that is deleted after the test completes.""" bucket = None while bucket is None or bucket.exists(): - bucket_name = "storage-snippets-test-{}".format(uuid.uuid4()) + bucket_name = f"storage-snippets-test-{uuid.uuid4()}" bucket = storage.Client().bucket(bucket_name) yield bucket bucket.delete(force=True) @@ -187,6 +217,7 @@ def test_list_blobs_with_prefix(test_blob, capsys): def test_upload_blob(test_bucket): with tempfile.NamedTemporaryFile() as source_file: source_file.write(b"test") + source_file.flush() storage_upload_file.upload_blob( test_bucket.name, source_file.name, "test_upload_blob" @@ -202,15 +233,38 @@ def test_upload_blob_from_memory(test_bucket, capsys): assert "Hello, is it me you're looking for?" in out +def test_upload_blob_from_stream(test_bucket, capsys): + file_obj = io.BytesIO() + file_obj.write(b"This is test data.") + storage_upload_from_stream.upload_blob_from_stream( + test_bucket.name, file_obj, "test_upload_blob" + ) + out, _ = capsys.readouterr() + + assert "Stream data uploaded to test_upload_blob" in out + + def test_upload_blob_with_kms(test_bucket): + blob_name = f"test_upload_with_kms_{uuid.uuid4().hex}" with tempfile.NamedTemporaryFile() as source_file: source_file.write(b"test") + source_file.flush() storage_upload_with_kms_key.upload_blob_with_kms( - test_bucket.name, source_file.name, "test_upload_blob_encrypted", KMS_KEY + test_bucket.name, + source_file.name, + blob_name, + KMS_KEY, ) bucket = storage.Client().bucket(test_bucket.name) - kms_blob = bucket.get_blob("test_upload_blob_encrypted") + kms_blob = bucket.get_blob(blob_name) assert kms_blob.kms_key_name.startswith(KMS_KEY) + test_bucket.delete_blob(blob_name) + + +def test_async_upload(bucket, capsys): + asyncio.run(storage_async_upload.async_upload_blob(bucket.name)) + out, _ = capsys.readouterr() + assert f"Uploaded 3 files to bucket {bucket.name}" in out def test_download_byte_range(test_blob): @@ -218,7 +272,7 @@ def test_download_byte_range(test_blob): storage_download_byte_range.download_byte_range( test_blob.bucket.name, test_blob.name, 0, 4, dest_file.name ) - assert dest_file.read() == b'Hello' + assert dest_file.read() == b"Hello" def test_download_blob(test_blob): @@ -239,6 +293,20 @@ def test_download_blob_into_memory(test_blob, capsys): assert "Hello, is it me you're looking for?" in out +def test_download_blob_to_stream(test_blob, capsys): + file_obj = io.BytesIO() + storage_download_to_stream.download_blob_to_stream( + test_blob.bucket.name, test_blob.name, file_obj + ) + out, _ = capsys.readouterr() + + file_obj.seek(0) + content = file_obj.read() + + assert "Downloaded blob" in out + assert b"Hello, is it me you're looking for?" in content + + def test_blob_metadata(test_blob, capsys): storage_get_metadata.blob_metadata(test_blob.bucket.name, test_blob.name) out, _ = capsys.readouterr() @@ -257,7 +325,8 @@ def test_delete_blob(test_blob): def test_make_blob_public(test_public_blob): storage_make_public.make_blob_public( - test_public_blob.bucket.name, test_public_blob.name) + test_public_blob.bucket.name, test_public_blob.name + ) r = requests.get(test_public_blob.public_url) assert r.text == "Hello, is it me you're looking for?" @@ -289,19 +358,21 @@ def test_generate_upload_signed_url_v4(test_bucket, capsys): ) requests.put( - url, data=content, headers={"content-type": "application/octet-stream"}, + url, + data=content, + headers={"content-type": "application/octet-stream"}, ) bucket = storage.Client().bucket(test_bucket.name) blob = bucket.blob(blob_name) - assert blob.download_as_string() == content + assert blob.download_as_bytes() == content def test_generate_signed_policy_v4(test_bucket, capsys): blob_name = "storage_snippets_test_form" short_name = storage_generate_signed_post_policy_v4 form = short_name.generate_signed_post_policy_v4(test_bucket.name, blob_name) - assert "name='key' value='{}'".format(blob_name) in form + assert f"name='key' value='{blob_name}'" in form assert "name='x-goog-signature'" in form assert "name='x-goog-date'" in form assert "name='x-goog-credential'" in form @@ -317,7 +388,7 @@ def test_rename_blob(test_blob): try: bucket.delete_blob("test_rename_blob") except google.cloud.exceptions.exceptions.NotFound: - print("test_rename_blob not found in bucket {}".format(bucket.name)) + print(f"test_rename_blob not found in bucket {bucket.name}") storage_rename_file.rename_blob(bucket.name, test_blob.name, "test_rename_blob") @@ -332,10 +403,13 @@ def test_move_blob(test_bucket_create, test_blob): try: test_bucket_create.delete_blob("test_move_blob") except google.cloud.exceptions.NotFound: - print("test_move_blob not found in bucket {}".format(test_bucket_create.name)) + print(f"test_move_blob not found in bucket {test_bucket_create.name}") storage_move_file.move_blob( - bucket.name, test_blob.name, test_bucket_create.name, "test_move_blob" + bucket.name, + test_blob.name, + test_bucket_create.name, + "test_move_blob", ) assert test_bucket_create.get_blob("test_move_blob") is not None @@ -351,7 +425,10 @@ def test_copy_blob(test_blob): pass storage_copy_file.copy_blob( - bucket.name, test_blob.name, bucket.name, "test_copy_blob" + bucket.name, + test_blob.name, + bucket.name, + "test_copy_blob", ) assert bucket.get_blob("test_copy_blob") is not None @@ -370,17 +447,50 @@ def test_versioning(test_bucket, capsys): assert bucket.versioning_enabled is False +def test_get_set_autoclass(new_bucket_obj, test_bucket, capsys): + # Test default values when Autoclass is unset + bucket = storage_get_autoclass.get_autoclass(test_bucket.name) + out, _ = capsys.readouterr() + assert "Autoclass enabled is set to False" in out + assert bucket.autoclass_toggle_time is None + assert bucket.autoclass_terminal_storage_class_update_time is None + + # Test enabling Autoclass at bucket creation + new_bucket_obj.autoclass_enabled = True + bucket = storage.Client().create_bucket(new_bucket_obj) + assert bucket.autoclass_enabled is True + assert bucket.autoclass_terminal_storage_class == "NEARLINE" + + # Test set terminal_storage_class to ARCHIVE + bucket = storage_set_autoclass.set_autoclass(bucket.name) + out, _ = capsys.readouterr() + assert "Autoclass enabled is set to True" in out + assert bucket.autoclass_enabled is True + assert bucket.autoclass_terminal_storage_class == "ARCHIVE" + + # Test get Autoclass + bucket = storage_get_autoclass.get_autoclass(bucket.name) + out, _ = capsys.readouterr() + assert "Autoclass enabled is set to True" in out + assert bucket.autoclass_toggle_time is not None + assert bucket.autoclass_terminal_storage_class_update_time is not None + + def test_bucket_lifecycle_management(test_bucket, capsys): - bucket = storage_enable_bucket_lifecycle_management.enable_bucket_lifecycle_management( - test_bucket + bucket = ( + storage_enable_bucket_lifecycle_management.enable_bucket_lifecycle_management( + test_bucket + ) ) out, _ = capsys.readouterr() assert "[]" in out assert "Lifecycle management is enable" in out assert len(list(bucket.lifecycle_rules)) > 0 - bucket = storage_disable_bucket_lifecycle_management.disable_bucket_lifecycle_management( - test_bucket + bucket = ( + storage_disable_bucket_lifecycle_management.disable_bucket_lifecycle_management( + test_bucket + ) ) out, _ = capsys.readouterr() assert "[]" in out @@ -396,6 +506,21 @@ def test_create_bucket_class_location(test_bucket_create): assert bucket.storage_class == "COLDLINE" +def test_create_bucket_dual_region(test_bucket_create, capsys): + location = "US" + region_1 = "US-EAST1" + region_2 = "US-WEST1" + storage_create_bucket_dual_region.create_bucket_dual_region( + test_bucket_create.name, location, region_1, region_2 + ) + out, _ = capsys.readouterr() + assert f"Created bucket {test_bucket_create.name}" in out + assert location in out + assert region_1 in out + assert region_2 in out + assert "dual-region" in out + + def test_bucket_delete_default_kms_key(test_bucket, capsys): test_bucket.default_kms_key_name = KMS_KEY test_bucket.patch() @@ -421,7 +546,8 @@ def test_get_service_account(capsys): def test_download_public_file(test_public_blob): storage_make_public.make_blob_public( - test_public_blob.bucket.name, test_public_blob.name) + test_public_blob.bucket.name, test_public_blob.name + ) with tempfile.NamedTemporaryFile() as dest_file: storage_download_public_file.download_public_file( test_public_blob.bucket.name, test_public_blob.name, dest_file.name @@ -431,8 +557,10 @@ def test_download_public_file(test_public_blob): def test_define_bucket_website_configuration(test_bucket): - bucket = storage_define_bucket_website_configuration.define_bucket_website_configuration( - test_bucket.name, "index.html", "404.html" + bucket = ( + storage_define_bucket_website_configuration.define_bucket_website_configuration( + test_bucket.name, "index.html", "404.html" + ) ) website_val = {"mainPageSuffix": "index.html", "notFoundPage": "404.html"} @@ -443,7 +571,10 @@ def test_define_bucket_website_configuration(test_bucket): def test_object_get_kms_key(test_bucket): with tempfile.NamedTemporaryFile() as source_file: storage_upload_with_kms_key.upload_blob_with_kms( - test_bucket.name, source_file.name, "test_upload_blob_encrypted", KMS_KEY + test_bucket.name, + source_file.name, + "test_upload_blob_encrypted", + KMS_KEY, ) kms_key = storage_object_get_kms_key.object_get_kms_key( test_bucket.name, "test_upload_blob_encrypted" @@ -460,9 +591,12 @@ def test_storage_compose_file(test_bucket): with tempfile.NamedTemporaryFile() as dest_file: destination = storage_compose_file.compose_file( - test_bucket.name, source_files[0], source_files[1], dest_file.name + test_bucket.name, + source_files[0], + source_files[1], + dest_file.name, ) - composed = destination.download_as_string() + composed = destination.download_as_bytes() assert composed.decode("utf-8") == source_files[0] + source_files[1] @@ -495,16 +629,17 @@ def test_change_default_storage_class(test_bucket, capsys): ) out, _ = capsys.readouterr() assert "Default storage class for bucket" in out - assert bucket.storage_class == 'COLDLINE' + assert bucket.storage_class == "COLDLINE" def test_change_file_storage_class(test_blob, capsys): blob = storage_change_file_storage_class.change_file_storage_class( - test_blob.bucket.name, test_blob.name + test_blob.bucket.name, + test_blob.name, ) out, _ = capsys.readouterr() - assert "Blob {} in bucket {}". format(blob.name, blob.bucket.name) in out - assert blob.storage_class == 'NEARLINE' + assert f"Blob {blob.name} in bucket {blob.bucket.name}" in out + assert blob.storage_class == "NEARLINE" def test_copy_file_archived_generation(test_blob): @@ -538,7 +673,8 @@ def test_storage_configure_retries(test_blob, capsys): out, _ = capsys.readouterr() assert "The following library method is customized to be retried" in out assert "_should_retry" in out - assert "initial=1.5, maximum=45.0, multiplier=1.2, deadline=500.0" in out + assert "initial=1.5, maximum=45.0, multiplier=1.2" in out + assert "500" in out # "deadline" or "timeout" depending on dependency ver. def test_batch_request(test_bucket): @@ -553,3 +689,177 @@ def test_batch_request(test_bucket): assert blob1.metadata.get("your-metadata-key") == "your-metadata-value" assert blob2.metadata.get("your-metadata-key") == "your-metadata-value" + + +def test_storage_set_client_endpoint(capsys): + storage_set_client_endpoint.set_client_endpoint("https://storage.googleapis.com") + out, _ = capsys.readouterr() + + assert "client initiated with endpoint: https://storage.googleapis.com" in out + + +def test_transfer_manager_snippets(test_bucket, capsys): + BLOB_NAMES = [ + "test.txt", + "test2.txt", + "blobs/test.txt", + "blobs/nesteddir/test.txt", + ] + + with tempfile.TemporaryDirectory() as uploads: + # Create dirs and nested dirs + for name in BLOB_NAMES: + relpath = os.path.dirname(name) + os.makedirs(os.path.join(uploads, relpath), exist_ok=True) + + # Create files with nested dirs to exercise directory handling. + for name in BLOB_NAMES: + with open(os.path.join(uploads, name), "w") as f: + f.write(name) + + storage_transfer_manager_upload_many.upload_many_blobs_with_transfer_manager( + test_bucket.name, + BLOB_NAMES, + source_directory="{}/".format(uploads), + workers=8, + ) + out, _ = capsys.readouterr() + + for name in BLOB_NAMES: + assert "Uploaded {}".format(name) in out + + with tempfile.TemporaryDirectory() as downloads: + # Download the files. + storage_transfer_manager_download_bucket.download_bucket_with_transfer_manager( + test_bucket.name, + destination_directory=os.path.join(downloads, ""), + workers=8, + max_results=10000, + ) + out, _ = capsys.readouterr() + + for name in BLOB_NAMES: + assert "Downloaded {}".format(name) in out + + with tempfile.TemporaryDirectory() as downloads: + # Download the files. + storage_transfer_manager_download_many.download_many_blobs_with_transfer_manager( + test_bucket.name, + blob_names=BLOB_NAMES, + destination_directory=os.path.join(downloads, ""), + workers=8, + ) + out, _ = capsys.readouterr() + + for name in BLOB_NAMES: + assert "Downloaded {}".format(name) in out + + +def test_transfer_manager_directory_upload(test_bucket, capsys): + BLOB_NAMES = [ + "dirtest/test.txt", + "dirtest/test2.txt", + "dirtest/blobs/test.txt", + "dirtest/blobs/nesteddir/test.txt", + ] + + with tempfile.TemporaryDirectory() as uploads: + # Create dirs and nested dirs + for name in BLOB_NAMES: + relpath = os.path.dirname(name) + os.makedirs(os.path.join(uploads, relpath), exist_ok=True) + + # Create files with nested dirs to exercise directory handling. + for name in BLOB_NAMES: + with open(os.path.join(uploads, name), "w") as f: + f.write(name) + + storage_transfer_manager_upload_directory.upload_directory_with_transfer_manager( + test_bucket.name, source_directory="{}/".format(uploads) + ) + out, _ = capsys.readouterr() + + assert "Found {}".format(len(BLOB_NAMES)) in out + for name in BLOB_NAMES: + assert "Uploaded {}".format(name) in out + + +def test_transfer_manager_download_chunks_concurrently(test_bucket, capsys): + BLOB_NAME = "test_file.txt" + + with tempfile.NamedTemporaryFile() as file: + file.write(b"test") + file.flush() + + storage_upload_file.upload_blob(test_bucket.name, file.name, BLOB_NAME) + + with tempfile.TemporaryDirectory() as downloads: + # Download the file. + storage_transfer_manager_download_chunks_concurrently.download_chunks_concurrently( + test_bucket.name, + BLOB_NAME, + os.path.join(downloads, BLOB_NAME), + workers=8, + ) + out, _ = capsys.readouterr() + + assert ( + "Downloaded {} to {}".format(BLOB_NAME, os.path.join(downloads, BLOB_NAME)) + in out + ) + + +def test_transfer_manager_upload_chunks_concurrently(test_bucket, capsys): + BLOB_NAME = "test_file.txt" + + with tempfile.NamedTemporaryFile() as file: + file.write(b"test") + file.flush() + + storage_transfer_manager_upload_chunks_concurrently.upload_chunks_concurrently( + test_bucket.name, file.name, BLOB_NAME + ) + + out, _ = capsys.readouterr() + assert "File {} uploaded to {}".format(file.name, BLOB_NAME) in out + + +def test_object_retention_policy(test_bucket_create, capsys): + storage_create_bucket_object_retention.create_bucket_object_retention( + test_bucket_create.name + ) + out, _ = capsys.readouterr() + assert f"Created bucket {test_bucket_create.name} with object retention enabled setting" in out + + blob_name = "test_object_retention" + storage_set_object_retention_policy.set_object_retention_policy( + test_bucket_create.name, "hello world", blob_name + ) + out, _ = capsys.readouterr() + assert f"Retention policy for file {blob_name}" in out + + # Remove retention policy for test cleanup + blob = test_bucket_create.blob(blob_name) + blob.retention.mode = None + blob.retention.retain_until_time = None + blob.patch(override_unlocked_retention=True) + + +def test_create_bucket_hierarchical_namespace(test_bucket_create, capsys): + storage_create_bucket_hierarchical_namespace.create_bucket_hierarchical_namespace( + test_bucket_create.name + ) + out, _ = capsys.readouterr() + assert f"Created bucket {test_bucket_create.name} with hierarchical namespace enabled" in out + + +def test_storage_trace_quickstart(test_bucket, capsys): + blob_name = f"trace_quickstart_{uuid.uuid4().hex}" + contents = "The quick brown fox jumps over the lazy dog." + storage_trace_quickstart.run_quickstart(test_bucket.name, blob_name, contents) + out, _ = capsys.readouterr() + + assert f"{blob_name} uploaded to {test_bucket.name}" in out + assert ( + f"Downloaded storage object {blob_name} from bucket {test_bucket.name}" in out + ) diff --git a/samples/snippets/storage_activate_hmac_key.py b/samples/snippets/storage_activate_hmac_key.py index e77cd8066..d3960eb62 100644 --- a/samples/snippets/storage_activate_hmac_key.py +++ b/samples/snippets/storage_activate_hmac_key.py @@ -36,14 +36,14 @@ def activate_key(access_id, project_id): hmac_key.update() print("The HMAC key metadata is:") - print("Service Account Email: {}".format(hmac_key.service_account_email)) - print("Key ID: {}".format(hmac_key.id)) - print("Access ID: {}".format(hmac_key.access_id)) - print("Project ID: {}".format(hmac_key.project)) - print("State: {}".format(hmac_key.state)) - print("Created At: {}".format(hmac_key.time_created)) - print("Updated At: {}".format(hmac_key.updated)) - print("Etag: {}".format(hmac_key.etag)) + print(f"Service Account Email: {hmac_key.service_account_email}") + print(f"Key ID: {hmac_key.id}") + print(f"Access ID: {hmac_key.access_id}") + print(f"Project ID: {hmac_key.project}") + print(f"State: {hmac_key.state}") + print(f"Created At: {hmac_key.time_created}") + print(f"Updated At: {hmac_key.updated}") + print(f"Etag: {hmac_key.etag}") return hmac_key diff --git a/samples/snippets/storage_add_bucket_conditional_iam_binding.py b/samples/snippets/storage_add_bucket_conditional_iam_binding.py index ddc0fc028..d09f528cf 100644 --- a/samples/snippets/storage_add_bucket_conditional_iam_binding.py +++ b/samples/snippets/storage_add_bucket_conditional_iam_binding.py @@ -53,15 +53,15 @@ def add_bucket_conditional_iam_binding( bucket.set_iam_policy(policy) - print("Added the following member(s) with role {} to {}:".format(role, bucket_name)) + print(f"Added the following member(s) with role {role} to {bucket_name}:") for member in members: - print(" {}".format(member)) + print(f" {member}") print("with condition:") - print(" Title: {}".format(title)) - print(" Description: {}".format(description)) - print(" Expression: {}".format(expression)) + print(f" Title: {title}") + print(f" Description: {description}") + print(f" Expression: {expression}") # [END storage_add_bucket_conditional_iam_binding] diff --git a/samples/snippets/storage_add_bucket_iam_member.py b/samples/snippets/storage_add_bucket_iam_member.py index 727f18483..0d610eae7 100644 --- a/samples/snippets/storage_add_bucket_iam_member.py +++ b/samples/snippets/storage_add_bucket_iam_member.py @@ -35,7 +35,7 @@ def add_bucket_iam_member(bucket_name, role, member): bucket.set_iam_policy(policy) - print("Added {} with role {} to {}.".format(member, role, bucket_name)) + print(f"Added {member} with role {role} to {bucket_name}.") # [END storage_add_bucket_iam_member] diff --git a/samples/snippets/storage_add_bucket_label.py b/samples/snippets/storage_add_bucket_label.py index 8ae8fe1f4..9c6fcff7a 100644 --- a/samples/snippets/storage_add_bucket_label.py +++ b/samples/snippets/storage_add_bucket_label.py @@ -36,7 +36,7 @@ def add_bucket_label(bucket_name): bucket.labels = labels bucket.patch() - print("Updated labels on {}.".format(bucket.name)) + print(f"Updated labels on {bucket.name}.") pprint.pprint(bucket.labels) diff --git a/samples/snippets/storage_add_bucket_owner.py b/samples/snippets/storage_add_bucket_owner.py index acdb60dc5..bac1f3f64 100644 --- a/samples/snippets/storage_add_bucket_owner.py +++ b/samples/snippets/storage_add_bucket_owner.py @@ -40,9 +40,7 @@ def add_bucket_owner(bucket_name, user_email): bucket.acl.save() print( - "Added user {} as an owner on bucket {}.".format( - user_email, bucket_name - ) + f"Added user {user_email} as an owner on bucket {bucket_name}." ) diff --git a/samples/snippets/storage_async_upload.py b/samples/snippets/storage_async_upload.py new file mode 100644 index 000000000..25aabb63e --- /dev/null +++ b/samples/snippets/storage_async_upload.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +# Copyright 2021 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import sys + + +"""Sample that asynchronously uploads a file to GCS +""" + + +# [START storage_async_upload] +# This sample can be run by calling `async.run(async_upload_blob('bucket_name'))` +async def async_upload_blob(bucket_name): + """Uploads a number of files in parallel to the bucket.""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + import asyncio + from functools import partial + from google.cloud import storage + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + + loop = asyncio.get_running_loop() + + tasks = [] + count = 3 + for x in range(count): + blob_name = f"async_sample_blob_{x}" + content = f"Hello world #{x}" + blob = bucket.blob(blob_name) + # The first arg, None, tells it to use the default loops executor + tasks.append(loop.run_in_executor(None, partial(blob.upload_from_string, content))) + + # If the method returns a value (such as download_as_string), gather will return the values + await asyncio.gather(*tasks) + + print(f"Uploaded {count} files to bucket {bucket_name}") + + +# [END storage_async_upload] + + +if __name__ == "__main__": + asyncio.run(async_upload_blob( + bucket_name=sys.argv[1] + )) diff --git a/samples/snippets/storage_batch_request.py b/samples/snippets/storage_batch_request.py index 863fc09cd..7fe11fb1c 100644 --- a/samples/snippets/storage_batch_request.py +++ b/samples/snippets/storage_batch_request.py @@ -28,7 +28,14 @@ def batch_request(bucket_name, prefix=None): - """Use a batch request to patch a list of objects with the given prefix in a bucket.""" + """ + Use a batch request to patch a list of objects with the given prefix in a bucket. + + Note that Cloud Storage does not support batch operations for uploading or downloading. + Additionally, the current batch design does not support library methods whose return values + depend on the response payload. + See https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.batch + """ # The ID of your GCS bucket # bucket_name = "my-bucket" # The prefix of the object paths diff --git a/samples/snippets/storage_bucket_delete_default_kms_key.py b/samples/snippets/storage_bucket_delete_default_kms_key.py index 3df23767d..0db293756 100644 --- a/samples/snippets/storage_bucket_delete_default_kms_key.py +++ b/samples/snippets/storage_bucket_delete_default_kms_key.py @@ -30,7 +30,7 @@ def bucket_delete_default_kms_key(bucket_name): bucket.default_kms_key_name = None bucket.patch() - print("Default KMS key was removed from {}".format(bucket.name)) + print(f"Default KMS key was removed from {bucket.name}") return bucket diff --git a/samples/snippets/storage_change_default_storage_class.py b/samples/snippets/storage_change_default_storage_class.py index 8a72719ba..5d2f924ad 100644 --- a/samples/snippets/storage_change_default_storage_class.py +++ b/samples/snippets/storage_change_default_storage_class.py @@ -31,7 +31,7 @@ def change_default_storage_class(bucket_name): bucket.storage_class = constants.COLDLINE_STORAGE_CLASS bucket.patch() - print("Default storage class for bucket {} has been set to {}".format(bucket_name, bucket.storage_class)) + print(f"Default storage class for bucket {bucket_name} has been set to {bucket.storage_class}") return bucket diff --git a/samples/snippets/storage_change_file_storage_class.py b/samples/snippets/storage_change_file_storage_class.py index d5dda56a7..a976ac8a4 100644 --- a/samples/snippets/storage_change_file_storage_class.py +++ b/samples/snippets/storage_change_file_storage_class.py @@ -27,9 +27,17 @@ def change_file_storage_class(bucket_name, blob_name): storage_client = storage.Client() - bucket = storage_client.get_bucket(bucket_name) - blob = bucket.get_blob(blob_name) - blob.update_storage_class("NEARLINE") + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + generation_match_precondition = None + + # Optional: set a generation-match precondition to avoid potential race + # conditions and data corruptions. The request is aborted if the + # object's generation number does not match your precondition. + blob.reload() # Fetch blob metadata to use in generation_match_precondition. + generation_match_precondition = blob.generation + + blob.update_storage_class("NEARLINE", if_generation_match=generation_match_precondition) print( "Blob {} in bucket {} had its storage class set to {}".format( diff --git a/samples/snippets/storage_compose_file.py b/samples/snippets/storage_compose_file.py index 2c1443f22..e67391272 100644 --- a/samples/snippets/storage_compose_file.py +++ b/samples/snippets/storage_compose_file.py @@ -32,9 +32,19 @@ def compose_file(bucket_name, first_blob_name, second_blob_name, destination_blo destination = bucket.blob(destination_blob_name) destination.content_type = "text/plain" - # sources is a list of Blob instances, up to the max of 32 instances per request - sources = [bucket.get_blob(first_blob_name), bucket.get_blob(second_blob_name)] - destination.compose(sources) + # Note sources is a list of Blob instances, up to the max of 32 instances per request + sources = [bucket.blob(first_blob_name), bucket.blob(second_blob_name)] + + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to compose is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + # There is also an `if_source_generation_match` parameter, which is not used in this example. + destination_generation_match_precondition = 0 + + destination.compose(sources, if_generation_match=destination_generation_match_precondition) print( "New composite object {} in the bucket {} was created by combining {} and {}".format( diff --git a/samples/snippets/storage_configure_retries.py b/samples/snippets/storage_configure_retries.py index 9543111b3..ef1e422b6 100644 --- a/samples/snippets/storage_configure_retries.py +++ b/samples/snippets/storage_configure_retries.py @@ -53,7 +53,7 @@ def configure_retries(bucket_name, blob_name): ) blob.delete(retry=modified_retry) - print("Blob {} deleted with a customized retry strategy.".format(blob_name)) + print(f"Blob {blob_name} deleted with a customized retry strategy.") # [END storage_configure_retries] diff --git a/samples/snippets/storage_copy_file.py b/samples/snippets/storage_copy_file.py index 5d36aa94b..b802de28b 100644 --- a/samples/snippets/storage_copy_file.py +++ b/samples/snippets/storage_copy_file.py @@ -21,7 +21,7 @@ def copy_blob( - bucket_name, blob_name, destination_bucket_name, destination_blob_name + bucket_name, blob_name, destination_bucket_name, destination_blob_name, ): """Copies a blob from one bucket to another with a new name.""" # bucket_name = "your-bucket-name" @@ -35,8 +35,17 @@ def copy_blob( source_blob = source_bucket.blob(blob_name) destination_bucket = storage_client.bucket(destination_bucket_name) + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to copy is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + # There is also an `if_source_generation_match` parameter, which is not used in this example. + destination_generation_match_precondition = 0 + blob_copy = source_bucket.copy_blob( - source_blob, destination_bucket, destination_blob_name + source_blob, destination_bucket, destination_blob_name, if_generation_match=destination_generation_match_precondition, ) print( diff --git a/samples/snippets/storage_copy_file_archived_generation.py b/samples/snippets/storage_copy_file_archived_generation.py index 988ebcbeb..419d8e5a3 100644 --- a/samples/snippets/storage_copy_file_archived_generation.py +++ b/samples/snippets/storage_copy_file_archived_generation.py @@ -36,13 +36,22 @@ def copy_file_archived_generation( source_blob = source_bucket.blob(blob_name) destination_bucket = storage_client.bucket(destination_bucket_name) + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to copy is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + destination_generation_match_precondition = 0 + + # source_generation selects a specific revision of the source object, as opposed to the latest version. blob_copy = source_bucket.copy_blob( - source_blob, destination_bucket, destination_blob_name, source_generation=generation + source_blob, destination_bucket, destination_blob_name, source_generation=generation, if_generation_match=destination_generation_match_precondition ) print( "Generation {} of the blob {} in bucket {} copied to blob {} in bucket {}.".format( - source_blob.generation, + generation, source_blob.name, source_bucket.name, blob_copy.name, diff --git a/samples/snippets/storage_cors_configuration.py b/samples/snippets/storage_cors_configuration.py index 3d2595a9d..2c5dd2428 100644 --- a/samples/snippets/storage_cors_configuration.py +++ b/samples/snippets/storage_cors_configuration.py @@ -38,7 +38,7 @@ def cors_configuration(bucket_name): ] bucket.patch() - print("Set CORS policies for bucket {} is {}".format(bucket.name, bucket.cors)) + print(f"Set CORS policies for bucket {bucket.name} is {bucket.cors}") return bucket diff --git a/samples/snippets/storage_create_bucket.py b/samples/snippets/storage_create_bucket.py index aaee9e234..c95f32f56 100644 --- a/samples/snippets/storage_create_bucket.py +++ b/samples/snippets/storage_create_bucket.py @@ -28,7 +28,7 @@ def create_bucket(bucket_name): bucket = storage_client.create_bucket(bucket_name) - print("Bucket {} created".format(bucket.name)) + print(f"Bucket {bucket.name} created") # [END storage_create_bucket] diff --git a/samples/snippets/storage_create_bucket_dual_region.py b/samples/snippets/storage_create_bucket_dual_region.py new file mode 100644 index 000000000..c5a78fa0f --- /dev/null +++ b/samples/snippets/storage_create_bucket_dual_region.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +""" +Sample that creates a dual region bucket. +""" + +# [START storage_create_bucket_dual_region] +from google.cloud import storage + + +def create_bucket_dual_region(bucket_name, location, region_1, region_2): + """Creates a Dual-Region Bucket with provided location and regions..""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The bucket's pair of regions. Case-insensitive. + # See this documentation for other valid locations: + # https://cloud.google.com/storage/docs/locations + # region_1 = "US-EAST1" + # region_2 = "US-WEST1" + # location = "US" + + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name, location=location, data_locations=[region_1, region_2]) + + print(f"Created bucket {bucket_name}") + print(f" - location: {bucket.location}") + print(f" - location_type: {bucket.location_type}") + print(f" - customPlacementConfig data_locations: {bucket.data_locations}") + + +# [END storage_create_bucket_dual_region] + + +if __name__ == "__main__": + create_bucket_dual_region( + bucket_name=sys.argv[1], location=sys.argv[2], region_1=sys.argv[3], region_2=sys.argv[4] + ) diff --git a/samples/snippets/storage_create_bucket_hierarchical_namespace.py b/samples/snippets/storage_create_bucket_hierarchical_namespace.py new file mode 100644 index 000000000..d9d310772 --- /dev/null +++ b/samples/snippets/storage_create_bucket_hierarchical_namespace.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +# [START storage_create_bucket_hierarchical_namespace] +from google.cloud import storage + + +def create_bucket_hierarchical_namespace(bucket_name): + """Creates a bucket with hierarchical namespace enabled.""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + bucket.iam_configuration.uniform_bucket_level_access_enabled = True + bucket.hierarchical_namespace_enabled = True + bucket.create() + + print(f"Created bucket {bucket_name} with hierarchical namespace enabled.") + + +# [END storage_create_bucket_hierarchical_namespace] + + +if __name__ == "__main__": + create_bucket_hierarchical_namespace(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_create_bucket_object_retention.py b/samples/snippets/storage_create_bucket_object_retention.py new file mode 100644 index 000000000..4ebc32c0a --- /dev/null +++ b/samples/snippets/storage_create_bucket_object_retention.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +# [START storage_create_bucket_with_object_retention] +from google.cloud import storage + + +def create_bucket_object_retention(bucket_name): + """Creates a bucket with object retention enabled.""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + storage_client = storage.Client() + bucket = storage_client.create_bucket(bucket_name, enable_object_retention=True) + + print(f"Created bucket {bucket_name} with object retention enabled setting: {bucket.object_retention_mode}") + + +# [END storage_create_bucket_with_object_retention] + + +if __name__ == "__main__": + create_bucket_object_retention(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_create_bucket_turbo_replication.py b/samples/snippets/storage_create_bucket_turbo_replication.py new file mode 100644 index 000000000..bc0559795 --- /dev/null +++ b/samples/snippets/storage_create_bucket_turbo_replication.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +"""Sample that creates a new bucket with dual-region and turbo replication. +This sample is used on this page: + https://cloud.google.com/storage/docs/managing-turbo-replication +For more information, see README.md. +""" + +# [START storage_create_bucket_turbo_replication] + +from google.cloud import storage +from google.cloud.storage.constants import RPO_ASYNC_TURBO + + +def create_bucket_turbo_replication(bucket_name): + """Creates dual-region bucket with turbo replication enabled.""" + # The ID of your GCS bucket + # bucket_name = "my-bucket" + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + bucket_location = "NAM4" + bucket.rpo = RPO_ASYNC_TURBO + bucket.create(location=bucket_location) + + print(f"{bucket.name} created with the recovery point objective (RPO) set to {bucket.rpo} in {bucket.location}.") + + +# [END storage_create_bucket_turbo_replication] + +if __name__ == "__main__": + create_bucket_turbo_replication(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_create_hmac_key.py b/samples/snippets/storage_create_hmac_key.py index 27a418c39..d845738b7 100644 --- a/samples/snippets/storage_create_hmac_key.py +++ b/samples/snippets/storage_create_hmac_key.py @@ -33,17 +33,17 @@ def create_key(project_id, service_account_email): service_account_email=service_account_email, project_id=project_id ) - print("The base64 encoded secret is {}".format(secret)) + print(f"The base64 encoded secret is {secret}") print("Do not miss that secret, there is no API to recover it.") print("The HMAC key metadata is:") - print("Service Account Email: {}".format(hmac_key.service_account_email)) - print("Key ID: {}".format(hmac_key.id)) - print("Access ID: {}".format(hmac_key.access_id)) - print("Project ID: {}".format(hmac_key.project)) - print("State: {}".format(hmac_key.state)) - print("Created At: {}".format(hmac_key.time_created)) - print("Updated At: {}".format(hmac_key.updated)) - print("Etag: {}".format(hmac_key.etag)) + print(f"Service Account Email: {hmac_key.service_account_email}") + print(f"Key ID: {hmac_key.id}") + print(f"Access ID: {hmac_key.access_id}") + print(f"Project ID: {hmac_key.project}") + print(f"State: {hmac_key.state}") + print(f"Created At: {hmac_key.time_created}") + print(f"Updated At: {hmac_key.updated}") + print(f"Etag: {hmac_key.etag}") return hmac_key diff --git a/samples/snippets/storage_deactivate_hmac_key.py b/samples/snippets/storage_deactivate_hmac_key.py index 389efb998..007f7b5a5 100644 --- a/samples/snippets/storage_deactivate_hmac_key.py +++ b/samples/snippets/storage_deactivate_hmac_key.py @@ -37,14 +37,14 @@ def deactivate_key(access_id, project_id): print("The HMAC key is now inactive.") print("The HMAC key metadata is:") - print("Service Account Email: {}".format(hmac_key.service_account_email)) - print("Key ID: {}".format(hmac_key.id)) - print("Access ID: {}".format(hmac_key.access_id)) - print("Project ID: {}".format(hmac_key.project)) - print("State: {}".format(hmac_key.state)) - print("Created At: {}".format(hmac_key.time_created)) - print("Updated At: {}".format(hmac_key.updated)) - print("Etag: {}".format(hmac_key.etag)) + print(f"Service Account Email: {hmac_key.service_account_email}") + print(f"Key ID: {hmac_key.id}") + print(f"Access ID: {hmac_key.access_id}") + print(f"Project ID: {hmac_key.project}") + print(f"State: {hmac_key.state}") + print(f"Created At: {hmac_key.time_created}") + print(f"Updated At: {hmac_key.updated}") + print(f"Etag: {hmac_key.etag}") return hmac_key diff --git a/samples/snippets/storage_delete_bucket.py b/samples/snippets/storage_delete_bucket.py index b3e264c74..b12c06636 100644 --- a/samples/snippets/storage_delete_bucket.py +++ b/samples/snippets/storage_delete_bucket.py @@ -29,7 +29,7 @@ def delete_bucket(bucket_name): bucket = storage_client.get_bucket(bucket_name) bucket.delete() - print("Bucket {} deleted".format(bucket.name)) + print(f"Bucket {bucket.name} deleted") # [END storage_delete_bucket] diff --git a/samples/snippets/storage_delete_file.py b/samples/snippets/storage_delete_file.py index 1105f3725..427604145 100644 --- a/samples/snippets/storage_delete_file.py +++ b/samples/snippets/storage_delete_file.py @@ -29,9 +29,17 @@ def delete_blob(bucket_name, blob_name): bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_name) - blob.delete() + generation_match_precondition = None - print("Blob {} deleted.".format(blob_name)) + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to delete is aborted if the object's + # generation number does not match your precondition. + blob.reload() # Fetch blob metadata to use in generation_match_precondition. + generation_match_precondition = blob.generation + + blob.delete(if_generation_match=generation_match_precondition) + + print(f"Blob {blob_name} deleted.") # [END storage_delete_file] diff --git a/samples/snippets/storage_delete_file_archived_generation.py b/samples/snippets/storage_delete_file_archived_generation.py index 4e4909001..ff02bca23 100644 --- a/samples/snippets/storage_delete_file_archived_generation.py +++ b/samples/snippets/storage_delete_file_archived_generation.py @@ -31,9 +31,7 @@ def delete_file_archived_generation(bucket_name, blob_name, generation): bucket = storage_client.get_bucket(bucket_name) bucket.delete_blob(blob_name, generation=generation) print( - "Generation {} of blob {} was deleted from {}".format( - generation, blob_name, bucket_name - ) + f"Generation {generation} of blob {blob_name} was deleted from {bucket_name}" ) diff --git a/samples/snippets/storage_disable_bucket_lifecycle_management.py b/samples/snippets/storage_disable_bucket_lifecycle_management.py index 9ef6971fb..a5fa56fcf 100644 --- a/samples/snippets/storage_disable_bucket_lifecycle_management.py +++ b/samples/snippets/storage_disable_bucket_lifecycle_management.py @@ -31,7 +31,7 @@ def disable_bucket_lifecycle_management(bucket_name): bucket.patch() rules = bucket.lifecycle_rules - print("Lifecycle management is disable for bucket {} and the rules are {}".format(bucket_name, list(rules))) + print(f"Lifecycle management is disable for bucket {bucket_name} and the rules are {list(rules)}") return bucket diff --git a/samples/snippets/storage_disable_default_event_based_hold.py b/samples/snippets/storage_disable_default_event_based_hold.py index dff3ed3c1..48becdac1 100644 --- a/samples/snippets/storage_disable_default_event_based_hold.py +++ b/samples/snippets/storage_disable_default_event_based_hold.py @@ -30,7 +30,7 @@ def disable_default_event_based_hold(bucket_name): bucket.default_event_based_hold = False bucket.patch() - print("Default event based hold was disabled for {}".format(bucket_name)) + print(f"Default event based hold was disabled for {bucket_name}") # [END storage_disable_default_event_based_hold] diff --git a/samples/snippets/storage_disable_requester_pays.py b/samples/snippets/storage_disable_requester_pays.py index c49cc28ea..78e195d8a 100644 --- a/samples/snippets/storage_disable_requester_pays.py +++ b/samples/snippets/storage_disable_requester_pays.py @@ -30,7 +30,7 @@ def disable_requester_pays(bucket_name): bucket.requester_pays = False bucket.patch() - print("Requester Pays has been disabled for {}".format(bucket_name)) + print(f"Requester Pays has been disabled for {bucket_name}") # [END storage_disable_requester_pays] diff --git a/samples/snippets/storage_disable_uniform_bucket_level_access.py b/samples/snippets/storage_disable_uniform_bucket_level_access.py index 4f4691611..20a045686 100644 --- a/samples/snippets/storage_disable_uniform_bucket_level_access.py +++ b/samples/snippets/storage_disable_uniform_bucket_level_access.py @@ -31,7 +31,7 @@ def disable_uniform_bucket_level_access(bucket_name): bucket.patch() print( - "Uniform bucket-level access was disabled for {}.".format(bucket.name) + f"Uniform bucket-level access was disabled for {bucket.name}." ) diff --git a/samples/snippets/storage_disable_versioning.py b/samples/snippets/storage_disable_versioning.py index 98832ba68..9dfd0ff90 100644 --- a/samples/snippets/storage_disable_versioning.py +++ b/samples/snippets/storage_disable_versioning.py @@ -30,7 +30,7 @@ def disable_versioning(bucket_name): bucket.versioning_enabled = False bucket.patch() - print("Versioning was disabled for bucket {}".format(bucket)) + print(f"Versioning was disabled for bucket {bucket}") return bucket diff --git a/samples/snippets/storage_download_encrypted_file.py b/samples/snippets/storage_download_encrypted_file.py index ac7071fbe..8a81b0de5 100644 --- a/samples/snippets/storage_download_encrypted_file.py +++ b/samples/snippets/storage_download_encrypted_file.py @@ -52,9 +52,7 @@ def download_encrypted_blob( blob.download_to_filename(destination_file_name) print( - "Blob {} downloaded to {}.".format( - source_blob_name, destination_file_name - ) + f"Blob {source_blob_name} downloaded to {destination_file_name}." ) diff --git a/samples/snippets/storage_download_into_memory.py b/samples/snippets/storage_download_into_memory.py index 453a13e21..97f677054 100644 --- a/samples/snippets/storage_download_into_memory.py +++ b/samples/snippets/storage_download_into_memory.py @@ -37,11 +37,11 @@ def download_blob_into_memory(bucket_name, blob_name): # any content from Google Cloud Storage. As we don't need additional data, # using `Bucket.blob` is preferred here. blob = bucket.blob(blob_name) - contents = blob.download_as_string() + contents = blob.download_as_bytes() print( - "Downloaded storage object {} from bucket {} as the following string: {}.".format( - blob_name, bucket_name, contents + "Downloaded storage object {} from bucket {} as the following bytes object: {}.".format( + blob_name, bucket_name, contents.decode("utf-8") ) ) diff --git a/samples/snippets/storage_download_to_stream.py b/samples/snippets/storage_download_to_stream.py new file mode 100644 index 000000000..3834e34c9 --- /dev/null +++ b/samples/snippets/storage_download_to_stream.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_stream_file_download] +from google.cloud import storage + + +def download_blob_to_stream(bucket_name, source_blob_name, file_obj): + """Downloads a blob to a stream or other file-like object.""" + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The ID of your GCS object (blob) + # source_blob_name = "storage-object-name" + + # The stream or file (file-like object) to which the blob will be written + # import io + # file_obj = io.BytesIO() + + storage_client = storage.Client() + + bucket = storage_client.bucket(bucket_name) + + # Construct a client-side representation of a blob. + # Note `Bucket.blob` differs from `Bucket.get_blob` in that it doesn't + # retrieve metadata from Google Cloud Storage. As we don't use metadata in + # this example, using `Bucket.blob` is preferred here. + blob = bucket.blob(source_blob_name) + blob.download_to_file(file_obj) + + print(f"Downloaded blob {source_blob_name} to file-like object.") + + return file_obj + # Before reading from file_obj, remember to rewind with file_obj.seek(0). + +# [END storage_stream_file_download] diff --git a/samples/snippets/storage_enable_bucket_lifecycle_management.py b/samples/snippets/storage_enable_bucket_lifecycle_management.py index 61c7d7b20..0bbff079c 100644 --- a/samples/snippets/storage_enable_bucket_lifecycle_management.py +++ b/samples/snippets/storage_enable_bucket_lifecycle_management.py @@ -29,12 +29,12 @@ def enable_bucket_lifecycle_management(bucket_name): bucket = storage_client.get_bucket(bucket_name) rules = bucket.lifecycle_rules - print("Lifecycle management rules for bucket {} are {}".format(bucket_name, list(rules))) + print(f"Lifecycle management rules for bucket {bucket_name} are {list(rules)}") bucket.add_lifecycle_delete_rule(age=2) bucket.patch() rules = bucket.lifecycle_rules - print("Lifecycle management is enable for bucket {} and the rules are {}".format(bucket_name, list(rules))) + print(f"Lifecycle management is enable for bucket {bucket_name} and the rules are {list(rules)}") return bucket diff --git a/samples/snippets/storage_enable_default_event_based_hold.py b/samples/snippets/storage_enable_default_event_based_hold.py index a535390c9..5dfdf94a9 100644 --- a/samples/snippets/storage_enable_default_event_based_hold.py +++ b/samples/snippets/storage_enable_default_event_based_hold.py @@ -30,7 +30,7 @@ def enable_default_event_based_hold(bucket_name): bucket.default_event_based_hold = True bucket.patch() - print("Default event based hold was enabled for {}".format(bucket_name)) + print(f"Default event based hold was enabled for {bucket_name}") # [END storage_enable_default_event_based_hold] diff --git a/samples/snippets/storage_enable_requester_pays.py b/samples/snippets/storage_enable_requester_pays.py index 9787008dd..fbecb04f4 100644 --- a/samples/snippets/storage_enable_requester_pays.py +++ b/samples/snippets/storage_enable_requester_pays.py @@ -30,7 +30,7 @@ def enable_requester_pays(bucket_name): bucket.requester_pays = True bucket.patch() - print("Requester Pays has been enabled for {}".format(bucket_name)) + print(f"Requester Pays has been enabled for {bucket_name}") # [END storage_enable_requester_pays] diff --git a/samples/snippets/storage_enable_uniform_bucket_level_access.py b/samples/snippets/storage_enable_uniform_bucket_level_access.py index c689bb735..9ab71ae37 100644 --- a/samples/snippets/storage_enable_uniform_bucket_level_access.py +++ b/samples/snippets/storage_enable_uniform_bucket_level_access.py @@ -31,7 +31,7 @@ def enable_uniform_bucket_level_access(bucket_name): bucket.patch() print( - "Uniform bucket-level access was enabled for {}.".format(bucket.name) + f"Uniform bucket-level access was enabled for {bucket.name}." ) diff --git a/samples/snippets/storage_enable_versioning.py b/samples/snippets/storage_enable_versioning.py index 89693e426..9cdc98001 100644 --- a/samples/snippets/storage_enable_versioning.py +++ b/samples/snippets/storage_enable_versioning.py @@ -30,7 +30,7 @@ def enable_versioning(bucket_name): bucket.versioning_enabled = True bucket.patch() - print("Versioning was enabled for bucket {}".format(bucket.name)) + print(f"Versioning was enabled for bucket {bucket.name}") return bucket diff --git a/samples/snippets/storage_generate_encryption_key.py b/samples/snippets/storage_generate_encryption_key.py index a973418a6..dbeb46b91 100644 --- a/samples/snippets/storage_generate_encryption_key.py +++ b/samples/snippets/storage_generate_encryption_key.py @@ -30,7 +30,7 @@ def generate_encryption_key(): key = os.urandom(32) encoded_key = base64.b64encode(key).decode("utf-8") - print("Base 64 encoded encryption key: {}".format(encoded_key)) + print(f"Base 64 encoded encryption key: {encoded_key}") # [END storage_generate_encryption_key] diff --git a/samples/snippets/storage_generate_signed_post_policy_v4.py b/samples/snippets/storage_generate_signed_post_policy_v4.py index 8217714e2..0c06ddc2f 100644 --- a/samples/snippets/storage_generate_signed_post_policy_v4.py +++ b/samples/snippets/storage_generate_signed_post_policy_v4.py @@ -46,7 +46,7 @@ def generate_signed_post_policy_v4(bucket_name, blob_name): # Include all fields returned in the HTML form as they're required for key, value in policy["fields"].items(): - form += " \n".format(key, value) + form += f" \n" form += "
\n" form += "
\n" diff --git a/samples/snippets/storage_generate_signed_url_v2.py b/samples/snippets/storage_generate_signed_url_v2.py index abea3dd54..f1317ea2f 100644 --- a/samples/snippets/storage_generate_signed_url_v2.py +++ b/samples/snippets/storage_generate_signed_url_v2.py @@ -44,7 +44,7 @@ def generate_signed_url(bucket_name, blob_name): method="GET", ) - print("The signed url for {} is {}".format(blob.name, url)) + print(f"The signed url for {blob.name} is {url}") return url diff --git a/samples/snippets/storage_generate_signed_url_v4.py b/samples/snippets/storage_generate_signed_url_v4.py index 2a45b23e9..80625a7b3 100644 --- a/samples/snippets/storage_generate_signed_url_v4.py +++ b/samples/snippets/storage_generate_signed_url_v4.py @@ -49,7 +49,7 @@ def generate_download_signed_url_v4(bucket_name, blob_name): print("Generated GET signed URL:") print(url) print("You can use this URL with any user agent, for example:") - print("curl '{}'".format(url)) + print(f"curl '{url}'") return url diff --git a/samples/snippets/storage_get_autoclass.py b/samples/snippets/storage_get_autoclass.py new file mode 100644 index 000000000..30fa0c4f6 --- /dev/null +++ b/samples/snippets/storage_get_autoclass.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +# [START storage_get_autoclass] +from google.cloud import storage + + +def get_autoclass(bucket_name): + """Get the Autoclass setting for a bucket.""" + # The ID of your GCS bucket + # bucket_name = "my-bucket" + + storage_client = storage.Client() + bucket = storage_client.get_bucket(bucket_name) + autoclass_enabled = bucket.autoclass_enabled + autoclass_toggle_time = bucket.autoclass_toggle_time + terminal_storage_class = bucket.autoclass_terminal_storage_class + tsc_update_time = bucket.autoclass_terminal_storage_class_update_time + + print(f"Autoclass enabled is set to {autoclass_enabled} for {bucket.name} at {autoclass_toggle_time}.") + print(f"Autoclass terminal storage class is set to {terminal_storage_class} for {bucket.name} at {tsc_update_time}.") + + return bucket + + +# [END storage_get_autoclass] + +if __name__ == "__main__": + get_autoclass(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_get_bucket_metadata.py b/samples/snippets/storage_get_bucket_metadata.py index 87cd5eddc..c86e154de 100644 --- a/samples/snippets/storage_get_bucket_metadata.py +++ b/samples/snippets/storage_get_bucket_metadata.py @@ -44,6 +44,7 @@ def bucket_metadata(bucket_name): print(f"Retention Effective Time: {bucket.retention_policy_effective_time}") print(f"Retention Period: {bucket.retention_period}") print(f"Retention Policy Locked: {bucket.retention_policy_locked}") + print(f"Object Retention Mode: {bucket.object_retention_mode}") print(f"Requester Pays: {bucket.requester_pays}") print(f"Self Link: {bucket.self_link}") print(f"Time Created: {bucket.time_created}") diff --git a/samples/snippets/storage_get_default_event_based_hold.py b/samples/snippets/storage_get_default_event_based_hold.py index 4cf13914d..08a05f8ef 100644 --- a/samples/snippets/storage_get_default_event_based_hold.py +++ b/samples/snippets/storage_get_default_event_based_hold.py @@ -29,12 +29,10 @@ def get_default_event_based_hold(bucket_name): bucket = storage_client.get_bucket(bucket_name) if bucket.default_event_based_hold: - print("Default event-based hold is enabled for {}".format(bucket_name)) + print(f"Default event-based hold is enabled for {bucket_name}") else: print( - "Default event-based hold is not enabled for {}".format( - bucket_name - ) + f"Default event-based hold is not enabled for {bucket_name}" ) diff --git a/samples/snippets/storage_get_hmac_key.py b/samples/snippets/storage_get_hmac_key.py index 4dc52240d..82b28ff99 100644 --- a/samples/snippets/storage_get_hmac_key.py +++ b/samples/snippets/storage_get_hmac_key.py @@ -34,14 +34,14 @@ def get_key(access_id, project_id): ) print("The HMAC key metadata is:") - print("Service Account Email: {}".format(hmac_key.service_account_email)) - print("Key ID: {}".format(hmac_key.id)) - print("Access ID: {}".format(hmac_key.access_id)) - print("Project ID: {}".format(hmac_key.project)) - print("State: {}".format(hmac_key.state)) - print("Created At: {}".format(hmac_key.time_created)) - print("Updated At: {}".format(hmac_key.updated)) - print("Etag: {}".format(hmac_key.etag)) + print(f"Service Account Email: {hmac_key.service_account_email}") + print(f"Key ID: {hmac_key.id}") + print(f"Access ID: {hmac_key.access_id}") + print(f"Project ID: {hmac_key.project}") + print(f"State: {hmac_key.state}") + print(f"Created At: {hmac_key.time_created}") + print(f"Updated At: {hmac_key.updated}") + print(f"Etag: {hmac_key.etag}") return hmac_key diff --git a/samples/snippets/storage_get_metadata.py b/samples/snippets/storage_get_metadata.py index c5ef0b4cc..7216efdb4 100644 --- a/samples/snippets/storage_get_metadata.py +++ b/samples/snippets/storage_get_metadata.py @@ -33,36 +33,37 @@ def blob_metadata(bucket_name, blob_name): # make an HTTP request. blob = bucket.get_blob(blob_name) - print("Blob: {}".format(blob.name)) - print("Bucket: {}".format(blob.bucket.name)) - print("Storage class: {}".format(blob.storage_class)) - print("ID: {}".format(blob.id)) - print("Size: {} bytes".format(blob.size)) - print("Updated: {}".format(blob.updated)) - print("Generation: {}".format(blob.generation)) - print("Metageneration: {}".format(blob.metageneration)) - print("Etag: {}".format(blob.etag)) - print("Owner: {}".format(blob.owner)) - print("Component count: {}".format(blob.component_count)) - print("Crc32c: {}".format(blob.crc32c)) - print("md5_hash: {}".format(blob.md5_hash)) - print("Cache-control: {}".format(blob.cache_control)) - print("Content-type: {}".format(blob.content_type)) - print("Content-disposition: {}".format(blob.content_disposition)) - print("Content-encoding: {}".format(blob.content_encoding)) - print("Content-language: {}".format(blob.content_language)) - print("Metadata: {}".format(blob.metadata)) - print("Custom Time: {}".format(blob.custom_time)) + print(f"Blob: {blob.name}") + print(f"Bucket: {blob.bucket.name}") + print(f"Storage class: {blob.storage_class}") + print(f"ID: {blob.id}") + print(f"Size: {blob.size} bytes") + print(f"Updated: {blob.updated}") + print(f"Generation: {blob.generation}") + print(f"Metageneration: {blob.metageneration}") + print(f"Etag: {blob.etag}") + print(f"Owner: {blob.owner}") + print(f"Component count: {blob.component_count}") + print(f"Crc32c: {blob.crc32c}") + print(f"md5_hash: {blob.md5_hash}") + print(f"Cache-control: {blob.cache_control}") + print(f"Content-type: {blob.content_type}") + print(f"Content-disposition: {blob.content_disposition}") + print(f"Content-encoding: {blob.content_encoding}") + print(f"Content-language: {blob.content_language}") + print(f"Metadata: {blob.metadata}") + print(f"Medialink: {blob.media_link}") + print(f"Custom Time: {blob.custom_time}") print("Temporary hold: ", "enabled" if blob.temporary_hold else "disabled") print( "Event based hold: ", "enabled" if blob.event_based_hold else "disabled", ) + print(f"Retention mode: {blob.retention.mode}") + print(f"Retention retain until time: {blob.retention.retain_until_time}") if blob.retention_expiration_time: print( - "retentionExpirationTime: {}".format( - blob.retention_expiration_time - ) + f"retentionExpirationTime: {blob.retention_expiration_time}" ) diff --git a/samples/snippets/storage_get_requester_pays_status.py b/samples/snippets/storage_get_requester_pays_status.py index 2014d654c..a2eeb34d7 100644 --- a/samples/snippets/storage_get_requester_pays_status.py +++ b/samples/snippets/storage_get_requester_pays_status.py @@ -29,9 +29,9 @@ def get_requester_pays_status(bucket_name): requester_pays_status = bucket.requester_pays if requester_pays_status: - print("Requester Pays is enabled for {}".format(bucket_name)) + print(f"Requester Pays is enabled for {bucket_name}") else: - print("Requester Pays is disabled for {}".format(bucket_name)) + print(f"Requester Pays is disabled for {bucket_name}") # [END storage_get_requester_pays_status] diff --git a/samples/snippets/storage_get_retention_policy.py b/samples/snippets/storage_get_retention_policy.py index f2ca26d26..215f80d5a 100644 --- a/samples/snippets/storage_get_retention_policy.py +++ b/samples/snippets/storage_get_retention_policy.py @@ -28,14 +28,14 @@ def get_retention_policy(bucket_name): bucket = storage_client.bucket(bucket_name) bucket.reload() - print("Retention Policy for {}".format(bucket_name)) - print("Retention Period: {}".format(bucket.retention_period)) + print(f"Retention Policy for {bucket_name}") + print(f"Retention Period: {bucket.retention_period}") if bucket.retention_policy_locked: print("Retention Policy is locked") if bucket.retention_policy_effective_time: print( - "Effective Time: {}".format(bucket.retention_policy_effective_time) + f"Effective Time: {bucket.retention_policy_effective_time}" ) diff --git a/samples/snippets/storage_get_rpo.py b/samples/snippets/storage_get_rpo.py new file mode 100644 index 000000000..ab40ca3a5 --- /dev/null +++ b/samples/snippets/storage_get_rpo.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +"""Sample that gets RPO (Recovery Point Objective) of a bucket +This sample is used on this page: + https://cloud.google.com/storage/docs/managing-turbo-replication +For more information, see README.md. +""" + +# [START storage_get_rpo] + +from google.cloud import storage + + +def get_rpo(bucket_name): + """Gets the RPO of the bucket""" + # The ID of your GCS bucket + # bucket_name = "my-bucket" + + storage_client = storage.Client() + bucket = storage_client.get_bucket(bucket_name) + rpo = bucket.rpo + + print(f"RPO for {bucket.name} is {rpo}.") + + +# [END storage_get_rpo] + +if __name__ == "__main__": + get_rpo(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_get_service_account.py b/samples/snippets/storage_get_service_account.py index 58ababb91..5ac0e5638 100644 --- a/samples/snippets/storage_get_service_account.py +++ b/samples/snippets/storage_get_service_account.py @@ -25,9 +25,7 @@ def get_service_account(): email = storage_client.get_service_account_email() print( - "The GCS service account for project {} is: {} ".format( - storage_client.project, email - ) + f"The GCS service account for project {storage_client.project} is: {email} " ) diff --git a/samples/snippets/storage_get_uniform_bucket_level_access.py b/samples/snippets/storage_get_uniform_bucket_level_access.py index eddb8bc1a..206b9f1ff 100644 --- a/samples/snippets/storage_get_uniform_bucket_level_access.py +++ b/samples/snippets/storage_get_uniform_bucket_level_access.py @@ -30,9 +30,7 @@ def get_uniform_bucket_level_access(bucket_name): if iam_configuration.uniform_bucket_level_access_enabled: print( - "Uniform bucket-level access is enabled for {}.".format( - bucket.name - ) + f"Uniform bucket-level access is enabled for {bucket.name}." ) print( "Bucket will be locked on {}.".format( @@ -41,9 +39,7 @@ def get_uniform_bucket_level_access(bucket_name): ) else: print( - "Uniform bucket-level access is disabled for {}.".format( - bucket.name - ) + f"Uniform bucket-level access is disabled for {bucket.name}." ) diff --git a/samples/snippets/storage_list_file_archived_generations.py b/samples/snippets/storage_list_file_archived_generations.py index dc2f5eaf5..419cc3da4 100644 --- a/samples/snippets/storage_list_file_archived_generations.py +++ b/samples/snippets/storage_list_file_archived_generations.py @@ -29,7 +29,7 @@ def list_file_archived_generations(bucket_name): blobs = storage_client.list_blobs(bucket_name, versions=True) for blob in blobs: - print("{},{}".format(blob.name, blob.generation)) + print(f"{blob.name},{blob.generation}") # [END storage_list_file_archived_generations] diff --git a/samples/snippets/storage_list_files.py b/samples/snippets/storage_list_files.py index c6a80d9fa..5e80c833a 100644 --- a/samples/snippets/storage_list_files.py +++ b/samples/snippets/storage_list_files.py @@ -29,6 +29,7 @@ def list_blobs(bucket_name): # Note: Client.list_blobs requires at least package version 1.17.0. blobs = storage_client.list_blobs(bucket_name) + # Note: The call returns a response only when the iterator is consumed. for blob in blobs: print(blob.name) diff --git a/samples/snippets/storage_list_files_with_prefix.py b/samples/snippets/storage_list_files_with_prefix.py index f79413fb6..be7468cba 100644 --- a/samples/snippets/storage_list_files_with_prefix.py +++ b/samples/snippets/storage_list_files_with_prefix.py @@ -53,6 +53,7 @@ def list_blobs_with_prefix(bucket_name, prefix, delimiter=None): # Note: Client.list_blobs requires at least package version 1.17.0. blobs = storage_client.list_blobs(bucket_name, prefix=prefix, delimiter=delimiter) + # Note: The call returns a response only when the iterator is consumed. print("Blobs:") for blob in blobs: print(blob.name) diff --git a/samples/snippets/storage_list_hmac_keys.py b/samples/snippets/storage_list_hmac_keys.py index 8e5c53b58..a09616fa5 100644 --- a/samples/snippets/storage_list_hmac_keys.py +++ b/samples/snippets/storage_list_hmac_keys.py @@ -31,9 +31,9 @@ def list_keys(project_id): print("HMAC Keys:") for hmac_key in hmac_keys: print( - "Service Account Email: {}".format(hmac_key.service_account_email) + f"Service Account Email: {hmac_key.service_account_email}" ) - print("Access ID: {}".format(hmac_key.access_id)) + print(f"Access ID: {hmac_key.access_id}") return hmac_keys diff --git a/samples/snippets/storage_lock_retention_policy.py b/samples/snippets/storage_lock_retention_policy.py index d59572f5d..adff364d7 100644 --- a/samples/snippets/storage_lock_retention_policy.py +++ b/samples/snippets/storage_lock_retention_policy.py @@ -33,11 +33,9 @@ def lock_retention_policy(bucket_name): # and retention period can only be increased. bucket.lock_retention_policy() - print("Retention policy for {} is now locked".format(bucket_name)) + print(f"Retention policy for {bucket_name} is now locked") print( - "Retention policy effective as of {}".format( - bucket.retention_policy_effective_time - ) + f"Retention policy effective as of {bucket.retention_policy_effective_time}" ) diff --git a/samples/snippets/storage_make_public.py b/samples/snippets/storage_make_public.py index 79ae40d12..489508cf6 100644 --- a/samples/snippets/storage_make_public.py +++ b/samples/snippets/storage_make_public.py @@ -32,9 +32,7 @@ def make_blob_public(bucket_name, blob_name): blob.make_public() print( - "Blob {} is publicly accessible at {}".format( - blob.name, blob.public_url - ) + f"Blob {blob.name} is publicly accessible at {blob.public_url}" ) diff --git a/samples/snippets/storage_move_file.py b/samples/snippets/storage_move_file.py index a881a38ba..b2e5144d0 100644 --- a/samples/snippets/storage_move_file.py +++ b/samples/snippets/storage_move_file.py @@ -20,7 +20,7 @@ from google.cloud import storage -def move_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_name): +def move_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_name,): """Moves a blob from one bucket to another with a new name.""" # The ID of your GCS bucket # bucket_name = "your-bucket-name" @@ -37,8 +37,17 @@ def move_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_ source_blob = source_bucket.blob(blob_name) destination_bucket = storage_client.bucket(destination_bucket_name) + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + # There is also an `if_source_generation_match` parameter, which is not used in this example. + destination_generation_match_precondition = 0 + blob_copy = source_bucket.copy_blob( - source_blob, destination_bucket, destination_blob_name + source_blob, destination_bucket, destination_blob_name, if_generation_match=destination_generation_match_precondition, ) source_bucket.delete_blob(blob_name) diff --git a/samples/snippets/storage_object_csek_to_cmek.py b/samples/snippets/storage_object_csek_to_cmek.py index 9d4d710bf..9a915f08d 100644 --- a/samples/snippets/storage_object_csek_to_cmek.py +++ b/samples/snippets/storage_object_csek_to_cmek.py @@ -33,12 +33,22 @@ def object_csek_to_cmek(bucket_name, blob_name, encryption_key, kms_key_name): current_encryption_key = base64.b64decode(encryption_key) source_blob = bucket.blob(blob_name, encryption_key=current_encryption_key) - destination_blob = bucket.blob(blob_name, kms_key_name=kms_key_name) - token, rewritten, total = destination_blob.rewrite(source_blob) + generation_match_precondition = None + token = None + + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to rewrite is aborted if the object's + # generation number does not match your precondition. + source_blob.reload() # Fetch blob metadata to use in generation_match_precondition. + generation_match_precondition = source_blob.generation - while token is not None: - token, rewritten, total = destination_blob.rewrite(source_blob, token=token) + while True: + token, bytes_rewritten, total_bytes = destination_blob.rewrite( + source_blob, token=token, if_generation_match=generation_match_precondition + ) + if token is None: + break print( "Blob {} in bucket {} is now managed by the KMS key {} instead of a customer-supplied encryption key".format( diff --git a/samples/snippets/storage_object_get_kms_key.py b/samples/snippets/storage_object_get_kms_key.py index dddfc9151..7604e6eba 100644 --- a/samples/snippets/storage_object_get_kms_key.py +++ b/samples/snippets/storage_object_get_kms_key.py @@ -32,7 +32,7 @@ def object_get_kms_key(bucket_name, blob_name): kms_key = blob.kms_key_name - print("The KMS key of a blob is {}".format(blob.kms_key_name)) + print(f"The KMS key of a blob is {blob.kms_key_name}") return kms_key diff --git a/samples/snippets/storage_print_bucket_acl.py b/samples/snippets/storage_print_bucket_acl.py index 0804f7a9a..55417f1bc 100644 --- a/samples/snippets/storage_print_bucket_acl.py +++ b/samples/snippets/storage_print_bucket_acl.py @@ -27,7 +27,7 @@ def print_bucket_acl(bucket_name): bucket = storage_client.bucket(bucket_name) for entry in bucket.acl: - print("{}: {}".format(entry["role"], entry["entity"])) + print(f"{entry['role']}: {entry['entity']}") # [END storage_print_bucket_acl] diff --git a/samples/snippets/storage_print_file_acl.py b/samples/snippets/storage_print_file_acl.py index f34a5283b..8dfc4e984 100644 --- a/samples/snippets/storage_print_file_acl.py +++ b/samples/snippets/storage_print_file_acl.py @@ -28,7 +28,7 @@ def print_blob_acl(bucket_name, blob_name): blob = bucket.blob(blob_name) for entry in blob.acl: - print("{}: {}".format(entry["role"], entry["entity"])) + print(f"{entry['role']}: {entry['entity']}") # [END storage_print_file_acl] diff --git a/samples/snippets/storage_release_event_based_hold.py b/samples/snippets/storage_release_event_based_hold.py index 8c3c11b6f..6b4a2ccb5 100644 --- a/samples/snippets/storage_release_event_based_hold.py +++ b/samples/snippets/storage_release_event_based_hold.py @@ -29,11 +29,18 @@ def release_event_based_hold(bucket_name, blob_name): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_name) + metageneration_match_precondition = None + + # Optional: set a metageneration-match precondition to avoid potential race + # conditions and data corruptions. The request to patch is aborted if the + # object's metageneration does not match your precondition. + blob.reload() # Fetch blob metadata to use in metageneration_match_precondition. + metageneration_match_precondition = blob.metageneration blob.event_based_hold = False - blob.patch() + blob.patch(if_metageneration_match=metageneration_match_precondition) - print("Event based hold was released for {}".format(blob_name)) + print(f"Event based hold was released for {blob_name}") # [END storage_release_event_based_hold] diff --git a/samples/snippets/storage_release_temporary_hold.py b/samples/snippets/storage_release_temporary_hold.py index 02a6ca96c..64c7607c1 100644 --- a/samples/snippets/storage_release_temporary_hold.py +++ b/samples/snippets/storage_release_temporary_hold.py @@ -29,9 +29,16 @@ def release_temporary_hold(bucket_name, blob_name): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_name) + metageneration_match_precondition = None + + # Optional: set a metageneration-match precondition to avoid potential race + # conditions and data corruptions. The request to patch is aborted if the + # object's metageneration does not match your precondition. + blob.reload() # Fetch blob metadata to use in metageneration_match_precondition. + metageneration_match_precondition = blob.metageneration blob.temporary_hold = False - blob.patch() + blob.patch(if_metageneration_match=metageneration_match_precondition) print("Temporary hold was release for #{blob_name}") diff --git a/samples/snippets/storage_remove_bucket_default_owner.py b/samples/snippets/storage_remove_bucket_default_owner.py index beaf6be84..e6f3c495e 100644 --- a/samples/snippets/storage_remove_bucket_default_owner.py +++ b/samples/snippets/storage_remove_bucket_default_owner.py @@ -40,9 +40,7 @@ def remove_bucket_default_owner(bucket_name, user_email): bucket.default_object_acl.save() print( - "Removed user {} from the default acl of bucket {}.".format( - user_email, bucket_name - ) + f"Removed user {user_email} from the default acl of bucket {bucket_name}." ) diff --git a/samples/snippets/storage_remove_bucket_iam_member.py b/samples/snippets/storage_remove_bucket_iam_member.py index ef75a1a15..2efc29e30 100644 --- a/samples/snippets/storage_remove_bucket_iam_member.py +++ b/samples/snippets/storage_remove_bucket_iam_member.py @@ -38,7 +38,7 @@ def remove_bucket_iam_member(bucket_name, role, member): bucket.set_iam_policy(policy) - print("Removed {} with role {} from {}.".format(member, role, bucket_name)) + print(f"Removed {member} with role {role} from {bucket_name}.") # [END storage_remove_bucket_iam_member] diff --git a/samples/snippets/storage_remove_bucket_label.py b/samples/snippets/storage_remove_bucket_label.py index 58bbfef2d..fc4a5b4e7 100644 --- a/samples/snippets/storage_remove_bucket_label.py +++ b/samples/snippets/storage_remove_bucket_label.py @@ -39,7 +39,7 @@ def remove_bucket_label(bucket_name): bucket.labels = labels bucket.patch() - print("Removed labels on {}.".format(bucket.name)) + print(f"Removed labels on {bucket.name}.") pprint.pprint(bucket.labels) diff --git a/samples/snippets/storage_remove_bucket_owner.py b/samples/snippets/storage_remove_bucket_owner.py index f54e7a7cc..561ba9175 100644 --- a/samples/snippets/storage_remove_bucket_owner.py +++ b/samples/snippets/storage_remove_bucket_owner.py @@ -38,7 +38,7 @@ def remove_bucket_owner(bucket_name, user_email): bucket.acl.user(user_email).revoke_owner() bucket.acl.save() - print("Removed user {} from bucket {}.".format(user_email, bucket_name)) + print(f"Removed user {user_email} from bucket {bucket_name}.") # [END storage_remove_bucket_owner] diff --git a/samples/snippets/storage_remove_cors_configuration.py b/samples/snippets/storage_remove_cors_configuration.py index 48ee74338..ad97371f4 100644 --- a/samples/snippets/storage_remove_cors_configuration.py +++ b/samples/snippets/storage_remove_cors_configuration.py @@ -29,7 +29,7 @@ def remove_cors_configuration(bucket_name): bucket.cors = [] bucket.patch() - print("Remove CORS policies for bucket {}.".format(bucket.name)) + print(f"Remove CORS policies for bucket {bucket.name}.") return bucket diff --git a/samples/snippets/storage_remove_file_owner.py b/samples/snippets/storage_remove_file_owner.py index 9db83cce0..315a747ad 100644 --- a/samples/snippets/storage_remove_file_owner.py +++ b/samples/snippets/storage_remove_file_owner.py @@ -39,9 +39,7 @@ def remove_blob_owner(bucket_name, blob_name, user_email): blob.acl.save() print( - "Removed user {} from blob {} in bucket {}.".format( - user_email, blob_name, bucket_name - ) + f"Removed user {user_email} from blob {blob_name} in bucket {bucket_name}." ) diff --git a/samples/snippets/storage_remove_retention_policy.py b/samples/snippets/storage_remove_retention_policy.py index cb8ee548c..9ede8053a 100644 --- a/samples/snippets/storage_remove_retention_policy.py +++ b/samples/snippets/storage_remove_retention_policy.py @@ -37,7 +37,7 @@ def remove_retention_policy(bucket_name): bucket.retention_period = None bucket.patch() - print("Removed bucket {} retention policy".format(bucket.name)) + print(f"Removed bucket {bucket.name} retention policy") # [END storage_remove_retention_policy] diff --git a/samples/snippets/storage_rename_file.py b/samples/snippets/storage_rename_file.py index b47e18621..1125007c6 100644 --- a/samples/snippets/storage_rename_file.py +++ b/samples/snippets/storage_rename_file.py @@ -35,7 +35,7 @@ def rename_blob(bucket_name, blob_name, new_name): new_blob = bucket.rename_blob(blob, new_name) - print("Blob {} has been renamed to {}".format(blob.name, new_blob.name)) + print(f"Blob {blob.name} has been renamed to {new_blob.name}") # [END storage_rename_file] diff --git a/samples/snippets/storage_rotate_encryption_key.py b/samples/snippets/storage_rotate_encryption_key.py index 663ee4796..174947b84 100644 --- a/samples/snippets/storage_rotate_encryption_key.py +++ b/samples/snippets/storage_rotate_encryption_key.py @@ -42,17 +42,23 @@ def rotate_encryption_key( destination_blob = bucket.blob( blob_name, encryption_key=new_encryption_key ) - + generation_match_precondition = None token = None + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to rewrite is aborted if the object's + # generation number does not match your precondition. + source_blob.reload() # Fetch blob metadata to use in generation_match_precondition. + generation_match_precondition = source_blob.generation + while True: token, bytes_rewritten, total_bytes = destination_blob.rewrite( - source_blob, token=token + source_blob, token=token, if_generation_match=generation_match_precondition ) if token is None: break - print("Key rotation complete for Blob {}".format(blob_name)) + print(f"Key rotation complete for Blob {blob_name}") # [END storage_rotate_encryption_key] diff --git a/samples/snippets/storage_set_autoclass.py b/samples/snippets/storage_set_autoclass.py new file mode 100644 index 000000000..eec5a550f --- /dev/null +++ b/samples/snippets/storage_set_autoclass.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +# [START storage_set_autoclass] +from google.cloud import storage + + +def set_autoclass(bucket_name): + """Configure the Autoclass setting for a bucket. + + terminal_storage_class field is optional and defaults to NEARLINE if not otherwise specified. + Valid terminal_storage_class values are NEARLINE and ARCHIVE. + """ + # The ID of your GCS bucket + # bucket_name = "my-bucket" + # Enable Autoclass for a bucket. Set enabled to false to disable Autoclass. + # Set Autoclass.TerminalStorageClass, valid values are NEARLINE and ARCHIVE. + enabled = True + terminal_storage_class = "ARCHIVE" + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + + bucket.autoclass_enabled = enabled + bucket.autoclass_terminal_storage_class = terminal_storage_class + bucket.patch() + print(f"Autoclass enabled is set to {bucket.autoclass_enabled} for {bucket.name} at {bucket.autoclass_toggle_time}.") + print(f"Autoclass terminal storage class is {bucket.autoclass_terminal_storage_class}.") + + return bucket + + +# [END storage_set_autoclass] + +if __name__ == "__main__": + set_autoclass(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_set_bucket_public_iam.py b/samples/snippets/storage_set_bucket_public_iam.py index 4b7df89df..0fb33f59c 100644 --- a/samples/snippets/storage_set_bucket_public_iam.py +++ b/samples/snippets/storage_set_bucket_public_iam.py @@ -39,7 +39,7 @@ def set_bucket_public_iam( bucket.set_iam_policy(policy) - print("Bucket {} is now publicly readable".format(bucket.name)) + print(f"Bucket {bucket.name} is now publicly readable") # [END storage_set_bucket_public_iam] diff --git a/samples/snippets/storage_set_client_endpoint.py b/samples/snippets/storage_set_client_endpoint.py new file mode 100644 index 000000000..99ca283a1 --- /dev/null +++ b/samples/snippets/storage_set_client_endpoint.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +"""Sample that creates a new bucket in a specified region +""" + +# [START storage_set_client_endpoint] + +from google.cloud import storage + + +def set_client_endpoint(api_endpoint): + """Initiates client with specified endpoint.""" + # api_endpoint = 'https://storage.googleapis.com' + + storage_client = storage.Client(client_options={'api_endpoint': api_endpoint}) + + print(f"client initiated with endpoint: {storage_client._connection.API_BASE_URL}") + + return storage_client + + +# [END storage_set_client_endpoint] + +if __name__ == "__main__": + set_client_endpoint(api_endpoint=sys.argv[1]) diff --git a/samples/snippets/storage_set_event_based_hold.py b/samples/snippets/storage_set_event_based_hold.py index 52a89b88e..76f7fd7ee 100644 --- a/samples/snippets/storage_set_event_based_hold.py +++ b/samples/snippets/storage_set_event_based_hold.py @@ -28,11 +28,18 @@ def set_event_based_hold(bucket_name, blob_name): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_name) + metageneration_match_precondition = None + + # Optional: set a metageneration-match precondition to avoid potential race + # conditions and data corruptions. The request to patch is aborted if the + # object's metageneration does not match your precondition. + blob.reload() # Fetch blob metadata to use in metageneration_match_precondition. + metageneration_match_precondition = blob.metageneration blob.event_based_hold = True - blob.patch() + blob.patch(if_metageneration_match=metageneration_match_precondition) - print("Event based hold was set for {}".format(blob_name)) + print(f"Event based hold was set for {blob_name}") # [END storage_set_event_based_hold] diff --git a/samples/snippets/storage_set_metadata.py b/samples/snippets/storage_set_metadata.py index 07529ac68..6a4a9fb9e 100644 --- a/samples/snippets/storage_set_metadata.py +++ b/samples/snippets/storage_set_metadata.py @@ -28,11 +28,18 @@ def set_blob_metadata(bucket_name, blob_name): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.get_blob(blob_name) + metageneration_match_precondition = None + + # Optional: set a metageneration-match precondition to avoid potential race + # conditions and data corruptions. The request to patch is aborted if the + # object's metageneration does not match your precondition. + metageneration_match_precondition = blob.metageneration + metadata = {'color': 'Red', 'name': 'Test'} blob.metadata = metadata - blob.patch() + blob.patch(if_metageneration_match=metageneration_match_precondition) - print("The metadata for the blob {} is {}".format(blob.name, blob.metadata)) + print(f"The metadata for the blob {blob.name} is {blob.metadata}") # [END storage_set_metadata] diff --git a/samples/snippets/storage_set_object_retention_policy.py b/samples/snippets/storage_set_object_retention_policy.py new file mode 100644 index 000000000..d0d3a54ec --- /dev/null +++ b/samples/snippets/storage_set_object_retention_policy.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import sys + +# [START storage_set_object_retention_policy] +from google.cloud import storage + + +def set_object_retention_policy(bucket_name, contents, destination_blob_name): + """Set the object retention policy of a file.""" + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The contents to upload to the file + # contents = "these are my contents" + + # The ID of your GCS object + # destination_blob_name = "storage-object-name" + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(destination_blob_name) + blob.upload_from_string(contents) + + # Set the retention policy for the file. + blob.retention.mode = "Unlocked" + retention_date = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=10) + blob.retention.retain_until_time = retention_date + blob.patch() + print( + f"Retention policy for file {destination_blob_name} was set to: {blob.retention.mode}." + ) + + # To modify an existing policy on an unlocked file object, pass in the override parameter. + new_retention_date = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=9) + blob.retention.retain_until_time = new_retention_date + blob.patch(override_unlocked_retention=True) + print( + f"Retention policy for file {destination_blob_name} was updated to: {blob.retention.retain_until_time}." + ) + + +# [END storage_set_object_retention_policy] + + +if __name__ == "__main__": + set_object_retention_policy( + bucket_name=sys.argv[1], + contents=sys.argv[2], + destination_blob_name=sys.argv[3], + ) diff --git a/samples/snippets/storage_set_rpo_async_turbo.py b/samples/snippets/storage_set_rpo_async_turbo.py new file mode 100644 index 000000000..a351cb8f8 --- /dev/null +++ b/samples/snippets/storage_set_rpo_async_turbo.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +"""Sample that sets RPO (Recovery Point Objective) to ASYNC_TURBO +This sample is used on this page: + https://cloud.google.com/storage/docs/managing-turbo-replication +For more information, see README.md. +""" + +# [START storage_set_rpo_async_turbo] + +from google.cloud import storage +from google.cloud.storage.constants import RPO_ASYNC_TURBO + + +def set_rpo_async_turbo(bucket_name): + """Sets the RPO to ASYNC_TURBO, enabling the turbo replication feature""" + # The ID of your GCS bucket + # bucket_name = "my-bucket" + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + + bucket.rpo = RPO_ASYNC_TURBO + bucket.patch() + + print(f"RPO is set to ASYNC_TURBO for {bucket.name}.") + + +# [END storage_set_rpo_async_turbo] + +if __name__ == "__main__": + set_rpo_async_turbo(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_set_rpo_default.py b/samples/snippets/storage_set_rpo_default.py new file mode 100644 index 000000000..883fee0c9 --- /dev/null +++ b/samples/snippets/storage_set_rpo_default.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +"""Sample that sets the replication behavior or recovery point objective (RPO) to default. +This sample is used on this page: + https://cloud.google.com/storage/docs/managing-turbo-replication +For more information, see README.md. +""" + +# [START storage_set_rpo_default] + +from google.cloud import storage +from google.cloud.storage.constants import RPO_DEFAULT + + +def set_rpo_default(bucket_name): + """Sets the RPO to DEFAULT, disabling the turbo replication feature""" + # The ID of your GCS bucket + # bucket_name = "my-bucket" + + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + + bucket.rpo = RPO_DEFAULT + bucket.patch() + + print(f"RPO is set to DEFAULT for {bucket.name}.") + + +# [END storage_set_rpo_default] + +if __name__ == "__main__": + set_rpo_default(bucket_name=sys.argv[1]) diff --git a/samples/snippets/storage_set_temporary_hold.py b/samples/snippets/storage_set_temporary_hold.py index edeb3c578..a91521bcc 100644 --- a/samples/snippets/storage_set_temporary_hold.py +++ b/samples/snippets/storage_set_temporary_hold.py @@ -28,9 +28,16 @@ def set_temporary_hold(bucket_name, blob_name): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_name) + metageneration_match_precondition = None + + # Optional: set a metageneration-match precondition to avoid potential race + # conditions and data corruptions. The request to patch is aborted if the + # object's metageneration does not match your precondition. + blob.reload() # Fetch blob metadata to use in metageneration_match_precondition. + metageneration_match_precondition = blob.metageneration blob.temporary_hold = True - blob.patch() + blob.patch(if_metageneration_match=metageneration_match_precondition) print("Temporary hold was set for #{blob_name}") diff --git a/samples/snippets/storage_trace_quickstart.py b/samples/snippets/storage_trace_quickstart.py new file mode 100644 index 000000000..322edc240 --- /dev/null +++ b/samples/snippets/storage_trace_quickstart.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +# Copyright 2024 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +""" +Sample that exports OpenTelemetry Traces collected from the Storage client to Cloud Trace. +""" + + +def run_quickstart(bucket_name, blob_name, data): + # [START storage_enable_otel_tracing] + + from opentelemetry import trace + from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter + from opentelemetry.resourcedetector.gcp_resource_detector import ( + GoogleCloudResourceDetector, + ) + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + # Optional: Enable traces emitted from the requests HTTP library. + from opentelemetry.instrumentation.requests import RequestsInstrumentor + + from google.cloud import storage + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + # The ID of your GCS object + # blob_name = "your-object-name" + # The contents to upload to the file + # data = "The quick brown fox jumps over the lazy dog." + + # In this sample, we use Google Cloud Trace to export the OpenTelemetry + # traces: https://cloud.google.com/trace/docs/setup/python-ot + # Choose and configure the exporter for your environment. + + tracer_provider = TracerProvider( + # Sampling is set to ALWAYS_ON. + # It is recommended to sample based on a ratio to control trace ingestion volume, + # for instance, sampler=TraceIdRatioBased(0.2) + sampler=ALWAYS_ON, + resource=GoogleCloudResourceDetector().detect(), + ) + + # Export to Google Cloud Trace. + tracer_provider.add_span_processor(BatchSpanProcessor(CloudTraceSpanExporter())) + trace.set_tracer_provider(tracer_provider) + + # Optional: Enable traces emitted from the requests HTTP library. + RequestsInstrumentor().instrument(tracer_provider=tracer_provider) + + # Get the tracer and create a new root span. + tracer = tracer_provider.get_tracer("My App") + with tracer.start_as_current_span("trace-quickstart"): + # Instantiate a storage client and perform a write and read workload. + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + blob.upload_from_string(data) + print(f"{blob_name} uploaded to {bucket_name}.") + + blob.download_as_bytes() + print("Downloaded storage object {} from bucket {}.".format(blob_name, bucket_name)) + + # [END storage_enable_otel_tracing] + + +if __name__ == "__main__": + run_quickstart(bucket_name=sys.argv[1], blob_name=sys.argv[2], data=sys.argv[3]) diff --git a/samples/snippets/storage_transfer_manager_download_bucket.py b/samples/snippets/storage_transfer_manager_download_bucket.py new file mode 100644 index 000000000..5d94a67ae --- /dev/null +++ b/samples/snippets/storage_transfer_manager_download_bucket.py @@ -0,0 +1,75 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_download_bucket] +def download_bucket_with_transfer_manager( + bucket_name, destination_directory="", workers=8, max_results=1000 +): + """Download all of the blobs in a bucket, concurrently in a process pool. + + The filename of each blob once downloaded is derived from the blob name and + the `destination_directory `parameter. For complete control of the filename + of each blob, use transfer_manager.download_many() instead. + + Directories will be created automatically as needed, for instance to + accommodate blob names that include slashes. + """ + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The directory on your computer to which to download all of the files. This + # string is prepended (with os.path.join()) to the name of each blob to form + # the full path. Relative paths and absolute paths are both accepted. An + # empty string means "the current working directory". Note that this + # parameter allows accepts directory traversal ("../" etc.) and is not + # intended for unsanitized end user input. + # destination_directory = "" + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case, but smaller files usually + # benefit from a higher number of processes. Each additional process occupies + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + # The maximum number of results to fetch from bucket.list_blobs(). This + # sample code fetches all of the blobs up to max_results and queues them all + # for download at once. Though they will still be executed in batches up to + # the processes limit, queueing them all at once can be taxing on system + # memory if buckets are very large. Adjust max_results as needed for your + # system environment, or set it to None if you are sure the bucket is not + # too large to hold in memory easily. + # max_results=1000 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + + blob_names = [blob.name for blob in bucket.list_blobs(max_results=max_results)] + + results = transfer_manager.download_many_to_path( + bucket, blob_names, destination_directory=destination_directory, max_workers=workers + ) + + for name, result in zip(blob_names, results): + # The results list is either `None` or an exception for each blob in + # the input list, in order. + + if isinstance(result, Exception): + print("Failed to download {} due to exception: {}".format(name, result)) + else: + print("Downloaded {} to {}.".format(name, destination_directory + name)) +# [END storage_transfer_manager_download_bucket] diff --git a/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py b/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py new file mode 100644 index 000000000..b6ac9982d --- /dev/null +++ b/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py @@ -0,0 +1,55 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_download_chunks_concurrently] +def download_chunks_concurrently( + bucket_name, blob_name, filename, chunk_size=32 * 1024 * 1024, workers=8 +): + """Download a single file in chunks, concurrently in a process pool.""" + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The file to be downloaded + # blob_name = "target-file" + + # The destination filename or path + # filename = "" + + # The size of each chunk. The performance impact of this value depends on + # the use case. The remote service has a minimum of 5 MiB and a maximum of + # 5 GiB. + # chunk_size = 32 * 1024 * 1024 (32 MiB) + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case, but smaller files usually + # benefit from a higher number of processes. Each additional process occupies + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + + transfer_manager.download_chunks_concurrently( + blob, filename, chunk_size=chunk_size, max_workers=workers + ) + + print("Downloaded {} to {}.".format(blob_name, filename)) + + +# [END storage_transfer_manager_download_chunks_concurrently] diff --git a/samples/snippets/storage_transfer_manager_download_many.py b/samples/snippets/storage_transfer_manager_download_many.py new file mode 100644 index 000000000..02cb9b887 --- /dev/null +++ b/samples/snippets/storage_transfer_manager_download_many.py @@ -0,0 +1,70 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_download_many] +def download_many_blobs_with_transfer_manager( + bucket_name, blob_names, destination_directory="", workers=8 +): + """Download blobs in a list by name, concurrently in a process pool. + + The filename of each blob once downloaded is derived from the blob name and + the `destination_directory `parameter. For complete control of the filename + of each blob, use transfer_manager.download_many() instead. + + Directories will be created automatically as needed to accommodate blob + names that include slashes. + """ + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The list of blob names to download. The names of each blobs will also + # be the name of each destination file (use transfer_manager.download_many() + # instead to control each destination file name). If there is a "/" in the + # blob name, then corresponding directories will be created on download. + # blob_names = ["myblob", "myblob2"] + + # The directory on your computer to which to download all of the files. This + # string is prepended (with os.path.join()) to the name of each blob to form + # the full path. Relative paths and absolute paths are both accepted. An + # empty string means "the current working directory". Note that this + # parameter allows accepts directory traversal ("../" etc.) and is not + # intended for unsanitized end user input. + # destination_directory = "" + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case, but smaller files usually + # benefit from a higher number of processes. Each additional process occupies + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + + results = transfer_manager.download_many_to_path( + bucket, blob_names, destination_directory=destination_directory, max_workers=workers + ) + + for name, result in zip(blob_names, results): + # The results list is either `None` or an exception for each blob in + # the input list, in order. + + if isinstance(result, Exception): + print("Failed to download {} due to exception: {}".format(name, result)) + else: + print("Downloaded {} to {}.".format(name, destination_directory + name)) +# [END storage_transfer_manager_download_many] diff --git a/samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py b/samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py new file mode 100644 index 000000000..009f09648 --- /dev/null +++ b/samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py @@ -0,0 +1,57 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_upload_chunks_concurrently] +def upload_chunks_concurrently( + bucket_name, + source_filename, + destination_blob_name, + chunk_size=32 * 1024 * 1024, + workers=8, +): + """Upload a single file, in chunks, concurrently in a process pool.""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The path to your file to upload + # source_filename = "local/path/to/file" + + # The ID of your GCS object + # destination_blob_name = "storage-object-name" + + # The size of each chunk. The performance impact of this value depends on + # the use case. The remote service has a minimum of 5 MiB and a maximum of + # 5 GiB. + # chunk_size = 32 * 1024 * 1024 (32 MiB) + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case. Each additional process + # occupies some CPU and memory resources until finished. Threads can be used + # instead of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(destination_blob_name) + + transfer_manager.upload_chunks_concurrently( + source_filename, blob, chunk_size=chunk_size, max_workers=workers + ) + + print(f"File {source_filename} uploaded to {destination_blob_name}.") + + +# [END storage_transfer_manager_upload_chunks_concurrently] diff --git a/samples/snippets/storage_transfer_manager_upload_directory.py b/samples/snippets/storage_transfer_manager_upload_directory.py new file mode 100644 index 000000000..329ca1081 --- /dev/null +++ b/samples/snippets/storage_transfer_manager_upload_directory.py @@ -0,0 +1,80 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_upload_directory] +def upload_directory_with_transfer_manager(bucket_name, source_directory, workers=8): + """Upload every file in a directory, including all files in subdirectories. + + Each blob name is derived from the filename, not including the `directory` + parameter itself. For complete control of the blob name for each file (and + other aspects of individual blob metadata), use + transfer_manager.upload_many() instead. + """ + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The directory on your computer to upload. Files in the directory and its + # subdirectories will be uploaded. An empty string means "the current + # working directory". + # source_directory="" + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case, but smaller files usually + # benefit from a higher number of processes. Each additional process occupies + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + from pathlib import Path + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + + # Generate a list of paths (in string form) relative to the `directory`. + # This can be done in a single list comprehension, but is expanded into + # multiple lines here for clarity. + + # First, recursively get all files in `directory` as Path objects. + directory_as_path_obj = Path(source_directory) + paths = directory_as_path_obj.rglob("*") + + # Filter so the list only includes files, not directories themselves. + file_paths = [path for path in paths if path.is_file()] + + # These paths are relative to the current working directory. Next, make them + # relative to `directory` + relative_paths = [path.relative_to(source_directory) for path in file_paths] + + # Finally, convert them all to strings. + string_paths = [str(path) for path in relative_paths] + + print("Found {} files.".format(len(string_paths))) + + # Start the upload. + results = transfer_manager.upload_many_from_filenames( + bucket, string_paths, source_directory=source_directory, max_workers=workers + ) + + for name, result in zip(string_paths, results): + # The results list is either `None` or an exception for each filename in + # the input list, in order. + + if isinstance(result, Exception): + print("Failed to upload {} due to exception: {}".format(name, result)) + else: + print("Uploaded {} to {}.".format(name, bucket.name)) +# [END storage_transfer_manager_upload_directory] diff --git a/samples/snippets/storage_transfer_manager_upload_many.py b/samples/snippets/storage_transfer_manager_upload_many.py new file mode 100644 index 000000000..1b9b9fc89 --- /dev/null +++ b/samples/snippets/storage_transfer_manager_upload_many.py @@ -0,0 +1,67 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_upload_many] +def upload_many_blobs_with_transfer_manager( + bucket_name, filenames, source_directory="", workers=8 +): + """Upload every file in a list to a bucket, concurrently in a process pool. + + Each blob name is derived from the filename, not including the + `source_directory` parameter. For complete control of the blob name for each + file (and other aspects of individual blob metadata), use + transfer_manager.upload_many() instead. + """ + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # A list (or other iterable) of filenames to upload. + # filenames = ["file_1.txt", "file_2.txt"] + + # The directory on your computer that is the root of all of the files in the + # list of filenames. This string is prepended (with os.path.join()) to each + # filename to get the full path to the file. Relative paths and absolute + # paths are both accepted. This string is not included in the name of the + # uploaded blob; it is only used to find the source files. An empty string + # means "the current working directory". Note that this parameter allows + # directory traversal (e.g. "/", "../") and is not intended for unsanitized + # end user input. + # source_directory="" + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case, but smaller files usually + # benefit from a higher number of processes. Each additional process occupies + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + + results = transfer_manager.upload_many_from_filenames( + bucket, filenames, source_directory=source_directory, max_workers=workers + ) + + for name, result in zip(filenames, results): + # The results list is either `None` or an exception for each filename in + # the input list, in order. + + if isinstance(result, Exception): + print("Failed to upload {} due to exception: {}".format(name, result)) + else: + print("Uploaded {} to {}.".format(name, bucket.name)) +# [END storage_transfer_manager_upload_many] diff --git a/samples/snippets/storage_upload_encrypted_file.py b/samples/snippets/storage_upload_encrypted_file.py index e7d02c67b..08f58154e 100644 --- a/samples/snippets/storage_upload_encrypted_file.py +++ b/samples/snippets/storage_upload_encrypted_file.py @@ -36,6 +36,10 @@ def upload_encrypted_blob( The file will be encrypted by Google Cloud Storage and only retrievable using the provided encryption key. """ + # bucket_name = "your-bucket-name" + # source_file_name = "local/path/to/file" + # destination_blob_name = "storage-object-name" + # base64_encryption_key = "TIbv/fjexq+VmtXzAlc63J4z5kFmWJ6NdAPQulQBT7g=" storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) @@ -48,12 +52,18 @@ def upload_encrypted_blob( destination_blob_name, encryption_key=encryption_key ) - blob.upload_from_filename(source_file_name) + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to upload is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + generation_match_precondition = 0 + + blob.upload_from_filename(source_file_name, if_generation_match=generation_match_precondition) print( - "File {} uploaded to {}.".format( - source_file_name, destination_blob_name - ) + f"File {source_file_name} uploaded to {destination_blob_name}." ) diff --git a/samples/snippets/storage_upload_file.py b/samples/snippets/storage_upload_file.py index fb02c3632..1e7ceda5e 100644 --- a/samples/snippets/storage_upload_file.py +++ b/samples/snippets/storage_upload_file.py @@ -33,12 +33,18 @@ def upload_blob(bucket_name, source_file_name, destination_blob_name): bucket = storage_client.bucket(bucket_name) blob = bucket.blob(destination_blob_name) - blob.upload_from_filename(source_file_name) + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to upload is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + generation_match_precondition = 0 + + blob.upload_from_filename(source_file_name, if_generation_match=generation_match_precondition) print( - "File {} uploaded to {}.".format( - source_file_name, destination_blob_name - ) + f"File {source_file_name} uploaded to {destination_blob_name}." ) diff --git a/samples/snippets/storage_upload_from_memory.py b/samples/snippets/storage_upload_from_memory.py index e5f61ff93..eff3d222a 100644 --- a/samples/snippets/storage_upload_from_memory.py +++ b/samples/snippets/storage_upload_from_memory.py @@ -22,10 +22,13 @@ def upload_blob_from_memory(bucket_name, contents, destination_blob_name): """Uploads a file to the bucket.""" + # The ID of your GCS bucket # bucket_name = "your-bucket-name" + # The contents to upload to the file # contents = "these are my contents" + # The ID of your GCS object # destination_blob_name = "storage-object-name" @@ -36,14 +39,12 @@ def upload_blob_from_memory(bucket_name, contents, destination_blob_name): blob.upload_from_string(contents) print( - "{} with contents {} uploaded to {}.".format( - destination_blob_name, contents, destination_blob_name - ) + f"{destination_blob_name} with contents {contents} uploaded to {bucket_name}." ) - # [END storage_file_upload_from_memory] + if __name__ == "__main__": upload_blob_from_memory( bucket_name=sys.argv[1], diff --git a/samples/snippets/storage_upload_from_stream.py b/samples/snippets/storage_upload_from_stream.py new file mode 100644 index 000000000..08eb25889 --- /dev/null +++ b/samples/snippets/storage_upload_from_stream.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_stream_file_upload] +from google.cloud import storage + + +def upload_blob_from_stream(bucket_name, file_obj, destination_blob_name): + """Uploads bytes from a stream or other file-like object to a blob.""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The stream or file (file-like object) from which to read + # import io + # file_obj = io.BytesIO() + # file_obj.write(b"This is test data.") + + # The desired name of the uploaded GCS object (blob) + # destination_blob_name = "storage-object-name" + + # Construct a client-side representation of the blob. + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(destination_blob_name) + + # Rewind the stream to the beginning. This step can be omitted if the input + # stream will always be at a correct position. + file_obj.seek(0) + + # Upload data from the stream to your bucket. + blob.upload_from_file(file_obj) + + print( + f"Stream data uploaded to {destination_blob_name} in bucket {bucket_name}." + ) + +# [END storage_stream_file_upload] diff --git a/samples/snippets/storage_upload_with_kms_key.py b/samples/snippets/storage_upload_with_kms_key.py index e83c10aea..6e8fe0394 100644 --- a/samples/snippets/storage_upload_with_kms_key.py +++ b/samples/snippets/storage_upload_with_kms_key.py @@ -21,7 +21,7 @@ def upload_blob_with_kms( - bucket_name, source_file_name, destination_blob_name, kms_key_name + bucket_name, source_file_name, destination_blob_name, kms_key_name, ): """Uploads a file to the bucket, encrypting it with the given KMS key.""" # bucket_name = "your-bucket-name" @@ -32,7 +32,16 @@ def upload_blob_with_kms( storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) blob = bucket.blob(destination_blob_name, kms_key_name=kms_key_name) - blob.upload_from_filename(source_file_name) + + # Optional: set a generation-match precondition to avoid potential race conditions + # and data corruptions. The request to upload is aborted if the object's + # generation number does not match your precondition. For a destination + # object that does not yet exist, set the if_generation_match precondition to 0. + # If the destination object already exists in your bucket, set instead a + # generation-match precondition using its generation number. + generation_match_precondition = 0 + + blob.upload_from_filename(source_file_name, if_generation_match=generation_match_precondition) print( "File {} uploaded to {} with encryption key {}.".format( diff --git a/samples/snippets/storage_view_bucket_iam_members.py b/samples/snippets/storage_view_bucket_iam_members.py index 5272f0ddb..184a1361f 100644 --- a/samples/snippets/storage_view_bucket_iam_members.py +++ b/samples/snippets/storage_view_bucket_iam_members.py @@ -30,7 +30,7 @@ def view_bucket_iam_members(bucket_name): policy = bucket.get_iam_policy(requested_policy_version=3) for binding in policy.bindings: - print("Role: {}, Members: {}".format(binding["role"], binding["members"])) + print(f"Role: {binding['role']}, Members: {binding['members']}") # [END storage_view_bucket_iam_members] diff --git a/samples/snippets/uniform_bucket_level_access_test.py b/samples/snippets/uniform_bucket_level_access_test.py index b43fa016f..8b7964038 100644 --- a/samples/snippets/uniform_bucket_level_access_test.py +++ b/samples/snippets/uniform_bucket_level_access_test.py @@ -23,7 +23,7 @@ def test_get_uniform_bucket_level_access(bucket, capsys): ) out, _ = capsys.readouterr() assert ( - "Uniform bucket-level access is disabled for {}.".format(bucket.name) + f"Uniform bucket-level access is disabled for {bucket.name}." in out ) @@ -35,7 +35,7 @@ def test_enable_uniform_bucket_level_access(bucket, capsys): ) out, _ = capsys.readouterr() assert ( - "Uniform bucket-level access was enabled for {}.".format(bucket.name) + f"Uniform bucket-level access was enabled for {bucket.name}." in out ) @@ -47,6 +47,6 @@ def test_disable_uniform_bucket_level_access(bucket, capsys): ) out, _ = capsys.readouterr() assert ( - "Uniform bucket-level access was disabled for {}.".format(bucket.name) + f"Uniform bucket-level access was disabled for {bucket.name}." in out ) diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh index 21f6d2a26..120b0ddc4 100755 --- a/scripts/decrypt-secrets.sh +++ b/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Google Inc. All rights reserved. +# Copyright 2024 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py index d309d6e97..8f5e248a0 100644 --- a/scripts/readme-gen/readme_gen.py +++ b/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2016 Google Inc +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,19 +28,22 @@ jinja_env = jinja2.Environment( trim_blocks=True, loader=jinja2.FileSystemLoader( - os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates')))) + os.path.abspath(os.path.join(os.path.dirname(__file__), "templates")) + ), + autoescape=True, +) -README_TMPL = jinja_env.get_template('README.tmpl.rst') +README_TMPL = jinja_env.get_template("README.tmpl.rst") def get_help(file): - return subprocess.check_output(['python', file, '--help']).decode() + return subprocess.check_output(["python", file, "--help"]).decode() def main(): parser = argparse.ArgumentParser() - parser.add_argument('source') - parser.add_argument('--destination', default='README.rst') + parser.add_argument("source") + parser.add_argument("--destination", default="README.rst") args = parser.parse_args() @@ -48,9 +51,9 @@ def main(): root = os.path.dirname(source) destination = os.path.join(root, args.destination) - jinja_env.globals['get_help'] = get_help + jinja_env.globals["get_help"] = get_help - with io.open(source, 'r') as f: + with io.open(source, "r") as f: config = yaml.load(f) # This allows get_help to execute in the right directory. @@ -58,9 +61,9 @@ def main(): output = README_TMPL.render(config) - with io.open(destination, 'w') as f: + with io.open(destination, "w") as f: f.write(output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index 275d64989..6f069c6c8 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 3.6+. +#. Create a virtualenv. Samples are compatible with Python 3.7+. .. code-block:: bash diff --git a/setup.cfg b/setup.cfg index c3a2b39f6..052350089 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup.py b/setup.py index 9264e4f56..84eedd4f2 100644 --- a/setup.py +++ b/setup.py @@ -28,14 +28,26 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-auth >= 1.25.0, < 3.0dev", - "google-api-core >= 1.29.0, < 3.0dev", - "google-cloud-core >= 1.6.0, < 3.0dev", - "google-resumable-media >= 1.3.0", + "google-auth >= 2.26.1, < 3.0dev", + "google-api-core >= 2.15.0, <3.0.0dev", + "google-cloud-core >= 2.3.0, < 3.0dev", + # The dependency "google-resumable-media" is no longer used. However, the + # dependency is still included here to accommodate users who may be + # importing exception classes from the google-resumable-media without + # installing it explicitly. See the python-storage README for details on + # exceptions and importing. Users who are not importing + # google-resumable-media classes in their application can safely disregard + # this dependency. + "google-resumable-media >= 2.7.2", "requests >= 2.18.0, < 3.0.0dev", - "protobuf", + "google-crc32c >= 1.0, < 2.0dev", ] -extras = {} +extras = { + "protobuf": ["protobuf<6.0.0dev"], + "tracing": [ + "opentelemetry-api >= 1.1.0", + ], +} # Setup boilerplate below this line. @@ -54,14 +66,11 @@ # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ - package for package in setuptools.find_packages() if package.startswith("google") + package + for package in setuptools.find_namespace_packages() + if package.startswith("google") ] -# Determine which namespaces are needed. -namespaces = ["google"] -if "google.cloud" in packages: - namespaces.append("google.cloud") - setuptools.setup( name=name, @@ -78,20 +87,20 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", "Topic :: Internet", ], platforms="Posix; MacOS X; Windows", packages=packages, - namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6", + python_requires=">=3.7", include_package_data=True, zip_safe=False, ) diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt new file mode 100644 index 000000000..e69de29bb diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt deleted file mode 100644 index a2729fd6f..000000000 --- a/testing/constraints-3.6.txt +++ /dev/null @@ -1,12 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List *all* library dependencies and extras in this file. -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -google-auth==1.25.0 -google-api-core==1.29.0 -google-cloud-core==1.6.0 -google-resumable-media==1.3.0 -requests==2.18.0 diff --git a/tests/conformance/conftest.py b/tests/conformance/conftest.py new file mode 100644 index 000000000..e62b74e8b --- /dev/null +++ b/tests/conformance/conftest.py @@ -0,0 +1,130 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import random +import uuid + +import pytest + +from google.auth.credentials import AnonymousCredentials +from google.cloud import storage +from google.cloud.exceptions import NotFound + + +"""Environment variable or default host for Storage testbench emulator.""" +_HOST = os.environ.get("STORAGE_EMULATOR_HOST", "http://localhost:9000") + + +"""Emulated project information for the storage testbench.""" +_CONF_TEST_PROJECT_ID = "my-project-id" +_CONF_TEST_SERVICE_ACCOUNT_EMAIL = ( + "my-service-account@my-project-id.iam.gserviceaccount.com" +) +_CONF_TEST_PUBSUB_TOPIC_NAME = "my-topic-name" + + +"""Create content payload in different sizes.""" + + +def _create_block(desired_kib): + line = "abcdefXYZ123456789ADDINGrandom#" # len(line) = 31 + multiplier = int(desired_kib / (len(line) + 1)) + lines = "".join( + line + str(random.randint(0, 9)) for _ in range(multiplier) + ) # add random single digit integers + return 1024 * lines + + +_STRING_CONTENT = "hello world" +_SIZE_9MB = 9216 # 9*1024 KiB + + +######################################################################################################################################## +### Pytest Fixtures to Populate Retry Conformance Test Resources ####################################################################### +######################################################################################################################################## + + +@pytest.fixture +def client(): + client = storage.Client( + project=_CONF_TEST_PROJECT_ID, + credentials=AnonymousCredentials(), + client_options={"api_endpoint": _HOST}, + ) + return client + + +@pytest.fixture +def bucket(client): + bucket = client.bucket(uuid.uuid4().hex) + client.create_bucket(bucket) + yield bucket + try: + bucket.delete(force=True) + except NotFound: # in cases where bucket is deleted within the test + pass + + +@pytest.fixture +def object(client, bucket): + blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + blob.upload_from_string(_STRING_CONTENT) + blob.reload() + yield blob + try: + blob.delete() + except NotFound: # in cases where object is deleted within the test + pass + + +@pytest.fixture +def notification(client, bucket): + notification = client.bucket(bucket.name).notification( + topic_name=_CONF_TEST_PUBSUB_TOPIC_NAME + ) + notification.create() + notification.reload() + yield notification + try: + notification.delete() + except NotFound: # in cases where notification is deleted within the test + pass + + +@pytest.fixture +def hmac_key(client): + hmac_key, _secret = client.create_hmac_key( + service_account_email=_CONF_TEST_SERVICE_ACCOUNT_EMAIL, + project_id=_CONF_TEST_PROJECT_ID, + ) + yield hmac_key + try: + hmac_key.state = "INACTIVE" + hmac_key.update() + hmac_key.delete() + except NotFound: # in cases where hmac_key is deleted within the test + pass + + +@pytest.fixture +def file_data(client, bucket): + blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + payload = _create_block(_SIZE_9MB) + blob.upload_from_string(payload) + yield blob, payload + try: + blob.delete() + except NotFound: # in cases where object is deleted within the test + pass diff --git a/tests/conformance/retry_strategy_test_data.json b/tests/conformance/retry_strategy_test_data.json index b807c6a72..e50018081 100644 --- a/tests/conformance/retry_strategy_test_data.json +++ b/tests/conformance/retry_strategy_test_data.json @@ -1,244 +1,283 @@ -{ - "retryTests": [ - { - "id": 1, - "description": "always_idempotent", - "cases": [ - { - "instructions": ["return-503", "return-503"] - }, - { - "instructions": ["return-reset-connection", "return-reset-connection"] - }, - { - "instructions": ["return-reset-connection", "return-503"] - } - ], - "methods": [ - {"name": "storage.bucket_acl.get", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.list", "resources": ["BUCKET"]}, - {"name": "storage.buckets.delete", "resources": ["BUCKET"]}, - {"name": "storage.buckets.get", "resources": ["BUCKET"]}, - {"name": "storage.buckets.getIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.insert", "resources": []}, - {"name": "storage.buckets.list", "resources": ["BUCKET"]}, - {"name": "storage.buckets.lockRetentionPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.testIamPermissions", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.get", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.list", "resources": ["BUCKET"]}, - {"name": "storage.hmacKey.delete", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.get", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.list", "resources": ["HMAC_KEY"]}, - {"name": "storage.notifications.delete", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.get", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.list", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.object_acl.get", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.list", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.get", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.list", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.serviceaccount.get", "resources": []} - ], - "preconditionProvided": false, - "expectSuccess": true - }, - { - "id": 2, - "description": "conditionally_idempotent_retries_when_precondition_is_present", - "cases": [ - { - "instructions": ["return-503", "return-503"] - }, - { - "instructions": ["return-reset-connection", "return-reset-connection"] - }, - { - "instructions": ["return-reset-connection", "return-503"] - } - ], - "methods": [ - {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, - {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.update", "resources": ["BUCKET"]}, - {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, - {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.insert", "resources": ["BUCKET"]}, - {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]} - ], - "preconditionProvided": true, - "expectSuccess": true - }, - { - "id": 3, - "description": "conditionally_idempotent_no_retries_when_precondition_is_absent", - "cases": [ - { - "instructions": ["return-503"] - }, - { - "instructions": ["return-reset-connection"] - } - ], - "methods": [ - {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, - {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.update", "resources": ["BUCKET"]}, - {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, - {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.insert", "resources": ["BUCKET"]}, - {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]} - ], - "preconditionProvided": false, - "expectSuccess": false - }, - { - "id": 4, - "description": "non_idempotent", - "cases": [ - { - "instructions": ["return-503"] - }, - { - "instructions": ["return-reset-connection"] - } - ], - "methods": [ - {"name": "storage.bucket_acl.delete", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.insert", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.patch", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.update", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.delete", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.insert", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.patch", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.update", "resources": ["BUCKET"]}, - {"name": "storage.hmacKey.create", "resources": []}, - {"name": "storage.notifications.insert", "resources": ["BUCKET"]}, - {"name": "storage.object_acl.delete", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.insert", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.patch", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.update", "resources": ["BUCKET", "OBJECT"]} - ], - "preconditionProvided": false, - "expectSuccess": false - }, - { - "id": 5, - "description": "non_retryable_errors", - "cases": [ - { - "instructions": ["return-400"] - }, - { - "instructions": ["return-401"] - } - ], - "methods": [ - {"name": "storage.bucket_acl.delete", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.get", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.insert", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.list", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.patch", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.update", "resources": ["BUCKET"]}, - {"name": "storage.buckets.delete", "resources": ["BUCKET"]}, - {"name": "storage.buckets.get", "resources": ["BUCKET"]}, - {"name": "storage.buckets.getIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.insert", "resources": ["BUCKET"]}, - {"name": "storage.buckets.list", "resources": ["BUCKET"]}, - {"name": "storage.buckets.lockRetentionPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, - {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.testIamPermissions", "resources": ["BUCKET"]}, - {"name": "storage.buckets.update", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.delete", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.get", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.insert", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.list", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.patch", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.update", "resources": ["BUCKET"]}, - {"name": "storage.hmacKey.create", "resources": []}, - {"name": "storage.hmacKey.delete", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.get", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.list", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, - {"name": "storage.notifications.delete", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.get", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.insert", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.list", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.object_acl.delete", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.get", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.insert", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.list", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.patch", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.update", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.get", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.insert", "resources": ["BUCKET"]}, - {"name": "storage.objects.list", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.serviceaccount.get", "resources": []} - ], - "preconditionProvided": false, - "expectSuccess": false - }, - { - "id": 6, - "description": "mix_retryable_non_retryable_errors", - "cases": [ - { - "instructions": ["return-503", "return-400"] - }, - { - "instructions": ["return-reset-connection", "return-401"] - } - ], - "methods": [ - {"name": "storage.bucket_acl.get", "resources": ["BUCKET"]}, - {"name": "storage.bucket_acl.list", "resources": ["BUCKET"]}, - {"name": "storage.buckets.delete", "resources": ["BUCKET"]}, - {"name": "storage.buckets.get", "resources": ["BUCKET"]}, - {"name": "storage.buckets.getIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.insert", "resources": []}, - {"name": "storage.buckets.list", "resources": ["BUCKET"]}, - {"name": "storage.buckets.lockRetentionPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, - {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, - {"name": "storage.buckets.testIamPermissions", "resources": ["BUCKET"]}, - {"name": "storage.buckets.update", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.get", "resources": ["BUCKET"]}, - {"name": "storage.default_object_acl.list", "resources": ["BUCKET"]}, - {"name": "storage.hmacKey.delete", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.get", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.list", "resources": ["HMAC_KEY"]}, - {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, - {"name": "storage.notifications.delete", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.get", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.notifications.list", "resources": ["BUCKET", "NOTIFICATION"]}, - {"name": "storage.object_acl.get", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.object_acl.list", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.get", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.list", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.insert", "resources": ["BUCKET"]}, - {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]}, - {"name": "storage.serviceaccount.get", "resources": []} - ], - "preconditionProvided": true, - "expectSuccess": false - } - ] - } \ No newline at end of file +{ + "retryTests": [ + { + "id": 1, + "description": "always_idempotent", + "cases": [ + { + "instructions": ["return-503", "return-503"] + }, + { + "instructions": ["return-reset-connection", "return-reset-connection"] + }, + { + "instructions": ["return-reset-connection", "return-503"] + } + ], + "methods": [ + {"name": "storage.bucket_acl.get", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.list", "resources": ["BUCKET"]}, + {"name": "storage.buckets.delete", "resources": ["BUCKET"]}, + {"name": "storage.buckets.get", "resources": ["BUCKET"]}, + {"name": "storage.buckets.getIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.insert", "resources": []}, + {"name": "storage.buckets.list", "resources": ["BUCKET"]}, + {"name": "storage.buckets.lockRetentionPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.testIamPermissions", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.get", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.list", "resources": ["BUCKET"]}, + {"name": "storage.hmacKey.delete", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.get", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.list", "resources": ["HMAC_KEY"]}, + {"name": "storage.notifications.delete", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.get", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.list", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.object_acl.get", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.list", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.get", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.list", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.insert", "resources": ["BUCKET"]}, + {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.serviceaccount.get", "resources": []} + ], + "preconditionProvided": false, + "expectSuccess": true + }, + { + "id": 2, + "description": "conditionally_idempotent_retries_when_precondition_is_present", + "cases": [ + { + "instructions": ["return-503", "return-503"] + }, + { + "instructions": ["return-reset-connection", "return-reset-connection"] + }, + { + "instructions": ["return-reset-connection", "return-503"] + } + ], + "methods": [ + {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, + {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.update", "resources": ["BUCKET"]}, + {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, + {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.insert", "resources": ["BUCKET"]}, + {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]} + ], + "preconditionProvided": true, + "expectSuccess": true + }, + { + "id": 3, + "description": "conditionally_idempotent_no_retries_when_precondition_is_absent", + "cases": [ + { + "instructions": ["return-503"] + }, + { + "instructions": ["return-reset-connection"] + } + ], + "methods": [ + {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, + {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.update", "resources": ["BUCKET"]}, + {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, + {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]} + ], + "preconditionProvided": false, + "expectSuccess": false + }, + { + "id": 4, + "description": "non_idempotent", + "cases": [ + { + "instructions": ["return-503"] + }, + { + "instructions": ["return-reset-connection"] + } + ], + "methods": [ + {"name": "storage.bucket_acl.delete", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.insert", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.patch", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.update", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.delete", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.insert", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.patch", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.update", "resources": ["BUCKET"]}, + {"name": "storage.hmacKey.create", "resources": []}, + {"name": "storage.notifications.insert", "resources": ["BUCKET"]}, + {"name": "storage.object_acl.delete", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.insert", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.patch", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.update", "resources": ["BUCKET", "OBJECT"]} + ], + "preconditionProvided": false, + "expectSuccess": false + }, + { + "id": 5, + "description": "non-retryable errors", + "cases": [ + { + "instructions": ["return-400"] + }, + { + "instructions": ["return-401"] + } + ], + "methods": [ + {"name": "storage.bucket_acl.delete", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.get", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.insert", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.list", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.patch", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.update", "resources": ["BUCKET"]}, + {"name": "storage.buckets.delete", "resources": ["BUCKET"]}, + {"name": "storage.buckets.get", "resources": ["BUCKET"]}, + {"name": "storage.buckets.getIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.insert", "resources": ["BUCKET"]}, + {"name": "storage.buckets.list", "resources": ["BUCKET"]}, + {"name": "storage.buckets.lockRetentionPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, + {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.testIamPermissions", "resources": ["BUCKET"]}, + {"name": "storage.buckets.update", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.delete", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.get", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.insert", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.list", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.patch", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.update", "resources": ["BUCKET"]}, + {"name": "storage.hmacKey.create", "resources": []}, + {"name": "storage.hmacKey.delete", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.get", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.list", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, + {"name": "storage.notifications.delete", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.get", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.insert", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.list", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.object_acl.delete", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.get", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.insert", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.list", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.patch", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.update", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.get", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.insert", "resources": ["BUCKET"]}, + {"name": "storage.objects.list", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.serviceaccount.get", "resources": []} + ], + "preconditionProvided": false, + "expectSuccess": false + }, + { + "id": 6, + "description": "mix_retryable_non_retryable_errors", + "cases": [ + { + "instructions": ["return-503", "return-400"] + }, + { + "instructions": ["return-reset-connection", "return-401"] + } + ], + "methods": [ + {"name": "storage.bucket_acl.get", "resources": ["BUCKET"]}, + {"name": "storage.bucket_acl.list", "resources": ["BUCKET"]}, + {"name": "storage.buckets.delete", "resources": ["BUCKET"]}, + {"name": "storage.buckets.get", "resources": ["BUCKET"]}, + {"name": "storage.buckets.getIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.insert", "resources": []}, + {"name": "storage.buckets.list", "resources": ["BUCKET"]}, + {"name": "storage.buckets.lockRetentionPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.patch", "resources": ["BUCKET"]}, + {"name": "storage.buckets.setIamPolicy", "resources": ["BUCKET"]}, + {"name": "storage.buckets.testIamPermissions", "resources": ["BUCKET"]}, + {"name": "storage.buckets.update", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.get", "resources": ["BUCKET"]}, + {"name": "storage.default_object_acl.list", "resources": ["BUCKET"]}, + {"name": "storage.hmacKey.delete", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.get", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.list", "resources": ["HMAC_KEY"]}, + {"name": "storage.hmacKey.update", "resources": ["HMAC_KEY"]}, + {"name": "storage.notifications.delete", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.get", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.notifications.list", "resources": ["BUCKET", "NOTIFICATION"]}, + {"name": "storage.object_acl.get", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.object_acl.list", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.compose", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.copy", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.delete", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.get", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.list", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.insert", "resources": ["BUCKET"]}, + {"name": "storage.objects.patch", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.rewrite", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.objects.update", "resources": ["BUCKET", "OBJECT"]}, + {"name": "storage.serviceaccount.get", "resources": []} + ], + "preconditionProvided": true, + "expectSuccess": false + }, + { + "id": 7, + "description": "resumable_uploads_handle_complex_retries", + "cases": [ + { + "instructions": ["return-reset-connection", "return-503"] + }, + { + "instructions": ["return-408"] + }, + { + "instructions": ["return-503-after-256K"] + }, + { + "instructions": ["return-503-after-8192K", "return-408"] + } + ], + "methods": [ + {"name": "storage.objects.insert", "group": "storage.resumable.upload", "resources": ["BUCKET"]} + ], + "preconditionProvided": true, + "expectSuccess": true + }, + { + "id": 8, + "description": "downloads_handle_complex_retries", + "cases": [ + { + "instructions": ["return-broken-stream", "return-broken-stream"] + }, + { + "instructions": ["return-broken-stream-after-256K"] + } + ], + "methods": [ + {"name": "storage.objects.get", "group": "storage.objects.download", "resources": ["BUCKET", "OBJECT"]} + ], + "preconditionProvided": false, + "expectSuccess": true + } + ] +} diff --git a/tests/conformance/test_conformance.py b/tests/conformance/test_conformance.py index cf4c026a8..819218d24 100644 --- a/tests/conformance/test_conformance.py +++ b/tests/conformance/test_conformance.py @@ -24,12 +24,10 @@ import pytest import requests - -from six.moves.urllib import parse as urlparse +import urllib from google.auth.credentials import AnonymousCredentials from google.cloud import storage -from google.cloud.exceptions import NotFound from google.cloud.storage.hmac_key import HMACKeyMetadata from . import _read_local_json @@ -39,14 +37,14 @@ """Environment variable or default host for Storage testbench emulator.""" _HOST = os.environ.get("STORAGE_EMULATOR_HOST", "http://localhost:9000") -_PORT = urlparse.urlsplit(_HOST).port +_PORT = urllib.parse.urlsplit(_HOST).port """The storage testbench docker image info and commands.""" _DEFAULT_IMAGE_NAME = "gcr.io/cloud-devrel-public-resources/storage-testbench" _DEFAULT_IMAGE_TAG = "latest" -_DOCKER_IMAGE = "{}:{}".format(_DEFAULT_IMAGE_NAME, _DEFAULT_IMAGE_TAG) +_DOCKER_IMAGE = f"{_DEFAULT_IMAGE_NAME}:{_DEFAULT_IMAGE_TAG}" _PULL_CMD = ["docker", "pull", _DOCKER_IMAGE] -_RUN_CMD = ["docker", "run", "--rm", "-d", "-p", "{}:9000".format(_PORT), _DOCKER_IMAGE] +_RUN_CMD = ["docker", "run", "--rm", "-d", "-p", f"{_PORT}:9000", _DOCKER_IMAGE] _CONF_TEST_PROJECT_ID = "my-project-id" _CONF_TEST_SERVICE_ACCOUNT_EMAIL = ( @@ -56,6 +54,7 @@ _STRING_CONTENT = "hello world" _BYTE_CONTENT = b"12345678" +_RESUMABLE_UPLOAD_CHUNK_SIZE = 2 * 1024 * 1024 ######################################################################################################################################## @@ -79,39 +78,73 @@ def blob_exists(client, _preconditions, **resources): def blob_download_as_bytes(client, _preconditions, **resources): bucket = resources.get("bucket") - object = resources.get("object") - blob = client.bucket(bucket.name).blob(object.name) - blob.download_as_bytes() + file, data = resources.get("file_data") + # download the file and assert data integrity + blob = client.bucket(bucket.name).blob(file.name) + stored_contents = blob.download_as_bytes() + assert stored_contents == data.encode("utf-8") def blob_download_as_text(client, _preconditions, **resources): bucket = resources.get("bucket") - object = resources.get("object") - blob = client.bucket(bucket.name).blob(object.name) - blob.download_as_text() + file, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(file.name) + stored_contents = blob.download_as_text() + assert stored_contents == data def blob_download_to_filename(client, _preconditions, **resources): bucket = resources.get("bucket") - object = resources.get("object") - blob = client.bucket(bucket.name).blob(object.name) + file, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(file.name) with tempfile.NamedTemporaryFile() as temp_f: blob.download_to_filename(temp_f.name) + with open(temp_f.name, "r") as file_obj: + stored_contents = file_obj.read() + assert stored_contents == data + + +def blob_download_to_filename_chunked(client, _preconditions, **resources): + bucket = resources.get("bucket") + file, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(file.name, chunk_size=40 * 1024 * 1024) + with tempfile.NamedTemporaryFile() as temp_f: + blob.download_to_filename(temp_f.name) + with open(temp_f.name, "r") as file_obj: + stored_contents = file_obj.read() + assert stored_contents == data + + +def blob_download_to_filename_range(client, _preconditions, **resources): + bucket = resources.get("bucket") + file, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(file.name) + with tempfile.NamedTemporaryFile() as temp_f: + blob.download_to_filename(temp_f.name, start=1024, end=512 * 1024) + with open(temp_f.name, "r") as file_obj: + stored_contents = file_obj.read() + assert stored_contents == data[1024 : 512 * 1024 + 1] def client_download_blob_to_file(client, _preconditions, **resources): - object = resources.get("object") + bucket = resources.get("bucket") + file, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(file.name) with tempfile.NamedTemporaryFile() as temp_f: with open(temp_f.name, "wb") as file_obj: - client.download_blob_to_file(object, file_obj) + client.download_blob_to_file(blob, file_obj) + with open(temp_f.name, "r") as to_read: + stored_contents = to_read.read() + assert stored_contents == data def blobreader_read(client, _preconditions, **resources): bucket = resources.get("bucket") - object = resources.get("object") - blob = client.bucket(bucket.name).blob(object.name) - with blob.open() as reader: - reader.read() + file, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(file.name) + with blob.open(mode="r") as reader: + stored_contents = reader.read() + assert stored_contents == data def client_list_blobs(client, _preconditions, **resources): @@ -428,35 +461,79 @@ def blob_compose(client, _preconditions, **resources): def blob_upload_from_string(client, _preconditions, **resources): bucket = resources.get("bucket") + _, data = resources.get("file_data") blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + blob.chunk_size = _RESUMABLE_UPLOAD_CHUNK_SIZE if _preconditions: - blob.upload_from_string(_STRING_CONTENT, if_generation_match=0) + blob.upload_from_string(data, if_generation_match=0) else: - blob.upload_from_string(_STRING_CONTENT) + blob.upload_from_string(data) + assert blob.size == len(data) def blob_upload_from_file(client, _preconditions, **resources): bucket = resources.get("bucket") - blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + file, data = resources.get("file_data") + file_blob = client.bucket(bucket.name).blob(file.name) + upload_blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + upload_blob.chunk_size = _RESUMABLE_UPLOAD_CHUNK_SIZE + with tempfile.NamedTemporaryFile() as temp_f: + # Create a named temporary file with payload. + with open(temp_f.name, "wb") as file_obj: + client.download_blob_to_file(file_blob, file_obj) + # Upload the temporary file and assert data integrity. if _preconditions: - blob.upload_from_file(temp_f, if_generation_match=0) + upload_blob.upload_from_file(temp_f, if_generation_match=0) else: - blob.upload_from_file(temp_f) + upload_blob.upload_from_file(temp_f) + + upload_blob.reload() + assert upload_blob.size == len(data) def blob_upload_from_filename(client, _preconditions, **resources): bucket = resources.get("bucket") blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + blob.chunk_size = _RESUMABLE_UPLOAD_CHUNK_SIZE + + bucket = resources.get("bucket") + file, data = resources.get("file_data") + file_blob = client.bucket(bucket.name).blob(file.name) + upload_blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + upload_blob.chunk_size = _RESUMABLE_UPLOAD_CHUNK_SIZE with tempfile.NamedTemporaryFile() as temp_f: + # Create a named temporary file with payload. + with open(temp_f.name, "wb") as file_obj: + client.download_blob_to_file(file_blob, file_obj) + # Upload the temporary file and assert data integrity. if _preconditions: - blob.upload_from_filename(temp_f.name, if_generation_match=0) + upload_blob.upload_from_filename(temp_f.name, if_generation_match=0) else: - blob.upload_from_filename(temp_f.name) + upload_blob.upload_from_filename(temp_f.name) + + upload_blob.reload() + assert upload_blob.size == len(data) def blobwriter_write(client, _preconditions, **resources): + bucket = resources.get("bucket") + _, data = resources.get("file_data") + blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + if _preconditions: + with blob.open( + "w", chunk_size=_RESUMABLE_UPLOAD_CHUNK_SIZE, if_generation_match=0 + ) as writer: + writer.write(data) + else: + with blob.open("w", chunk_size=_RESUMABLE_UPLOAD_CHUNK_SIZE) as writer: + writer.write(data) + blob.reload() + assert blob.size == len(data) + + +def blobwriter_write_multipart(client, _preconditions, **resources): chunk_size = 256 * 1024 bucket = resources.get("bucket") blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) @@ -468,6 +545,15 @@ def blobwriter_write(client, _preconditions, **resources): writer.write(_BYTE_CONTENT) +def blob_upload_from_string_multipart(client, _preconditions, **resources): + bucket = resources.get("bucket") + blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) + if _preconditions: + blob.upload_from_string(_STRING_CONTENT, if_generation_match=0) + else: + blob.upload_from_string(_STRING_CONTENT) + + def blob_create_resumable_upload_session(client, _preconditions, **resources): bucket = resources.get("bucket") blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) @@ -672,11 +758,27 @@ def object_acl_clear(client, _preconditions, **resources): blob_exists, client_download_blob_to_file, blob_download_to_filename, + blob_download_to_filename_chunked, + blob_download_to_filename_range, + blob_download_as_bytes, + blob_download_as_text, + blobreader_read, + ], + "storage.objects.download": [ + client_download_blob_to_file, + blob_download_to_filename, + blob_download_to_filename_chunked, + blob_download_to_filename_range, blob_download_as_bytes, blob_download_as_text, blobreader_read, ], "storage.objects.list": [client_list_blobs, bucket_list_blobs, bucket_delete], + "storage.objects.delete": [ + bucket_delete_blob, + bucket_delete_blobs, + blob_delete, + ], "storage.serviceaccount.get": [client_get_service_account_email], # S1 end "storage.buckets.patch": [ bucket_patch, @@ -694,18 +796,16 @@ def object_acl_clear(client, _preconditions, **resources): "storage.hmacKey.update": [hmac_key_update], "storage.objects.compose": [blob_compose], "storage.objects.copy": [bucket_copy_blob, bucket_rename_blob], - "storage.objects.delete": [ - bucket_delete_blob, - bucket_delete_blobs, - blob_delete, - bucket_rename_blob, - ], "storage.objects.insert": [ + blob_upload_from_string_multipart, + blobwriter_write_multipart, + blob_create_resumable_upload_session, + ], + "storage.resumable.upload": [ blob_upload_from_string, blob_upload_from_file, blob_upload_from_filename, blobwriter_write, - blob_create_resumable_upload_session, ], "storage.objects.patch": [ blob_patch, @@ -722,73 +822,6 @@ def object_acl_clear(client, _preconditions, **resources): } -######################################################################################################################################## -### Pytest Fixtures to Populate Resources ############################################################################################## -######################################################################################################################################## - - -@pytest.fixture -def client(): - client = storage.Client( - project=_CONF_TEST_PROJECT_ID, - credentials=AnonymousCredentials(), - client_options={"api_endpoint": _HOST}, - ) - return client - - -@pytest.fixture -def bucket(client): - bucket = client.bucket(uuid.uuid4().hex) - client.create_bucket(bucket) - yield bucket - try: - bucket.delete(force=True) - except NotFound: # in cases where bucket is deleted within the test - pass - - -@pytest.fixture -def object(client, bucket): - blob = client.bucket(bucket.name).blob(uuid.uuid4().hex) - blob.upload_from_string(_STRING_CONTENT) - blob.reload() - yield blob - try: - blob.delete() - except NotFound: # in cases where object is deleted within the test - pass - - -@pytest.fixture -def notification(client, bucket): - notification = client.bucket(bucket.name).notification( - topic_name=_CONF_TEST_PUBSUB_TOPIC_NAME - ) - notification.create() - notification.reload() - yield notification - try: - notification.delete() - except NotFound: # in cases where notification is deleted within the test - pass - - -@pytest.fixture -def hmac_key(client): - hmac_key, _secret = client.create_hmac_key( - service_account_email=_CONF_TEST_SERVICE_ACCOUNT_EMAIL, - project_id=_CONF_TEST_PROJECT_ID, - ) - yield hmac_key - try: - hmac_key.state = "INACTIVE" - hmac_key.update() - hmac_key.delete() - except NotFound: # in cases where hmac_key is deleted within the test - pass - - ######################################################################################################################################## ### Helper Methods for Testbench Retry Test API ######################################################################################## ######################################################################################################################################## @@ -825,15 +858,21 @@ def _get_retry_test(host, id): instructions, and a boolean status "completed". This can be used to verify if all instructions were used as expected. """ - get_retry_test_uri = "{base}{retry}/{id}".format( - base=host, retry="/retry_test", id=id - ) + get_retry_test_uri = f"{host}/retry_test/{id}" r = requests.get(get_retry_test_uri) return r.json() def _run_retry_test( - host, id, lib_func, _preconditions, bucket, object, notification, hmac_key + host, + id, + lib_func, + _preconditions, + bucket, + object, + notification, + hmac_key, + file_data, ): """ To execute tests against the list of instrucions sent to the Retry Test API, @@ -855,6 +894,7 @@ def _run_retry_test( object=object, notification=notification, hmac_key=hmac_key, + file_data=file_data, ) @@ -862,9 +902,7 @@ def _delete_retry_test(host, id): """ Delete the Retry Test resource by id. """ - get_retry_test_uri = "{base}{retry}/{id}".format( - base=host, retry="/retry_test", id=id - ) + get_retry_test_uri = f"{host}/retry_test/{id}" requests.delete(get_retry_test_uri) @@ -874,7 +912,16 @@ def _delete_retry_test(host, id): def run_test_case( - scenario_id, method, case, lib_func, host, bucket, object, notification, hmac_key + scenario_id, + method, + case, + lib_func, + host, + bucket, + object, + notification, + hmac_key, + file_data, ): scenario = _CONFORMANCE_TESTS[scenario_id - 1] expect_success = scenario["expectSuccess"] @@ -887,7 +934,7 @@ def run_test_case( id = r["id"] except Exception as e: raise Exception( - "Error creating retry test for {}: {}".format(method_name, e) + f"Error creating retry test for {method_name}: {e}" ).with_traceback(e.__traceback__) # Run retry tests on library methods. @@ -901,11 +948,10 @@ def run_test_case( object, notification, hmac_key, + file_data, ) except Exception as e: - logging.exception( - "Caught an exception while running retry instructions\n {}".format(e) - ) + logging.exception(f"Caught an exception while running retry instructions\n {e}") success_results = False else: success_results = True @@ -948,14 +994,13 @@ def run_test_case( for i, c in enumerate(cases): for m in methods: method_name = m["name"] - if method_name not in method_mapping: - logging.info("No tests for operation {}".format(method_name)) + method_group = m["group"] if m.get("group", None) else m["name"] + if method_group not in method_mapping: + logging.info(f"No tests for operation {method_name}") continue - for lib_func in method_mapping[method_name]: - test_name = "test-S{}-{}-{}-{}".format( - id, method_name, lib_func.__name__, i - ) + for lib_func in method_mapping[method_group]: + test_name = f"test-S{id}-{method_name}-{lib_func.__name__}-{i}" globals()[test_name] = functools.partial( run_test_case, id, m, c, lib_func, _HOST ) diff --git a/tests/perf/README.md b/tests/perf/README.md index e77589f61..14b8f7be7 100644 --- a/tests/perf/README.md +++ b/tests/perf/README.md @@ -1,21 +1,54 @@ -# storage benchwrapp +# python-storage benchmarking -main.py is a gRPC wrapper around the storage library for benchmarking purposes. +**This is not an officially supported Google product** -## Running +This benchmarking script is used by Storage client library maintainers to benchmark various workloads and collect metrics in order to improve performance of the library. +Currently the benchmarking runs a Write-1-Read-3 workload and measures the usual two QoS performance attributes, latency and throughput. +## Run example: +This runs 10K iterations of Write-1-Read-3 on 5KiB to 16KiB files, and generates output to a default csv file `output_bench.csv`: ```bash -$ export STORAGE_EMULATOR_HOST=http://localhost:8080 -$ pip install grpcio -$ cd storage +$ cd python-storage $ pip install -e . # install google.cloud.storage locally $ cd tests/perf -$ python3 benchwrapper.py --port 8081 +$ python3 benchmarking.py --num_samples 10000 --object_size 5120..16384 --output_type csv ``` -## Re-generating protos +## CLI parameters -```bash -$ pip install grpcio-tools -$ python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. *.proto -``` +| Parameter | Description | Possible values | Default | +| --------- | ----------- | --------------- |:-------:| +| --project | GCP project identifier | a project id| * | +| --api | API to use | only JSON is currently supported in python benchmarking | `JSON` | +| --output_type | output results as csv records or cloud monitoring | `csv`, `cloud-monitoring` | `cloud-monitoring` | +| --object_size | object size in bytes; can be a range min..max | string | `1048576` (1 MiB) | +| --range_read_size | size of the range to read in bytes | any positive integer
<=0 reads the full object | `0` | +| --minimum_read_offset | minimum offset for the start of the range to be read in bytes | any integer >0 | `0` | +| --maximum_read_offset | maximum offset for the start of the range to be read in bytes | any integer >0 | `0` | +| --samples | number of W1R3 iterations | any positive integer | `8000` | +| --bucket | storage bucket name | a bucket name | `pybench` | +| --bucket_region | bucket region for benchmarks | any GCS region | `US-WEST1` | +| --workers | number of processes (multiprocessing enabled) | any positive integer | 16 (recommend not to exceed 16) | +| --test_type | test type to run benchmarking | `w1r3`, `range` | `w1r3` | +| --output_file | file to output results to | any file path | `output_bench.csv` | +| --tmp_dir | temp directory path on file system | any file path | `tm-perf-metrics` | +| --delete_bucket | whether or not to delete GCS bucket used for benchmarking| bool | `False` | + + +## Workload definition and CSV headers + +For each invocation of the benchmark, write a new object of random size between `min_size` and `max_size` . After the successful write, download the object in full three times. For each of the 4 operations record the following fields: + +| Field | Description | +| ----- | ----------- | +| Op | the name of the operations (WRITE, READ[{0,1,2}]) | +| ObjectSize | the number of bytes of the object | +| LibBufferSize | configured to use the [library default of 100 MiB](https://github.com/googleapis/python-storage/blob/main/google/cloud/storage/blob.py#L135) | +| Crc32cEnabled | bool: whether crc32c was computed for the operation | +| MD5Enabled | bool: whether MD5 was computed for the operation | +| ApiName | default to JSON| +| ElapsedTimeUs | the elapsed time in microseconds the operation took | +| Status | completion state of the operation [OK, FAIL] | +| RunID | timestamp from the benchmarking run | +| AppBufferSize | N/A | +| CpuTimeUs | N/A | \ No newline at end of file diff --git a/tests/perf/_perf_utils.py b/tests/perf/_perf_utils.py new file mode 100644 index 000000000..d86568d7e --- /dev/null +++ b/tests/perf/_perf_utils.py @@ -0,0 +1,235 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Performance benchmarking helper methods. This is not an officially supported Google product.""" + +import csv +import logging +import os +import random +import shutil +import time +import uuid + +from google.cloud import storage + + +##### DEFAULTS & CONSTANTS ##### +HEADER = [ + "Op", + "ObjectSize", + "AppBufferSize", + "LibBufferSize", + "Crc32cEnabled", + "MD5Enabled", + "ApiName", + "ElapsedTimeUs", + "CpuTimeUs", + "Status", +] +CHECKSUM = ["md5", "crc32c", None] +TIMESTAMP = time.strftime("%Y%m%d-%H%M%S") +DEFAULT_API = "JSON" +DEFAULT_BUCKET_NAME = f"pybench{TIMESTAMP}" +DEFAULT_BUCKET_REGION = "US-WEST1" +DEFAULT_OBJECT_RANGE_SIZE_BYTES = "1048576" # 1 MiB +DEFAULT_NUM_SAMPLES = 8000 +DEFAULT_NUM_PROCESSES = 16 +DEFAULT_LIB_BUFFER_SIZE = 104857600 # 100MB +DEFAULT_CHUNKSIZE = 104857600 # 100 MB https://github.com/googleapis/python-storage/blob/main/google/cloud/storage/blob.py#L139 +NOT_SUPPORTED = -1 +DEFAULT_BASE_DIR = "tm-perf-metrics" +DEFAULT_OUTPUT_FILE = f"output_bench{TIMESTAMP}.csv" +DEFAULT_CREATE_SUBDIR_PROBABILITY = 0.1 +SSB_SIZE_THRESHOLD_BYTES = 1048576 + + +##### UTILITY METHODS ##### + + +# Returns a boolean value with the provided probability. +def weighted_random_boolean(create_subdir_probability): + return random.uniform(0.0, 1.0) <= create_subdir_probability + + +# Creates a random file with the given file name, path and size. +def generate_random_file(file_name, file_path, size): + with open(os.path.join(file_path, file_name), "wb") as file_obj: + file_obj.write(os.urandom(size)) + + +# Creates a random directory structure consisting of subdirectories and random files. +# Returns an array of all the generated paths and total size in bytes of all generated files. +def generate_random_directory( + max_objects, + min_file_size, + max_file_size, + base_dir, + create_subdir_probability=DEFAULT_CREATE_SUBDIR_PROBABILITY, +): + directory_info = { + "paths": [], + "total_size_in_bytes": 0, + } + + file_path = base_dir + os.makedirs(file_path, exist_ok=True) + for i in range(max_objects): + if weighted_random_boolean(create_subdir_probability): + file_path = f"{file_path}/{uuid.uuid4().hex}" + os.makedirs(file_path, exist_ok=True) + directory_info["paths"].append(file_path) + else: + file_name = uuid.uuid4().hex + rand_size = random.randint(min_file_size, max_file_size) + generate_random_file(file_name, file_path, rand_size) + directory_info["total_size_in_bytes"] += rand_size + directory_info["paths"].append(os.path.join(file_path, file_name)) + + return directory_info + + +def results_to_csv(res): + results = [] + for metric in HEADER: + results.append(res.get(metric, -1)) + return results + + +def convert_to_csv(filename, results, workers): + with open(filename, "w") as file: + writer = csv.writer(file) + writer.writerow(HEADER) + # Benchmarking main script uses Multiprocessing Pool.map(), + # thus results is structured as List[List[Dict[str, any]]]. + for result in results: + for row in result: + writer.writerow(results_to_csv(row)) + + +def convert_to_cloud_monitoring(bucket_name, results, workers): + # Benchmarking main script uses Multiprocessing Pool.map(), + # thus results is structured as List[List[Dict[str, any]]]. + for result in results: + for res in result: + # Only output successful benchmarking runs to cloud monitoring. + status = res.get("Status").pop() # convert ["OK"] --> "OK" + if status != "OK": + continue + + range_read_size = res.get("RangeReadSize", 0) + object_size = res.get("ObjectSize") + elapsed_time_us = res.get("ElapsedTimeUs") + + # Handle range reads and calculate throughput using range_read_size. + if range_read_size > 0: + size = range_read_size + else: + size = object_size + + # If size is greater than the defined threshold, report in MiB/s, otherwise report in KiB/s. + if size >= SSB_SIZE_THRESHOLD_BYTES: + throughput = (size / 1024 / 1024) / (elapsed_time_us / 1_000_000) + else: + throughput = (size / 1024) / (elapsed_time_us / 1_000_000) + + cloud_monitoring_output = ( + "throughput{" + + "library=python-storage," + + "api={},".format(res.get("ApiName")) + + "op={},".format(res.get("Op")) + + "workers={},".format(workers) + + "object_size={},".format(object_size) + + "transfer_offset={},".format(res.get("TransferOffset", 0)) + + "transfer_size={},".format(res.get("TransferSize", object_size)) + + "app_buffer_size={},".format(res.get("AppBufferSize")) + + "chunksize={},".format(res.get("TransferSize", object_size)) + + "crc32c_enabled={},".format(res.get("Crc32cEnabled")) + + "md5_enabled={},".format(res.get("MD5Enabled")) + + "cpu_time_us={},".format(res.get("CpuTimeUs")) + + "peer=''," + + f"bucket_name={bucket_name}," + + "retry_count=''," + + f"status={status}" + + "}" + f"{throughput}" + ) + + print(cloud_monitoring_output) + + +def cleanup_directory_tree(directory): + """Clean up directory tree on disk.""" + try: + shutil.rmtree(directory) + except Exception as e: + logging.exception(f"Caught an exception while deleting local directory\n {e}") + + +def cleanup_file(file_path): + """Clean up local file on disk.""" + try: + os.remove(file_path) + except Exception as e: + logging.exception(f"Caught an exception while deleting local file\n {e}") + + +def get_bucket_instance(bucket_name): + client = storage.Client() + bucket = client.bucket(bucket_name) + if not bucket.exists(): + client.create_bucket(bucket) + return bucket + + +def cleanup_bucket(bucket, delete_bucket=False): + # Delete blobs first as the bucket may contain more than 256 blobs. + try: + blobs = bucket.list_blobs() + for blob in blobs: + blob.delete() + except Exception as e: + logging.exception(f"Caught an exception while deleting blobs\n {e}") + # Delete bucket if delete_bucket is set to True + if delete_bucket: + try: + bucket.delete(force=True) + except Exception as e: + logging.exception(f"Caught an exception while deleting bucket\n {e}") + + +def get_min_max_size(object_size): + # Object size accepts a single value in bytes or a range in bytes min..max + if object_size.find("..") < 0: + min_size = int(object_size) + max_size = int(object_size) + else: + split_sizes = object_size.split("..") + min_size = int(split_sizes[0]) + max_size = int(split_sizes[1]) + return min_size, max_size + + +class logCount(logging.Handler): + class LogType: + def __init__(self): + self.errors = 0 + + def __init__(self): + super().__init__() + self.count = self.LogType() + + def emit(self, record): + if record.levelname == "ERROR": + self.count.errors += 1 diff --git a/tests/perf/benchmarking.py b/tests/perf/benchmarking.py new file mode 100644 index 000000000..26bd85a69 --- /dev/null +++ b/tests/perf/benchmarking.py @@ -0,0 +1,184 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Performance benchmarking main script. This is not an officially supported Google product.""" + +import argparse +import logging +import multiprocessing +import sys + +from google.cloud import storage + +import _perf_utils as _pu +import profile_w1r3 as w1r3 + + +##### PROFILE BENCHMARKING TEST TYPES ##### +PROFILE_WRITE_ONE_READ_THREE = "w1r3" +PROFILE_RANGE_READ = "range" + + +def main(args): + # Track error logging for BBMC reporting. + counter = _pu.logCount() + logging.basicConfig( + level=logging.ERROR, + handlers=[counter, logging.StreamHandler(sys.stderr)], + ) + + # Create a storage bucket to run benchmarking. + if args.project is not None: + client = storage.Client(project=args.project) + else: + client = storage.Client() + + bucket = client.bucket(args.bucket) + if not bucket.exists(): + bucket = client.create_bucket(bucket, location=args.bucket_region) + + # Define test type and number of processes to run benchmarking. + # Note that transfer manager tests defaults to using 1 process. + num_processes = 1 + test_type = args.test_type + if test_type == PROFILE_WRITE_ONE_READ_THREE: + num_processes = args.workers + benchmark_runner = w1r3.run_profile_w1r3 + logging.info( + f"A total of {num_processes} processes are created to run benchmarking {test_type}" + ) + elif test_type == PROFILE_RANGE_READ: + num_processes = args.workers + benchmark_runner = w1r3.run_profile_range_read + logging.info( + f"A total of {num_processes} processes are created to run benchmarking {test_type}" + ) + + # Allow multiprocessing to speed up benchmarking tests; Defaults to 1 for no concurrency. + p = multiprocessing.Pool(num_processes) + pool_output = p.map(benchmark_runner, [args for _ in range(args.samples)]) + + # Output to Cloud Monitoring or CSV file. + output_type = args.output_type + if output_type == "cloud-monitoring": + _pu.convert_to_cloud_monitoring(args.bucket, pool_output, num_processes) + elif output_type == "csv": + _pu.convert_to_csv(args.output_file, pool_output, num_processes) + logging.info( + f"Succesfully ran benchmarking. Please find your output log at {args.output_file}" + ) + + # Cleanup and delete blobs. + _pu.cleanup_bucket(bucket, delete_bucket=args.delete_bucket) + + # BBMC will not surface errors unless the process is terminated with a non zero code. + if counter.count.errors != 0: + sys.exit(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--project", + type=str, + default=None, + help="GCP project identifier", + ) + parser.add_argument( + "--api", + type=str, + default="JSON", + help="API to use", + ) + parser.add_argument( + "--test_type", + type=str, + default=PROFILE_WRITE_ONE_READ_THREE, + help="Benchmarking test type", + ) + parser.add_argument( + "--object_size", + type=str, + default=_pu.DEFAULT_OBJECT_RANGE_SIZE_BYTES, + help="Object size in bytes; can be a range min..max", + ) + parser.add_argument( + "--range_read_size", + type=int, + default=0, + help="Size of the range to read in bytes", + ) + parser.add_argument( + "--minimum_read_offset", + type=int, + default=0, + help="Minimum offset for the start of the range to be read in bytes", + ) + parser.add_argument( + "--maximum_read_offset", + type=int, + default=0, + help="Maximum offset for the start of the range to be read in bytes", + ) + parser.add_argument( + "--samples", + type=int, + default=_pu.DEFAULT_NUM_SAMPLES, + help="Number of samples to report", + ) + parser.add_argument( + "--workers", + type=int, + default=_pu.DEFAULT_NUM_PROCESSES, + help="Number of processes- multiprocessing enabled", + ) + parser.add_argument( + "--bucket", + type=str, + default=_pu.DEFAULT_BUCKET_NAME, + help="Storage bucket name", + ) + parser.add_argument( + "--bucket_region", + type=str, + default=_pu.DEFAULT_BUCKET_REGION, + help="Bucket region", + ) + parser.add_argument( + "--output_type", + type=str, + default="cloud-monitoring", + help="Ouput format, csv or cloud-monitoring", + ) + parser.add_argument( + "--output_file", + type=str, + default=_pu.DEFAULT_OUTPUT_FILE, + help="File to output results to", + ) + parser.add_argument( + "--tmp_dir", + type=str, + default=_pu.DEFAULT_BASE_DIR, + help="Temp directory path on file system", + ) + parser.add_argument( + "--delete_bucket", + type=bool, + default=False, + help="Whether or not to delete GCS bucket used for benchmarking", + ) + args = parser.parse_args() + + main(args) diff --git a/tests/perf/benchwrapper.py b/tests/perf/benchwrapper.py deleted file mode 100644 index c81d6bb20..000000000 --- a/tests/perf/benchwrapper.py +++ /dev/null @@ -1,54 +0,0 @@ -import argparse -import sys -import time -import grpc -from concurrent import futures -import storage_pb2_grpc -import storage_pb2 -from google.cloud import storage - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -parser = argparse.ArgumentParser() - -# if os.environ.get("STORAGE_EMULATOR_HOST") is None: -# sys.exit( -# "This benchmarking server only works when connected to an emulator. Please set STORAGE_EMULATOR_HOST." -# ) - -parser.add_argument("--port", help="The port to run on.") - -args = parser.parse_args() - -if args.port is None: - sys.exit("Usage: python3 main.py --port 8081") - -# client = storage.Client.create_anonymous_client() -client = storage.Client() - - -class StorageBenchWrapperServicer(storage_pb2_grpc.StorageBenchWrapperServicer): - def Write(self, request, context): - # TODO(deklerk): implement this - return storage_pb2.EmptyResponse() - - def Read(self, request, context): - bucket = client.bucket(request.bucketName) - blob = storage.Blob(request.objectName, bucket) - blob.download_as_string() - return storage_pb2.EmptyResponse() - - -server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) -storage_pb2_grpc.add_StorageBenchWrapperServicer_to_server( - StorageBenchWrapperServicer(), server -) - -print("listening on localhost:" + args.port) -server.add_insecure_port("[::]:" + args.port) -server.start() -try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) -except KeyboardInterrupt: - server.stop(0) diff --git a/tests/perf/profile_w1r3.py b/tests/perf/profile_w1r3.py new file mode 100644 index 000000000..50c8b5c24 --- /dev/null +++ b/tests/perf/profile_w1r3.py @@ -0,0 +1,221 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Workload W1R3 profiling script. This is not an officially supported Google product.""" + +import logging +import os +import random +import time +import uuid + +from functools import partial, update_wrapper + +from google.cloud import storage + +import _perf_utils as _pu + + +def WRITE(bucket, blob_name, checksum, size, args, **kwargs): + """Perform an upload and return latency.""" + blob = bucket.blob(blob_name) + file_path = f"{os.getcwd()}/{uuid.uuid4().hex}" + # Create random file locally on disk + with open(file_path, "wb") as file_obj: + file_obj.write(os.urandom(size)) + + start_time = time.monotonic_ns() + blob.upload_from_filename(file_path, checksum=checksum, if_generation_match=0) + end_time = time.monotonic_ns() + + elapsed_time = round( + (end_time - start_time) / 1000 + ) # convert nanoseconds to microseconds + + # Clean up local file + _pu.cleanup_file(file_path) + + return elapsed_time + + +def READ(bucket, blob_name, checksum, args, **kwargs): + """Perform a download and return latency.""" + blob = bucket.blob(blob_name) + if not blob.exists(): + raise Exception("Blob does not exist. Previous WRITE failed.") + + range_read_size = args.range_read_size + range_read_offset = kwargs.get("range_read_offset") + # Perfor range read if range_read_size is specified, else get full object. + if range_read_size != 0: + start = range_read_offset + end = start + range_read_size - 1 + else: + start = 0 + end = -1 + + file_path = f"{os.getcwd()}/{blob_name}" + with open(file_path, "wb") as file_obj: + start_time = time.monotonic_ns() + blob.download_to_file(file_obj, checksum=checksum, start=start, end=end) + end_time = time.monotonic_ns() + + elapsed_time = round( + (end_time - start_time) / 1000 + ) # convert nanoseconds to microseconds + + # Clean up local file + _pu.cleanup_file(file_path) + + return elapsed_time + + +def _wrapped_partial(func, *args, **kwargs): + """Helper method to create partial and propagate function name and doc from original function.""" + partial_func = partial(func, *args, **kwargs) + update_wrapper(partial_func, func) + return partial_func + + +def _generate_func_list(args): + """Generate Write-1-Read-3 workload.""" + bucket_name = args.bucket + blob_name = f"{_pu.TIMESTAMP}-{uuid.uuid4().hex}" + + # parse min_size and max_size from object_size + min_size, max_size = _pu.get_min_max_size(args.object_size) + # generate randmon size in bytes using a uniform distribution + size = random.randint(min_size, max_size) + + # generate random checksumming type: md5, crc32c or None + idx_checksum = random.choice([0, 1, 2]) + checksum = _pu.CHECKSUM[idx_checksum] + + # generated random read_offset + range_read_offset = random.randint( + args.minimum_read_offset, args.maximum_read_offset + ) + + func_list = [ + _wrapped_partial( + WRITE, + storage.Client().bucket(bucket_name), + blob_name, + size=size, + checksum=checksum, + args=args, + ), + *[ + _wrapped_partial( + READ, + storage.Client().bucket(bucket_name), + blob_name, + size=size, + checksum=checksum, + args=args, + num=i, + range_read_offset=range_read_offset, + ) + for i in range(3) + ], + ] + return func_list + + +def log_performance(func, args, elapsed_time, status, failure_msg): + """Hold benchmarking results per operation call.""" + size = func.keywords.get("size") + checksum = func.keywords.get("checksum", None) + num = func.keywords.get("num", None) + range_read_size = args.range_read_size + + res = { + "Op": func.__name__, + "ElapsedTimeUs": elapsed_time, + "ApiName": args.api, + "RunID": _pu.TIMESTAMP, + "CpuTimeUs": _pu.NOT_SUPPORTED, + "AppBufferSize": _pu.NOT_SUPPORTED, + "LibBufferSize": _pu.DEFAULT_LIB_BUFFER_SIZE, + "ChunkSize": 0, + "ObjectSize": size, + "TransferSize": size, + "TransferOffset": 0, + "RangeReadSize": range_read_size, + "BucketName": args.bucket, + "Library": "python-storage", + "Crc32cEnabled": checksum == "crc32c", + "MD5Enabled": checksum == "md5", + "FailureMsg": failure_msg, + "Status": status, + } + + if res["Op"] == "READ": + res["Op"] += f"[{num}]" + + # For range reads (workload 2), record additional outputs + if range_read_size > 0: + res["TransferSize"] = range_read_size + res["TransferOffset"] = func.keywords.get("range_read_offset", 0) + + return res + + +def run_profile_w1r3(args): + """Run w1r3 benchmarking. This is a wrapper used with the main benchmarking framework.""" + results = [] + + for func in _generate_func_list(args): + failure_msg = "" + try: + elapsed_time = func() + except Exception as e: + failure_msg = ( + f"Caught an exception while running operation {func.__name__}\n {e}" + ) + logging.exception(failure_msg) + status = ["FAIL"] + elapsed_time = _pu.NOT_SUPPORTED + else: + status = ["OK"] + + res = log_performance(func, args, elapsed_time, status, failure_msg) + results.append(res) + + return results + + +def run_profile_range_read(args): + """Run range read W2 benchmarking. This is a wrapper used with the main benchmarking framework.""" + results = [] + + for func in _generate_func_list(args): + failure_msg = "" + try: + elapsed_time = func() + except Exception as e: + failure_msg = ( + f"Caught an exception while running operation {func.__name__}\n {e}" + ) + logging.exception(failure_msg) + status = ["FAIL"] + elapsed_time = _pu.NOT_SUPPORTED + else: + status = ["OK"] + + # Only measure the last read + res = log_performance(func, args, elapsed_time, status, failure_msg) + results.append(res) + + return results diff --git a/tests/perf/storage.proto b/tests/perf/storage.proto deleted file mode 100644 index 055e7e786..000000000 --- a/tests/perf/storage.proto +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package storage_bench; - -message ObjectRead{ - // The bucket string identifier. - string bucketName = 1; - // The object/blob string identifier. - string objectName = 2; -} - -message ObjectWrite{ - // The bucket string identifier. - string bucketName = 1; - // The object/blob string identifiers. - string objectName = 2; - // The string containing the upload file path. - string destination = 3; -} - -message EmptyResponse{ -} - -service StorageBenchWrapper{ - // Performs an upload from a specific object. - rpc Write(ObjectWrite) returns (EmptyResponse) {} - // Read a specific object. - rpc Read(ObjectRead) returns (EmptyResponse){} -} \ No newline at end of file diff --git a/tests/perf/storage_pb2.py b/tests/perf/storage_pb2.py deleted file mode 100644 index 59ea52f91..000000000 --- a/tests/perf/storage_pb2.py +++ /dev/null @@ -1,252 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: storage.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="storage.proto", - package="storage_bench", - syntax="proto3", - serialized_options=None, - serialized_pb=_b( - '\n\rstorage.proto\x12\rstorage_bench"4\n\nObjectRead\x12\x12\n\nbucketName\x18\x01 \x01(\t\x12\x12\n\nobjectName\x18\x02 \x01(\t"J\n\x0bObjectWrite\x12\x12\n\nbucketName\x18\x01 \x01(\t\x12\x12\n\nobjectName\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65stination\x18\x03 \x01(\t"\x0f\n\rEmptyResponse2\x9d\x01\n\x13StorageBenchWrapper\x12\x43\n\x05Write\x12\x1a.storage_bench.ObjectWrite\x1a\x1c.storage_bench.EmptyResponse"\x00\x12\x41\n\x04Read\x12\x19.storage_bench.ObjectRead\x1a\x1c.storage_bench.EmptyResponse"\x00\x62\x06proto3' - ), -) - - -_OBJECTREAD = _descriptor.Descriptor( - name="ObjectRead", - full_name="storage_bench.ObjectRead", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="bucketName", - full_name="storage_bench.ObjectRead.bucketName", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="objectName", - full_name="storage_bench.ObjectRead.objectName", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=32, - serialized_end=84, -) - - -_OBJECTWRITE = _descriptor.Descriptor( - name="ObjectWrite", - full_name="storage_bench.ObjectWrite", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="bucketName", - full_name="storage_bench.ObjectWrite.bucketName", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="objectName", - full_name="storage_bench.ObjectWrite.objectName", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="destination", - full_name="storage_bench.ObjectWrite.destination", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=86, - serialized_end=160, -) - - -_EMPTYRESPONSE = _descriptor.Descriptor( - name="EmptyResponse", - full_name="storage_bench.EmptyResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=162, - serialized_end=177, -) - -DESCRIPTOR.message_types_by_name["ObjectRead"] = _OBJECTREAD -DESCRIPTOR.message_types_by_name["ObjectWrite"] = _OBJECTWRITE -DESCRIPTOR.message_types_by_name["EmptyResponse"] = _EMPTYRESPONSE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ObjectRead = _reflection.GeneratedProtocolMessageType( - "ObjectRead", - (_message.Message,), - { - "DESCRIPTOR": _OBJECTREAD, - "__module__": "storage_pb2" - # @@protoc_insertion_point(class_scope:storage_bench.ObjectRead) - }, -) -_sym_db.RegisterMessage(ObjectRead) - -ObjectWrite = _reflection.GeneratedProtocolMessageType( - "ObjectWrite", - (_message.Message,), - { - "DESCRIPTOR": _OBJECTWRITE, - "__module__": "storage_pb2" - # @@protoc_insertion_point(class_scope:storage_bench.ObjectWrite) - }, -) -_sym_db.RegisterMessage(ObjectWrite) - -EmptyResponse = _reflection.GeneratedProtocolMessageType( - "EmptyResponse", - (_message.Message,), - { - "DESCRIPTOR": _EMPTYRESPONSE, - "__module__": "storage_pb2" - # @@protoc_insertion_point(class_scope:storage_bench.EmptyResponse) - }, -) -_sym_db.RegisterMessage(EmptyResponse) - - -_STORAGEBENCHWRAPPER = _descriptor.ServiceDescriptor( - name="StorageBenchWrapper", - full_name="storage_bench.StorageBenchWrapper", - file=DESCRIPTOR, - index=0, - serialized_options=None, - serialized_start=180, - serialized_end=337, - methods=[ - _descriptor.MethodDescriptor( - name="Write", - full_name="storage_bench.StorageBenchWrapper.Write", - index=0, - containing_service=None, - input_type=_OBJECTWRITE, - output_type=_EMPTYRESPONSE, - serialized_options=None, - ), - _descriptor.MethodDescriptor( - name="Read", - full_name="storage_bench.StorageBenchWrapper.Read", - index=1, - containing_service=None, - input_type=_OBJECTREAD, - output_type=_EMPTYRESPONSE, - serialized_options=None, - ), - ], -) -_sym_db.RegisterServiceDescriptor(_STORAGEBENCHWRAPPER) - -DESCRIPTOR.services_by_name["StorageBenchWrapper"] = _STORAGEBENCHWRAPPER - -# @@protoc_insertion_point(module_scope) diff --git a/tests/perf/storage_pb2_grpc.py b/tests/perf/storage_pb2_grpc.py deleted file mode 100644 index 1b3a2c82f..000000000 --- a/tests/perf/storage_pb2_grpc.py +++ /dev/null @@ -1,64 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -import grpc - -import storage_pb2 as storage__pb2 - - -class StorageBenchWrapperStub(object): - # missing associated documentation comment in .proto file - pass - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Write = channel.unary_unary( - "/storage_bench.StorageBenchWrapper/Write", - request_serializer=storage__pb2.ObjectWrite.SerializeToString, - response_deserializer=storage__pb2.EmptyResponse.FromString, - ) - self.Read = channel.unary_unary( - "/storage_bench.StorageBenchWrapper/Read", - request_serializer=storage__pb2.ObjectRead.SerializeToString, - response_deserializer=storage__pb2.EmptyResponse.FromString, - ) - - -class StorageBenchWrapperServicer(object): - # missing associated documentation comment in .proto file - pass - - def Write(self, request, context): - """Performs an upload from a specific object. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def Read(self, request, context): - """Read a specific object. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_StorageBenchWrapperServicer_to_server(servicer, server): - rpc_method_handlers = { - "Write": grpc.unary_unary_rpc_method_handler( - servicer.Write, - request_deserializer=storage__pb2.ObjectWrite.FromString, - response_serializer=storage__pb2.EmptyResponse.SerializeToString, - ), - "Read": grpc.unary_unary_rpc_method_handler( - servicer.Read, - request_deserializer=storage__pb2.ObjectRead.FromString, - response_serializer=storage__pb2.EmptyResponse.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - "storage_bench.StorageBenchWrapper", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) diff --git a/google/__init__.py b/tests/resumable_media/__init__.py similarity index 73% rename from google/__init__.py rename to tests/resumable_media/__init__.py index 0e1bc5131..7c07b241f 100644 --- a/google/__init__.py +++ b/tests/resumable_media/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2016 Google LLC +# Copyright 2017 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/tests/resumable_media/data/brotli.txt b/tests/resumable_media/data/brotli.txt new file mode 100644 index 000000000..da07c5107 --- /dev/null +++ b/tests/resumable_media/data/brotli.txt @@ -0,0 +1,64 @@ +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 diff --git a/tests/resumable_media/data/brotli.txt.br b/tests/resumable_media/data/brotli.txt.br new file mode 100644 index 000000000..84828432c Binary files /dev/null and b/tests/resumable_media/data/brotli.txt.br differ diff --git a/tests/resumable_media/data/favicon.ico b/tests/resumable_media/data/favicon.ico new file mode 100644 index 000000000..e9c59160a Binary files /dev/null and b/tests/resumable_media/data/favicon.ico differ diff --git a/tests/resumable_media/data/file.txt b/tests/resumable_media/data/file.txt new file mode 100644 index 000000000..da07c5107 --- /dev/null +++ b/tests/resumable_media/data/file.txt @@ -0,0 +1,64 @@ +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 diff --git a/tests/resumable_media/data/gzipped.txt b/tests/resumable_media/data/gzipped.txt new file mode 100644 index 000000000..da07c5107 --- /dev/null +++ b/tests/resumable_media/data/gzipped.txt @@ -0,0 +1,64 @@ +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 diff --git a/tests/resumable_media/data/gzipped.txt.gz b/tests/resumable_media/data/gzipped.txt.gz new file mode 100644 index 000000000..83e9f396c Binary files /dev/null and b/tests/resumable_media/data/gzipped.txt.gz differ diff --git a/tests/resumable_media/data/image1.jpg b/tests/resumable_media/data/image1.jpg new file mode 100644 index 000000000..e70137b82 Binary files /dev/null and b/tests/resumable_media/data/image1.jpg differ diff --git a/tests/resumable_media/data/image2.jpg b/tests/resumable_media/data/image2.jpg new file mode 100644 index 000000000..c3969530e Binary files /dev/null and b/tests/resumable_media/data/image2.jpg differ diff --git a/tests/resumable_media/system/__init__.py b/tests/resumable_media/system/__init__.py new file mode 100644 index 000000000..7c07b241f --- /dev/null +++ b/tests/resumable_media/system/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/resumable_media/system/credentials.json.enc b/tests/resumable_media/system/credentials.json.enc new file mode 100644 index 000000000..19e26ade7 --- /dev/null +++ b/tests/resumable_media/system/credentials.json.enc @@ -0,0 +1,52 @@ +U2FsdGVkX1+wqu1+eVu6OPbPoE0lzIp3B11p8Rdbha1ukxXcsskegJdBjcUqQOav +W2N3vhA7YfXW/F3T+tZMYYWk5a0vAjxLov3MgFfhvGPK0UzDwKNIXRgxhcLjcSeQ +ZmSN2kqpmSSKEPLxP0B6r50nAG6r8NYbZWs02lH2e3NGbsoGgP5PQV2oP/ZVYkET +qABgSd+xkOjE/7664QRfs/5Jl3Pl045Mzl87l1kN6oeoFpxeFqGWOR4WNflauS3s +96SKsbrCQ4aF/9n9hCz31J9cJosu54eTB9s0fKBkDx7xmouwT3Cqv2KGwJPUCRHk +3a+3ijxhNz65dYCRp20dUpJuudFQvMpsptn7oAFtNQhvcFrpjnyBn3ODr9JhLBEy +PTdJbv06ufb+SH9YNMpH3nTYCkS7ZgrnzhteFJtoMzX6sAYiMUmIZtGY7J8MaSE0 +AYqTO/EGkzzSw33o2nNGcg0lsW1tdmY5GKuJ3jlc1Hi6RHpmgbdv+0dAYi734sYs ++0wE18QMe4/RIOCBslMAWvlo9LX9QDLkolToToQ+HN/kJNQOumkxwcjBV3piiJQH +LaX9bI6lnqkoMl/2GvuR+oQTfzQxjGKdenLWZO2ODH2rr90hXi9vlXjdpDGreMGy +Mv4lcwmw3Pd1JreKJtdc2ObDrU/o7wDJe4txNCGwCSAZacI+5c/27mT1yOfgE/EK +Q3LHjqZhFlLI4K0KqH+dyQutL7b1uPtQpeWAVAt/yHs7nNWF62UAdVR+hZyko2Dy +HWoYtJDMazfpS98c8VWi0FyGfYVESedWvBCLHch4wWqaccY0HWk9sehyC4XrPX8v +OMw6J1va3vprzCQte56fXNzzpU6f0XeT3OGj5RCN/POMnN+cjyuwqFOsWNCfpXaV +lhNj3zg+fMk4mM+wa2KdUk6xa0vj7YblgJ5uvZ3lG81ydZCRoFWqaO6497lnj8NV +SEDqDdJ+/dw+Sf2ur3hyJ9DW0JD8QJkSwfLrqT51eoOqTfFFGdwy2iuXP426l/NH +mkyusp8UZNPaKZSF9jC8++18fC2Nbbd+dTIn6XWdZKKRZLZ/hca8QP0QesrtYo36 +6kx8Kl3nAbgOk9wFFsZdkUyOy3iRxkBF0qoaH1kPzyxIpNeeIg5cBPWLwN5FVBdd +eBy8R4i4y/W8yhib34vcOliP0IfAB/VvXJRMUCc1bENfZskMb4mvtsYblyf68Fne +OjtcSKV2drO+mRmH1H2sPH/yE2yVDivhY5FJxDRFMnS9HXDMpGoukirMLgCjnSre +ZXMVaDzkRw1RtsOms+F7EVJb5v/HKu6I34YNJDlAFy6AASmz+H0EXBDK4mma8GSu +BOgPY3PbF8R+KnzKsOVbaOon90dGclnUNlqnVvsnNeWWKJmL7rCPkMHfb5dBhw60 +j9oLmu74+xmuf9aqzSvrcaHV9u+zf2eCsdQJhttaDYFAKg1q43fhZYHIaURidoD+ +UTxn0AVygiKkTwTFQl1+taDiRffOtNvumSLZG9n8cimoBvzKle3H9tv43uyO6muG +ty0m8Pyk5LyLE9DaDQwxq+++8g7boXQe7jCtAIMxRveIdwWPI/XHbyZ3I4uTG65F +RV5K8Q34VVjagdPMNq0ijo73iYy5RH18MSQc8eG3UtqVvr/QeSdPEb8N6o+OwEG8 +VuAFbKPHMfQrjwGCtr0YvHTmvZPlFef+J3iH6WPfFFbe5ZS8XQUoR1dZHX9BXIXK +Om/itKUoHvAuYIqjTboqK181OVr/9a2FipXxbenXYiWXRtLGpHeetZbKRhxwWe0h +kDdDL/XglsRNasfLz4c9AyGzJJi7J9Pr7uBSX9QFHLeGQP6jfHrEqBkiGEUP9iQr +11wabtNouC+1tT0erBAm/KEps81l76NZ7OxqOM8mLrdAE8RO/ypZTqZW4saQnry/ +iUGhwEnRNZpEh8xiYSZ8JgUTbbKo4+FXZxUwV1DBQ7oroPrduaukd68m4E6Tqsx+ +lTl25hLhNTEJCYQ0hg2CeZdSpOPGgpn+zhLDvlQ0lPZDCByh9xCepAq/oUArddln +vobPdBRVW27gYntAYMlFbc1hSN/LKoZOYq6jBNAPykiv5tTWNV71HUE7b1nRfo27 +aGf3Ptzu7GRXVLom+WKxswUqzkWC8afvrNnZ040wiLQnWzn2yxytipUg3UxIvP+U +klWj8Tt1wBmG/JGLEThwcjPTOGvDkocQAAImlV3diiqwTHlj+pLZVRtJA4SOQxI8 +ChFi73B8gPOexfqYPUFdB90FJWsxTQGZaucyuNTqFMuJ9eEDP5WmK4lcJuKFTCGT +M4VYd9j4JlxRRQxKkMhfoXeUsW3TH6uAmKxN79AiYnOh6QUIv+PP+yt9WwQhNqkb +7otLl0AKdMBizxyq6AExlw/VmdYDJxcZ4Y/P+M85Ae5e+Lz/XjWHLnjP1BPI6C+n +A/RbICOd/W/wf6ZOZlVBW1wePv0M5jWDGL086lHVrgBnzdWrQTHhzG43v1IaN/vK +EVZfvkqTe5AWNoK1Da/zEafWf0jzc4cS0grCA9KJ0nHwRYYEG0YQAGqY12PDn9tH +WjCVDa6wlw/Niq6BAmkE8d9ds2I8l0Xm1eHaMM3U3xY0OsmDYVP2p+BXZ7qWKa9c +XjuT8gWTS0gZqerlALxTsIEy4/5iKhqdepjAefZxozS30kZhCMG7WXORV9pcdYFP +rCoVPES85sAfwjjL9ZxmtoqH5845KoTlZWqbI/NJ/KCNa1VGXcc7NuNnCUo8sWqe +kTwFSOnF+kaXtDFjM5/7/eQWKBelWWXysMX2+pUCQdIcUa5LW3M+16AjF906+DGZ +pptUebilOd7CEXFKwgO2dZXLkTXj5hyKHYyTt066jPIdyAfGZe9oF0ttzwSS74WY +Y1Sx1PvAH8B5+jfGnYKhVZHbX0nzdBvwG3FNlg2+GVrpTynTH1l1pVUV8YWrbWhh +JE+xjLk0RKfC9jmhs3EenpfpYAEkIKZO3CGVXhZMi4kd7wUZud9vGjOcBlOF3YGG +cVjYDRAymlY1VH3hvkToMZPdjJk8+1fT0bbWTXXjppV3tpC9aybz4H3BOvTXh8MN +c7X4Pn1rDgjtPK2HfvuR6t9+LqWYTM15NeTnEtdkDdQGUmr3CYQI2h07bQYjtGDY +XCfYZ4rRLYGcXiRKmm+NGGb/rsJcJe0KeVPZZmIFP5gfvmWvaQeY4lYw1YABdh9Y +gTIqd+T4OGB5S9EIGrG6uXrlJkCZnIxOJjBPGkVsygn2QOdkIJ8tnycXB3ChTBfL +FMA3i59W/pGf9apHpGF+iA== diff --git a/tests/resumable_media/system/requests/__init__.py b/tests/resumable_media/system/requests/__init__.py new file mode 100644 index 000000000..7c07b241f --- /dev/null +++ b/tests/resumable_media/system/requests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/resumable_media/system/requests/conftest.py b/tests/resumable_media/system/requests/conftest.py new file mode 100644 index 000000000..67908795b --- /dev/null +++ b/tests/resumable_media/system/requests/conftest.py @@ -0,0 +1,58 @@ +# Copyright 2019 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""py.test fixtures to be shared across multiple system test modules.""" + +import google.auth # type: ignore +import google.auth.transport.requests as tr_requests # type: ignore +import pytest # type: ignore + +from .. import utils + + +def ensure_bucket(transport): + get_response = transport.get(utils.BUCKET_URL) + if get_response.status_code == 404: + credentials = transport.credentials + query_params = {"project": credentials.project_id} + payload = {"name": utils.BUCKET_NAME} + post_response = transport.post( + utils.BUCKET_POST_URL, params=query_params, json=payload + ) + + if not post_response.ok: + raise ValueError( + "{}: {}".format(post_response.status_code, post_response.reason) + ) + + +def cleanup_bucket(transport): + del_response = utils.retry_transient_errors(transport.delete)(utils.BUCKET_URL) + + if not del_response.ok: + raise ValueError("{}: {}".format(del_response.status_code, del_response.reason)) + + +@pytest.fixture(scope="session") +def authorized_transport(): + credentials, _ = google.auth.default(scopes=(utils.GCS_RW_SCOPE,)) + yield tr_requests.AuthorizedSession(credentials) + + +@pytest.fixture(scope="session") +def bucket(authorized_transport): + ensure_bucket(authorized_transport) + + yield utils.BUCKET_NAME + + cleanup_bucket(authorized_transport) diff --git a/tests/resumable_media/system/requests/test_download.py b/tests/resumable_media/system/requests/test_download.py new file mode 100644 index 000000000..15fe7d2c0 --- /dev/null +++ b/tests/resumable_media/system/requests/test_download.py @@ -0,0 +1,637 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import copy +import hashlib +import http.client +import io +import os + +import google.auth # type: ignore +import google.auth.transport.requests as tr_requests # type: ignore +import pytest # type: ignore + +import google.cloud.storage._media.requests as resumable_requests +from google.cloud.storage._media import _helpers +from google.cloud.storage._media.requests import _request_helpers +import google.cloud.storage._media.requests.download as download_mod +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.exceptions import DataCorruption +from .. import utils + +import google_crc32c + + +CURR_DIR = os.path.dirname(os.path.realpath(__file__)) +DATA_DIR = os.path.join(CURR_DIR, "..", "..", "data") +PLAIN_TEXT = "text/plain" +IMAGE_JPEG = "image/jpeg" +ENCRYPTED_ERR = b"The target object is encrypted by a customer-supplied encryption key." +NO_BODY_ERR = "The content for this response was already consumed" +NOT_FOUND_ERR = ( + b"No such object: " + utils.BUCKET_NAME.encode("utf-8") + b"/does-not-exist.txt" +) +SIMPLE_DOWNLOADS = (resumable_requests.Download, resumable_requests.RawDownload) + + +class CorruptingAuthorizedSession(tr_requests.AuthorizedSession): + """A Requests Session class with credentials, which corrupts responses. + + This class is used for testing checksum validation. + + Args: + credentials (google.auth.credentials.Credentials): The credentials to + add to the request. + refresh_status_codes (Sequence[int]): Which HTTP status codes indicate + that credentials should be refreshed and the request should be + retried. + max_refresh_attempts (int): The maximum number of times to attempt to + refresh the credentials and retry the request. + kwargs: Additional arguments passed to the :class:`requests.Session` + constructor. + """ + + EMPTY_MD5 = base64.b64encode(hashlib.md5(b"").digest()).decode("utf-8") + crc32c = google_crc32c.Checksum() + crc32c.update(b"") + EMPTY_CRC32C = base64.b64encode(crc32c.digest()).decode("utf-8") + + def request(self, method, url, data=None, headers=None, **kwargs): + """Implementation of Requests' request.""" + response = tr_requests.AuthorizedSession.request( + self, method, url, data=data, headers=headers, **kwargs + ) + response.headers[_helpers._HASH_HEADER] = "crc32c={},md5={}".format( + self.EMPTY_CRC32C, self.EMPTY_MD5 + ) + return response + + +def get_path(filename): + return os.path.realpath(os.path.join(DATA_DIR, filename)) + + +ALL_FILES = ( + { + "path": get_path("image1.jpg"), + "content_type": IMAGE_JPEG, + "md5": "1bsd83IYNug8hd+V1ING3Q==", + "crc32c": "YQGPxA==", + "slices": ( + slice(1024, 16386, None), # obj[1024:16386] + slice(None, 8192, None), # obj[:8192] + slice(-256, None, None), # obj[-256:] + slice(262144, None, None), # obj[262144:] + ), + }, + { + "path": get_path("image2.jpg"), + "content_type": IMAGE_JPEG, + "md5": "gdLXJltiYAMP9WZZFEQI1Q==", + "crc32c": "sxxEFQ==", + "slices": ( + slice(1024, 16386, None), # obj[1024:16386] + slice(None, 8192, None), # obj[:8192] + slice(-256, None, None), # obj[-256:] + slice(262144, None, None), # obj[262144:] + ), + }, + { + "path": get_path("file.txt"), + "content_type": PLAIN_TEXT, + "md5": "XHSHAr/SpIeZtZbjgQ4nGw==", + "crc32c": "MeMHoQ==", + "slices": (), + }, + { + "path": get_path("gzipped.txt.gz"), + "uncompressed": get_path("gzipped.txt"), + "content_type": PLAIN_TEXT, + "md5": "KHRs/+ZSrc/FuuR4qz/PZQ==", + "crc32c": "/LIRNg==", + "slices": (), + "metadata": {"contentEncoding": "gzip"}, + }, + { + "path": get_path("brotli.txt.br"), + "uncompressed": get_path("brotli.txt"), + "content_type": PLAIN_TEXT, + "md5": "MffJw7pTSX/7CVWFFPgwQA==", + "crc32c": "GGK0OQ==", + "slices": (), + "metadata": {"contentEncoding": "br"}, + }, +) + + +def get_contents_for_upload(info): + with open(info["path"], "rb") as file_obj: + return file_obj.read() + + +def get_contents(info): + full_path = info.get("uncompressed", info["path"]) + with open(full_path, "rb") as file_obj: + return file_obj.read() + + +def get_raw_contents(info): + full_path = info["path"] + with open(full_path, "rb") as file_obj: + return file_obj.read() + + +def get_blob_name(info): + full_path = info.get("uncompressed", info["path"]) + return os.path.basename(full_path) + + +def delete_blob(transport, blob_name): + metadata_url = utils.METADATA_URL_TEMPLATE.format(blob_name=blob_name) + response = transport.delete(metadata_url) + assert response.status_code == http.client.NO_CONTENT + + +@pytest.fixture(scope="module") +def secret_file(authorized_transport, bucket): + blob_name = "super-seekrit.txt" + data = b"Please do not tell anyone my encrypted seekrit." + + upload_url = utils.SIMPLE_UPLOAD_TEMPLATE.format(blob_name=blob_name) + headers = utils.get_encryption_headers() + upload = resumable_requests.SimpleUpload(upload_url, headers=headers) + response = upload.transmit(authorized_transport, data, PLAIN_TEXT) + assert response.status_code == http.client.OK + + yield blob_name, data, headers + + delete_blob(authorized_transport, blob_name) + + +# Transport that returns corrupt data, so we can exercise checksum handling. +@pytest.fixture(scope="module") +def corrupting_transport(): + credentials, _ = google.auth.default(scopes=(utils.GCS_RW_SCOPE,)) + yield CorruptingAuthorizedSession(credentials) + + +@pytest.fixture(scope="module") +def simple_file(authorized_transport, bucket): + blob_name = "basic-file.txt" + upload_url = utils.SIMPLE_UPLOAD_TEMPLATE.format(blob_name=blob_name) + upload = resumable_requests.SimpleUpload(upload_url) + data = b"Simple contents" + response = upload.transmit(authorized_transport, data, PLAIN_TEXT) + assert response.status_code == http.client.OK + + yield blob_name, data + + delete_blob(authorized_transport, blob_name) + + +@pytest.fixture(scope="module") +def add_files(authorized_transport, bucket): + blob_names = [] + for info in ALL_FILES: + to_upload = get_contents_for_upload(info) + blob_name = get_blob_name(info) + + blob_names.append(blob_name) + if "metadata" in info: + upload = resumable_requests.MultipartUpload(utils.MULTIPART_UPLOAD) + metadata = copy.deepcopy(info["metadata"]) + metadata["name"] = blob_name + response = upload.transmit( + authorized_transport, to_upload, metadata, info["content_type"] + ) + else: + upload_url = utils.SIMPLE_UPLOAD_TEMPLATE.format(blob_name=blob_name) + upload = resumable_requests.SimpleUpload(upload_url) + response = upload.transmit( + authorized_transport, to_upload, info["content_type"] + ) + + assert response.status_code == http.client.OK + + yield + + # Clean-up the blobs we created. + for blob_name in blob_names: + delete_blob(authorized_transport, blob_name) + + +def check_tombstoned(download, transport): + assert download.finished + if isinstance(download, SIMPLE_DOWNLOADS): + with pytest.raises(ValueError) as exc_info: + download.consume(transport) + assert exc_info.match("A download can only be used once.") + else: + with pytest.raises(ValueError) as exc_info: + download.consume_next_chunk(transport) + assert exc_info.match("Download has finished.") + + +def check_error_response(exc_info, status_code, message): + error = exc_info.value + response = error.response + assert response.status_code == status_code + assert response.content.startswith(message) + assert len(error.args) == 5 + assert error.args[1] == status_code + assert error.args[3] == http.client.OK + assert error.args[4] == http.client.PARTIAL_CONTENT + + +class TestDownload(object): + @staticmethod + def _get_target_class(): + return resumable_requests.Download + + def _make_one(self, media_url, **kw): + return self._get_target_class()(media_url, **kw) + + @staticmethod + def _get_contents(info): + return get_contents(info) + + @staticmethod + def _read_response_content(response): + return response.content + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test_download_full(self, add_files, authorized_transport, checksum): + for info in ALL_FILES: + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + download = self._make_one(media_url, checksum=checksum) + # Consume the resource. + response = download.consume(authorized_transport) + assert response.status_code == http.client.OK + assert self._read_response_content(response) == actual_contents + check_tombstoned(download, authorized_transport) + + def test_download_to_stream(self, add_files, authorized_transport): + for info in ALL_FILES: + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, stream=stream) + # Consume the resource. + response = download.consume(authorized_transport) + assert response.status_code == http.client.OK + with pytest.raises(RuntimeError) as exc_info: + getattr(response, "content") + assert exc_info.value.args == (NO_BODY_ERR,) + assert response._content is False + assert response._content_consumed is True + assert stream.getvalue() == actual_contents + check_tombstoned(download, authorized_transport) + + def test_download_gzip_w_stored_content_headers( + self, add_files, authorized_transport + ): + # Retrieve the gzip compressed file + info = ALL_FILES[-2] + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, stream=stream) + # Consume the resource. + response = download.consume(authorized_transport) + assert response.status_code == http.client.OK + assert response.headers.get(_helpers._STORED_CONTENT_ENCODING_HEADER) == "gzip" + assert response.headers.get("X-Goog-Stored-Content-Length") is not None + assert stream.getvalue() == actual_contents + check_tombstoned(download, authorized_transport) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_download_brotli_w_stored_content_headers( + self, add_files, authorized_transport, checksum + ): + # Retrieve the br compressed file + info = ALL_FILES[-1] + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, stream=stream, checksum=checksum) + # Consume the resource. + response = download.consume(authorized_transport) + assert response.status_code == http.client.OK + assert response.headers.get(_helpers._STORED_CONTENT_ENCODING_HEADER) == "br" + assert response.headers.get("X-Goog-Stored-Content-Length") is not None + assert stream.getvalue() == actual_contents + check_tombstoned(download, authorized_transport) + + def test_extra_headers(self, authorized_transport, secret_file): + blob_name, data, headers = secret_file + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + download = self._make_one(media_url, headers=headers) + # Consume the resource. + response = download.consume(authorized_transport) + assert response.status_code == http.client.OK + assert response.content == data + check_tombstoned(download, authorized_transport) + # Attempt to consume the resource **without** the headers. + download_wo = self._make_one(media_url) + with pytest.raises(InvalidResponse) as exc_info: + download_wo.consume(authorized_transport) + + check_error_response(exc_info, http.client.BAD_REQUEST, ENCRYPTED_ERR) + check_tombstoned(download_wo, authorized_transport) + + def test_non_existent_file(self, authorized_transport, bucket): + blob_name = "does-not-exist.txt" + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + download = self._make_one(media_url) + + # Try to consume the resource and fail. + with pytest.raises(InvalidResponse) as exc_info: + download.consume(authorized_transport) + check_error_response(exc_info, http.client.NOT_FOUND, NOT_FOUND_ERR) + check_tombstoned(download, authorized_transport) + + def test_bad_range(self, simple_file, authorized_transport): + blob_name, data = simple_file + # Make sure we have an invalid range. + start = 32 + end = 63 + assert len(data) < start < end + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + download = self._make_one(media_url, start=start, end=end) + + # Try to consume the resource and fail. + with pytest.raises(InvalidResponse) as exc_info: + download.consume(authorized_transport) + + check_error_response( + exc_info, + http.client.REQUESTED_RANGE_NOT_SATISFIABLE, + b"Request range not satisfiable", + ) + check_tombstoned(download, authorized_transport) + + def _download_slice(self, media_url, slice_): + assert slice_.step is None + + end = None + if slice_.stop is not None: + end = slice_.stop - 1 + + return self._make_one(media_url, start=slice_.start, end=end) + + def test_download_partial(self, add_files, authorized_transport): + for info in ALL_FILES: + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + for slice_ in info["slices"]: + download = self._download_slice(media_url, slice_) + response = download.consume(authorized_transport) + assert response.status_code == http.client.PARTIAL_CONTENT + assert response.content == actual_contents[slice_] + with pytest.raises(ValueError): + download.consume(authorized_transport) + + +class TestRawDownload(TestDownload): + @staticmethod + def _get_target_class(): + return resumable_requests.RawDownload + + @staticmethod + def _get_contents(info): + return get_raw_contents(info) + + @staticmethod + def _read_response_content(response): + return b"".join( + response.raw.stream( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_corrupt_download(self, add_files, corrupting_transport, checksum): + for info in ALL_FILES: + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, stream=stream, checksum=checksum) + # Consume the resource. + with pytest.raises(DataCorruption) as exc_info: + download.consume(corrupting_transport) + + assert download.finished + + if checksum == "md5": + EMPTY_HASH = CorruptingAuthorizedSession.EMPTY_MD5 + else: + EMPTY_HASH = CorruptingAuthorizedSession.EMPTY_CRC32C + msg = download_mod._CHECKSUM_MISMATCH.format( + download.media_url, + EMPTY_HASH, + info[checksum], + checksum_type=checksum.upper(), + ) + assert exc_info.value.args == (msg,) + + def test_corrupt_download_no_check(self, add_files, corrupting_transport): + for info in ALL_FILES: + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, stream=stream, checksum=None) + # Consume the resource. + download.consume(corrupting_transport) + + assert download.finished + + +def get_chunk_size(min_chunks, total_bytes): + # Make sure the number of chunks **DOES NOT** evenly divide. + num_chunks = min_chunks + while total_bytes % num_chunks == 0: + num_chunks += 1 + + chunk_size = total_bytes // num_chunks + # Since we know an integer division has remainder, increment by 1. + chunk_size += 1 + assert total_bytes < num_chunks * chunk_size + + return num_chunks, chunk_size + + +def consume_chunks(download, authorized_transport, total_bytes, actual_contents): + start_byte = download.start + end_byte = download.end + if end_byte is None: + end_byte = total_bytes - 1 + + num_responses = 0 + while not download.finished: + response = download.consume_next_chunk(authorized_transport) + num_responses += 1 + + next_byte = min(start_byte + download.chunk_size, end_byte + 1) + assert download.bytes_downloaded == next_byte - download.start + assert download.total_bytes == total_bytes + assert response.status_code == http.client.PARTIAL_CONTENT + assert response.content == actual_contents[start_byte:next_byte] + start_byte = next_byte + + return num_responses, response + + +class TestChunkedDownload(object): + @staticmethod + def _get_target_class(): + return resumable_requests.ChunkedDownload + + def _make_one(self, media_url, chunk_size, stream, **kw): + return self._get_target_class()(media_url, chunk_size, stream, **kw) + + @staticmethod + def _get_contents(info): + return get_contents(info) + + def test_chunked_download_partial(self, add_files, authorized_transport): + for info in ALL_FILES: + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + for slice_ in info["slices"]: + # Manually replace a missing start with 0. + start = 0 if slice_.start is None else slice_.start + # Chunked downloads don't support a negative index. + if start < 0: + continue + + # First determine how much content is in the slice and + # use it to determine a chunking strategy. + total_bytes = len(actual_contents) + if slice_.stop is None: + end_byte = total_bytes - 1 + end = None + else: + # Python slices DO NOT include the last index, though a byte + # range **is** inclusive of both endpoints. + end_byte = slice_.stop - 1 + end = end_byte + + num_chunks, chunk_size = get_chunk_size(7, end_byte - start + 1) + # Create the actual download object. + stream = io.BytesIO() + download = self._make_one( + media_url, chunk_size, stream, start=start, end=end + ) + # Consume the resource in chunks. + num_responses, last_response = consume_chunks( + download, authorized_transport, total_bytes, actual_contents + ) + + # Make sure the combined chunks are the whole slice. + assert stream.getvalue() == actual_contents[slice_] + # Check that we have the right number of responses. + assert num_responses == num_chunks + # Make sure the last chunk isn't the same size. + assert len(last_response.content) < chunk_size + check_tombstoned(download, authorized_transport) + + def test_chunked_with_extra_headers(self, authorized_transport, secret_file): + blob_name, data, headers = secret_file + num_chunks = 4 + chunk_size = 12 + assert (num_chunks - 1) * chunk_size < len(data) < num_chunks * chunk_size + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, chunk_size, stream, headers=headers) + # Consume the resource in chunks. + num_responses, last_response = consume_chunks( + download, authorized_transport, len(data), data + ) + # Make sure the combined chunks are the whole object. + assert stream.getvalue() == data + # Check that we have the right number of responses. + assert num_responses == num_chunks + # Make sure the last chunk isn't the same size. + assert len(last_response.content) < chunk_size + check_tombstoned(download, authorized_transport) + # Attempt to consume the resource **without** the headers. + stream_wo = io.BytesIO() + download_wo = resumable_requests.ChunkedDownload( + media_url, chunk_size, stream_wo + ) + with pytest.raises(InvalidResponse) as exc_info: + download_wo.consume_next_chunk(authorized_transport) + + assert stream_wo.tell() == 0 + check_error_response(exc_info, http.client.BAD_REQUEST, ENCRYPTED_ERR) + assert download_wo.invalid + + +class TestRawChunkedDownload(TestChunkedDownload): + @staticmethod + def _get_target_class(): + return resumable_requests.RawChunkedDownload + + @staticmethod + def _get_contents(info): + return get_raw_contents(info) + + def test_chunked_download_full(self, add_files, authorized_transport): + for info in ALL_FILES: + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + total_bytes = len(actual_contents) + num_chunks, chunk_size = get_chunk_size(7, total_bytes) + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, chunk_size, stream) + # Consume the resource in chunks. + num_responses, last_response = consume_chunks( + download, authorized_transport, total_bytes, actual_contents + ) + # Make sure the combined chunks are the whole object. + assert stream.getvalue() == actual_contents + # Check that we have the right number of responses. + assert num_responses == num_chunks + # Make sure the last chunk isn't the same size. + assert total_bytes % chunk_size != 0 + assert len(last_response.content) < chunk_size + check_tombstoned(download, authorized_transport) diff --git a/tests/resumable_media/system/requests/test_upload.py b/tests/resumable_media/system/requests/test_upload.py new file mode 100644 index 000000000..f9e3b8164 --- /dev/null +++ b/tests/resumable_media/system/requests/test_upload.py @@ -0,0 +1,777 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import hashlib +import http.client +import io +import os +import urllib.parse + +import pytest # type: ignore +from unittest import mock + +from google.cloud.storage import _media +import google.cloud.storage._media.requests as resumable_requests +from google.cloud.storage._media import _helpers +from .. import utils +from google.cloud.storage._media import _upload +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.exceptions import DataCorruption + + +CURR_DIR = os.path.dirname(os.path.realpath(__file__)) +DATA_DIR = os.path.join(CURR_DIR, "..", "..", "data") +ICO_FILE = os.path.realpath(os.path.join(DATA_DIR, "favicon.ico")) +IMAGE_FILE = os.path.realpath(os.path.join(DATA_DIR, "image1.jpg")) +ICO_CONTENT_TYPE = "image/x-icon" +JPEG_CONTENT_TYPE = "image/jpeg" +BYTES_CONTENT_TYPE = "application/octet-stream" +BAD_CHUNK_SIZE_MSG = ( + b"Invalid request. The number of bytes uploaded is required to be equal " + b"or greater than 262144, except for the final request (it's recommended " + b"to be the exact multiple of 262144). The received request contained " + b"1024 bytes, which does not meet this requirement." +) + + +@pytest.fixture +def cleanup(): + to_delete = [] + + def add_cleanup(blob_name, transport): + to_delete.append((blob_name, transport)) + + yield add_cleanup + + for blob_name, transport in to_delete: + metadata_url = utils.METADATA_URL_TEMPLATE.format(blob_name=blob_name) + response = utils.retry_transient_errors(transport.delete)(metadata_url) + assert response.status_code == http.client.NO_CONTENT + + +@pytest.fixture +def img_stream(): + """Open-file as a fixture. + + This is so that an entire test can execute in the context of + the context manager without worrying about closing the file. + """ + with open(IMAGE_FILE, "rb") as file_obj: + yield file_obj + + +def get_md5(data): + hash_obj = hashlib.md5(data) + return base64.b64encode(hash_obj.digest()) + + +def get_upload_id(upload_url): + parse_result = urllib.parse.urlparse(upload_url) + parsed_query = urllib.parse.parse_qs(parse_result.query) + # NOTE: We are unpacking here, so asserting exactly one match. + (upload_id,) = parsed_query["upload_id"] + return upload_id + + +def get_num_chunks(total_bytes, chunk_size): + expected_chunks, remainder = divmod(total_bytes, chunk_size) + if remainder > 0: + expected_chunks += 1 + return expected_chunks + + +def check_response( + response, + blob_name, + actual_contents=None, + total_bytes=None, + metadata=None, + content_type=ICO_CONTENT_TYPE, +): + assert response.status_code == http.client.OK + json_response = response.json() + assert json_response["bucket"] == utils.BUCKET_NAME + assert json_response["contentType"] == content_type + if actual_contents is not None: + md5_hash = json_response["md5Hash"].encode("ascii") + assert md5_hash == get_md5(actual_contents) + total_bytes = len(actual_contents) + assert json_response["metageneration"] == "1" + assert json_response["name"] == blob_name + assert json_response["size"] == "{:d}".format(total_bytes) + assert json_response["storageClass"] == "STANDARD" + if metadata is None: + assert "metadata" not in json_response + else: + assert json_response["metadata"] == metadata + + +def check_content(blob_name, expected_content, transport, headers=None): + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + download = resumable_requests.Download(media_url, headers=headers) + response = download.consume(transport) + assert response.status_code == http.client.OK + assert response.content == expected_content + + +def check_tombstoned(upload, transport, *args): + assert upload.finished + basic_types = (resumable_requests.SimpleUpload, resumable_requests.MultipartUpload) + if isinstance(upload, basic_types): + with pytest.raises(ValueError): + upload.transmit(transport, *args) + else: + with pytest.raises(ValueError): + upload.transmit_next_chunk(transport, *args) + + +def check_does_not_exist(transport, blob_name): + metadata_url = utils.METADATA_URL_TEMPLATE.format(blob_name=blob_name) + # Make sure we are creating a **new** object. + response = transport.get(metadata_url) + assert response.status_code == http.client.NOT_FOUND + + +def check_initiate(response, upload, stream, transport, metadata): + assert response.status_code == http.client.OK + assert response.content == b"" + upload_id = get_upload_id(upload.resumable_url) + assert response.headers["x-guploader-uploadid"] == upload_id + assert stream.tell() == 0 + # Make sure the upload cannot be re-initiated. + with pytest.raises(ValueError) as exc_info: + upload.initiate(transport, stream, metadata, JPEG_CONTENT_TYPE) + + exc_info.match("This upload has already been initiated.") + + +def check_bad_chunk(upload, transport): + with pytest.raises(InvalidResponse) as exc_info: + upload.transmit_next_chunk(transport) + error = exc_info.value + response = error.response + assert response.status_code == http.client.BAD_REQUEST + assert response.content == BAD_CHUNK_SIZE_MSG + + +def transmit_chunks( + upload, transport, blob_name, metadata, num_chunks=0, content_type=JPEG_CONTENT_TYPE +): + while not upload.finished: + num_chunks += 1 + response = upload.transmit_next_chunk(transport) + if upload.finished: + assert upload.bytes_uploaded == upload.total_bytes + check_response( + response, + blob_name, + total_bytes=upload.total_bytes, + metadata=metadata, + content_type=content_type, + ) + else: + assert upload.bytes_uploaded == num_chunks * upload.chunk_size + assert response.status_code == http.client.PERMANENT_REDIRECT + + return num_chunks + + +def test_simple_upload(authorized_transport, bucket, cleanup): + with open(ICO_FILE, "rb") as file_obj: + actual_contents = file_obj.read() + + blob_name = os.path.basename(ICO_FILE) + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.SIMPLE_UPLOAD_TEMPLATE.format(blob_name=blob_name) + upload = resumable_requests.SimpleUpload(upload_url) + # Transmit the resource. + response = upload.transmit(authorized_transport, actual_contents, ICO_CONTENT_TYPE) + check_response(response, blob_name, actual_contents=actual_contents) + # Download the content to make sure it's "working as expected". + check_content(blob_name, actual_contents, authorized_transport) + # Make sure the upload is tombstoned. + check_tombstoned(upload, authorized_transport, actual_contents, ICO_CONTENT_TYPE) + + +def test_simple_upload_with_headers(authorized_transport, bucket, cleanup): + blob_name = "some-stuff.bin" + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.SIMPLE_UPLOAD_TEMPLATE.format(blob_name=blob_name) + headers = utils.get_encryption_headers() + upload = resumable_requests.SimpleUpload(upload_url, headers=headers) + # Transmit the resource. + data = b"Binary contents\x00\x01\x02." + response = upload.transmit(authorized_transport, data, BYTES_CONTENT_TYPE) + check_response( + response, blob_name, actual_contents=data, content_type=BYTES_CONTENT_TYPE + ) + # Download the content to make sure it's "working as expected". + check_content(blob_name, data, authorized_transport, headers=headers) + # Make sure the upload is tombstoned. + check_tombstoned(upload, authorized_transport, data, BYTES_CONTENT_TYPE) + + +@pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) +def test_multipart_upload(authorized_transport, bucket, cleanup, checksum): + with open(ICO_FILE, "rb") as file_obj: + actual_contents = file_obj.read() + + blob_name = os.path.basename(ICO_FILE) + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.MULTIPART_UPLOAD + upload = resumable_requests.MultipartUpload(upload_url, checksum=checksum) + # Transmit the resource. + metadata = {"name": blob_name, "metadata": {"color": "yellow"}} + response = upload.transmit( + authorized_transport, actual_contents, metadata, ICO_CONTENT_TYPE + ) + check_response( + response, + blob_name, + actual_contents=actual_contents, + metadata=metadata["metadata"], + ) + # Download the content to make sure it's "working as expected". + check_content(blob_name, actual_contents, authorized_transport) + # Make sure the upload is tombstoned. + check_tombstoned( + upload, authorized_transport, actual_contents, metadata, ICO_CONTENT_TYPE + ) + + +@pytest.mark.parametrize("checksum", ["md5", "crc32c"]) +def test_multipart_upload_with_bad_checksum(authorized_transport, checksum, bucket): + with open(ICO_FILE, "rb") as file_obj: + actual_contents = file_obj.read() + + blob_name = os.path.basename(ICO_FILE) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.MULTIPART_UPLOAD + upload = resumable_requests.MultipartUpload(upload_url, checksum=checksum) + # Transmit the resource. + metadata = {"name": blob_name, "metadata": {"color": "yellow"}} + fake_checksum_object = _helpers._get_checksum_object(checksum) + fake_checksum_object.update(b"bad data") + fake_prepared_checksum_digest = _helpers.prepare_checksum_digest( + fake_checksum_object.digest() + ) + with mock.patch.object( + _helpers, "prepare_checksum_digest", return_value=fake_prepared_checksum_digest + ): + with pytest.raises(InvalidResponse) as exc_info: + response = upload.transmit( + authorized_transport, actual_contents, metadata, ICO_CONTENT_TYPE + ) + response = exc_info.value.response + message = response.json()["error"]["message"] + # Attempt to verify that this is a checksum mismatch error. + assert checksum.upper() in message + assert fake_prepared_checksum_digest in message + + # Make sure the upload is tombstoned. + check_tombstoned( + upload, authorized_transport, actual_contents, metadata, ICO_CONTENT_TYPE + ) + + +def test_multipart_upload_with_headers(authorized_transport, bucket, cleanup): + blob_name = "some-multipart-stuff.bin" + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.MULTIPART_UPLOAD + headers = utils.get_encryption_headers() + upload = resumable_requests.MultipartUpload(upload_url, headers=headers) + # Transmit the resource. + metadata = {"name": blob_name} + data = b"Other binary contents\x03\x04\x05." + response = upload.transmit(authorized_transport, data, metadata, BYTES_CONTENT_TYPE) + check_response( + response, blob_name, actual_contents=data, content_type=BYTES_CONTENT_TYPE + ) + # Download the content to make sure it's "working as expected". + check_content(blob_name, data, authorized_transport, headers=headers) + # Make sure the upload is tombstoned. + check_tombstoned(upload, authorized_transport, data, metadata, BYTES_CONTENT_TYPE) + + +def _resumable_upload_helper( + authorized_transport, stream, cleanup, headers=None, checksum=None +): + blob_name = os.path.basename(stream.name) + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + # Create the actual upload object. + chunk_size = _media.UPLOAD_CHUNK_SIZE + upload = resumable_requests.ResumableUpload( + utils.RESUMABLE_UPLOAD, chunk_size, headers=headers, checksum=checksum + ) + # Initiate the upload. + metadata = {"name": blob_name, "metadata": {"direction": "north"}} + response = upload.initiate( + authorized_transport, stream, metadata, JPEG_CONTENT_TYPE + ) + # Make sure ``initiate`` succeeded and did not mangle the stream. + check_initiate(response, upload, stream, authorized_transport, metadata) + # Actually upload the file in chunks. + num_chunks = transmit_chunks( + upload, authorized_transport, blob_name, metadata["metadata"] + ) + assert num_chunks == get_num_chunks(upload.total_bytes, chunk_size) + # Download the content to make sure it's "working as expected". + stream.seek(0) + actual_contents = stream.read() + check_content(blob_name, actual_contents, authorized_transport, headers=headers) + # Make sure the upload is tombstoned. + check_tombstoned(upload, authorized_transport) + + +@pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) +def test_resumable_upload(authorized_transport, img_stream, bucket, cleanup, checksum): + _resumable_upload_helper( + authorized_transport, img_stream, cleanup, checksum=checksum + ) + + +def test_resumable_upload_with_headers( + authorized_transport, img_stream, bucket, cleanup +): + headers = utils.get_encryption_headers() + _resumable_upload_helper(authorized_transport, img_stream, cleanup, headers=headers) + + +@pytest.mark.parametrize("checksum", ["md5", "crc32c"]) +def test_resumable_upload_with_bad_checksum( + authorized_transport, img_stream, bucket, cleanup, checksum +): + fake_checksum_object = _helpers._get_checksum_object(checksum) + fake_checksum_object.update(b"bad data") + fake_prepared_checksum_digest = _helpers.prepare_checksum_digest( + fake_checksum_object.digest() + ) + with mock.patch.object( + _helpers, "prepare_checksum_digest", return_value=fake_prepared_checksum_digest + ): + with pytest.raises(DataCorruption) as exc_info: + _resumable_upload_helper( + authorized_transport, img_stream, cleanup, checksum=checksum + ) + expected_checksums = {"md5": "1bsd83IYNug8hd+V1ING3Q==", "crc32c": "YQGPxA=="} + expected_message = _upload._UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format( + checksum.upper(), fake_prepared_checksum_digest, expected_checksums[checksum] + ) + assert exc_info.value.args[0] == expected_message + + +def test_resumable_upload_bad_chunk_size(authorized_transport, img_stream): + blob_name = os.path.basename(img_stream.name) + # Create the actual upload object. + upload = resumable_requests.ResumableUpload( + utils.RESUMABLE_UPLOAD, _media.UPLOAD_CHUNK_SIZE + ) + # Modify the ``upload`` **after** construction so we can + # use a bad chunk size. + upload._chunk_size = 1024 + assert upload._chunk_size < _media.UPLOAD_CHUNK_SIZE + # Initiate the upload. + metadata = {"name": blob_name} + response = upload.initiate( + authorized_transport, img_stream, metadata, JPEG_CONTENT_TYPE + ) + # Make sure ``initiate`` succeeded and did not mangle the stream. + check_initiate(response, upload, img_stream, authorized_transport, metadata) + # Make the first request and verify that it fails. + check_bad_chunk(upload, authorized_transport) + # Reset the chunk size (and the stream) and verify the "resumable" + # URL is unusable. + upload._chunk_size = _media.UPLOAD_CHUNK_SIZE + img_stream.seek(0) + upload._invalid = False + check_bad_chunk(upload, authorized_transport) + + +def sabotage_and_recover(upload, stream, transport, chunk_size): + assert upload.bytes_uploaded == chunk_size + assert stream.tell() == chunk_size + # "Fake" that the instance is in an invalid state. + upload._invalid = True + stream.seek(0) # Seek to the wrong place. + upload._bytes_uploaded = 0 # Make ``bytes_uploaded`` wrong as well. + # Recover the (artifically) invalid upload. + response = upload.recover(transport) + assert response.status_code == http.client.PERMANENT_REDIRECT + assert not upload.invalid + assert upload.bytes_uploaded == chunk_size + assert stream.tell() == chunk_size + + +def _resumable_upload_recover_helper( + authorized_transport, cleanup, headers=None, checksum=None +): + blob_name = "some-bytes.bin" + chunk_size = _media.UPLOAD_CHUNK_SIZE + data = b"123" * chunk_size # 3 chunks worth. + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + # Create the actual upload object. + upload = resumable_requests.ResumableUpload( + utils.RESUMABLE_UPLOAD, chunk_size, headers=headers, checksum=checksum + ) + # Initiate the upload. + metadata = {"name": blob_name} + stream = io.BytesIO(data) + response = upload.initiate( + authorized_transport, stream, metadata, BYTES_CONTENT_TYPE + ) + # Make sure ``initiate`` succeeded and did not mangle the stream. + check_initiate(response, upload, stream, authorized_transport, metadata) + # Make the first request. + response = upload.transmit_next_chunk(authorized_transport) + assert response.status_code == http.client.PERMANENT_REDIRECT + # Call upload.recover(). + sabotage_and_recover(upload, stream, authorized_transport, chunk_size) + # Now stream what remains. + num_chunks = transmit_chunks( + upload, + authorized_transport, + blob_name, + None, + num_chunks=1, + content_type=BYTES_CONTENT_TYPE, + ) + assert num_chunks == 3 + # Download the content to make sure it's "working as expected". + actual_contents = stream.getvalue() + check_content(blob_name, actual_contents, authorized_transport, headers=headers) + # Make sure the upload is tombstoned. + check_tombstoned(upload, authorized_transport) + + +@pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) +def test_resumable_upload_recover(authorized_transport, bucket, cleanup, checksum): + _resumable_upload_recover_helper(authorized_transport, cleanup, checksum=checksum) + + +def test_resumable_upload_recover_with_headers(authorized_transport, bucket, cleanup): + headers = utils.get_encryption_headers() + _resumable_upload_recover_helper(authorized_transport, cleanup, headers=headers) + + +class TestResumableUploadUnknownSize(object): + @staticmethod + def _check_range_sent(response, start, end, total): + headers_sent = response.request.headers + if start is None and end is None: + expected_content_range = "bytes */{:d}".format(total) + else: + # Allow total to be an int or a string "*" + expected_content_range = "bytes {:d}-{:d}/{}".format(start, end, total) + + assert headers_sent["content-range"] == expected_content_range + + @staticmethod + def _check_range_received(response, size): + assert response.headers["range"] == "bytes=0-{:d}".format(size - 1) + + def _check_partial(self, upload, response, chunk_size, num_chunks): + start_byte = (num_chunks - 1) * chunk_size + end_byte = num_chunks * chunk_size - 1 + + assert not upload.finished + assert upload.bytes_uploaded == end_byte + 1 + assert response.status_code == http.client.PERMANENT_REDIRECT + assert response.content == b"" + + self._check_range_sent(response, start_byte, end_byte, "*") + self._check_range_received(response, end_byte + 1) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test_smaller_than_chunk_size( + self, authorized_transport, bucket, cleanup, checksum + ): + blob_name = os.path.basename(ICO_FILE) + chunk_size = _media.UPLOAD_CHUNK_SIZE + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + # Make sure the blob is smaller than the chunk size. + total_bytes = os.path.getsize(ICO_FILE) + assert total_bytes < chunk_size + # Create the actual upload object. + upload = resumable_requests.ResumableUpload( + utils.RESUMABLE_UPLOAD, chunk_size, checksum=checksum + ) + # Initiate the upload. + metadata = {"name": blob_name} + with open(ICO_FILE, "rb") as stream: + response = upload.initiate( + authorized_transport, + stream, + metadata, + ICO_CONTENT_TYPE, + stream_final=False, + ) + # Make sure ``initiate`` succeeded and did not mangle the stream. + check_initiate(response, upload, stream, authorized_transport, metadata) + # Make sure total bytes was never set. + assert upload.total_bytes is None + # Make the **ONLY** request. + response = upload.transmit_next_chunk(authorized_transport) + self._check_range_sent(response, 0, total_bytes - 1, total_bytes) + check_response(response, blob_name, total_bytes=total_bytes) + # Download the content to make sure it's "working as expected". + stream.seek(0) + actual_contents = stream.read() + check_content(blob_name, actual_contents, authorized_transport) + # Make sure the upload is tombstoned. + check_tombstoned(upload, authorized_transport) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test_finish_at_chunk(self, authorized_transport, bucket, cleanup, checksum): + blob_name = "some-clean-stuff.bin" + chunk_size = _media.UPLOAD_CHUNK_SIZE + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + # Make sure the blob size is an exact multiple of the chunk size. + data = b"ab" * chunk_size + total_bytes = len(data) + stream = io.BytesIO(data) + # Create the actual upload object. + upload = resumable_requests.ResumableUpload( + utils.RESUMABLE_UPLOAD, chunk_size, checksum=checksum + ) + # Initiate the upload. + metadata = {"name": blob_name} + response = upload.initiate( + authorized_transport, + stream, + metadata, + BYTES_CONTENT_TYPE, + stream_final=False, + ) + # Make sure ``initiate`` succeeded and did not mangle the stream. + check_initiate(response, upload, stream, authorized_transport, metadata) + # Make sure total bytes was never set. + assert upload.total_bytes is None + # Make three requests. + response0 = upload.transmit_next_chunk(authorized_transport) + self._check_partial(upload, response0, chunk_size, 1) + + response1 = upload.transmit_next_chunk(authorized_transport) + self._check_partial(upload, response1, chunk_size, 2) + + response2 = upload.transmit_next_chunk(authorized_transport) + assert upload.finished + # Verify the "clean-up" request. + assert upload.bytes_uploaded == 2 * chunk_size + check_response( + response2, + blob_name, + actual_contents=data, + total_bytes=total_bytes, + content_type=BYTES_CONTENT_TYPE, + ) + self._check_range_sent(response2, None, None, 2 * chunk_size) + + @staticmethod + def _add_bytes(stream, data): + curr_pos = stream.tell() + stream.write(data) + # Go back to where we were before the write. + stream.seek(curr_pos) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test_interleave_writes(self, authorized_transport, bucket, cleanup, checksum): + blob_name = "some-moar-stuff.bin" + chunk_size = _media.UPLOAD_CHUNK_SIZE + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + # Start out the blob as a single chunk (but we will add to it). + stream = io.BytesIO(b"Z" * chunk_size) + # Create the actual upload object. + upload = resumable_requests.ResumableUpload( + utils.RESUMABLE_UPLOAD, chunk_size, checksum=checksum + ) + # Initiate the upload. + metadata = {"name": blob_name} + response = upload.initiate( + authorized_transport, + stream, + metadata, + BYTES_CONTENT_TYPE, + stream_final=False, + ) + # Make sure ``initiate`` succeeded and did not mangle the stream. + check_initiate(response, upload, stream, authorized_transport, metadata) + # Make sure total bytes was never set. + assert upload.total_bytes is None + # Make three requests. + response0 = upload.transmit_next_chunk(authorized_transport) + self._check_partial(upload, response0, chunk_size, 1) + # Add another chunk before sending. + self._add_bytes(stream, b"K" * chunk_size) + response1 = upload.transmit_next_chunk(authorized_transport) + self._check_partial(upload, response1, chunk_size, 2) + # Add more bytes, but make sure less than a full chunk. + last_chunk = 155 + self._add_bytes(stream, b"r" * last_chunk) + response2 = upload.transmit_next_chunk(authorized_transport) + assert upload.finished + # Verify the "clean-up" request. + total_bytes = 2 * chunk_size + last_chunk + assert upload.bytes_uploaded == total_bytes + check_response( + response2, + blob_name, + actual_contents=stream.getvalue(), + total_bytes=total_bytes, + content_type=BYTES_CONTENT_TYPE, + ) + self._check_range_sent(response2, 2 * chunk_size, total_bytes - 1, total_bytes) + + +@pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) +def test_XMLMPU(authorized_transport, bucket, cleanup, checksum): + with open(ICO_FILE, "rb") as file_obj: + actual_contents = file_obj.read() + + blob_name = os.path.basename(ICO_FILE) + # Make sure to clean up the uploaded blob when we are done. + cleanup(blob_name, authorized_transport) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.XML_UPLOAD_URL_TEMPLATE.format(bucket=bucket, blob=blob_name) + container = resumable_requests.XMLMPUContainer(upload_url, blob_name) + # Initiate + container.initiate(authorized_transport, ICO_CONTENT_TYPE) + assert container.upload_id + + part = resumable_requests.XMLMPUPart( + upload_url, + container.upload_id, + ICO_FILE, + 0, + len(actual_contents), + 1, + checksum=checksum, + ) + part.upload(authorized_transport) + assert part.etag + + container.register_part(1, part.etag) + container.finalize(authorized_transport) + assert container.finished + + # Download the content to make sure it's "working as expected". + check_content(blob_name, actual_contents, authorized_transport) + + +@pytest.mark.parametrize("checksum", ["md5", "crc32c"]) +def test_XMLMPU_with_bad_checksum(authorized_transport, bucket, checksum): + with open(ICO_FILE, "rb") as file_obj: + actual_contents = file_obj.read() + + blob_name = os.path.basename(ICO_FILE) + # No need to clean up, since the upload will not be finalized successfully. + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.XML_UPLOAD_URL_TEMPLATE.format(bucket=bucket, blob=blob_name) + container = resumable_requests.XMLMPUContainer(upload_url, blob_name) + # Initiate + container.initiate(authorized_transport, ICO_CONTENT_TYPE) + assert container.upload_id + + try: + part = resumable_requests.XMLMPUPart( + upload_url, + container.upload_id, + ICO_FILE, + 0, + len(actual_contents), + 1, + checksum=checksum, + ) + + fake_checksum_object = _helpers._get_checksum_object(checksum) + fake_checksum_object.update(b"bad data") + fake_prepared_checksum_digest = _helpers.prepare_checksum_digest( + fake_checksum_object.digest() + ) + with mock.patch.object( + _helpers, + "prepare_checksum_digest", + return_value=fake_prepared_checksum_digest, + ): + with pytest.raises(DataCorruption): + part.upload(authorized_transport) + finally: + utils.retry_transient_errors(authorized_transport.delete)( + upload_url + "?uploadId=" + str(container.upload_id) + ) + + +def test_XMLMPU_cancel(authorized_transport, bucket): + with open(ICO_FILE, "rb") as file_obj: + actual_contents = file_obj.read() + + blob_name = os.path.basename(ICO_FILE) + check_does_not_exist(authorized_transport, blob_name) + + # Create the actual upload object. + upload_url = utils.XML_UPLOAD_URL_TEMPLATE.format(bucket=bucket, blob=blob_name) + container = resumable_requests.XMLMPUContainer(upload_url, blob_name) + # Initiate + container.initiate(authorized_transport, ICO_CONTENT_TYPE) + assert container.upload_id + + part = resumable_requests.XMLMPUPart( + upload_url, + container.upload_id, + ICO_FILE, + 0, + len(actual_contents), + 1, + ) + part.upload(authorized_transport) + assert part.etag + + container.register_part(1, part.etag) + container.cancel(authorized_transport) + + # Validate the cancel worked by expecting a 404 on finalize. + with pytest.raises(InvalidResponse): + container.finalize(authorized_transport) diff --git a/tests/resumable_media/system/utils.py b/tests/resumable_media/system/utils.py new file mode 100644 index 000000000..7b679095d --- /dev/null +++ b/tests/resumable_media/system/utils.py @@ -0,0 +1,88 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import hashlib +import time + +from test_utils.retry import RetryResult # type: ignore + + +BUCKET_NAME = "grpm-systest-{}".format(int(1000 * time.time())) +BUCKET_POST_URL = "https://www.googleapis.com/storage/v1/b/" +BUCKET_URL = "https://www.googleapis.com/storage/v1/b/{}".format(BUCKET_NAME) + +_DOWNLOAD_BASE = "https://www.googleapis.com/download/storage/v1/b/{}".format( + BUCKET_NAME +) +DOWNLOAD_URL_TEMPLATE = _DOWNLOAD_BASE + "/o/{blob_name}?alt=media" + +_UPLOAD_BASE = ( + "https://www.googleapis.com/upload/storage/v1/b/{}".format(BUCKET_NAME) + + "/o?uploadType=" +) +SIMPLE_UPLOAD_TEMPLATE = _UPLOAD_BASE + "media&name={blob_name}" +MULTIPART_UPLOAD = _UPLOAD_BASE + "multipart" +RESUMABLE_UPLOAD = _UPLOAD_BASE + "resumable" + +METADATA_URL_TEMPLATE = BUCKET_URL + "/o/{blob_name}" + +XML_UPLOAD_URL_TEMPLATE = "https://{bucket}.storage.googleapis.com/{blob}" + + +GCS_RW_SCOPE = "https://www.googleapis.com/auth/devstorage.read_write" +# Generated using random.choice() with all 256 byte choices. +ENCRYPTION_KEY = ( + b"R\xb8\x1b\x94T\xea_\xa8\x93\xae\xd1\xf6\xfca\x15\x0ekA" + b"\x08 Y\x13\xe2\n\x02i\xadc\xe2\xd99x" +) + + +_RETRYABLE_CODES = [ + 409, # Conflict + 429, # TooManyRequests + 503, # ServiceUnavailable +] + + +def _not_retryable(response): + return response.status_code not in _RETRYABLE_CODES + + +retry_transient_errors = RetryResult(_not_retryable) + + +def get_encryption_headers(key=ENCRYPTION_KEY): + """Builds customer-supplied encryption key headers + + See `Managing Data Encryption`_ for more details. + + Args: + key (bytes): 32 byte key to build request key and hash. + + Returns: + Dict[str, str]: The algorithm, key and key-SHA256 headers. + + .. _Managing Data Encryption: + https://cloud.google.com/storage/docs/encryption + """ + key_hash = hashlib.sha256(key).digest() + key_hash_b64 = base64.b64encode(key_hash) + key_b64 = base64.b64encode(key) + + return { + "x-goog-encryption-algorithm": "AES256", + "x-goog-encryption-key": key_b64.decode("utf-8"), + "x-goog-encryption-key-sha256": key_hash_b64.decode("utf-8"), + } diff --git a/tests/resumable_media/unit/__init__.py b/tests/resumable_media/unit/__init__.py new file mode 100644 index 000000000..7c07b241f --- /dev/null +++ b/tests/resumable_media/unit/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/resumable_media/unit/requests/__init__.py b/tests/resumable_media/unit/requests/__init__.py new file mode 100644 index 000000000..7c07b241f --- /dev/null +++ b/tests/resumable_media/unit/requests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/resumable_media/unit/requests/test__helpers.py b/tests/resumable_media/unit/requests/test__helpers.py new file mode 100644 index 000000000..132172bbb --- /dev/null +++ b/tests/resumable_media/unit/requests/test__helpers.py @@ -0,0 +1,59 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import http.client + +from unittest import mock + +from google.cloud.storage._media.requests import _request_helpers + +EXPECTED_TIMEOUT = (61, 60) + + +class TestRequestsMixin(object): + def test__get_status_code(self): + status_code = int(http.client.OK) + response = _make_response(status_code) + assert status_code == _request_helpers.RequestsMixin._get_status_code(response) + + def test__get_headers(self): + headers = {"fruit": "apple"} + response = mock.Mock(headers=headers, spec=["headers"]) + assert headers == _request_helpers.RequestsMixin._get_headers(response) + + def test__get_body(self): + body = b"This is the payload." + response = mock.Mock(content=body, spec=["content"]) + assert body == _request_helpers.RequestsMixin._get_body(response) + + +class TestRawRequestsMixin(object): + def test__get_body_wo_content_consumed(self): + body = b"This is the payload." + raw = mock.Mock(spec=["stream"]) + raw.stream.return_value = iter([body]) + response = mock.Mock(raw=raw, _content=False, spec=["raw", "_content"]) + assert body == _request_helpers.RawRequestsMixin._get_body(response) + raw.stream.assert_called_once_with( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + + def test__get_body_w_content_consumed(self): + body = b"This is the payload." + response = mock.Mock(_content=body, spec=["_content"]) + assert body == _request_helpers.RawRequestsMixin._get_body(response) + + +def _make_response(status_code): + return mock.Mock(status_code=status_code, spec=["status_code"]) diff --git a/tests/resumable_media/unit/requests/test_download.py b/tests/resumable_media/unit/requests/test_download.py new file mode 100644 index 000000000..3da234a29 --- /dev/null +++ b/tests/resumable_media/unit/requests/test_download.py @@ -0,0 +1,1303 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import http.client +import io + +from unittest import mock +import pytest # type: ignore + +from google.cloud.storage._media import _helpers +from google.cloud.storage._media.requests import download as download_mod +from google.cloud.storage._media.requests import _request_helpers +from google.cloud.storage.exceptions import DataCorruption + + +URL_PREFIX = "https://www.googleapis.com/download/storage/v1/b/{BUCKET}/o/" +EXAMPLE_URL = URL_PREFIX + "{OBJECT}?alt=media" +EXPECTED_TIMEOUT = (61, 60) + + +class TestDownload(object): + def test__write_to_stream_no_hash_check(self): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream) + + chunk1 = b"right now, " + chunk2 = b"but a little later" + response = _mock_response(chunks=[chunk1, chunk2], headers={}) + + ret_val = download._write_to_stream(response) + assert ret_val is None + + assert stream.getvalue() == chunk1 + chunk2 + assert download._bytes_downloaded == len(chunk1 + chunk2) + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + def test__write_to_stream_empty_chunks(self): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream) + + response = _mock_response(chunks=[], headers={}) + + ret_val = download._write_to_stream(response) + assert ret_val is None + + assert stream.getvalue() == b"" + assert download._bytes_downloaded == 0 + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test__write_to_stream_with_hash_check_success(self, checksum): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum) + + chunk1 = b"first chunk, count starting at 0. " + chunk2 = b"second chunk, or chunk 1, which is better? " + chunk3 = b"ordinals and numerals and stuff." + header_value = "crc32c=qmNCyg==,md5=fPAJHnnoi/+NadyNxT2c2w==" + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_response(chunks=[chunk1, chunk2, chunk3], headers=headers) + + ret_val = download._write_to_stream(response) + assert ret_val is None + + assert stream.getvalue() == chunk1 + chunk2 + chunk3 + assert download._bytes_downloaded == len(chunk1 + chunk2 + chunk3) + assert download._checksum_object is not None + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__write_to_stream_with_hash_check_fail(self, checksum): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum) + + chunk1 = b"first chunk, count starting at 0. " + chunk2 = b"second chunk, or chunk 1, which is better? " + chunk3 = b"ordinals and numerals and stuff." + bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_response(chunks=[chunk1, chunk2, chunk3], headers=headers) + + with pytest.raises(DataCorruption) as exc_info: + download._write_to_stream(response) + + assert not download.finished + + error = exc_info.value + assert error.response is response + assert len(error.args) == 1 + if checksum == "md5": + good_checksum = "fPAJHnnoi/+NadyNxT2c2w==" + else: + good_checksum = "qmNCyg==" + msg = download_mod._CHECKSUM_MISMATCH.format( + EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() + ) + assert error.args[0] == msg + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__write_to_stream_no_checksum_validation_for_partial_response( + self, checksum + ): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum) + + chunk1 = b"first chunk" + response = _mock_response( + status_code=http.client.PARTIAL_CONTENT, chunks=[chunk1] + ) + + # Make sure that the checksum is not validated. + with mock.patch( + "google.cloud.storage._media._helpers.prepare_checksum_digest", + return_value=None, + ) as prepare_checksum_digest: + download._write_to_stream(response) + assert not prepare_checksum_digest.called + + assert not download.finished + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + def test__write_to_stream_with_invalid_checksum_type(self): + BAD_CHECKSUM_TYPE = "badsum" + + stream = io.BytesIO() + download = download_mod.Download( + EXAMPLE_URL, stream=stream, checksum=BAD_CHECKSUM_TYPE + ) + + chunk1 = b"first chunk, count starting at 0. " + chunk2 = b"second chunk, or chunk 1, which is better? " + chunk3 = b"ordinals and numerals and stuff." + bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_response(chunks=[chunk1, chunk2, chunk3], headers=headers) + + with pytest.raises(ValueError) as exc_info: + download._write_to_stream(response) + + assert not download.finished + + error = exc_info.value + assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``" + + def _consume_helper( + self, + stream=None, + end=65536, + headers=None, + chunks=(), + response_headers=None, + checksum="md5", + timeout=None, + ): + download = download_mod.Download( + EXAMPLE_URL, stream=stream, end=end, headers=headers, checksum=checksum + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_response( + chunks=chunks, headers=response_headers + ) + + assert not download.finished + + if timeout is not None: + ret_val = download.consume(transport, timeout=timeout) + else: + ret_val = download.consume(transport) + + assert ret_val is transport.request.return_value + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT if timeout is None else timeout, + } + if chunks: + assert stream is not None + called_kwargs["stream"] = True + + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + + range_bytes = "bytes={:d}-{:d}".format(0, end) + assert download._headers["range"] == range_bytes + assert download.finished + + return transport + + def test_consume(self): + self._consume_helper() + + def test_consume_with_custom_timeout(self): + self._consume_helper(timeout=14.7) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test_consume_with_stream(self, checksum): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + transport = self._consume_helper( + stream=stream, chunks=chunks, checksum=checksum + ) + + assert stream.getvalue() == b"".join(chunks) + + # Check mocks. + response = transport.request.return_value + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_consume_with_stream_hash_check_success(self, checksum): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + header_value = "crc32c=UNIQxg==,md5=JvS1wjMvfbCXgEGeaJJLDQ==" + headers = {_helpers._HASH_HEADER: header_value} + transport = self._consume_helper( + stream=stream, chunks=chunks, response_headers=headers, checksum=checksum + ) + + assert stream.getvalue() == b"".join(chunks) + + # Check mocks. + response = transport.request.return_value + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.iter_content.assert_called_once_with( + chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_consume_with_stream_hash_check_fail(self, checksum): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum) + + chunks = (b"zero zero", b"niner tango") + bad_checksum = "anVzdCBub3QgdGhpcyAxLA==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers = {_helpers._HASH_HEADER: header_value} + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_response(chunks=chunks, headers=headers) + + assert not download.finished + with pytest.raises(DataCorruption) as exc_info: + download.consume(transport) + + assert stream.getvalue() == b"".join(chunks) + assert download.finished + assert download._headers == {} + + error = exc_info.value + assert error.response is transport.request.return_value + assert len(error.args) == 1 + if checksum == "md5": + good_checksum = "1A/dxEpys717C6FH7FIWDw==" + else: + good_checksum = "GvNZlg==" + msg = download_mod._CHECKSUM_MISMATCH.format( + EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() + ) + assert error.args[0] == msg + + # Check mocks. + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers={}, + stream=True, + timeout=EXPECTED_TIMEOUT, + ) + + def test_consume_with_headers(self): + headers = {} # Empty headers + end = 16383 + self._consume_helper(end=end, headers=headers) + range_bytes = "bytes={:d}-{:d}".format(0, end) + # Make sure the headers have been modified. + assert headers == {"range": range_bytes} + + def test_consume_gets_generation_from_url(self): + GENERATION_VALUE = 1641590104888641 + url = EXAMPLE_URL + f"&generation={GENERATION_VALUE}" + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + + download = download_mod.Download( + url, stream=stream, end=65536, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_response(chunks=chunks, headers=None) + + assert not download.finished + assert download._object_generation is None + + ret_val = download.consume(transport) + + assert download._object_generation == GENERATION_VALUE + assert ret_val is transport.request.return_value + assert stream.getvalue() == b"".join(chunks) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", url, **called_kwargs) + + def test_consume_gets_generation_from_headers(self): + GENERATION_VALUE = 1641590104888641 + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + + download = download_mod.Download( + EXAMPLE_URL, stream=stream, end=65536, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + headers = {_helpers._GENERATION_HEADER: GENERATION_VALUE} + transport.request.return_value = _mock_response(chunks=chunks, headers=headers) + + assert not download.finished + assert download._object_generation is None + + ret_val = download.consume(transport) + + assert download._object_generation == GENERATION_VALUE + assert ret_val is transport.request.return_value + assert stream.getvalue() == b"".join(chunks) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + + def test_consume_w_object_generation(self): + GENERATION_VALUE = 1641590104888641 + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.Download( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_response(chunks=chunks, headers=None) + + assert download._object_generation is None + + # Mock a retry operation with object generation retrieved and bytes already downloaded in the stream + download._object_generation = GENERATION_VALUE + offset = 256 + download._bytes_downloaded = offset + download.consume(transport) + + expected_url = EXAMPLE_URL + f"&generation={GENERATION_VALUE}" + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", expected_url, **called_kwargs) + range_bytes = "bytes={:d}-{:d}".format(offset, end) + assert download._headers["range"] == range_bytes + + def test_consume_w_bytes_downloaded(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.Download( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_response(chunks=chunks, headers=None) + + assert download._bytes_downloaded == 0 + + # Mock a retry operation with bytes already downloaded in the stream and checksum stored + offset = 256 + download._bytes_downloaded = offset + download._expected_checksum = None + download._checksum_object = _helpers._DoNothingHash() + download.consume(transport) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + range_bytes = "bytes={:d}-{:d}".format(offset, end) + assert download._headers["range"] == range_bytes + + def test_consume_w_bytes_downloaded_range_read(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + start = 1024 + end = 65536 + + download = download_mod.Download( + EXAMPLE_URL, + stream=stream, + start=start, + end=end, + headers=None, + checksum="md5", + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_response(chunks=chunks, headers=None) + + assert download._bytes_downloaded == 0 + + # Mock a retry operation with bytes already downloaded in the stream and checksum stored + offset = 256 + download._bytes_downloaded = offset + download._expected_checksum = None + download._checksum_object = _helpers._DoNothingHash() + download.consume(transport) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + range_bytes = "bytes={:d}-{:d}".format(offset + start, end) + assert download._headers["range"] == range_bytes + + def test_consume_gzip_reset_stream_w_bytes_downloaded(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.Download( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + + # Mock a decompressive transcoding retry operation with bytes already downloaded in the stream + headers = {_helpers._STORED_CONTENT_ENCODING_HEADER: "gzip"} + transport.request.return_value = _mock_response(chunks=chunks, headers=headers) + offset = 16 + download._bytes_downloaded = offset + download.consume(transport) + + assert stream.getvalue() == b"".join(chunks) + assert download._bytes_downloaded == len(b"".join(chunks)) + + def test_consume_gzip_reset_stream_error(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.Download( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + + # Mock a stream seek error while resuming a decompressive transcoding download + stream.seek = mock.Mock(side_effect=OSError("mock stream seek error")) + headers = {_helpers._STORED_CONTENT_ENCODING_HEADER: "gzip"} + transport.request.return_value = _mock_response(chunks=chunks, headers=headers) + offset = 16 + download._bytes_downloaded = offset + with pytest.raises(Exception): + download.consume(transport) + + +class TestRawDownload(object): + def test__write_to_stream_no_hash_check(self): + stream = io.BytesIO() + download = download_mod.RawDownload(EXAMPLE_URL, stream=stream) + + chunk1 = b"right now, " + chunk2 = b"but a little later" + response = _mock_raw_response(chunks=[chunk1, chunk2], headers={}) + + ret_val = download._write_to_stream(response) + assert ret_val is None + + assert stream.getvalue() == chunk1 + chunk2 + assert download._bytes_downloaded == len(chunk1 + chunk2) + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.raw.stream.assert_called_once_with( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test__write_to_stream_with_hash_check_success(self, checksum): + stream = io.BytesIO() + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, checksum=checksum + ) + + chunk1 = b"first chunk, count starting at 0. " + chunk2 = b"second chunk, or chunk 1, which is better? " + chunk3 = b"ordinals and numerals and stuff." + header_value = "crc32c=qmNCyg==,md5=fPAJHnnoi/+NadyNxT2c2w==" + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_raw_response(chunks=[chunk1, chunk2, chunk3], headers=headers) + + ret_val = download._write_to_stream(response) + assert ret_val is None + + assert stream.getvalue() == chunk1 + chunk2 + chunk3 + assert download._bytes_downloaded == len(chunk1 + chunk2 + chunk3) + assert download._checksum_object is not None + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.raw.stream.assert_called_once_with( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__write_to_stream_with_hash_check_fail(self, checksum): + stream = io.BytesIO() + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, checksum=checksum + ) + + chunk1 = b"first chunk, count starting at 0. " + chunk2 = b"second chunk, or chunk 1, which is better? " + chunk3 = b"ordinals and numerals and stuff." + bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_raw_response(chunks=[chunk1, chunk2, chunk3], headers=headers) + + with pytest.raises(DataCorruption) as exc_info: + download._write_to_stream(response) + + assert not download.finished + + error = exc_info.value + assert error.response is response + assert len(error.args) == 1 + if checksum == "md5": + good_checksum = "fPAJHnnoi/+NadyNxT2c2w==" + else: + good_checksum = "qmNCyg==" + msg = download_mod._CHECKSUM_MISMATCH.format( + EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() + ) + assert error.args[0] == msg + + # Check mocks. + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.raw.stream.assert_called_once_with( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + + def test__write_to_stream_with_invalid_checksum_type(self): + BAD_CHECKSUM_TYPE = "badsum" + + stream = io.BytesIO() + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, checksum=BAD_CHECKSUM_TYPE + ) + + chunk1 = b"first chunk, count starting at 0. " + chunk2 = b"second chunk, or chunk 1, which is better? " + chunk3 = b"ordinals and numerals and stuff." + bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_response(chunks=[chunk1, chunk2, chunk3], headers=headers) + + with pytest.raises(ValueError) as exc_info: + download._write_to_stream(response) + + assert not download.finished + + error = exc_info.value + assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``" + + def _consume_helper( + self, + stream=None, + end=65536, + headers=None, + chunks=(), + response_headers=None, + checksum=None, + timeout=None, + ): + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, end=end, headers=headers, checksum=checksum + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_raw_response( + chunks=chunks, headers=response_headers + ) + + assert not download.finished + + if timeout is not None: + ret_val = download.consume(transport, timeout=timeout) + else: + ret_val = download.consume(transport) + + assert ret_val is transport.request.return_value + + if chunks: + assert stream is not None + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers=download._headers, + stream=True, + timeout=EXPECTED_TIMEOUT if timeout is None else timeout, + ) + + range_bytes = "bytes={:d}-{:d}".format(0, end) + assert download._headers["range"] == range_bytes + assert download.finished + + return transport + + def test_consume(self): + self._consume_helper() + + def test_consume_with_custom_timeout(self): + self._consume_helper(timeout=14.7) + + @pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None]) + def test_consume_with_stream(self, checksum): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + transport = self._consume_helper( + stream=stream, chunks=chunks, checksum=checksum + ) + + assert stream.getvalue() == b"".join(chunks) + + # Check mocks. + response = transport.request.return_value + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.raw.stream.assert_called_once_with( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_consume_with_stream_hash_check_success(self, checksum): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + header_value = "crc32c=UNIQxg==,md5=JvS1wjMvfbCXgEGeaJJLDQ==" + headers = {_helpers._HASH_HEADER: header_value} + transport = self._consume_helper( + stream=stream, chunks=chunks, response_headers=headers, checksum=checksum + ) + + assert stream.getvalue() == b"".join(chunks) + + # Check mocks. + response = transport.request.return_value + response.__enter__.assert_called_once_with() + response.__exit__.assert_called_once_with(None, None, None) + response.raw.stream.assert_called_once_with( + _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_consume_with_stream_hash_check_fail(self, checksum): + stream = io.BytesIO() + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, checksum=checksum + ) + + chunks = (b"zero zero", b"niner tango") + bad_checksum = "anVzdCBub3QgdGhpcyAxLA==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers = {_helpers._HASH_HEADER: header_value} + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_raw_response( + chunks=chunks, headers=headers + ) + + assert not download.finished + with pytest.raises(DataCorruption) as exc_info: + download.consume(transport) + + assert stream.getvalue() == b"".join(chunks) + assert download.finished + assert download._headers == {} + + error = exc_info.value + assert error.response is transport.request.return_value + assert len(error.args) == 1 + if checksum == "md5": + good_checksum = "1A/dxEpys717C6FH7FIWDw==" + else: + good_checksum = "GvNZlg==" + msg = download_mod._CHECKSUM_MISMATCH.format( + EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() + ) + assert error.args[0] == msg + + # Check mocks. + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers={}, + stream=True, + timeout=EXPECTED_TIMEOUT, + ) + + def test_consume_with_headers(self): + headers = {} # Empty headers + end = 16383 + self._consume_helper(end=end, headers=headers) + range_bytes = "bytes={:d}-{:d}".format(0, end) + # Make sure the headers have been modified. + assert headers == {"range": range_bytes} + + def test_consume_gets_generation_from_url(self): + GENERATION_VALUE = 1641590104888641 + url = EXAMPLE_URL + f"&generation={GENERATION_VALUE}" + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + + download = download_mod.RawDownload( + url, stream=stream, end=65536, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_raw_response(chunks=chunks, headers=None) + + assert not download.finished + assert download._object_generation is None + + ret_val = download.consume(transport) + + assert download._object_generation == GENERATION_VALUE + assert ret_val is transport.request.return_value + assert stream.getvalue() == b"".join(chunks) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", url, **called_kwargs) + + def test_consume_gets_generation_from_headers(self): + GENERATION_VALUE = 1641590104888641 + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, end=65536, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + headers = {_helpers._GENERATION_HEADER: GENERATION_VALUE} + transport.request.return_value = _mock_raw_response( + chunks=chunks, headers=headers + ) + + assert not download.finished + assert download._object_generation is None + + ret_val = download.consume(transport) + + assert download._object_generation == GENERATION_VALUE + assert ret_val is transport.request.return_value + assert stream.getvalue() == b"".join(chunks) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + + def test_consume_w_object_generation(self): + GENERATION_VALUE = 1641590104888641 + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_raw_response(chunks=chunks, headers=None) + + assert download._object_generation is None + + # Mock a retry operation with object generation retrieved and bytes already downloaded in the stream + download._object_generation = GENERATION_VALUE + offset = 256 + download._bytes_downloaded = offset + download.consume(transport) + + expected_url = EXAMPLE_URL + f"&generation={GENERATION_VALUE}" + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", expected_url, **called_kwargs) + range_bytes = "bytes={:d}-{:d}".format(offset, end) + assert download._headers["range"] == range_bytes + + def test_consume_w_bytes_downloaded(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_raw_response(chunks=chunks, headers=None) + + assert download._bytes_downloaded == 0 + + # Mock a retry operation with bytes already downloaded in the stream and checksum stored + offset = 256 + download._bytes_downloaded = offset + download._expected_checksum = None + download._checksum_object = _helpers._DoNothingHash() + download.consume(transport) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + range_bytes = "bytes={:d}-{:d}".format(offset, end) + assert download._headers["range"] == range_bytes + + def test_consume_w_bytes_downloaded_range_read(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + start = 1024 + end = 65536 + + download = download_mod.RawDownload( + EXAMPLE_URL, + stream=stream, + start=start, + end=end, + headers=None, + checksum="md5", + ) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _mock_raw_response(chunks=chunks, headers=None) + + assert download._bytes_downloaded == 0 + + # Mock a retry operation with bytes already downloaded in the stream and checksum stored + offset = 256 + download._bytes_downloaded = offset + download._expected_checksum = None + download._checksum_object = _helpers._DoNothingHash() + download.consume(transport) + + called_kwargs = { + "data": None, + "headers": download._headers, + "timeout": EXPECTED_TIMEOUT, + "stream": True, + } + transport.request.assert_called_once_with("GET", EXAMPLE_URL, **called_kwargs) + range_bytes = "bytes={:d}-{:d}".format(start + offset, end) + assert download._headers["range"] == range_bytes + + def test_consume_gzip_reset_stream_w_bytes_downloaded(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + + # Mock a decompressive transcoding retry operation with bytes already downloaded in the stream + headers = {_helpers._STORED_CONTENT_ENCODING_HEADER: "gzip"} + transport.request.return_value = _mock_raw_response( + chunks=chunks, headers=headers + ) + offset = 16 + download._bytes_downloaded = offset + download.consume(transport) + + assert stream.getvalue() == b"".join(chunks) + assert download._bytes_downloaded == len(b"".join(chunks)) + + def test_consume_gzip_reset_stream_error(self): + stream = io.BytesIO() + chunks = (b"up down ", b"charlie ", b"brown") + end = 65536 + + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, end=end, headers=None, checksum="md5" + ) + transport = mock.Mock(spec=["request"]) + + # Mock a stream seek error while resuming a decompressive transcoding download + stream.seek = mock.Mock(side_effect=OSError("mock stream seek error")) + headers = {_helpers._STORED_CONTENT_ENCODING_HEADER: "gzip"} + transport.request.return_value = _mock_raw_response( + chunks=chunks, headers=headers + ) + offset = 16 + download._bytes_downloaded = offset + with pytest.raises(Exception): + download.consume(transport) + + +class TestChunkedDownload(object): + @staticmethod + def _response_content_range(start_byte, end_byte, total_bytes): + return "bytes {:d}-{:d}/{:d}".format(start_byte, end_byte, total_bytes) + + def _response_headers(self, start_byte, end_byte, total_bytes): + content_length = end_byte - start_byte + 1 + resp_range = self._response_content_range(start_byte, end_byte, total_bytes) + return { + "content-length": "{:d}".format(content_length), + "content-range": resp_range, + } + + def _mock_response( + self, start_byte, end_byte, total_bytes, content=None, status_code=None + ): + response_headers = self._response_headers(start_byte, end_byte, total_bytes) + return mock.Mock( + content=content, + headers=response_headers, + status_code=status_code, + spec=["content", "headers", "status_code"], + ) + + def test_consume_next_chunk_already_finished(self): + download = download_mod.ChunkedDownload(EXAMPLE_URL, 512, None) + download._finished = True + with pytest.raises(ValueError): + download.consume_next_chunk(None) + + def _mock_transport(self, start, chunk_size, total_bytes, content=b""): + transport = mock.Mock(spec=["request"]) + assert len(content) == chunk_size + transport.request.return_value = self._mock_response( + start, + start + chunk_size - 1, + total_bytes, + content=content, + status_code=int(http.client.OK), + ) + + return transport + + def test_consume_next_chunk(self): + start = 1536 + stream = io.BytesIO() + data = b"Just one chunk." + chunk_size = len(data) + download = download_mod.ChunkedDownload( + EXAMPLE_URL, chunk_size, stream, start=start + ) + total_bytes = 16384 + transport = self._mock_transport(start, chunk_size, total_bytes, content=data) + + # Verify the internal state before consuming a chunk. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + # Actually consume the chunk and check the output. + ret_val = download.consume_next_chunk(transport) + assert ret_val is transport.request.return_value + range_bytes = "bytes={:d}-{:d}".format(start, start + chunk_size - 1) + download_headers = {"range": range_bytes} + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers=download_headers, + timeout=EXPECTED_TIMEOUT, + ) + assert stream.getvalue() == data + # Go back and check the internal state after consuming the chunk. + assert not download.finished + assert download.bytes_downloaded == chunk_size + assert download.total_bytes == total_bytes + + def test_consume_next_chunk_with_custom_timeout(self): + start = 1536 + stream = io.BytesIO() + data = b"Just one chunk." + chunk_size = len(data) + download = download_mod.ChunkedDownload( + EXAMPLE_URL, chunk_size, stream, start=start + ) + total_bytes = 16384 + transport = self._mock_transport(start, chunk_size, total_bytes, content=data) + + # Actually consume the chunk and check the output. + download.consume_next_chunk(transport, timeout=14.7) + + range_bytes = "bytes={:d}-{:d}".format(start, start + chunk_size - 1) + download_headers = {"range": range_bytes} + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers=download_headers, + timeout=14.7, + ) + + +class TestRawChunkedDownload(object): + @staticmethod + def _response_content_range(start_byte, end_byte, total_bytes): + return "bytes {:d}-{:d}/{:d}".format(start_byte, end_byte, total_bytes) + + def _response_headers(self, start_byte, end_byte, total_bytes): + content_length = end_byte - start_byte + 1 + resp_range = self._response_content_range(start_byte, end_byte, total_bytes) + return { + "content-length": "{:d}".format(content_length), + "content-range": resp_range, + } + + def _mock_response( + self, start_byte, end_byte, total_bytes, content=None, status_code=None + ): + response_headers = self._response_headers(start_byte, end_byte, total_bytes) + return mock.Mock( + _content=content, + headers=response_headers, + status_code=status_code, + spec=["_content", "headers", "status_code"], + ) + + def test_consume_next_chunk_already_finished(self): + download = download_mod.RawChunkedDownload(EXAMPLE_URL, 512, None) + download._finished = True + with pytest.raises(ValueError): + download.consume_next_chunk(None) + + def _mock_transport(self, start, chunk_size, total_bytes, content=b""): + transport = mock.Mock(spec=["request"]) + assert len(content) == chunk_size + transport.request.return_value = self._mock_response( + start, + start + chunk_size - 1, + total_bytes, + content=content, + status_code=int(http.client.OK), + ) + + return transport + + def test_consume_next_chunk(self): + start = 1536 + stream = io.BytesIO() + data = b"Just one chunk." + chunk_size = len(data) + download = download_mod.RawChunkedDownload( + EXAMPLE_URL, chunk_size, stream, start=start + ) + total_bytes = 16384 + transport = self._mock_transport(start, chunk_size, total_bytes, content=data) + + # Verify the internal state before consuming a chunk. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + # Actually consume the chunk and check the output. + ret_val = download.consume_next_chunk(transport) + assert ret_val is transport.request.return_value + range_bytes = "bytes={:d}-{:d}".format(start, start + chunk_size - 1) + download_headers = {"range": range_bytes} + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers=download_headers, + stream=True, + timeout=EXPECTED_TIMEOUT, + ) + assert stream.getvalue() == data + # Go back and check the internal state after consuming the chunk. + assert not download.finished + assert download.bytes_downloaded == chunk_size + assert download.total_bytes == total_bytes + + def test_consume_next_chunk_with_custom_timeout(self): + start = 1536 + stream = io.BytesIO() + data = b"Just one chunk." + chunk_size = len(data) + download = download_mod.RawChunkedDownload( + EXAMPLE_URL, chunk_size, stream, start=start + ) + total_bytes = 16384 + transport = self._mock_transport(start, chunk_size, total_bytes, content=data) + + # Actually consume the chunk and check the output. + download.consume_next_chunk(transport, timeout=14.7) + + range_bytes = "bytes={:d}-{:d}".format(start, start + chunk_size - 1) + download_headers = {"range": range_bytes} + transport.request.assert_called_once_with( + "GET", + EXAMPLE_URL, + data=None, + headers=download_headers, + stream=True, + timeout=14.7, + ) + assert stream.getvalue() == data + # Go back and check the internal state after consuming the chunk. + assert not download.finished + assert download.bytes_downloaded == chunk_size + assert download.total_bytes == total_bytes + + +class Test__add_decoder(object): + def test_non_gzipped(self): + response_raw = mock.Mock(headers={}, spec=["headers"]) + md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash) + + assert md5_hash is mock.sentinel.md5_hash + + def test_gzipped(self): + headers = {"content-encoding": "gzip"} + response_raw = mock.Mock(headers=headers, spec=["headers", "_decoder"]) + md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash) + + assert md5_hash is not mock.sentinel.md5_hash + assert isinstance(md5_hash, _helpers._DoNothingHash) + assert isinstance(response_raw._decoder, download_mod._GzipDecoder) + assert response_raw._decoder._checksum is mock.sentinel.md5_hash + + def test_brotli(self): + headers = {"content-encoding": "br"} + response_raw = mock.Mock(headers=headers, spec=["headers", "_decoder"]) + md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash) + + assert md5_hash is not mock.sentinel.md5_hash + assert isinstance(md5_hash, _helpers._DoNothingHash) + assert isinstance(response_raw._decoder, download_mod._BrotliDecoder) + assert response_raw._decoder._checksum is mock.sentinel.md5_hash + # Go ahead and exercise the flush method, added only for completion + response_raw._decoder.flush() + + +class Test_GzipDecoder(object): + def test_constructor(self): + decoder = download_mod._GzipDecoder(mock.sentinel.md5_hash) + assert decoder._checksum is mock.sentinel.md5_hash + + def test_decompress(self): + md5_hash = mock.Mock(spec=["update"]) + decoder = download_mod._GzipDecoder(md5_hash) + + data = b"\x1f\x8b\x08\x08" + result = decoder.decompress(data) + + assert result == b"" + md5_hash.update.assert_called_once_with(data) + + +class Test_BrotliDecoder(object): + def test_constructor(self): + decoder = download_mod._BrotliDecoder(mock.sentinel.md5_hash) + assert decoder._checksum is mock.sentinel.md5_hash + + def test_decompress(self): + md5_hash = mock.Mock(spec=["update"]) + decoder = download_mod._BrotliDecoder(md5_hash) + + data = b"\xc1\xf8I\xc0/\x83\xf3\xfa" + result = decoder.decompress(data) + + assert result == b"" + md5_hash.update.assert_called_once_with(data) + + +def _mock_response(status_code=http.client.OK, chunks=None, headers=None): + if headers is None: + headers = {} + + if chunks is not None: + mock_raw = mock.Mock(headers=headers, spec=["headers"]) + response = mock.MagicMock( + headers=headers, + status_code=int(status_code), + raw=mock_raw, + spec=[ + "__enter__", + "__exit__", + "iter_content", + "status_code", + "headers", + "raw", + ], + ) + # i.e. context manager returns ``self``. + response.__enter__.return_value = response + response.__exit__.return_value = None + response.iter_content.return_value = iter(chunks) + return response + else: + return mock.Mock( + headers=headers, + status_code=int(status_code), + spec=["status_code", "headers"], + ) + + +def _mock_raw_response(status_code=http.client.OK, chunks=(), headers=None): + if headers is None: + headers = {} + + mock_raw = mock.Mock(headers=headers, spec=["stream"]) + mock_raw.stream.return_value = iter(chunks) + response = mock.MagicMock( + headers=headers, + status_code=int(status_code), + raw=mock_raw, + spec=[ + "__enter__", + "__exit__", + "iter_content", + "status_code", + "headers", + "raw", + ], + ) + # i.e. context manager returns ``self``. + response.__enter__.return_value = response + response.__exit__.return_value = None + return response diff --git a/tests/resumable_media/unit/requests/test_upload.py b/tests/resumable_media/unit/requests/test_upload.py new file mode 100644 index 000000000..6868cc7b8 --- /dev/null +++ b/tests/resumable_media/unit/requests/test_upload.py @@ -0,0 +1,412 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import http.client +import io +import json +import pytest # type: ignore +import tempfile +from unittest import mock + +import google.cloud.storage._media.requests.upload as upload_mod + + +URL_PREFIX = "https://www.googleapis.com/upload/storage/v1/b/{BUCKET}/o" +SIMPLE_URL = URL_PREFIX + "?uploadType=media&name={OBJECT}" +MULTIPART_URL = URL_PREFIX + "?uploadType=multipart" +RESUMABLE_URL = URL_PREFIX + "?uploadType=resumable" +ONE_MB = 1024 * 1024 +BASIC_CONTENT = "text/plain" +JSON_TYPE = "application/json; charset=UTF-8" +JSON_TYPE_LINE = b"content-type: application/json; charset=UTF-8\r\n" +EXPECTED_TIMEOUT = (61, 60) +EXAMPLE_XML_UPLOAD_URL = "https://test-project.storage.googleapis.com/test-bucket" +EXAMPLE_XML_MPU_INITIATE_TEXT_TEMPLATE = """ + + travel-maps + paris.jpg + {upload_id} + +""" +UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA" +PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"} +FILE_DATA = b"testdata" * 128 + + +@pytest.fixture(scope="session") +def filename(): + with tempfile.NamedTemporaryFile() as f: + f.write(FILE_DATA) + f.flush() + yield f.name + + +class TestSimpleUpload(object): + def test_transmit(self): + data = b"I have got a lovely bunch of coconuts." + content_type = BASIC_CONTENT + upload = upload_mod.SimpleUpload(SIMPLE_URL) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response() + assert not upload.finished + ret_val = upload.transmit(transport, data, content_type) + assert ret_val is transport.request.return_value + upload_headers = {"content-type": content_type} + transport.request.assert_called_once_with( + "POST", + SIMPLE_URL, + data=data, + headers=upload_headers, + timeout=EXPECTED_TIMEOUT, + ) + assert upload.finished + + def test_transmit_w_custom_timeout(self): + data = b"I have got a lovely bunch of coconuts." + content_type = BASIC_CONTENT + upload = upload_mod.SimpleUpload(SIMPLE_URL) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response() + + upload.transmit(transport, data, content_type, timeout=12.6) + + expected_headers = {"content-type": content_type} + transport.request.assert_called_once_with( + "POST", + SIMPLE_URL, + data=data, + headers=expected_headers, + timeout=12.6, + ) + + +class TestMultipartUpload(object): + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==4==" + ) + def test_transmit(self, mock_get_boundary): + data = b"Mock data here and there." + metadata = {"Hey": "You", "Guys": "90909"} + content_type = BASIC_CONTENT + upload = upload_mod.MultipartUpload(MULTIPART_URL) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response() + assert not upload.finished + ret_val = upload.transmit(transport, data, metadata, content_type) + assert ret_val is transport.request.return_value + expected_payload = ( + b"--==4==\r\n" + + JSON_TYPE_LINE + + b"\r\n" + + json.dumps(metadata).encode("utf-8") + + b"\r\n" + + b"--==4==\r\n" + b"content-type: text/plain\r\n" + b"\r\n" + b"Mock data here and there.\r\n" + b"--==4==--" + ) + multipart_type = b'multipart/related; boundary="==4=="' + upload_headers = {"content-type": multipart_type} + transport.request.assert_called_once_with( + "POST", + MULTIPART_URL, + data=expected_payload, + headers=upload_headers, + timeout=EXPECTED_TIMEOUT, + ) + assert upload.finished + mock_get_boundary.assert_called_once_with() + + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==4==" + ) + def test_transmit_w_custom_timeout(self, mock_get_boundary): + data = b"Mock data here and there." + metadata = {"Hey": "You", "Guys": "90909"} + content_type = BASIC_CONTENT + upload = upload_mod.MultipartUpload(MULTIPART_URL) + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response() + + upload.transmit(transport, data, metadata, content_type, timeout=12.6) + + expected_payload = b"".join( + ( + b"--==4==\r\n", + JSON_TYPE_LINE, + b"\r\n", + json.dumps(metadata).encode("utf-8"), + b"\r\n", + b"--==4==\r\n", + b"content-type: text/plain\r\n", + b"\r\n", + b"Mock data here and there.\r\n", + b"--==4==--", + ) + ) + multipart_type = b'multipart/related; boundary="==4=="' + upload_headers = {"content-type": multipart_type} + + transport.request.assert_called_once_with( + "POST", + MULTIPART_URL, + data=expected_payload, + headers=upload_headers, + timeout=12.6, + ) + assert upload.finished + mock_get_boundary.assert_called_once_with() + + +class TestResumableUpload(object): + def test_initiate(self): + upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) + data = b"Knock knock who is there" + stream = io.BytesIO(data) + metadata = {"name": "got-jokes.txt"} + + transport = mock.Mock(spec=["request"]) + location = ("http://test.invalid?upload_id=AACODBBBxuw9u3AA",) + response_headers = {"location": location} + post_response = _make_response(headers=response_headers) + transport.request.return_value = post_response + # Check resumable_url before. + assert upload._resumable_url is None + # Make request and check the return value (against the mock). + total_bytes = 100 + assert total_bytes > len(data) + response = upload.initiate( + transport, + stream, + metadata, + BASIC_CONTENT, + total_bytes=total_bytes, + stream_final=False, + ) + assert response is transport.request.return_value + # Check resumable_url after. + assert upload._resumable_url == location + # Make sure the mock was called as expected. + json_bytes = b'{"name": "got-jokes.txt"}' + expected_headers = { + "content-type": JSON_TYPE, + "x-upload-content-type": BASIC_CONTENT, + "x-upload-content-length": "{:d}".format(total_bytes), + } + transport.request.assert_called_once_with( + "POST", + RESUMABLE_URL, + data=json_bytes, + headers=expected_headers, + timeout=EXPECTED_TIMEOUT, + ) + + def test_initiate_w_custom_timeout(self): + upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) + data = b"Knock knock who is there" + stream = io.BytesIO(data) + metadata = {"name": "got-jokes.txt"} + + transport = mock.Mock(spec=["request"]) + location = ("http://test.invalid?upload_id=AACODBBBxuw9u3AA",) + response_headers = {"location": location} + post_response = _make_response(headers=response_headers) + transport.request.return_value = post_response + + upload.initiate( + transport, + stream, + metadata, + BASIC_CONTENT, + total_bytes=100, + timeout=12.6, + ) + + # Make sure timeout was passed to the transport + json_bytes = b'{"name": "got-jokes.txt"}' + expected_headers = { + "content-type": JSON_TYPE, + "x-upload-content-type": BASIC_CONTENT, + "x-upload-content-length": "{:d}".format(100), + } + transport.request.assert_called_once_with( + "POST", + RESUMABLE_URL, + data=json_bytes, + headers=expected_headers, + timeout=12.6, + ) + + @staticmethod + def _upload_in_flight(data, headers=None): + upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB, headers=headers) + upload._stream = io.BytesIO(data) + upload._content_type = BASIC_CONTENT + upload._total_bytes = len(data) + upload._resumable_url = "http://test.invalid?upload_id=not-none" + return upload + + @staticmethod + def _chunk_mock(status_code, response_headers): + transport = mock.Mock(spec=["request"]) + put_response = _make_response(status_code=status_code, headers=response_headers) + transport.request.return_value = put_response + + return transport + + def test_transmit_next_chunk(self): + data = b"This time the data is official." + upload = self._upload_in_flight(data) + # Make a fake chunk size smaller than 256 KB. + chunk_size = 10 + assert chunk_size < len(data) + upload._chunk_size = chunk_size + # Make a fake 308 response. + response_headers = {"range": "bytes=0-{:d}".format(chunk_size - 1)} + transport = self._chunk_mock(http.client.PERMANENT_REDIRECT, response_headers) + # Check the state before the request. + assert upload._bytes_uploaded == 0 + + # Make request and check the return value (against the mock). + response = upload.transmit_next_chunk(transport) + assert response is transport.request.return_value + # Check that the state has been updated. + assert upload._bytes_uploaded == chunk_size + # Make sure the mock was called as expected. + payload = data[:chunk_size] + content_range = "bytes 0-{:d}/{:d}".format(chunk_size - 1, len(data)) + expected_headers = { + "content-range": content_range, + "content-type": BASIC_CONTENT, + } + transport.request.assert_called_once_with( + "PUT", + upload.resumable_url, + data=payload, + headers=expected_headers, + timeout=EXPECTED_TIMEOUT, + ) + + def test_transmit_next_chunk_w_custom_timeout(self): + data = b"This time the data is official." + upload = self._upload_in_flight(data) + + # Make a fake chunk size smaller than 256 KB. + chunk_size = 10 + upload._chunk_size = chunk_size + + # Make a fake 308 response. + response_headers = {"range": "bytes=0-{:d}".format(chunk_size - 1)} + transport = self._chunk_mock(http.client.PERMANENT_REDIRECT, response_headers) + + # Make request and check the return value (against the mock). + upload.transmit_next_chunk(transport, timeout=12.6) + + # Make sure timeout was passed to the transport + payload = data[:chunk_size] + content_range = "bytes 0-{:d}/{:d}".format(chunk_size - 1, len(data)) + expected_headers = { + "content-range": content_range, + "content-type": BASIC_CONTENT, + } + transport.request.assert_called_once_with( + "PUT", + upload.resumable_url, + data=payload, + headers=expected_headers, + timeout=12.6, + ) + + def test_recover(self): + upload = upload_mod.ResumableUpload(RESUMABLE_URL, ONE_MB) + upload._invalid = True # Make sure invalid. + upload._stream = mock.Mock(spec=["seek"]) + upload._resumable_url = "http://test.invalid?upload_id=big-deal" + + end = 55555 + headers = {"range": "bytes=0-{:d}".format(end)} + transport = self._chunk_mock(http.client.PERMANENT_REDIRECT, headers) + + ret_val = upload.recover(transport) + assert ret_val is transport.request.return_value + # Check the state of ``upload`` after. + assert upload.bytes_uploaded == end + 1 + assert not upload.invalid + upload._stream.seek.assert_called_once_with(end + 1) + expected_headers = {"content-range": "bytes */*"} + transport.request.assert_called_once_with( + "PUT", + upload.resumable_url, + data=None, + headers=expected_headers, + timeout=EXPECTED_TIMEOUT, + ) + + +def test_mpu_container(): + container = upload_mod.XMLMPUContainer(EXAMPLE_XML_UPLOAD_URL, filename) + + response_text = EXAMPLE_XML_MPU_INITIATE_TEXT_TEMPLATE.format(upload_id=UPLOAD_ID) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response(text=response_text) + container.initiate(transport, BASIC_CONTENT) + assert container.upload_id == UPLOAD_ID + + for part, etag in PARTS.items(): + container.register_part(part, etag) + + assert container._parts == PARTS + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response() + container.finalize(transport) + assert container.finished + + +def test_mpu_container_cancel(): + container = upload_mod.XMLMPUContainer( + EXAMPLE_XML_UPLOAD_URL, filename, upload_id=UPLOAD_ID + ) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response(status_code=204) + container.cancel(transport) + + +def test_mpu_part(filename): + part = upload_mod.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, UPLOAD_ID, filename, 0, 128, 1, checksum=None + ) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response(headers={"etag": PARTS[1]}) + + part.upload(transport) + + assert part.finished + assert part.etag == PARTS[1] + + +def _make_response(status_code=http.client.OK, headers=None, text=None): + headers = headers or {} + return mock.Mock( + headers=headers, + status_code=status_code, + text=text, + spec=["headers", "status_code", "text"], + ) diff --git a/tests/resumable_media/unit/test__download.py b/tests/resumable_media/unit/test__download.py new file mode 100644 index 000000000..54559e45e --- /dev/null +++ b/tests/resumable_media/unit/test__download.py @@ -0,0 +1,751 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import http.client +import io + +from unittest import mock +import pytest # type: ignore + +from google.cloud.storage._media import _download +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.retry import DEFAULT_RETRY + + +EXAMPLE_URL = ( + "https://www.googleapis.com/download/storage/v1/b/{BUCKET}/o/{OBJECT}?alt=media" +) + + +class TestDownloadBase(object): + def test_constructor_defaults(self): + download = _download.DownloadBase(EXAMPLE_URL) + assert download.media_url == EXAMPLE_URL + assert download._stream is None + assert download.start is None + assert download.end is None + assert download._headers == {} + assert not download._finished + _check_retry_strategy(download) + + def test_constructor_explicit(self): + start = 11 + end = 10001 + headers = {"foof": "barf"} + download = _download.DownloadBase( + EXAMPLE_URL, + stream=mock.sentinel.stream, + start=start, + end=end, + headers=headers, + ) + assert download.media_url == EXAMPLE_URL + assert download._stream is mock.sentinel.stream + assert download.start == start + assert download.end == end + assert download._headers is headers + assert not download._finished + _check_retry_strategy(download) + + def test_finished_property(self): + download = _download.DownloadBase(EXAMPLE_URL) + # Default value of @property. + assert not download.finished + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + download.finished = False + + # Set it privately and then check the @property. + download._finished = True + assert download.finished + + def test__get_status_code(self): + with pytest.raises(NotImplementedError) as exc_info: + _download.DownloadBase._get_status_code(None) + + exc_info.match("virtual") + + def test__get_headers(self): + with pytest.raises(NotImplementedError) as exc_info: + _download.DownloadBase._get_headers(None) + + exc_info.match("virtual") + + def test__get_body(self): + with pytest.raises(NotImplementedError) as exc_info: + _download.DownloadBase._get_body(None) + + exc_info.match("virtual") + + +class TestDownload(object): + def test__prepare_request_already_finished(self): + download = _download.Download(EXAMPLE_URL) + download._finished = True + with pytest.raises(ValueError): + download._prepare_request() + + def test__prepare_request(self): + download1 = _download.Download(EXAMPLE_URL) + method1, url1, payload1, headers1 = download1._prepare_request() + assert method1 == "GET" + assert url1 == EXAMPLE_URL + assert payload1 is None + assert headers1 == {} + + download2 = _download.Download(EXAMPLE_URL, start=53) + method2, url2, payload2, headers2 = download2._prepare_request() + assert method2 == "GET" + assert url2 == EXAMPLE_URL + assert payload2 is None + assert headers2 == {"range": "bytes=53-"} + + def test__prepare_request_with_headers(self): + headers = {"spoonge": "borb"} + download = _download.Download(EXAMPLE_URL, start=11, end=111, headers=headers) + method, url, payload, new_headers = download._prepare_request() + assert method == "GET" + assert url == EXAMPLE_URL + assert payload is None + assert new_headers is headers + assert headers == {"range": "bytes=11-111", "spoonge": "borb"} + + def test__process_response(self): + download = _download.Download(EXAMPLE_URL) + _fix_up_virtual(download) + + # Make sure **not finished** before. + assert not download.finished + response = mock.Mock(status_code=int(http.client.OK), spec=["status_code"]) + ret_val = download._process_response(response) + assert ret_val is None + # Make sure **finished** after. + assert download.finished + + def test__process_response_bad_status(self): + download = _download.Download(EXAMPLE_URL) + _fix_up_virtual(download) + + # Make sure **not finished** before. + assert not download.finished + response = mock.Mock( + status_code=int(http.client.NOT_FOUND), spec=["status_code"] + ) + with pytest.raises(InvalidResponse) as exc_info: + download._process_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 5 + assert error.args[1] == response.status_code + assert error.args[3] == http.client.OK + assert error.args[4] == http.client.PARTIAL_CONTENT + # Make sure **finished** even after a failure. + assert download.finished + + def test_consume(self): + download = _download.Download(EXAMPLE_URL) + with pytest.raises(NotImplementedError) as exc_info: + download.consume(None) + + exc_info.match("virtual") + + +class TestChunkedDownload(object): + def test_constructor_defaults(self): + chunk_size = 256 + stream = mock.sentinel.stream + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + assert download.media_url == EXAMPLE_URL + assert download.chunk_size == chunk_size + assert download.start == 0 + assert download.end is None + assert download._headers == {} + assert not download._finished + _check_retry_strategy(download) + assert download._stream is stream + assert download._bytes_downloaded == 0 + assert download._total_bytes is None + assert not download._invalid + + def test_constructor_bad_start(self): + with pytest.raises(ValueError): + _download.ChunkedDownload(EXAMPLE_URL, 256, None, start=-11) + + def test_bytes_downloaded_property(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 256, None) + # Default value of @property. + assert download.bytes_downloaded == 0 + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + download.bytes_downloaded = 1024 + + # Set it privately and then check the @property. + download._bytes_downloaded = 128 + assert download.bytes_downloaded == 128 + + def test_total_bytes_property(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 256, None) + # Default value of @property. + assert download.total_bytes is None + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + download.total_bytes = 65536 + + # Set it privately and then check the @property. + download._total_bytes = 8192 + assert download.total_bytes == 8192 + + def test__get_byte_range(self): + chunk_size = 512 + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, None) + curr_start, curr_end = download._get_byte_range() + assert curr_start == 0 + assert curr_end == chunk_size - 1 + + def test__get_byte_range_with_end(self): + chunk_size = 512 + start = 1024 + end = 1151 + download = _download.ChunkedDownload( + EXAMPLE_URL, chunk_size, None, start=start, end=end + ) + curr_start, curr_end = download._get_byte_range() + assert curr_start == start + assert curr_end == end + # Make sure this is less than the chunk size. + actual_size = curr_end - curr_start + 1 + assert actual_size < chunk_size + + def test__get_byte_range_with_total_bytes(self): + chunk_size = 512 + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, None) + total_bytes = 207 + download._total_bytes = total_bytes + curr_start, curr_end = download._get_byte_range() + assert curr_start == 0 + assert curr_end == total_bytes - 1 + # Make sure this is less than the chunk size. + actual_size = curr_end - curr_start + 1 + assert actual_size < chunk_size + + @staticmethod + def _response_content_range(start_byte, end_byte, total_bytes): + return "bytes {:d}-{:d}/{:d}".format(start_byte, end_byte, total_bytes) + + def _response_headers(self, start_byte, end_byte, total_bytes): + content_length = end_byte - start_byte + 1 + resp_range = self._response_content_range(start_byte, end_byte, total_bytes) + return { + "content-length": "{:d}".format(content_length), + "content-range": resp_range, + } + + def _mock_response( + self, start_byte, end_byte, total_bytes, content=None, status_code=None + ): + response_headers = self._response_headers(start_byte, end_byte, total_bytes) + return mock.Mock( + content=content, + headers=response_headers, + status_code=status_code, + spec=["content", "headers", "status_code"], + ) + + def test__prepare_request_already_finished(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 64, None) + download._finished = True + with pytest.raises(ValueError) as exc_info: + download._prepare_request() + + assert exc_info.match("Download has finished.") + + def test__prepare_request_invalid(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 64, None) + download._invalid = True + with pytest.raises(ValueError) as exc_info: + download._prepare_request() + + assert exc_info.match("Download is invalid and cannot be re-used.") + + def test__prepare_request(self): + chunk_size = 2048 + download1 = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, None) + method1, url1, payload1, headers1 = download1._prepare_request() + assert method1 == "GET" + assert url1 == EXAMPLE_URL + assert payload1 is None + assert headers1 == {"range": "bytes=0-2047"} + + download2 = _download.ChunkedDownload( + EXAMPLE_URL, chunk_size, None, start=19991 + ) + download2._total_bytes = 20101 + method2, url2, payload2, headers2 = download2._prepare_request() + assert method2 == "GET" + assert url2 == EXAMPLE_URL + assert payload2 is None + assert headers2 == {"range": "bytes=19991-20100"} + + def test__prepare_request_with_headers(self): + chunk_size = 2048 + headers = {"patrizio": "Starf-ish"} + download = _download.ChunkedDownload( + EXAMPLE_URL, chunk_size, None, headers=headers + ) + method, url, payload, new_headers = download._prepare_request() + assert method == "GET" + assert url == EXAMPLE_URL + assert payload is None + assert new_headers is headers + expected = {"patrizio": "Starf-ish", "range": "bytes=0-2047"} + assert headers == expected + + def test__make_invalid(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 512, None) + assert not download.invalid + download._make_invalid() + assert download.invalid + + def test__process_response(self): + data = b"1234xyztL" * 37 # 9 * 37 == 33 + chunk_size = len(data) + stream = io.BytesIO() + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + _fix_up_virtual(download) + + already = 22 + download._bytes_downloaded = already + total_bytes = 4444 + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == already + assert download.total_bytes is None + # Actually call the method to update. + response = self._mock_response( + already, + already + chunk_size - 1, + total_bytes, + content=data, + status_code=int(http.client.PARTIAL_CONTENT), + ) + download._process_response(response) + # Check internal state after. + assert not download.finished + assert download.bytes_downloaded == already + chunk_size + assert download.total_bytes == total_bytes + assert stream.getvalue() == data + + def test__process_response_transfer_encoding(self): + data = b"1234xyztL" * 37 + chunk_size = len(data) + stream = io.BytesIO() + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + _fix_up_virtual(download) + + already = 22 + download._bytes_downloaded = already + total_bytes = 4444 + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == already + assert download.total_bytes is None + assert not download.invalid + # Actually call the method to update. + response = self._mock_response( + already, + already + chunk_size - 1, + total_bytes, + content=data, + status_code=int(http.client.PARTIAL_CONTENT), + ) + response.headers["transfer-encoding"] = "chunked" + del response.headers["content-length"] + download._process_response(response) + # Check internal state after. + assert not download.finished + assert download.bytes_downloaded == already + chunk_size + assert download.total_bytes == total_bytes + assert stream.getvalue() == data + + def test__process_response_bad_status(self): + chunk_size = 384 + stream = mock.Mock(spec=["write"]) + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + _fix_up_virtual(download) + + total_bytes = 300 + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + # Actually call the method to update. + response = self._mock_response( + 0, total_bytes - 1, total_bytes, status_code=int(http.client.NOT_FOUND) + ) + with pytest.raises(InvalidResponse) as exc_info: + download._process_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 5 + assert error.args[1] == response.status_code + assert error.args[3] == http.client.OK + assert error.args[4] == http.client.PARTIAL_CONTENT + # Check internal state after. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + assert download.invalid + stream.write.assert_not_called() + + def test__process_response_missing_content_length(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 256, None) + _fix_up_virtual(download) + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + assert not download.invalid + # Actually call the method to update. + response = mock.Mock( + headers={"content-range": "bytes 0-99/99"}, + status_code=int(http.client.PARTIAL_CONTENT), + content=b"DEADBEEF", + spec=["headers", "status_code", "content"], + ) + with pytest.raises(InvalidResponse) as exc_info: + download._process_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 2 + assert error.args[1] == "content-length" + # Check internal state after. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + assert download.invalid + + def test__process_response_bad_content_range(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 256, None) + _fix_up_virtual(download) + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + assert not download.invalid + # Actually call the method to update. + data = b"stuff" + headers = { + "content-length": "{:d}".format(len(data)), + "content-range": "kites x-y/58", + } + response = mock.Mock( + content=data, + headers=headers, + status_code=int(http.client.PARTIAL_CONTENT), + spec=["content", "headers", "status_code"], + ) + with pytest.raises(InvalidResponse) as exc_info: + download._process_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 3 + assert error.args[1] == headers["content-range"] + # Check internal state after. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + assert download.invalid + + def test__process_response_body_wrong_length(self): + chunk_size = 10 + stream = mock.Mock(spec=["write"]) + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + _fix_up_virtual(download) + + total_bytes = 100 + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + # Actually call the method to update. + data = b"not 10" + response = self._mock_response( + 0, + chunk_size - 1, + total_bytes, + content=data, + status_code=int(http.client.PARTIAL_CONTENT), + ) + with pytest.raises(InvalidResponse) as exc_info: + download._process_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 5 + assert error.args[2] == chunk_size + assert error.args[4] == len(data) + # Check internal state after. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + assert download.invalid + stream.write.assert_not_called() + + def test__process_response_when_finished(self): + chunk_size = 256 + stream = io.BytesIO() + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + _fix_up_virtual(download) + + total_bytes = 200 + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + # Actually call the method to update. + data = b"abcd" * 50 # 4 * 50 == 200 + response = self._mock_response( + 0, + total_bytes - 1, + total_bytes, + content=data, + status_code=int(http.client.OK), + ) + download._process_response(response) + # Check internal state after. + assert download.finished + assert download.bytes_downloaded == total_bytes + assert total_bytes < chunk_size + assert download.total_bytes == total_bytes + assert stream.getvalue() == data + + def test__process_response_when_reaching_end(self): + chunk_size = 8192 + end = 65000 + stream = io.BytesIO() + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream, end=end) + _fix_up_virtual(download) + + download._bytes_downloaded = 7 * chunk_size + download._total_bytes = 8 * chunk_size + + # Check internal state before. + assert not download.finished + assert download.bytes_downloaded == 7 * chunk_size + assert download.total_bytes == 8 * chunk_size + # Actually call the method to update. + expected_size = end - 7 * chunk_size + 1 + data = b"B" * expected_size + response = self._mock_response( + 7 * chunk_size, + end, + 8 * chunk_size, + content=data, + status_code=int(http.client.PARTIAL_CONTENT), + ) + download._process_response(response) + # Check internal state after. + assert download.finished + assert download.bytes_downloaded == end + 1 + assert download.bytes_downloaded < download.total_bytes + assert download.total_bytes == 8 * chunk_size + assert stream.getvalue() == data + + def test__process_response_when_content_range_is_zero(self): + chunk_size = 10 + stream = mock.Mock(spec=["write"]) + download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) + _fix_up_virtual(download) + + content_range = _download._ZERO_CONTENT_RANGE_HEADER + headers = {"content-range": content_range} + status_code = http.client.REQUESTED_RANGE_NOT_SATISFIABLE + response = mock.Mock( + headers=headers, status_code=status_code, spec=["headers", "status_code"] + ) + download._process_response(response) + stream.write.assert_not_called() + assert download.finished + assert download.bytes_downloaded == 0 + assert download.total_bytes is None + + def test_consume_next_chunk(self): + download = _download.ChunkedDownload(EXAMPLE_URL, 256, None) + with pytest.raises(NotImplementedError) as exc_info: + download.consume_next_chunk(None) + + exc_info.match("virtual") + + +class Test__add_bytes_range(object): + def test_do_nothing(self): + headers = {} + ret_val = _download.add_bytes_range(None, None, headers) + assert ret_val is None + assert headers == {} + + def test_both_vals(self): + headers = {} + ret_val = _download.add_bytes_range(17, 1997, headers) + assert ret_val is None + assert headers == {"range": "bytes=17-1997"} + + def test_end_only(self): + headers = {} + ret_val = _download.add_bytes_range(None, 909, headers) + assert ret_val is None + assert headers == {"range": "bytes=0-909"} + + def test_start_only(self): + headers = {} + ret_val = _download.add_bytes_range(3735928559, None, headers) + assert ret_val is None + assert headers == {"range": "bytes=3735928559-"} + + def test_start_as_offset(self): + headers = {} + ret_val = _download.add_bytes_range(-123454321, None, headers) + assert ret_val is None + assert headers == {"range": "bytes=-123454321"} + + +class Test_get_range_info(object): + @staticmethod + def _make_response(content_range): + headers = {"content-range": content_range} + return mock.Mock(headers=headers, spec=["headers"]) + + def _success_helper(self, **kwargs): + content_range = "Bytes 7-11/42" + response = self._make_response(content_range) + start_byte, end_byte, total_bytes = _download.get_range_info( + response, _get_headers, **kwargs + ) + assert start_byte == 7 + assert end_byte == 11 + assert total_bytes == 42 + + def test_success(self): + self._success_helper() + + def test_success_with_callback(self): + callback = mock.Mock(spec=[]) + self._success_helper(callback=callback) + callback.assert_not_called() + + def _failure_helper(self, **kwargs): + content_range = "nope x-6/y" + response = self._make_response(content_range) + with pytest.raises(InvalidResponse) as exc_info: + _download.get_range_info(response, _get_headers, **kwargs) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 3 + assert error.args[1] == content_range + + def test_failure(self): + self._failure_helper() + + def test_failure_with_callback(self): + callback = mock.Mock(spec=[]) + self._failure_helper(callback=callback) + callback.assert_called_once_with() + + def _missing_header_helper(self, **kwargs): + response = mock.Mock(headers={}, spec=["headers"]) + with pytest.raises(InvalidResponse) as exc_info: + _download.get_range_info(response, _get_headers, **kwargs) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 2 + assert error.args[1] == "content-range" + + def test_missing_header(self): + self._missing_header_helper() + + def test_missing_header_with_callback(self): + callback = mock.Mock(spec=[]) + self._missing_header_helper(callback=callback) + callback.assert_called_once_with() + + +class Test__check_for_zero_content_range(object): + @staticmethod + def _make_response(content_range, status_code): + headers = {"content-range": content_range} + return mock.Mock( + headers=headers, status_code=status_code, spec=["headers", "status_code"] + ) + + def test_status_code_416_and_test_content_range_zero_both(self): + content_range = _download._ZERO_CONTENT_RANGE_HEADER + status_code = http.client.REQUESTED_RANGE_NOT_SATISFIABLE + response = self._make_response(content_range, status_code) + assert _download._check_for_zero_content_range( + response, _get_status_code, _get_headers + ) + + def test_status_code_416_only(self): + content_range = "bytes 2-5/3" + status_code = http.client.REQUESTED_RANGE_NOT_SATISFIABLE + response = self._make_response(content_range, status_code) + assert not _download._check_for_zero_content_range( + response, _get_status_code, _get_headers + ) + + def test_content_range_zero_only(self): + content_range = _download._ZERO_CONTENT_RANGE_HEADER + status_code = http.client.OK + response = self._make_response(content_range, status_code) + assert not _download._check_for_zero_content_range( + response, _get_status_code, _get_headers + ) + + +def _get_status_code(response): + return response.status_code + + +def _get_headers(response): + return response.headers + + +def _get_body(response): + return response.content + + +def _fix_up_virtual(download): + download._get_status_code = _get_status_code + download._get_headers = _get_headers + download._get_body = _get_body + + +def _check_retry_strategy(download): + assert download._retry_strategy == DEFAULT_RETRY diff --git a/tests/resumable_media/unit/test__helpers.py b/tests/resumable_media/unit/test__helpers.py new file mode 100644 index 000000000..2f7ae0f72 --- /dev/null +++ b/tests/resumable_media/unit/test__helpers.py @@ -0,0 +1,421 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import hashlib +import http.client + +from unittest import mock +import pytest # type: ignore + +from google.cloud.storage._media import _helpers +from google.cloud.storage.retry import _RETRYABLE_STATUS_CODES +from google.cloud.storage.exceptions import InvalidResponse + +import google_crc32c + + +def test_do_nothing(): + ret_val = _helpers.do_nothing() + assert ret_val is None + + +class Test_header_required(object): + def _success_helper(self, **kwargs): + name = "some-header" + value = "The Right Hand Side" + headers = {name: value, "other-name": "other-value"} + response = mock.Mock(headers=headers, spec=["headers"]) + result = _helpers.header_required(response, name, _get_headers, **kwargs) + assert result == value + + def test_success(self): + self._success_helper() + + def test_success_with_callback(self): + callback = mock.Mock(spec=[]) + self._success_helper(callback=callback) + callback.assert_not_called() + + def _failure_helper(self, **kwargs): + response = mock.Mock(headers={}, spec=["headers"]) + name = "any-name" + with pytest.raises(InvalidResponse) as exc_info: + _helpers.header_required(response, name, _get_headers, **kwargs) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 2 + assert error.args[1] == name + + def test_failure(self): + self._failure_helper() + + def test_failure_with_callback(self): + callback = mock.Mock(spec=[]) + self._failure_helper(callback=callback) + callback.assert_called_once_with() + + +class Test_require_status_code(object): + @staticmethod + def _get_status_code(response): + return response.status_code + + def test_success(self): + status_codes = (http.client.OK, http.client.CREATED) + acceptable = ( + http.client.OK, + int(http.client.OK), + http.client.CREATED, + int(http.client.CREATED), + ) + for value in acceptable: + response = _make_response(value) + status_code = _helpers.require_status_code( + response, status_codes, self._get_status_code + ) + assert value == status_code + + def test_success_with_callback(self): + status_codes = (http.client.OK,) + response = _make_response(http.client.OK) + callback = mock.Mock(spec=[]) + status_code = _helpers.require_status_code( + response, status_codes, self._get_status_code, callback=callback + ) + assert status_code == http.client.OK + callback.assert_not_called() + + def test_failure(self): + status_codes = (http.client.CREATED, http.client.NO_CONTENT) + response = _make_response(http.client.OK) + with pytest.raises(InvalidResponse) as exc_info: + _helpers.require_status_code(response, status_codes, self._get_status_code) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 5 + assert error.args[1] == response.status_code + assert error.args[3:] == status_codes + + def test_failure_with_callback(self): + status_codes = (http.client.OK,) + response = _make_response(http.client.NOT_FOUND) + callback = mock.Mock(spec=[]) + with pytest.raises(InvalidResponse) as exc_info: + _helpers.require_status_code( + response, status_codes, self._get_status_code, callback=callback + ) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 4 + assert error.args[1] == response.status_code + assert error.args[3:] == status_codes + callback.assert_called_once_with() + + def test_retryable_failure_without_callback(self): + status_codes = (http.client.OK,) + retryable_responses = [ + _make_response(status_code) for status_code in _RETRYABLE_STATUS_CODES + ] + callback = mock.Mock(spec=[]) + for retryable_response in retryable_responses: + with pytest.raises(InvalidResponse) as exc_info: + _helpers.require_status_code( + retryable_response, + status_codes, + self._get_status_code, + callback=callback, + ) + + error = exc_info.value + assert error.response is retryable_response + assert len(error.args) == 4 + assert error.args[1] == retryable_response.status_code + assert error.args[3:] == status_codes + callback.assert_not_called() + + +def _make_response(status_code): + return mock.Mock(status_code=status_code, spec=["status_code"]) + + +def _get_headers(response): + return response.headers + + +@pytest.mark.parametrize("checksum", ["md5", "crc32c", None]) +def test__get_checksum_object(checksum): + checksum_object = _helpers._get_checksum_object(checksum) + + checksum_types = { + "md5": type(hashlib.md5()), + "crc32c": type(google_crc32c.Checksum()), + None: type(None), + } + assert isinstance(checksum_object, checksum_types[checksum]) + + +def test__get_checksum_object_invalid(): + with pytest.raises(ValueError): + _helpers._get_checksum_object("invalid") + + +def test__is_crc32c_available_and_fast(): + import sys + + import google_crc32c + + assert google_crc32c.implementation == "c" + assert _helpers._is_crc32c_available_and_fast() is True + + del sys.modules["google_crc32c"] + with mock.patch("builtins.__import__", side_effect=ImportError): + assert _helpers._is_crc32c_available_and_fast() is False + + import google_crc32c + + assert google_crc32c.implementation == "c" + with mock.patch("google_crc32c.implementation", new="python"): + assert _helpers._is_crc32c_available_and_fast() is False + + # Run this again to confirm we're back to the initial state. + assert _helpers._is_crc32c_available_and_fast() is True + + +def test__DoNothingHash(): + do_nothing_hash = _helpers._DoNothingHash() + return_value = do_nothing_hash.update(b"some data") + assert return_value is None + + +class Test__get_expected_checksum(object): + @pytest.mark.parametrize("template", ["crc32c={},md5={}", "crc32c={}, md5={}"]) + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + @mock.patch("google.cloud.storage._media._helpers._LOGGER") + def test__w_header_present(self, _LOGGER, template, checksum): + checksums = {"md5": "b2twdXNodGhpc2J1dHRvbg==", "crc32c": "3q2+7w=="} + header_value = template.format(checksums["crc32c"], checksums["md5"]) + headers = {_helpers._HASH_HEADER: header_value} + response = _mock_response(headers=headers) + + def _get_headers(response): + return response.headers + + url = "https://example.com/" + expected_checksum, checksum_obj = _helpers._get_expected_checksum( + response, _get_headers, url, checksum_type=checksum + ) + assert expected_checksum == checksums[checksum] + + checksum_types = { + "md5": type(hashlib.md5()), + "crc32c": type(google_crc32c.Checksum()), + } + assert isinstance(checksum_obj, checksum_types[checksum]) + + _LOGGER.info.assert_not_called() + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + @mock.patch("google.cloud.storage._media._helpers._LOGGER") + def test__w_header_missing(self, _LOGGER, checksum): + headers = {} + response = _mock_response(headers=headers) + + def _get_headers(response): + return response.headers + + url = "https://example.com/" + expected_checksum, checksum_obj = _helpers._get_expected_checksum( + response, _get_headers, url, checksum_type=checksum + ) + assert expected_checksum is None + assert isinstance(checksum_obj, _helpers._DoNothingHash) + expected_msg = _helpers._MISSING_CHECKSUM.format( + url, checksum_type=checksum.upper() + ) + _LOGGER.info.assert_called_once_with(expected_msg) + + +class Test__parse_checksum_header(object): + CRC32C_CHECKSUM = "3q2+7w==" + MD5_CHECKSUM = "c2l4dGVlbmJ5dGVzbG9uZw==" + + def test_empty_value(self): + header_value = None + response = None + md5_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="md5" + ) + assert md5_header is None + crc32c_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="crc32c" + ) + assert crc32c_header is None + + def test_crc32c_only(self): + header_value = "crc32c={}".format(self.CRC32C_CHECKSUM) + response = None + md5_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="md5" + ) + assert md5_header is None + crc32c_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="crc32c" + ) + assert crc32c_header == self.CRC32C_CHECKSUM + + def test_md5_only(self): + header_value = "md5={}".format(self.MD5_CHECKSUM) + response = None + md5_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="md5" + ) + assert md5_header == self.MD5_CHECKSUM + crc32c_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="crc32c" + ) + assert crc32c_header is None + + def test_both_crc32c_and_md5(self): + header_value = "crc32c={},md5={}".format( + self.CRC32C_CHECKSUM, self.MD5_CHECKSUM + ) + response = None + md5_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="md5" + ) + assert md5_header == self.MD5_CHECKSUM + crc32c_header = _helpers._parse_checksum_header( + header_value, response, checksum_label="crc32c" + ) + assert crc32c_header == self.CRC32C_CHECKSUM + + def test_md5_multiple_matches(self): + another_checksum = "eW91IGRpZCBXQVQgbm93Pw==" + header_value = "md5={},md5={}".format(self.MD5_CHECKSUM, another_checksum) + response = mock.sentinel.response + + with pytest.raises(InvalidResponse) as exc_info: + _helpers._parse_checksum_header( + header_value, response, checksum_label="md5" + ) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 3 + assert error.args[1] == header_value + assert error.args[2] == [self.MD5_CHECKSUM, another_checksum] + + +class Test__parse_generation_header(object): + GENERATION_VALUE = 1641590104888641 + + def test_empty_value(self): + headers = {} + response = _mock_response(headers=headers) + generation_header = _helpers._parse_generation_header(response, _get_headers) + assert generation_header is None + + def test_header_value(self): + headers = {_helpers._GENERATION_HEADER: self.GENERATION_VALUE} + response = _mock_response(headers=headers) + generation_header = _helpers._parse_generation_header(response, _get_headers) + assert generation_header == self.GENERATION_VALUE + + +class Test__is_decompressive_transcoding(object): + def test_empty_value(self): + headers = {} + response = _mock_response(headers=headers) + assert _helpers._is_decompressive_transcoding(response, _get_headers) is False + + def test_gzip_in_headers(self): + headers = {_helpers._STORED_CONTENT_ENCODING_HEADER: "gzip"} + response = _mock_response(headers=headers) + assert _helpers._is_decompressive_transcoding(response, _get_headers) is True + + def test_gzip_not_in_headers(self): + headers = {_helpers._STORED_CONTENT_ENCODING_HEADER: "identity"} + response = _mock_response(headers=headers) + assert _helpers._is_decompressive_transcoding(response, _get_headers) is False + + def test_gzip_w_content_encoding_in_headers(self): + headers = { + _helpers._STORED_CONTENT_ENCODING_HEADER: "gzip", + _helpers.CONTENT_ENCODING_HEADER: "gzip", + } + response = _mock_response(headers=headers) + assert _helpers._is_decompressive_transcoding(response, _get_headers) is False + + +class Test__get_generation_from_url(object): + GENERATION_VALUE = 1641590104888641 + MEDIA_URL = ( + "https://storage.googleapis.com/storage/v1/b/my-bucket/o/my-object?alt=media" + ) + MEDIA_URL_W_GENERATION = MEDIA_URL + f"&generation={GENERATION_VALUE}" + + def test_empty_value(self): + generation = _helpers._get_generation_from_url(self.MEDIA_URL) + assert generation is None + + def test_generation_in_url(self): + generation = _helpers._get_generation_from_url(self.MEDIA_URL_W_GENERATION) + assert generation == self.GENERATION_VALUE + + +class Test__add_query_parameters(object): + def test_w_empty_list(self): + query_params = {} + MEDIA_URL = "https://storage.googleapis.com/storage/v1/b/my-bucket/o/my-object" + new_url = _helpers.add_query_parameters(MEDIA_URL, query_params) + assert new_url == MEDIA_URL + + def test_wo_existing_qs(self): + query_params = {"one": "One", "two": "Two"} + MEDIA_URL = "https://storage.googleapis.com/storage/v1/b/my-bucket/o/my-object" + expected = "&".join( + ["{}={}".format(name, value) for name, value in query_params.items()] + ) + new_url = _helpers.add_query_parameters(MEDIA_URL, query_params) + assert new_url == "{}?{}".format(MEDIA_URL, expected) + + def test_w_existing_qs(self): + query_params = {"one": "One", "two": "Two"} + MEDIA_URL = "https://storage.googleapis.com/storage/v1/b/my-bucket/o/my-object?alt=media" + expected = "&".join( + ["{}={}".format(name, value) for name, value in query_params.items()] + ) + new_url = _helpers.add_query_parameters(MEDIA_URL, query_params) + assert new_url == "{}&{}".format(MEDIA_URL, expected) + + +def test__get_uploaded_checksum_from_headers_error_handling(): + response = _mock_response({}) + + with pytest.raises(ValueError): + _helpers._get_uploaded_checksum_from_headers(response, None, "invalid") + assert _helpers._get_uploaded_checksum_from_headers(response, None, None) is None + + +def _mock_response(headers): + return mock.Mock( + headers=headers, + status_code=200, + spec=["status_code", "headers"], + ) diff --git a/tests/resumable_media/unit/test__upload.py b/tests/resumable_media/unit/test__upload.py new file mode 100644 index 000000000..faabc0f56 --- /dev/null +++ b/tests/resumable_media/unit/test__upload.py @@ -0,0 +1,1576 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import http.client +import io +import sys +import tempfile + +from unittest import mock +import pytest # type: ignore + +from google.cloud.storage._media import _helpers +from google.cloud.storage._media import _upload +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.exceptions import DataCorruption +from google.cloud.storage.retry import DEFAULT_RETRY + + +URL_PREFIX = "https://www.googleapis.com/upload/storage/v1/b/{BUCKET}/o" +SIMPLE_URL = URL_PREFIX + "?uploadType=media&name={OBJECT}" +MULTIPART_URL = URL_PREFIX + "?uploadType=multipart" +RESUMABLE_URL = URL_PREFIX + "?uploadType=resumable" +ONE_MB = 1024 * 1024 +BASIC_CONTENT = "text/plain" +JSON_TYPE = "application/json; charset=UTF-8" +JSON_TYPE_LINE = b"content-type: application/json; charset=UTF-8\r\n" +EXAMPLE_XML_UPLOAD_URL = "https://test-project.storage.googleapis.com/test-bucket" +EXAMPLE_HEADERS = {"example-key": "example-content"} +EXAMPLE_XML_MPU_INITIATE_TEXT_TEMPLATE = """ + + travel-maps + paris.jpg + {upload_id} + +""" +UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA" +PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"} +FILE_DATA = b"testdata" * 128 + + +@pytest.fixture(scope="session") +def filename(): + with tempfile.NamedTemporaryFile() as f: + f.write(FILE_DATA) + f.flush() + yield f.name + + +class TestUploadBase(object): + def test_constructor_defaults(self): + upload = _upload.UploadBase(SIMPLE_URL) + assert upload.upload_url == SIMPLE_URL + assert upload._headers == {} + assert not upload._finished + _check_retry_strategy(upload) + + def test_constructor_explicit(self): + headers = {"spin": "doctors"} + upload = _upload.UploadBase(SIMPLE_URL, headers=headers) + assert upload.upload_url == SIMPLE_URL + assert upload._headers is headers + assert not upload._finished + _check_retry_strategy(upload) + + def test_finished_property(self): + upload = _upload.UploadBase(SIMPLE_URL) + # Default value of @property. + assert not upload.finished + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + upload.finished = False + + # Set it privately and then check the @property. + upload._finished = True + assert upload.finished + + def test__process_response_bad_status(self): + upload = _upload.UploadBase(SIMPLE_URL) + _fix_up_virtual(upload) + + # Make sure **not finished** before. + assert not upload.finished + status_code = http.client.SERVICE_UNAVAILABLE + response = _make_response(status_code=status_code) + with pytest.raises(InvalidResponse) as exc_info: + upload._process_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 4 + assert error.args[1] == status_code + assert error.args[3] == http.client.OK + # Make sure **finished** after (even in failure). + assert upload.finished + + def test__process_response(self): + upload = _upload.UploadBase(SIMPLE_URL) + _fix_up_virtual(upload) + + # Make sure **not finished** before. + assert not upload.finished + response = _make_response() + ret_val = upload._process_response(response) + assert ret_val is None + # Make sure **finished** after. + assert upload.finished + + def test__get_status_code(self): + with pytest.raises(NotImplementedError) as exc_info: + _upload.UploadBase._get_status_code(None) + + exc_info.match("virtual") + + def test__get_headers(self): + with pytest.raises(NotImplementedError) as exc_info: + _upload.UploadBase._get_headers(None) + + exc_info.match("virtual") + + def test__get_body(self): + with pytest.raises(NotImplementedError) as exc_info: + _upload.UploadBase._get_body(None) + + exc_info.match("virtual") + + +class TestSimpleUpload(object): + def test__prepare_request_already_finished(self): + upload = _upload.SimpleUpload(SIMPLE_URL) + upload._finished = True + with pytest.raises(ValueError) as exc_info: + upload._prepare_request(b"", None) + + exc_info.match("An upload can only be used once.") + + def test__prepare_request_non_bytes_data(self): + upload = _upload.SimpleUpload(SIMPLE_URL) + assert not upload.finished + with pytest.raises(TypeError) as exc_info: + upload._prepare_request("", None) + + exc_info.match("must be bytes") + + def test__prepare_request(self): + upload = _upload.SimpleUpload(SIMPLE_URL) + content_type = "image/jpeg" + data = b"cheetos and eetos" + method, url, payload, headers = upload._prepare_request(data, content_type) + + assert method == "POST" + assert url == SIMPLE_URL + assert payload == data + assert headers == {"content-type": content_type} + + def test__prepare_request_with_headers(self): + headers = {"x-goog-cheetos": "spicy"} + upload = _upload.SimpleUpload(SIMPLE_URL, headers=headers) + content_type = "image/jpeg" + data = b"some stuff" + method, url, payload, new_headers = upload._prepare_request(data, content_type) + + assert method == "POST" + assert url == SIMPLE_URL + assert payload == data + assert new_headers is headers + expected = {"content-type": content_type, "x-goog-cheetos": "spicy"} + assert headers == expected + + def test_transmit(self): + upload = _upload.SimpleUpload(SIMPLE_URL) + with pytest.raises(NotImplementedError) as exc_info: + upload.transmit(None, None, None) + + exc_info.match("virtual") + + +class TestMultipartUpload(object): + def test_constructor_defaults(self): + upload = _upload.MultipartUpload(MULTIPART_URL) + assert upload.upload_url == MULTIPART_URL + assert upload._headers == {} + assert upload._checksum_type == "crc32c" # converted from "auto" + assert not upload._finished + _check_retry_strategy(upload) + + def test_constructor_explicit(self): + headers = {"spin": "doctors"} + upload = _upload.MultipartUpload(MULTIPART_URL, headers=headers, checksum="md5") + assert upload.upload_url == MULTIPART_URL + assert upload._headers is headers + assert upload._checksum_type == "md5" + assert not upload._finished + _check_retry_strategy(upload) + + def test_constructor_explicit_auto(self): + headers = {"spin": "doctors"} + upload = _upload.MultipartUpload( + MULTIPART_URL, headers=headers, checksum="auto" + ) + assert upload.upload_url == MULTIPART_URL + assert upload._headers is headers + assert upload._checksum_type == "crc32c" + assert not upload._finished + _check_retry_strategy(upload) + + def test__prepare_request_already_finished(self): + upload = _upload.MultipartUpload(MULTIPART_URL) + upload._finished = True + with pytest.raises(ValueError): + upload._prepare_request(b"Hi", {}, BASIC_CONTENT) + + def test__prepare_request_non_bytes_data(self): + data = "Nope not bytes." + upload = _upload.MultipartUpload(MULTIPART_URL) + with pytest.raises(TypeError): + upload._prepare_request(data, {}, BASIC_CONTENT) + + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==3==" + ) + def _prepare_request_helper( + self, + mock_get_boundary, + headers=None, + checksum=None, + expected_checksum=None, + test_overwrite=False, + ): + upload = _upload.MultipartUpload( + MULTIPART_URL, headers=headers, checksum=checksum + ) + data = b"Hi" + if test_overwrite and checksum: + # Deliberately set metadata that conflicts with the chosen checksum. + # This should be fully overwritten by the calculated checksum, so + # the output should not change even if this is set. + if checksum == "md5": + metadata = {"md5Hash": "ZZZZZZZZZZZZZZZZZZZZZZ=="} + else: + metadata = {"crc32c": "ZZZZZZ=="} + else: + # To simplify parsing the response, omit other test metadata if a + # checksum is specified. + metadata = {"Some": "Stuff"} if not checksum else {} + content_type = BASIC_CONTENT + method, url, payload, new_headers = upload._prepare_request( + data, metadata, content_type + ) + + assert method == "POST" + assert url == MULTIPART_URL + + preamble = b"--==3==\r\n" + JSON_TYPE_LINE + b"\r\n" + + if checksum == "md5" and expected_checksum: + metadata_payload = '{{"md5Hash": "{}"}}\r\n'.format( + expected_checksum + ).encode("utf8") + elif checksum == "crc32c" and expected_checksum: + metadata_payload = '{{"crc32c": "{}"}}\r\n'.format( + expected_checksum + ).encode("utf8") + else: + metadata_payload = b'{"Some": "Stuff"}\r\n' + remainder = ( + b"--==3==\r\n" + b"content-type: text/plain\r\n" + b"\r\n" + b"Hi\r\n" + b"--==3==--" + ) + expected_payload = preamble + metadata_payload + remainder + + assert payload == expected_payload + multipart_type = b'multipart/related; boundary="==3=="' + mock_get_boundary.assert_called_once_with() + + return new_headers, multipart_type + + def test__prepare_request(self): + headers, multipart_type = self._prepare_request_helper() + assert headers == {"content-type": multipart_type} + + def test__prepare_request_with_headers(self): + headers = {"best": "shirt", "worst": "hat"} + new_headers, multipart_type = self._prepare_request_helper(headers=headers) + assert new_headers is headers + expected_headers = { + "best": "shirt", + "content-type": multipart_type, + "worst": "hat", + } + assert expected_headers == headers + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__prepare_request_with_checksum(self, checksum): + checksums = { + "md5": "waUpj5Oeh+j5YqXt/CBpGA==", + "crc32c": "ihY6wA==", + } + headers, multipart_type = self._prepare_request_helper( + checksum=checksum, expected_checksum=checksums[checksum] + ) + assert headers == { + "content-type": multipart_type, + } + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__prepare_request_with_checksum_overwrite(self, checksum): + checksums = { + "md5": "waUpj5Oeh+j5YqXt/CBpGA==", + "crc32c": "ihY6wA==", + } + headers, multipart_type = self._prepare_request_helper( + checksum=checksum, + expected_checksum=checksums[checksum], + test_overwrite=True, + ) + assert headers == { + "content-type": multipart_type, + } + + def test_transmit(self): + upload = _upload.MultipartUpload(MULTIPART_URL) + with pytest.raises(NotImplementedError) as exc_info: + upload.transmit(None, None, None, None) + + exc_info.match("virtual") + + +class TestResumableUpload(object): + def test_constructor(self): + chunk_size = ONE_MB + upload = _upload.ResumableUpload(RESUMABLE_URL, chunk_size) + assert upload.upload_url == RESUMABLE_URL + assert upload._headers == {} + assert not upload._finished + _check_retry_strategy(upload) + assert upload._chunk_size == chunk_size + assert upload._stream is None + assert upload._content_type is None + assert upload._bytes_uploaded == 0 + assert upload._bytes_checksummed == 0 + assert upload._checksum_object is None + assert upload._total_bytes is None + assert upload._resumable_url is None + assert upload._checksum_type == "crc32c" # converted from "auto" + + def test_constructor_bad_chunk_size(self): + with pytest.raises(ValueError): + _upload.ResumableUpload(RESUMABLE_URL, 1) + + def test_invalid_property(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + # Default value of @property. + assert not upload.invalid + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + upload.invalid = False + + # Set it privately and then check the @property. + upload._invalid = True + assert upload.invalid + + def test_chunk_size_property(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + # Default value of @property. + assert upload.chunk_size == ONE_MB + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + upload.chunk_size = 17 + + # Set it privately and then check the @property. + new_size = 102 + upload._chunk_size = new_size + assert upload.chunk_size == new_size + + def test_resumable_url_property(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + # Default value of @property. + assert upload.resumable_url is None + + # Make sure we cannot set it on public @property. + new_url = "http://test.invalid?upload_id=not-none" + with pytest.raises(AttributeError): + upload.resumable_url = new_url + + # Set it privately and then check the @property. + upload._resumable_url = new_url + assert upload.resumable_url == new_url + + def test_bytes_uploaded_property(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + # Default value of @property. + assert upload.bytes_uploaded == 0 + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + upload.bytes_uploaded = 1024 + + # Set it privately and then check the @property. + upload._bytes_uploaded = 128 + assert upload.bytes_uploaded == 128 + + def test_total_bytes_property(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + # Default value of @property. + assert upload.total_bytes is None + + # Make sure we cannot set it on public @property. + with pytest.raises(AttributeError): + upload.total_bytes = 65536 + + # Set it privately and then check the @property. + upload._total_bytes = 8192 + assert upload.total_bytes == 8192 + + def _prepare_initiate_request_helper( + self, upload_url=RESUMABLE_URL, upload_headers=None, **method_kwargs + ): + data = b"some really big big data." + stream = io.BytesIO(data) + metadata = {"name": "big-data-file.txt"} + + upload = _upload.ResumableUpload(upload_url, ONE_MB, headers=upload_headers) + orig_headers = upload._headers.copy() + # Check ``upload``-s state before. + assert upload._stream is None + assert upload._content_type is None + assert upload._total_bytes is None + # Call the method and check the output. + method, url, payload, headers = upload._prepare_initiate_request( + stream, metadata, BASIC_CONTENT, **method_kwargs + ) + assert payload == b'{"name": "big-data-file.txt"}' + # Make sure the ``upload``-s state was updated. + assert upload._stream == stream + assert upload._content_type == BASIC_CONTENT + if method_kwargs == {"stream_final": False}: + assert upload._total_bytes is None + else: + assert upload._total_bytes == len(data) + # Make sure headers are untouched. + assert headers is not upload._headers + assert upload._headers == orig_headers + assert method == "POST" + assert url == upload.upload_url + # Make sure the stream is still at the beginning. + assert stream.tell() == 0 + + return data, headers + + def test__prepare_initiate_request(self): + data, headers = self._prepare_initiate_request_helper() + expected_headers = { + "content-type": JSON_TYPE, + "x-upload-content-length": "{:d}".format(len(data)), + "x-upload-content-type": BASIC_CONTENT, + } + assert headers == expected_headers + + def test_prepare_initiate_request_with_signed_url(self): + signed_urls = [ + "https://storage.googleapis.com/b/o?x-goog-signature=123abc", + "https://storage.googleapis.com/b/o?X-Goog-Signature=123abc", + ] + for signed_url in signed_urls: + data, headers = self._prepare_initiate_request_helper( + upload_url=signed_url, + ) + expected_headers = { + "content-type": BASIC_CONTENT, + "x-upload-content-length": "{:d}".format(len(data)), + } + assert headers == expected_headers + + def test__prepare_initiate_request_with_headers(self): + # content-type header should be overwritten, the rest should stay + headers = { + "caviar": "beluga", + "top": "quark", + "content-type": "application/xhtml", + } + data, new_headers = self._prepare_initiate_request_helper( + upload_headers=headers + ) + expected_headers = { + "caviar": "beluga", + "content-type": JSON_TYPE, + "top": "quark", + "x-upload-content-length": "{:d}".format(len(data)), + "x-upload-content-type": BASIC_CONTENT, + } + assert new_headers == expected_headers + + def test__prepare_initiate_request_known_size(self): + total_bytes = 25 + data, headers = self._prepare_initiate_request_helper(total_bytes=total_bytes) + assert len(data) == total_bytes + expected_headers = { + "content-type": "application/json; charset=UTF-8", + "x-upload-content-length": "{:d}".format(total_bytes), + "x-upload-content-type": BASIC_CONTENT, + } + assert headers == expected_headers + + def test__prepare_initiate_request_unknown_size(self): + _, headers = self._prepare_initiate_request_helper(stream_final=False) + expected_headers = { + "content-type": "application/json; charset=UTF-8", + "x-upload-content-type": BASIC_CONTENT, + } + assert headers == expected_headers + + def test__prepare_initiate_request_already_initiated(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + # Fake that the upload has been started. + upload._resumable_url = "http://test.invalid?upload_id=definitely-started" + + with pytest.raises(ValueError): + upload._prepare_initiate_request(io.BytesIO(), {}, BASIC_CONTENT) + + def test__prepare_initiate_request_bad_stream_position(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + + stream = io.BytesIO(b"data") + stream.seek(1) + with pytest.raises(ValueError): + upload._prepare_initiate_request(stream, {}, BASIC_CONTENT) + + # Also test a bad object (i.e. non-stream) + with pytest.raises(AttributeError): + upload._prepare_initiate_request(None, {}, BASIC_CONTENT) + + def test__process_initiate_response_non_200(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + response = _make_response(403) + with pytest.raises(InvalidResponse) as exc_info: + upload._process_initiate_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 5 + assert error.args[1] == 403 + assert error.args[3] == 200 + assert error.args[4] == 201 + + def test__process_initiate_response(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + headers = {"location": "http://test.invalid?upload_id=kmfeij3234"} + response = _make_response(headers=headers) + # Check resumable_url before. + assert upload._resumable_url is None + # Process the actual headers. + ret_val = upload._process_initiate_response(response) + assert ret_val is None + # Check resumable_url after. + assert upload._resumable_url == headers["location"] + + def test_initiate(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + with pytest.raises(NotImplementedError) as exc_info: + upload.initiate(None, None, {}, BASIC_CONTENT) + + exc_info.match("virtual") + + def test__prepare_request_already_finished(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + assert not upload.invalid + upload._finished = True + with pytest.raises(ValueError) as exc_info: + upload._prepare_request() + + assert exc_info.value.args == ("Upload has finished.",) + + def test__prepare_request_invalid(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + assert not upload.finished + upload._invalid = True + with pytest.raises(ValueError) as exc_info: + upload._prepare_request() + + assert exc_info.match("invalid state") + assert exc_info.match("recover()") + + def test__prepare_request_not_initiated(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + assert not upload.finished + assert not upload.invalid + assert upload._resumable_url is None + with pytest.raises(ValueError) as exc_info: + upload._prepare_request() + + assert exc_info.match("upload has not been initiated") + assert exc_info.match("initiate()") + + def test__prepare_request_invalid_stream_state(self): + stream = io.BytesIO(b"some data here") + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + upload._stream = stream + upload._resumable_url = "http://test.invalid?upload_id=not-none" + # Make stream.tell() disagree with bytes_uploaded. + upload._bytes_uploaded = 5 + assert upload.bytes_uploaded != stream.tell() + with pytest.raises(ValueError) as exc_info: + upload._prepare_request() + + assert exc_info.match("Bytes stream is in unexpected state.") + + @staticmethod + def _upload_in_flight(data, headers=None, checksum=None): + upload = _upload.ResumableUpload( + RESUMABLE_URL, ONE_MB, headers=headers, checksum=checksum + ) + upload._stream = io.BytesIO(data) + upload._content_type = BASIC_CONTENT + upload._total_bytes = len(data) + upload._resumable_url = "http://test.invalid?upload_id=not-none" + return upload + + def _prepare_request_helper(self, headers=None, checksum=None): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, headers=headers, checksum=checksum) + method, url, payload, new_headers = upload._prepare_request() + # Check the response values. + assert method == "PUT" + assert url == upload.resumable_url + assert payload == data + # Make sure headers are **NOT** updated + assert upload._headers != new_headers + + return new_headers + + def test__prepare_request_success(self): + headers = self._prepare_request_helper() + expected_headers = { + "content-range": "bytes 0-32/33", + "content-type": BASIC_CONTENT, + } + assert headers == expected_headers + + def test__prepare_request_success_with_headers(self): + headers = {"keep": "this"} + new_headers = self._prepare_request_helper(headers) + assert new_headers is not headers + expected_headers = { + "keep": "this", + "content-range": "bytes 0-32/33", + "content-type": BASIC_CONTENT, + } + assert new_headers == expected_headers + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__prepare_request_with_checksum(self, checksum): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=checksum) + upload._prepare_request() + assert upload._checksum_object is not None + + checksums = {"md5": "GRvfKbqr5klAOwLkxgIf8w==", "crc32c": "Qg8thA=="} + checksum_digest = _helpers.prepare_checksum_digest( + upload._checksum_object.digest() + ) + assert checksum_digest == checksums[checksum] + assert upload._bytes_checksummed == len(data) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__update_checksum(self, checksum): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=checksum) + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == 8 + + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == 16 + + # Continue to the end. + start_byte, payload, _ = _upload.get_next_chunk( + upload._stream, len(data), len(data) + ) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == len(data) + + checksums = {"md5": "GRvfKbqr5klAOwLkxgIf8w==", "crc32c": "Qg8thA=="} + checksum_digest = _helpers.prepare_checksum_digest( + upload._checksum_object.digest() + ) + assert checksum_digest == checksums[checksum] + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__update_checksum_rewind(self, checksum): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=checksum) + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == 8 + checksum_checkpoint = upload._checksum_object.digest() + + # Rewind to the beginning. + upload._stream.seek(0) + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == 8 + assert upload._checksum_object.digest() == checksum_checkpoint + + # Rewind but not to the beginning. + upload._stream.seek(4) + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == 12 + + # Continue to the end. + start_byte, payload, _ = _upload.get_next_chunk( + upload._stream, len(data), len(data) + ) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == len(data) + + checksums = {"md5": "GRvfKbqr5klAOwLkxgIf8w==", "crc32c": "Qg8thA=="} + checksum_digest = _helpers.prepare_checksum_digest( + upload._checksum_object.digest() + ) + assert checksum_digest == checksums[checksum] + + def test__update_checksum_none(self): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=None) + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + upload._update_checksum(start_byte, payload) + assert upload._checksum_object is None + + def test__update_checksum_invalid(self): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum="invalid") + start_byte, payload, _ = _upload.get_next_chunk(upload._stream, 8, len(data)) + with pytest.raises(ValueError): + upload._update_checksum(start_byte, payload) + + def test__make_invalid(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + assert not upload.invalid + upload._make_invalid() + assert upload.invalid + + def test__process_resumable_response_bad_status(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + # Make sure the upload is valid before the failure. + assert not upload.invalid + response = _make_response(status_code=http.client.NOT_FOUND) + with pytest.raises(InvalidResponse) as exc_info: + upload._process_resumable_response(response, None) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 5 + assert error.args[1] == response.status_code + assert error.args[3] == http.client.OK + assert error.args[4] == http.client.PERMANENT_REDIRECT + # Make sure the upload is invalid after the failure. + assert upload.invalid + + def test__process_resumable_response_success(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB, checksum=None) + _fix_up_virtual(upload) + + # Check / set status before. + assert upload._bytes_uploaded == 0 + upload._bytes_uploaded = 20 + assert not upload._finished + + # Set the response body. + bytes_sent = 158 + total_bytes = upload._bytes_uploaded + bytes_sent + response_body = '{{"size": "{:d}"}}'.format(total_bytes) + response_body = response_body.encode("utf-8") + response = mock.Mock( + content=response_body, + status_code=http.client.OK, + spec=["content", "status_code"], + ) + ret_val = upload._process_resumable_response(response, bytes_sent) + assert ret_val is None + # Check status after. + assert upload._bytes_uploaded == total_bytes + assert upload._finished + + def test__process_resumable_response_partial_no_range(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + response = _make_response(status_code=http.client.PERMANENT_REDIRECT) + # Make sure the upload is valid before the failure. + assert not upload.invalid + with pytest.raises(InvalidResponse) as exc_info: + upload._process_resumable_response(response, None) + # Make sure the upload is invalid after the failure. + assert upload.invalid + + # Check the error response. + error = exc_info.value + assert error.response is response + assert len(error.args) == 2 + assert error.args[1] == "range" + + def test__process_resumable_response_partial_bad_range(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + # Make sure the upload is valid before the failure. + assert not upload.invalid + headers = {"range": "nights 1-81"} + response = _make_response( + status_code=http.client.PERMANENT_REDIRECT, headers=headers + ) + with pytest.raises(InvalidResponse) as exc_info: + upload._process_resumable_response(response, 81) + + # Check the error response. + error = exc_info.value + assert error.response is response + assert len(error.args) == 3 + assert error.args[1] == headers["range"] + # Make sure the upload is invalid after the failure. + assert upload.invalid + + def test__process_resumable_response_partial(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + # Check status before. + assert upload._bytes_uploaded == 0 + headers = {"range": "bytes=0-171"} + response = _make_response( + status_code=http.client.PERMANENT_REDIRECT, headers=headers + ) + ret_val = upload._process_resumable_response(response, 172) + assert ret_val is None + # Check status after. + assert upload._bytes_uploaded == 172 + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__validate_checksum_success(self, checksum): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=checksum) + _fix_up_virtual(upload) + # Go ahead and process the entire data in one go for this test. + start_byte, payload, _ = _upload.get_next_chunk( + upload._stream, len(data), len(data) + ) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == len(data) + + # This is only used by _validate_checksum for fetching metadata and + # logging. + metadata = {"md5Hash": "GRvfKbqr5klAOwLkxgIf8w==", "crc32c": "Qg8thA=="} + response = _make_response(metadata=metadata) + upload._finished = True + + assert upload._checksum_object is not None + # Test passes if it does not raise an error (no assert needed) + upload._validate_checksum(response) + + def test__validate_checksum_none(self): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(b"test", checksum=None) + _fix_up_virtual(upload) + # Go ahead and process the entire data in one go for this test. + start_byte, payload, _ = _upload.get_next_chunk( + upload._stream, len(data), len(data) + ) + upload._update_checksum(start_byte, payload) + + # This is only used by _validate_checksum for fetching metadata and + # logging. + metadata = {"md5Hash": "GRvfKbqr5klAOwLkxgIf8w==", "crc32c": "Qg8thA=="} + response = _make_response(metadata=metadata) + upload._finished = True + + assert upload._checksum_object is None + assert upload._bytes_checksummed == 0 + # Test passes if it does not raise an error (no assert needed) + upload._validate_checksum(response) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__validate_checksum_header_no_match(self, checksum): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=checksum) + _fix_up_virtual(upload) + # Go ahead and process the entire data in one go for this test. + start_byte, payload, _ = _upload.get_next_chunk( + upload._stream, len(data), len(data) + ) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == len(data) + + # For this test, each checksum option will be provided with a valid but + # mismatching remote checksum type. + if checksum == "crc32c": + metadata = {"md5Hash": "GRvfKbqr5klAOwLkxgIf8w=="} + else: + metadata = {"crc32c": "Qg8thA=="} + # This is only used by _validate_checksum for fetching headers and + # logging, so it doesn't need to be fleshed out with a response body. + response = _make_response(metadata=metadata) + upload._finished = True + + assert upload._checksum_object is not None + with pytest.raises(InvalidResponse) as exc_info: + upload._validate_checksum(response) + + error = exc_info.value + assert error.response is response + message = error.args[0] + metadata_key = _helpers._get_metadata_key(checksum) + assert ( + message + == _upload._UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE.format( + metadata_key + ) + ) + + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__validate_checksum_mismatch(self, checksum): + data = b"All of the data goes in a stream." + upload = self._upload_in_flight(data, checksum=checksum) + _fix_up_virtual(upload) + # Go ahead and process the entire data in one go for this test. + start_byte, payload, _ = _upload.get_next_chunk( + upload._stream, len(data), len(data) + ) + upload._update_checksum(start_byte, payload) + assert upload._bytes_checksummed == len(data) + + metadata = { + "md5Hash": "ZZZZZZZZZZZZZZZZZZZZZZ==", + "crc32c": "ZZZZZZ==", + } + # This is only used by _validate_checksum for fetching headers and + # logging, so it doesn't need to be fleshed out with a response body. + response = _make_response(metadata=metadata) + upload._finished = True + + assert upload._checksum_object is not None + # Test passes if it does not raise an error (no assert needed) + with pytest.raises(DataCorruption) as exc_info: + upload._validate_checksum(response) + + error = exc_info.value + assert error.response is response + message = error.args[0] + correct_checksums = {"crc32c": "Qg8thA==", "md5": "GRvfKbqr5klAOwLkxgIf8w=="} + metadata_key = _helpers._get_metadata_key(checksum) + assert message == _upload._UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format( + checksum.upper(), correct_checksums[checksum], metadata[metadata_key] + ) + + def test_transmit_next_chunk(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + with pytest.raises(NotImplementedError) as exc_info: + upload.transmit_next_chunk(None) + + exc_info.match("virtual") + + def test__prepare_recover_request_not_invalid(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + assert not upload.invalid + + method, url, payload, headers = upload._prepare_recover_request() + assert method == "PUT" + assert url == upload.resumable_url + assert payload is None + assert headers == {"content-range": "bytes */*"} + # Make sure headers are untouched. + assert upload._headers == {} + + def test__prepare_recover_request(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + upload._invalid = True + + method, url, payload, headers = upload._prepare_recover_request() + assert method == "PUT" + assert url == upload.resumable_url + assert payload is None + assert headers == {"content-range": "bytes */*"} + # Make sure headers are untouched. + assert upload._headers == {} + + def test__prepare_recover_request_with_headers(self): + headers = {"lake": "ocean"} + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB, headers=headers) + upload._invalid = True + + method, url, payload, new_headers = upload._prepare_recover_request() + assert method == "PUT" + assert url == upload.resumable_url + assert payload is None + assert new_headers == {"content-range": "bytes */*"} + # Make sure the ``_headers`` are not incorporated. + assert "lake" not in new_headers + # Make sure headers are untouched. + assert upload._headers == {"lake": "ocean"} + + def test__process_recover_response_bad_status(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + upload._invalid = True + + response = _make_response(status_code=http.client.BAD_REQUEST) + with pytest.raises(InvalidResponse) as exc_info: + upload._process_recover_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 4 + assert error.args[1] == response.status_code + assert error.args[3] == http.client.PERMANENT_REDIRECT + # Make sure still invalid. + assert upload.invalid + + def test__process_recover_response_no_range(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + upload._invalid = True + upload._stream = mock.Mock(spec=["seek"]) + upload._bytes_uploaded = mock.sentinel.not_zero + assert upload.bytes_uploaded != 0 + + response = _make_response(status_code=http.client.PERMANENT_REDIRECT) + ret_val = upload._process_recover_response(response) + assert ret_val is None + # Check the state of ``upload`` after. + assert upload.bytes_uploaded == 0 + assert not upload.invalid + upload._stream.seek.assert_called_once_with(0) + + def test__process_recover_response_bad_range(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + upload._invalid = True + upload._stream = mock.Mock(spec=["seek"]) + upload._bytes_uploaded = mock.sentinel.not_zero + + headers = {"range": "bites=9-11"} + response = _make_response( + status_code=http.client.PERMANENT_REDIRECT, headers=headers + ) + with pytest.raises(InvalidResponse) as exc_info: + upload._process_recover_response(response) + + error = exc_info.value + assert error.response is response + assert len(error.args) == 3 + assert error.args[1] == headers["range"] + # Check the state of ``upload`` after (untouched). + assert upload.bytes_uploaded is mock.sentinel.not_zero + assert upload.invalid + upload._stream.seek.assert_not_called() + + def test__process_recover_response_with_range(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + _fix_up_virtual(upload) + + upload._invalid = True + upload._stream = mock.Mock(spec=["seek"]) + upload._bytes_uploaded = mock.sentinel.not_zero + assert upload.bytes_uploaded != 0 + + end = 11 + headers = {"range": "bytes=0-{:d}".format(end)} + response = _make_response( + status_code=http.client.PERMANENT_REDIRECT, headers=headers + ) + ret_val = upload._process_recover_response(response) + assert ret_val is None + # Check the state of ``upload`` after. + assert upload.bytes_uploaded == end + 1 + assert not upload.invalid + upload._stream.seek.assert_called_once_with(end + 1) + + def test_recover(self): + upload = _upload.ResumableUpload(RESUMABLE_URL, ONE_MB) + with pytest.raises(NotImplementedError) as exc_info: + upload.recover(None) + + exc_info.match("virtual") + + +@mock.patch("random.randrange", return_value=1234567890123456789) +def test_get_boundary(mock_rand): + result = _upload.get_boundary() + assert result == b"===============1234567890123456789==" + mock_rand.assert_called_once_with(sys.maxsize) + + +class Test_construct_multipart_request(object): + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==1==" + ) + def test_binary(self, mock_get_boundary): + data = b"By nary day tuh" + metadata = {"name": "hi-file.bin"} + content_type = "application/octet-stream" + payload, multipart_boundary = _upload.construct_multipart_request( + data, metadata, content_type + ) + + assert multipart_boundary == mock_get_boundary.return_value + expected_payload = ( + b"--==1==\r\n" + JSON_TYPE_LINE + b"\r\n" + b'{"name": "hi-file.bin"}\r\n' + b"--==1==\r\n" + b"content-type: application/octet-stream\r\n" + b"\r\n" + b"By nary day tuh\r\n" + b"--==1==--" + ) + assert payload == expected_payload + mock_get_boundary.assert_called_once_with() + + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==2==" + ) + def test_unicode(self, mock_get_boundary): + data_unicode = "\N{snowman}" + # construct_multipart_request( ASSUMES callers pass bytes. + data = data_unicode.encode("utf-8") + metadata = {"name": "snowman.txt"} + content_type = BASIC_CONTENT + payload, multipart_boundary = _upload.construct_multipart_request( + data, metadata, content_type + ) + + assert multipart_boundary == mock_get_boundary.return_value + expected_payload = ( + b"--==2==\r\n" + JSON_TYPE_LINE + b"\r\n" + b'{"name": "snowman.txt"}\r\n' + b"--==2==\r\n" + b"content-type: text/plain\r\n" + b"\r\n" + b"\xe2\x98\x83\r\n" + b"--==2==--" + ) + assert payload == expected_payload + mock_get_boundary.assert_called_once_with() + + +def test_get_total_bytes(): + data = b"some data" + stream = io.BytesIO(data) + # Check position before function call. + assert stream.tell() == 0 + assert _upload.get_total_bytes(stream) == len(data) + # Check position after function call. + assert stream.tell() == 0 + + # Make sure this works just as well when not at beginning. + curr_pos = 3 + stream.seek(curr_pos) + assert _upload.get_total_bytes(stream) == len(data) + # Check position after function call. + assert stream.tell() == curr_pos + + +class Test_get_next_chunk(object): + def test_exhausted_known_size(self): + data = b"the end" + stream = io.BytesIO(data) + stream.seek(len(data)) + with pytest.raises(ValueError) as exc_info: + _upload.get_next_chunk(stream, 1, len(data)) + + exc_info.match("Stream is already exhausted. There is no content remaining.") + + def test_exhausted_known_size_zero(self): + stream = io.BytesIO(b"") + answer = _upload.get_next_chunk(stream, 1, 0) + assert answer == (0, b"", "bytes */0") + + def test_exhausted_known_size_zero_nonempty(self): + stream = io.BytesIO(b"not empty WAT!") + with pytest.raises(ValueError) as exc_info: + _upload.get_next_chunk(stream, 1, 0) + exc_info.match("Stream specified as empty, but produced non-empty content.") + + def test_success_known_size_lt_stream_size(self): + data = b"0123456789" + stream = io.BytesIO(data) + chunk_size = 3 + total_bytes = len(data) - 2 + + # Splits into 3 chunks: 012, 345, 67 + result0 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + result1 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + result2 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + + assert result0 == (0, b"012", "bytes 0-2/8") + assert result1 == (3, b"345", "bytes 3-5/8") + assert result2 == (6, b"67", "bytes 6-7/8") + + def test_success_known_size(self): + data = b"0123456789" + stream = io.BytesIO(data) + total_bytes = len(data) + chunk_size = 3 + # Splits into 4 chunks: 012, 345, 678, 9 + result0 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + result1 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + result2 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + result3 = _upload.get_next_chunk(stream, chunk_size, total_bytes) + assert result0 == (0, b"012", "bytes 0-2/10") + assert result1 == (3, b"345", "bytes 3-5/10") + assert result2 == (6, b"678", "bytes 6-8/10") + assert result3 == (9, b"9", "bytes 9-9/10") + assert stream.tell() == total_bytes + + def test_success_unknown_size(self): + data = b"abcdefghij" + stream = io.BytesIO(data) + chunk_size = 6 + # Splits into 4 chunks: abcdef, ghij + result0 = _upload.get_next_chunk(stream, chunk_size, None) + result1 = _upload.get_next_chunk(stream, chunk_size, None) + assert result0 == (0, b"abcdef", "bytes 0-5/*") + assert result1 == (chunk_size, b"ghij", "bytes 6-9/10") + assert stream.tell() == len(data) + + # Do the same when the chunk size evenly divides len(data) + stream.seek(0) + chunk_size = len(data) + # Splits into 2 chunks: `data` and empty string + result0 = _upload.get_next_chunk(stream, chunk_size, None) + result1 = _upload.get_next_chunk(stream, chunk_size, None) + assert result0 == (0, data, "bytes 0-9/*") + assert result1 == (len(data), b"", "bytes */10") + assert stream.tell() == len(data) + + +class Test_get_content_range(object): + def test_known_size(self): + result = _upload.get_content_range(5, 10, 40) + assert result == "bytes 5-10/40" + + def test_unknown_size(self): + result = _upload.get_content_range(1000, 10000, None) + assert result == "bytes 1000-10000/*" + + +def test_xml_mpu_container_constructor_and_properties(filename): + container = _upload.XMLMPUContainer(EXAMPLE_XML_UPLOAD_URL, filename) + assert container.upload_url == EXAMPLE_XML_UPLOAD_URL + assert container.upload_id is None + assert container._headers == {} + assert container._parts == {} + assert container._filename == filename + + container = _upload.XMLMPUContainer( + EXAMPLE_XML_UPLOAD_URL, + filename, + headers=EXAMPLE_HEADERS, + upload_id=UPLOAD_ID, + ) + container._parts = PARTS + assert container.upload_url == EXAMPLE_XML_UPLOAD_URL + assert container.upload_id == UPLOAD_ID + assert container._headers == EXAMPLE_HEADERS + assert container._parts == PARTS + assert container._filename == filename + + +def test_xml_mpu_container_initiate(filename): + container = _upload.XMLMPUContainer( + EXAMPLE_XML_UPLOAD_URL, filename, upload_id=UPLOAD_ID + ) + with pytest.raises(ValueError): + container._prepare_initiate_request(BASIC_CONTENT) + + container = _upload.XMLMPUContainer( + EXAMPLE_XML_UPLOAD_URL, filename, headers=EXAMPLE_HEADERS + ) + verb, url, body, headers = container._prepare_initiate_request(BASIC_CONTENT) + assert verb == _upload._POST + assert url == EXAMPLE_XML_UPLOAD_URL + _upload._MPU_INITIATE_QUERY + assert not body + assert headers == {**EXAMPLE_HEADERS, "content-type": BASIC_CONTENT} + + _fix_up_virtual(container) + response = _make_xml_response( + text=EXAMPLE_XML_MPU_INITIATE_TEXT_TEMPLATE.format(upload_id=UPLOAD_ID) + ) + container._process_initiate_response(response) + assert container.upload_id == UPLOAD_ID + + with pytest.raises(NotImplementedError): + container.initiate(None, None) + + +def test_xml_mpu_container_finalize(filename): + container = _upload.XMLMPUContainer(EXAMPLE_XML_UPLOAD_URL, filename) + with pytest.raises(ValueError): + container._prepare_finalize_request() + + container = _upload.XMLMPUContainer( + EXAMPLE_XML_UPLOAD_URL, + filename, + headers=EXAMPLE_HEADERS, + upload_id=UPLOAD_ID, + ) + container._parts = PARTS + verb, url, body, headers = container._prepare_finalize_request() + assert verb == _upload._POST + final_query = _upload._MPU_FINAL_QUERY_TEMPLATE.format(upload_id=UPLOAD_ID) + assert url == EXAMPLE_XML_UPLOAD_URL + final_query + assert headers == EXAMPLE_HEADERS + assert b"CompleteMultipartUpload" in body + for key, value in PARTS.items(): + assert str(key).encode("utf-8") in body + assert value.encode("utf-8") in body + + _fix_up_virtual(container) + response = _make_xml_response() + container._process_finalize_response(response) + assert container.finished + + with pytest.raises(NotImplementedError): + container.finalize(None) + + +def test_xml_mpu_container_cancel(filename): + container = _upload.XMLMPUContainer(EXAMPLE_XML_UPLOAD_URL, filename) + with pytest.raises(ValueError): + container._prepare_cancel_request() + + container = _upload.XMLMPUContainer( + EXAMPLE_XML_UPLOAD_URL, + filename, + headers=EXAMPLE_HEADERS, + upload_id=UPLOAD_ID, + ) + container._parts = PARTS + verb, url, body, headers = container._prepare_cancel_request() + assert verb == _upload._DELETE + final_query = _upload._MPU_FINAL_QUERY_TEMPLATE.format(upload_id=UPLOAD_ID) + assert url == EXAMPLE_XML_UPLOAD_URL + final_query + assert headers == EXAMPLE_HEADERS + assert not body + + _fix_up_virtual(container) + response = _make_xml_response(status_code=204) + container._process_cancel_response(response) + + with pytest.raises(NotImplementedError): + container.cancel(None) + + +def test_xml_mpu_part(filename): + PART_NUMBER = 1 + START = 0 + END = 256 + ETAG = PARTS[1] + + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum="md5", + ) + assert part.upload_url == EXAMPLE_XML_UPLOAD_URL + assert part.upload_id == UPLOAD_ID + assert part.filename == filename + assert part.etag is None + assert part.start == START + assert part.end == END + assert part.part_number == PART_NUMBER + assert part._headers == EXAMPLE_HEADERS + assert part._checksum_type == "md5" + assert part._checksum_object is None + + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum="auto", + ) + assert part.upload_url == EXAMPLE_XML_UPLOAD_URL + assert part.upload_id == UPLOAD_ID + assert part.filename == filename + assert part.etag is None + assert part.start == START + assert part.end == END + assert part.part_number == PART_NUMBER + assert part._headers == EXAMPLE_HEADERS + assert part._checksum_type == "crc32c" # transformed from "auto" + assert part._checksum_object is None + + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum=None, + ) + verb, url, payload, headers = part._prepare_upload_request() + assert verb == _upload._PUT + assert url == EXAMPLE_XML_UPLOAD_URL + _upload._MPU_PART_QUERY_TEMPLATE.format( + part=PART_NUMBER, upload_id=UPLOAD_ID + ) + assert headers == EXAMPLE_HEADERS + assert payload == FILE_DATA[START:END] + + _fix_up_virtual(part) + response = _make_xml_response(headers={"etag": ETAG}) + part._process_upload_response(response) + assert part.etag == ETAG + + +def test_xml_mpu_part_invalid_response(filename): + PART_NUMBER = 1 + START = 0 + END = 256 + ETAG = PARTS[1] + + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum="md5", + ) + _fix_up_virtual(part) + response = _make_xml_response(headers={"etag": ETAG}) + with pytest.raises(InvalidResponse): + part._process_upload_response(response) + + +def test_xml_mpu_part_checksum_failure(filename): + PART_NUMBER = 1 + START = 0 + END = 256 + ETAG = PARTS[1] + + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum="md5", + ) + _fix_up_virtual(part) + part._prepare_upload_request() + response = _make_xml_response( + headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="} + ) # Example md5 checksum but not the correct one + with pytest.raises(DataCorruption): + part._process_upload_response(response) + + +def test_xml_mpu_part_checksum_success(filename): + PART_NUMBER = 1 + START = 0 + END = 256 + ETAG = PARTS[1] + + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum="md5", + ) + _fix_up_virtual(part) + part._prepare_upload_request() + response = _make_xml_response( + headers={"etag": ETAG, "x-goog-hash": "md5=pOUFGnohRRFFd24NztFuFw=="} + ) + part._process_upload_response(response) + assert part.etag == ETAG + assert part.finished + + # Test error handling + part = _upload.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + START, + END, + PART_NUMBER, + headers=EXAMPLE_HEADERS, + checksum="md5", + ) + with pytest.raises(NotImplementedError): + part.upload(None) + part._finished = True + with pytest.raises(ValueError): + part._prepare_upload_request() + + +def _make_response(status_code=http.client.OK, headers=None, metadata=None): + headers = headers or {} + return mock.Mock( + headers=headers, + status_code=status_code, + json=mock.Mock(return_value=metadata), + spec=["headers", "status_code"], + ) + + +def _make_xml_response(status_code=http.client.OK, headers=None, text=None): + headers = headers or {} + return mock.Mock( + headers=headers, + status_code=status_code, + text=text, + spec=["headers", "status_code"], + ) + + +def _get_status_code(response): + return response.status_code + + +def _get_headers(response): + return response.headers + + +def _fix_up_virtual(upload): + upload._get_status_code = _get_status_code + upload._get_headers = _get_headers + + +def _check_retry_strategy(upload): + assert upload._retry_strategy == DEFAULT_RETRY diff --git a/tests/system/_helpers.py b/tests/system/_helpers.py index c172129d6..7274610a8 100644 --- a/tests/system/_helpers.py +++ b/tests/system/_helpers.py @@ -13,23 +13,32 @@ # limitations under the License. import os +import time from google.api_core import exceptions from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState from test_utils.system import unique_resource_id +from google.cloud.storage._helpers import _get_default_storage_base_url retry_429 = RetryErrors(exceptions.TooManyRequests) retry_429_harder = RetryErrors(exceptions.TooManyRequests, max_tries=10) retry_429_503 = RetryErrors( - [exceptions.TooManyRequests, exceptions.ServiceUnavailable], max_tries=10 + (exceptions.TooManyRequests, exceptions.ServiceUnavailable), max_tries=10 ) retry_failures = RetryErrors(AssertionError) user_project = os.environ.get("GOOGLE_CLOUD_TESTS_USER_PROJECT") testing_mtls = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" +test_universe_domain = os.getenv("TEST_UNIVERSE_DOMAIN") +test_universe_project_id = os.getenv("TEST_UNIVERSE_PROJECT_ID") +test_universe_location = os.getenv("TEST_UNIVERSE_LOCATION") +test_universe_domain_credential = os.getenv("TEST_UNIVERSE_DOMAIN_CREDENTIAL") signing_blob_content = b"This time for sure, Rocky!" +is_api_endpoint_override = ( + _get_default_storage_base_url() != "https://storage.googleapis.com" +) def _bad_copy(bad_request): @@ -46,9 +55,33 @@ def _has_kms_key_name(blob): return blob.kms_key_name is not None +def _has_retention_expiration(blob): + return blob.retention_expiration_time is not None + + +def _no_retention_expiration(blob): + return blob.retention_expiration_time is None + + +def _has_retetion_period(bucket): + return bucket.retention_period is not None + + +def _no_retetion_period(bucket): + return bucket.retention_period is None + + retry_bad_copy = RetryErrors(exceptions.BadRequest, error_predicate=_bad_copy) -retry_no_event_based_hold = RetryInstanceState(_no_event_based_hold) -retry_has_kms_key_name = RetryInstanceState(_has_kms_key_name) +retry_no_event_based_hold = RetryInstanceState(_no_event_based_hold, max_tries=5) +retry_has_kms_key_name = RetryInstanceState(_has_kms_key_name, max_tries=5) +retry_has_retention_expiration = RetryInstanceState( + _has_retention_expiration, max_tries=5 +) +retry_no_retention_expiration = RetryInstanceState( + _no_retention_expiration, max_tries=5 +) +retry_has_retention_period = RetryInstanceState(_has_retetion_period, max_tries=5) +retry_no_retention_period = RetryInstanceState(_no_retetion_period, max_tries=5) def unique_name(prefix): @@ -64,7 +97,11 @@ def empty_bucket(bucket): def delete_blob(blob): - errors = (exceptions.Conflict, exceptions.TooManyRequests) + errors = ( + exceptions.Conflict, + exceptions.TooManyRequests, + exceptions.ServiceUnavailable, + ) retry = RetryErrors(errors) try: retry(blob.delete)(timeout=120) # seconds @@ -78,7 +115,22 @@ def delete_blob(blob): def delete_bucket(bucket): - errors = (exceptions.Conflict, exceptions.TooManyRequests) + errors = ( + exceptions.Conflict, + exceptions.TooManyRequests, + exceptions.ServiceUnavailable, + ) retry = RetryErrors(errors, max_tries=15) retry(empty_bucket)(bucket) retry(bucket.delete)(force=True) + + +def await_config_changes_propagate(sec=12): + # Changes to the bucket will be readable immediately after writing, + # but configuration changes may take time to propagate. + # See https://cloud.google.com/storage/docs/json_api/v1/buckets/patch + # + # The default was changed from 3 to 12 in May 2023 due to changes in bucket + # metadata handling. Note that the documentation recommends waiting "30 + # seconds". + time.sleep(sec) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 02a13d140..588f66f79 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -17,7 +17,10 @@ import pytest +from google.api_core import exceptions +from google.cloud import kms from google.cloud.storage._helpers import _base64_md5hash +from google.cloud.storage.retry import DEFAULT_RETRY from . import _helpers @@ -44,6 +47,23 @@ "parent/child/other/file32.txt", ] +ebh_bucket_iteration = 0 + +_key_name_format = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}" + +keyring_name = "gcs-test" +default_key_name = "gcs-test" +alt_key_name = "gcs-test-alternate" + + +def _kms_key_name(client, bucket, key_name): + return _key_name_format.format( + client.project, + bucket.location.lower(), + keyring_name, + key_name, + ) + @pytest.fixture(scope="session") def storage_client(): @@ -85,7 +105,11 @@ def shared_bucket_name(): def shared_bucket(storage_client, shared_bucket_name): bucket = storage_client.bucket(shared_bucket_name) bucket.versioning_enabled = True - _helpers.retry_429_503(bucket.create)() + # Create the bucket only if it doesn't yet exist. + try: + storage_client.get_bucket(bucket) + except exceptions.NotFound: + _helpers.retry_429_503(bucket.create)() yield bucket @@ -100,15 +124,21 @@ def listable_bucket_name(): @pytest.fixture(scope="session") def listable_bucket(storage_client, listable_bucket_name, file_data): bucket = storage_client.bucket(listable_bucket_name) - _helpers.retry_429_503(bucket.create)() + # Create the bucket only if it doesn't yet exist. + try: + storage_client.get_bucket(bucket) + except exceptions.NotFound: + _helpers.retry_429_503(bucket.create)() info = file_data["logo"] source_blob = bucket.blob(_listable_filenames[0]) - source_blob.upload_from_filename(info["path"]) + source_blob.upload_from_filename(info["path"], retry=DEFAULT_RETRY) for filename in _listable_filenames[1:]: _helpers.retry_bad_copy(bucket.copy_blob)( - source_blob, bucket, filename, + source_blob, + bucket, + filename, ) yield bucket @@ -129,12 +159,16 @@ def hierarchy_bucket_name(): @pytest.fixture(scope="session") def hierarchy_bucket(storage_client, hierarchy_bucket_name, file_data): bucket = storage_client.bucket(hierarchy_bucket_name) - _helpers.retry_429_503(bucket.create)() + # Create the hierarchy bucket only if it doesn't yet exist. + try: + storage_client.get_bucket(bucket) + except exceptions.NotFound: + _helpers.retry_429_503(bucket.create)() simple_path = _file_data["simple"]["path"] for filename in _hierarchy_filenames: blob = bucket.blob(filename) - blob.upload_from_filename(simple_path) + blob.upload_from_filename(simple_path, retry=DEFAULT_RETRY) yield bucket @@ -154,7 +188,12 @@ def signing_bucket_name(): @pytest.fixture(scope="session") def signing_bucket(storage_client, signing_bucket_name): bucket = storage_client.bucket(signing_bucket_name) - _helpers.retry_429_503(bucket.create)() + # Create the bucket only if it doesn't yet exist. + try: + storage_client.get_bucket(bucket) + except exceptions.NotFound: + _helpers.retry_429_503(bucket.create)() + blob = bucket.blob("README.txt") blob.upload_from_string(_helpers.signing_blob_content) @@ -163,6 +202,34 @@ def signing_bucket(storage_client, signing_bucket_name): _helpers.delete_bucket(bucket) +@pytest.fixture(scope="function") +def default_ebh_bucket_name(): + # Keep track of how many ebh buckets have been created so we can get a + # clean one each rerun. "unique_name" is unique per test iteration, not + # per test rerun. + global ebh_bucket_iteration + ebh_bucket_iteration += 1 + return _helpers.unique_name("gcp-systest-default-ebh") + "-{}".format( + ebh_bucket_iteration + ) + + +# ebh_bucket/name are not scope=session because the bucket is modified in test. +@pytest.fixture(scope="function") +def default_ebh_bucket(storage_client, default_ebh_bucket_name): + bucket = storage_client.bucket(default_ebh_bucket_name) + bucket.default_event_based_hold = True + # Create the bucket only if it doesn't yet exist. + try: + storage_client.get_bucket(bucket) + except exceptions.NotFound: + _helpers.retry_429_503(bucket.create)() + + yield bucket + + _helpers.delete_bucket(bucket) + + @pytest.fixture(scope="function") def buckets_to_delete(): buckets_to_delete = [] @@ -190,3 +257,160 @@ def file_data(): file_data["hash"] = _base64_md5hash(file_obj) return _file_data + + +@pytest.fixture(scope="function") +def kms_bucket_name(): + return _helpers.unique_name("gcp-systest-kms") + + +@pytest.fixture(scope="function") +def kms_bucket(storage_client, kms_bucket_name, no_mtls): + bucket = _helpers.retry_429_503(storage_client.create_bucket)(kms_bucket_name) + + yield bucket + + _helpers.delete_bucket(bucket) + + +@pytest.fixture(scope="function") +def kms_key_name(storage_client, kms_bucket): + return _kms_key_name(storage_client, kms_bucket, default_key_name) + + +@pytest.fixture(scope="function") +def alt_kms_key_name(storage_client, kms_bucket): + return _kms_key_name(storage_client, kms_bucket, alt_key_name) + + +@pytest.fixture(scope="session") +def kms_client(): + return kms.KeyManagementServiceClient() + + +@pytest.fixture(scope="function") +def keyring(storage_client, kms_bucket, kms_client): + project = storage_client.project + location = kms_bucket.location.lower() + purpose = kms.enums.CryptoKey.CryptoKeyPurpose.ENCRYPT_DECRYPT + + # If the keyring doesn't exist create it. + keyring_path = kms_client.key_ring_path(project, location, keyring_name) + + try: + kms_client.get_key_ring(keyring_path) + except exceptions.NotFound: + parent = kms_client.location_path(project, location) + kms_client.create_key_ring(parent, keyring_name, {}) + + # Mark this service account as an owner of the new keyring + service_account_email = storage_client.get_service_account_email() + policy = { + "bindings": [ + { + "role": "roles/cloudkms.cryptoKeyEncrypterDecrypter", + "members": ["serviceAccount:" + service_account_email], + } + ] + } + kms_client.set_iam_policy(keyring_path, policy) + + # Populate the keyring with the keys we use in the tests + key_names = [ + "gcs-test", + "gcs-test-alternate", + "explicit-kms-key-name", + "default-kms-key-name", + "override-default-kms-key-name", + "alt-default-kms-key-name", + ] + for key_name in key_names: + key_path = kms_client.crypto_key_path(project, location, keyring_name, key_name) + try: + kms_client.get_crypto_key(key_path) + except exceptions.NotFound: + key = {"purpose": purpose} + kms_client.create_crypto_key(keyring_path, key_name, key) + + +@pytest.fixture(scope="function") +def test_universe_domain(): + if _helpers.test_universe_domain is None: + pytest.skip("TEST_UNIVERSE_DOMAIN not set in environment.") + return _helpers.test_universe_domain + + +@pytest.fixture(scope="function") +def test_universe_project_id(): + if _helpers.test_universe_project_id is None: + pytest.skip("TEST_UNIVERSE_PROJECT_ID not set in environment.") + return _helpers.test_universe_project_id + + +@pytest.fixture(scope="function") +def test_universe_location(): + if _helpers.test_universe_location is None: + pytest.skip("TEST_UNIVERSE_LOCATION not set in environment.") + return _helpers.test_universe_location + + +@pytest.fixture(scope="function") +def test_universe_domain_credential(): + if _helpers.test_universe_domain_credential is None: + pytest.skip("TEST_UNIVERSE_DOMAIN_CREDENTIAL not set in environment.") + return _helpers.test_universe_domain_credential + + +@pytest.fixture(scope="function") +def universe_domain_credential(test_universe_domain_credential): + from google.oauth2 import service_account + + return service_account.Credentials.from_service_account_file( + test_universe_domain_credential + ) + + +@pytest.fixture(scope="function") +def universe_domain_client( + test_universe_domain, test_universe_project_id, universe_domain_credential +): + from google.cloud.storage import Client + + client_options = {"universe_domain": test_universe_domain} + ud_storage_client = Client( + project=test_universe_project_id, + credentials=universe_domain_credential, + client_options=client_options, + ) + with contextlib.closing(ud_storage_client): + yield ud_storage_client + + +@pytest.fixture(scope="function") +def universe_domain_bucket(universe_domain_client, test_universe_location): + bucket_name = _helpers.unique_name("gcp-systest-ud") + bucket = universe_domain_client.create_bucket( + bucket_name, location=test_universe_location + ) + + blob = bucket.blob("README.txt") + blob.upload_from_string(_helpers.signing_blob_content) + + yield bucket + + _helpers.delete_bucket(bucket) + + +@pytest.fixture(scope="function") +def universe_domain_iam_client( + test_universe_domain, test_universe_project_id, universe_domain_credential +): + from google.cloud import iam_credentials_v1 + + client_options = {"universe_domain": test_universe_domain} + iam_client = iam_credentials_v1.IAMCredentialsClient( + credentials=universe_domain_credential, + client_options=client_options, + ) + + return iam_client diff --git a/tests/system/test__signing.py b/tests/system/test__signing.py index 04c3687a4..cdf718d90 100644 --- a/tests/system/test__signing.py +++ b/tests/system/test__signing.py @@ -17,11 +17,13 @@ import hashlib import os import time - +import pytest import requests from google.api_core import path_template from google.cloud import iam_credentials_v1 +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC from . import _helpers @@ -41,7 +43,11 @@ def _create_signed_list_blobs_url_helper( expiration = _morph_expiration(version, expiration) signed_url = bucket.generate_signed_url( - expiration=expiration, method=method, client=client, version=version + expiration=expiration, + method=method, + client=client, + version=version, + api_access_endpoint=_helpers._get_default_storage_base_url(), ) response = requests.get(signed_url) @@ -50,34 +56,44 @@ def _create_signed_list_blobs_url_helper( def test_create_signed_list_blobs_url_v2(storage_client, signing_bucket, no_mtls): _create_signed_list_blobs_url_helper( - storage_client, signing_bucket, version="v2", + storage_client, + signing_bucket, + version="v2", ) def test_create_signed_list_blobs_url_v2_w_expiration( storage_client, signing_bucket, no_mtls ): - now = datetime.datetime.utcnow() + now = _NOW(_UTC).replace(tzinfo=None) delta = datetime.timedelta(seconds=10) _create_signed_list_blobs_url_helper( - storage_client, signing_bucket, expiration=now + delta, version="v2", + storage_client, + signing_bucket, + expiration=now + delta, + version="v2", ) def test_create_signed_list_blobs_url_v4(storage_client, signing_bucket, no_mtls): _create_signed_list_blobs_url_helper( - storage_client, signing_bucket, version="v4", + storage_client, + signing_bucket, + version="v4", ) def test_create_signed_list_blobs_url_v4_w_expiration( storage_client, signing_bucket, no_mtls ): - now = datetime.datetime.utcnow() + now = _NOW(_UTC).replace(tzinfo=None) delta = datetime.timedelta(seconds=10) _create_signed_list_blobs_url_helper( - storage_client, signing_bucket, expiration=now + delta, version="v4", + storage_client, + signing_bucket, + expiration=now + delta, + version="v4", ) @@ -135,14 +151,16 @@ def test_create_signed_read_url_v2(storage_client, signing_bucket, no_mtls): def test_create_signed_read_url_v4(storage_client, signing_bucket, no_mtls): _create_signed_read_url_helper( - storage_client, signing_bucket, version="v4", + storage_client, + signing_bucket, + version="v4", ) def test_create_signed_read_url_v2_w_expiration( storage_client, signing_bucket, no_mtls ): - now = datetime.datetime.utcnow() + now = _NOW(_UTC).replace(tzinfo=None) delta = datetime.timedelta(seconds=10) _create_signed_read_url_helper( @@ -153,7 +171,7 @@ def test_create_signed_read_url_v2_w_expiration( def test_create_signed_read_url_v4_w_expiration( storage_client, signing_bucket, no_mtls ): - now = datetime.datetime.utcnow() + now = _NOW(_UTC).replace(tzinfo=None) delta = datetime.timedelta(seconds=10) _create_signed_read_url_helper( storage_client, signing_bucket, expiration=now + delta, version="v4" @@ -180,7 +198,7 @@ def test_create_signed_read_url_v2_w_non_ascii_name( _create_signed_read_url_helper( storage_client, signing_bucket, - blob_name=u"Caf\xe9.txt", + blob_name="Caf\xe9.txt", payload=b"Test signed URL for blob w/ non-ASCII name", ) @@ -191,7 +209,7 @@ def test_create_signed_read_url_v4_w_non_ascii_name( _create_signed_read_url_helper( storage_client, signing_bucket, - blob_name=u"Caf\xe9.txt", + blob_name="Caf\xe9.txt", payload=b"Test signed URL for blob w/ non-ASCII name", version="v4", ) @@ -269,6 +287,39 @@ def test_create_signed_read_url_v4_w_access_token( ) +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Credentials not yet supported in preprod testing.", +) +def test_create_signed_read_url_v4_w_access_token_universe_domain( + universe_domain_iam_client, + universe_domain_client, + test_universe_location, + universe_domain_credential, + universe_domain_bucket, + no_mtls, +): + service_account_email = universe_domain_credential.service_account_email + name = path_template.expand( + "projects/{project}/serviceAccounts/{service_account}", + project="-", + service_account=service_account_email, + ) + scope = [ + "https://www.googleapis.com/auth/devstorage.read_write", + "https://www.googleapis.com/auth/iam", + ] + response = universe_domain_iam_client.generate_access_token(name=name, scope=scope) + + _create_signed_read_url_helper( + universe_domain_client, + universe_domain_bucket, + version="v4", + service_account_email=service_account_email, + access_token=response.access_token, + ) + + def _create_signed_delete_url_helper(client, bucket, version="v2", expiration=None): expiration = _morph_expiration(version, expiration) @@ -276,7 +327,10 @@ def _create_signed_delete_url_helper(client, bucket, version="v2", expiration=No blob.upload_from_string(b"DELETE ME!") signed_delete_url = blob.generate_signed_url( - expiration=expiration, method="DELETE", client=client, version=version, + expiration=expiration, + method="DELETE", + client=client, + version=version, ) response = requests.request("DELETE", signed_delete_url) @@ -303,7 +357,10 @@ def _create_signed_resumable_upload_url_helper( # Initiate the upload using a signed URL. signed_resumable_upload_url = blob.generate_signed_url( - expiration=expiration, method="RESUMABLE", client=client, version=version, + expiration=expiration, + method="RESUMABLE", + client=client, + version=version, ) post_headers = {"x-goog-resumable": "start"} @@ -327,7 +384,10 @@ def _create_signed_resumable_upload_url_helper( # Finally, delete the blob using a signed URL. signed_delete_url = blob.generate_signed_url( - expiration=expiration, method="DELETE", client=client, version=version, + expiration=expiration, + method="DELETE", + client=client, + version=version, ) delete_response = requests.delete(signed_delete_url) @@ -336,16 +396,24 @@ def _create_signed_resumable_upload_url_helper( def test_create_signed_resumable_upload_url_v2(storage_client, signing_bucket, no_mtls): _create_signed_resumable_upload_url_helper( - storage_client, signing_bucket, version="v2", + storage_client, + signing_bucket, + version="v2", ) def test_create_signed_resumable_upload_url_v4(storage_client, signing_bucket, no_mtls): _create_signed_resumable_upload_url_helper( - storage_client, signing_bucket, version="v4", + storage_client, + signing_bucket, + version="v4", ) +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) def test_generate_signed_post_policy_v4( storage_client, buckets_to_delete, blobs_to_delete, service_account, no_mtls ): @@ -358,6 +426,7 @@ def test_generate_signed_post_policy_v4( with open(blob_name, "wb") as f: f.write(payload) + now = _NOW(_UTC).replace(tzinfo=None) policy = storage_client.generate_signed_post_policy_v4( bucket_name, blob_name, @@ -365,7 +434,7 @@ def test_generate_signed_post_policy_v4( {"bucket": bucket_name}, ["starts-with", "$Content-Type", "text/pla"], ], - expiration=datetime.datetime.utcnow() + datetime.timedelta(hours=1), + expiration=now + datetime.timedelta(hours=1), fields={"content-type": "text/plain"}, ) with open(blob_name, "r") as f: @@ -379,6 +448,55 @@ def test_generate_signed_post_policy_v4( assert blob.download_as_bytes() == payload +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) +def test_generate_signed_post_policy_v4_access_token_sa_email( + storage_client, signing_bucket, blobs_to_delete, service_account, no_mtls +): + client = iam_credentials_v1.IAMCredentialsClient() + service_account_email = service_account.service_account_email + name = path_template.expand( + "projects/{project}/serviceAccounts/{service_account}", + project="-", + service_account=service_account_email, + ) + scope = [ + "https://www.googleapis.com/auth/devstorage.read_write", + "https://www.googleapis.com/auth/iam", + ] + response = client.generate_access_token(name=name, scope=scope) + + now = _NOW(_UTC).replace(tzinfo=None) + blob_name = "post_policy_obj_email2.txt" + payload = b"DEADBEEF" + with open(blob_name, "wb") as f: + f.write(payload) + policy = storage_client.generate_signed_post_policy_v4( + signing_bucket.name, + blob_name, + conditions=[ + {"bucket": signing_bucket.name}, + ["starts-with", "$Content-Type", "text/pla"], + ], + expiration=now + datetime.timedelta(hours=1), + fields={"content-type": "text/plain"}, + service_account_email=service_account_email, + access_token=response.access_token, + ) + with open(blob_name, "r") as f: + files = {"file": (blob_name, f)} + response = requests.post(policy["url"], data=policy["fields"], files=files) + + os.remove(blob_name) + assert response.status_code == 204 + + blob = signing_bucket.get_blob(blob_name) + blobs_to_delete.append(blob) + assert blob.download_as_bytes() == payload + + def test_generate_signed_post_policy_v4_invalid_field( storage_client, buckets_to_delete, blobs_to_delete, service_account, no_mtls ): @@ -391,6 +509,7 @@ def test_generate_signed_post_policy_v4_invalid_field( with open(blob_name, "wb") as f: f.write(payload) + now = _NOW(_UTC).replace(tzinfo=None) policy = storage_client.generate_signed_post_policy_v4( bucket_name, blob_name, @@ -398,7 +517,7 @@ def test_generate_signed_post_policy_v4_invalid_field( {"bucket": bucket_name}, ["starts-with", "$Content-Type", "text/pla"], ], - expiration=datetime.datetime.utcnow() + datetime.timedelta(hours=1), + expiration=now + datetime.timedelta(hours=1), fields={"x-goog-random": "invalid_field", "content-type": "text/plain"}, ) with open(blob_name, "r") as f: diff --git a/tests/system/test_blob.py b/tests/system/test_blob.py index b6d5216a7..00f218534 100644 --- a/tests/system/test_blob.py +++ b/tests/system/test_blob.py @@ -17,12 +17,13 @@ import io import os import tempfile +import uuid import warnings import pytest import mock -from google import resumable_media +from google.cloud.storage.exceptions import DataCorruption from google.api_core import exceptions from google.cloud.storage._helpers import _base64_md5hash from . import _helpers @@ -39,9 +40,12 @@ def _check_blob_hash(blob, info): def test_large_file_write_from_stream( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("LargeFile") + blob = shared_bucket.blob(f"LargeFile{uuid.uuid4().hex}") info = file_data["big"] with open(info["path"], "rb") as file_obj: @@ -52,9 +56,12 @@ def test_large_file_write_from_stream( def test_large_file_write_from_stream_w_checksum( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("LargeFile") + blob = shared_bucket.blob(f"LargeFile{uuid.uuid4().hex}") info = file_data["big"] with open(info["path"], "rb") as file_obj: @@ -65,9 +72,12 @@ def test_large_file_write_from_stream_w_checksum( def test_large_file_write_from_stream_w_failed_checksum( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("LargeFile") + blob = shared_bucket.blob(f"LargeFile{uuid.uuid4().hex}") # Intercept the digest processing at the last stage and replace it # with garbage. This is done with a patch to monkey-patch the @@ -76,19 +86,26 @@ def test_large_file_write_from_stream_w_failed_checksum( # The # remote API is still exercised. info = file_data["big"] with open(info["path"], "rb") as file_obj: - with mock.patch( - "google.resumable_media._helpers.prepare_checksum_digest", + "google.cloud.storage._media._helpers.prepare_checksum_digest", return_value="FFFFFF==", ): - with pytest.raises(resumable_media.DataCorruption): + with pytest.raises(DataCorruption): blob.upload_from_file(file_obj, checksum="crc32c") assert not blob.exists() +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) def test_large_file_write_from_stream_w_encryption_key( - storage_client, shared_bucket, blobs_to_delete, file_data, service_account, + storage_client, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): blob = shared_bucket.blob("LargeFile", encryption_key=encryption_key) @@ -99,6 +116,11 @@ def test_large_file_write_from_stream_w_encryption_key( _check_blob_hash(blob, info) + blob_without_key = shared_bucket.blob("LargeFile") + with tempfile.TemporaryFile() as tmp: + with pytest.raises(exceptions.BadRequest): + storage_client.download_blob_to_file(blob_without_key, tmp) + with tempfile.NamedTemporaryFile() as temp_f: with open(temp_f.name, "wb") as file_obj: storage_client.download_blob_to_file(blob, file_obj) @@ -110,9 +132,12 @@ def test_large_file_write_from_stream_w_encryption_key( def test_small_file_write_from_filename( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("SmallFile") + blob = shared_bucket.blob(f"SmallFile{uuid.uuid4().hex}") info = file_data["simple"] blob.upload_from_filename(info["path"]) @@ -122,9 +147,12 @@ def test_small_file_write_from_filename( def test_small_file_write_from_filename_with_checksum( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("SmallFile") + blob = shared_bucket.blob(f"SmallFile{uuid.uuid4().hex}") info = file_data["simple"] blob.upload_from_filename(info["path"], checksum="crc32c") @@ -134,15 +162,18 @@ def test_small_file_write_from_filename_with_checksum( def test_small_file_write_from_filename_with_failed_checksum( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("SmallFile") + blob = shared_bucket.blob(f"SmallFile{uuid.uuid4().hex}") info = file_data["simple"] # Intercept the digest processing at the last stage and replace # it with garbage with mock.patch( - "google.resumable_media._helpers.prepare_checksum_digest", + "google.cloud.storage._media._helpers.prepare_checksum_digest", return_value="FFFFFF==", ): with pytest.raises(exceptions.BadRequest): @@ -235,7 +266,10 @@ def test_blob_crud_w_user_project( def test_blob_crud_w_etag_match( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): wrong_etag = "kittens" @@ -281,7 +315,10 @@ def test_blob_crud_w_etag_match( def test_blob_crud_w_generation_match( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): wrong_generation_number = 6 wrong_metageneration_number = 9 @@ -353,7 +390,7 @@ def test_blob_acl_w_user_project( with_user_project = storage_client.bucket( shared_bucket.name, user_project=user_project ) - blob = with_user_project.blob("SmallFile") + blob = with_user_project.blob(f"SmallFile{uuid.uuid4().hex}") info = file_data["simple"] @@ -373,7 +410,10 @@ def test_blob_acl_w_user_project( def test_blob_acl_w_metageneration_match( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): wrong_metageneration_number = 9 wrong_generation_number = 6 @@ -408,12 +448,15 @@ def test_blob_acl_w_metageneration_match( def test_blob_acl_upload_predefined( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - control = shared_bucket.blob("logo") + control = shared_bucket.blob(f"logo{uuid.uuid4().hex}") control_info = file_data["logo"] - blob = shared_bucket.blob("SmallFile") + blob = shared_bucket.blob(f"SmallFile{uuid.uuid4().hex}") info = file_data["simple"] try: @@ -438,7 +481,10 @@ def test_blob_acl_upload_predefined( def test_blob_patch_metadata( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): filename = file_data["logo"]["path"] blob_name = os.path.basename(filename) @@ -467,7 +513,9 @@ def test_blob_patch_metadata( def test_blob_direct_write_and_read_into_file( - shared_bucket, blobs_to_delete, service_account, + shared_bucket, + blobs_to_delete, + service_account, ): payload = b"Hello World" blob = shared_bucket.blob("MyBuffer") @@ -478,7 +526,6 @@ def test_blob_direct_write_and_read_into_file( same_blob.reload() # Initialize properties. with tempfile.NamedTemporaryFile() as temp_f: - with open(temp_f.name, "wb") as file_obj: same_blob.download_to_file(file_obj) @@ -489,7 +536,9 @@ def test_blob_direct_write_and_read_into_file( def test_blob_download_w_generation_match( - shared_bucket, blobs_to_delete, service_account, + shared_bucket, + blobs_to_delete, + service_account, ): wrong_generation_number = 6 @@ -502,7 +551,6 @@ def test_blob_download_w_generation_match( same_blob.reload() # Initialize properties. with tempfile.NamedTemporaryFile() as temp_f: - with open(temp_f.name, "wb") as file_obj: with pytest.raises(exceptions.PreconditionFailed): same_blob.download_to_file( @@ -522,7 +570,9 @@ def test_blob_download_w_generation_match( def test_blob_download_w_failed_crc32c_checksum( - shared_bucket, blobs_to_delete, service_account, + shared_bucket, + blobs_to_delete, + service_account, ): blob = shared_bucket.blob("FailedChecksumBlob") payload = b"Hello World" @@ -536,10 +586,10 @@ def test_blob_download_w_failed_crc32c_checksum( # mock a remote interface like a unit test would. # The remote API is still exercised. with mock.patch( - "google.resumable_media._helpers.prepare_checksum_digest", + "google.cloud.storage._media._helpers.prepare_checksum_digest", return_value="FFFFFF==", ): - with pytest.raises(resumable_media.DataCorruption): + with pytest.raises(DataCorruption): blob.download_to_filename(temp_f.name, checksum="crc32c") # Confirm the file was deleted on failure @@ -555,7 +605,9 @@ def test_blob_download_w_failed_crc32c_checksum( def test_blob_download_as_text( - shared_bucket, blobs_to_delete, service_account, + shared_bucket, + blobs_to_delete, + service_account, ): blob = shared_bucket.blob("MyBuffer") payload = "Hello World" @@ -569,9 +621,16 @@ def test_blob_download_as_text( assert stored_contents == payload assert blob.etag == etag + # Test download with byte range + end_byte = 5 + stored_contents = blob.download_as_text(start=0, end=end_byte - 1) + assert stored_contents == payload[0:end_byte] + def test_blob_upload_w_gzip_encoded_download_raw( - shared_bucket, blobs_to_delete, service_account, + shared_bucket, + blobs_to_delete, + service_account, ): payload = b"DEADBEEF" * 1000 raw_stream = io.BytesIO() @@ -592,9 +651,12 @@ def test_blob_upload_w_gzip_encoded_download_raw( def test_blob_upload_from_file_resumable_with_generation( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): - blob = shared_bucket.blob("LargeFile") + blob = shared_bucket.blob(f"LargeFile{uuid.uuid4().hex}") wrong_generation = 3 wrong_meta_generation = 3 @@ -616,18 +678,23 @@ def test_blob_upload_from_file_resumable_with_generation( with pytest.raises(exceptions.PreconditionFailed): with open(info["path"], "rb") as file_obj: blob.upload_from_file( - file_obj, if_generation_match=wrong_generation, + file_obj, + if_generation_match=wrong_generation, ) with pytest.raises(exceptions.PreconditionFailed): with open(info["path"], "rb") as file_obj: blob.upload_from_file( - file_obj, if_metageneration_match=wrong_meta_generation, + file_obj, + if_metageneration_match=wrong_meta_generation, ) def test_blob_upload_from_string_w_owner( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): blob = shared_bucket.blob("MyBuffer") payload = b"Hello World" @@ -642,7 +709,10 @@ def test_blob_upload_from_string_w_owner( def test_blob_upload_from_string_w_custom_time( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): blob = shared_bucket.blob("CustomTimeBlob") payload = b"Hello World" @@ -658,7 +728,10 @@ def test_blob_upload_from_string_w_custom_time( def test_blob_upload_from_string_w_custom_time_no_micros( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): # Test that timestamps without microseconds are treated correctly by # custom_time encoding/decoding. @@ -676,7 +749,10 @@ def test_blob_upload_from_string_w_custom_time_no_micros( def test_blob_upload_download_crc32_md5_hash( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): blob = shared_bucket.blob("MyBuffer") payload = b"Hello World" @@ -685,7 +761,7 @@ def test_blob_upload_download_crc32_md5_hash( download_blob = shared_bucket.blob("MyBuffer") - assert download_blob.download_as_string() == payload + assert download_blob.download_as_bytes() == payload assert download_blob.crc32c == blob.crc32c assert download_blob.md5_hash == blob.md5_hash @@ -693,8 +769,8 @@ def test_blob_upload_download_crc32_md5_hash( @pytest.mark.parametrize( "blob_name,payload", [ - (u"Caf\u00e9", b"Normalization Form C"), - (u"Cafe\u0301", b"Normalization Form D"), + ("Caf\u00e9", b"Normalization Form C"), + ("Cafe\u0301", b"Normalization Form D"), ], ) def test_blob_w_unicode_names(blob_name, payload, shared_bucket, blobs_to_delete): @@ -757,13 +833,13 @@ def test_blob_compose_new_blob_wo_content_type(shared_bucket, blobs_to_delete): def test_blob_compose_replace_existing_blob(shared_bucket, blobs_to_delete): payload_before = b"AAA\n" - original = shared_bucket.blob("original") + original = shared_bucket.blob(uuid.uuid4().hex) original.content_type = "text/plain" original.upload_from_string(payload_before) blobs_to_delete.append(original) payload_to_append = b"BBB\n" - to_append = shared_bucket.blob("to_append") + to_append = shared_bucket.blob(uuid.uuid4().hex) to_append.upload_from_string(payload_to_append) blobs_to_delete.append(to_append) @@ -774,7 +850,7 @@ def test_blob_compose_replace_existing_blob(shared_bucket, blobs_to_delete): def test_blob_compose_w_generation_match_list(shared_bucket, blobs_to_delete): payload_before = b"AAA\n" - original = shared_bucket.blob("original") + original = shared_bucket.blob(uuid.uuid4().hex) original.content_type = "text/plain" original.upload_from_string(payload_before) blobs_to_delete.append(original) @@ -782,7 +858,7 @@ def test_blob_compose_w_generation_match_list(shared_bucket, blobs_to_delete): wrong_metagenerations = [8, 9] payload_to_append = b"BBB\n" - to_append = shared_bucket.blob("to_append") + to_append = shared_bucket.blob(uuid.uuid4().hex) to_append.upload_from_string(payload_to_append) blobs_to_delete.append(to_append) @@ -808,13 +884,13 @@ def test_blob_compose_w_generation_match_list(shared_bucket, blobs_to_delete): def test_blob_compose_w_generation_match_long(shared_bucket, blobs_to_delete): payload_before = b"AAA\n" - original = shared_bucket.blob("original") + original = shared_bucket.blob(uuid.uuid4().hex) original.content_type = "text/plain" original.upload_from_string(payload_before) blobs_to_delete.append(original) payload_to_append = b"BBB\n" - to_append = shared_bucket.blob("to_append") + to_append = shared_bucket.blob(uuid.uuid4().hex) to_append.upload_from_string(payload_to_append) blobs_to_delete.append(to_append) @@ -828,20 +904,21 @@ def test_blob_compose_w_generation_match_long(shared_bucket, blobs_to_delete): def test_blob_compose_w_source_generation_match(shared_bucket, blobs_to_delete): payload_before = b"AAA\n" - original = shared_bucket.blob("original") + original = shared_bucket.blob(uuid.uuid4().hex) original.content_type = "text/plain" original.upload_from_string(payload_before) blobs_to_delete.append(original) wrong_source_generations = [6, 7] payload_to_append = b"BBB\n" - to_append = shared_bucket.blob("to_append") + to_append = shared_bucket.blob(uuid.uuid4().hex) to_append.upload_from_string(payload_to_append) blobs_to_delete.append(to_append) with pytest.raises(exceptions.PreconditionFailed): original.compose( - [original, to_append], if_source_generation_match=wrong_source_generations, + [original, to_append], + if_source_generation_match=wrong_source_generations, ) original.compose( @@ -859,18 +936,18 @@ def test_blob_compose_w_user_project(storage_client, buckets_to_delete, user_pro created.requester_pays = True payload_1 = b"AAA\n" - source_1 = created.blob("source-1") + source_1 = created.blob(uuid.uuid4().hex) source_1.upload_from_string(payload_1) payload_2 = b"BBB\n" - source_2 = created.blob("source-2") + source_2 = created.blob(uuid.uuid4().hex) source_2.upload_from_string(payload_2) with_user_project = storage_client.bucket( new_bucket_name, user_project=user_project ) - destination = with_user_project.blob("destination") + destination = with_user_project.blob(uuid.uuid4().hex) destination.content_type = "text/plain" destination.compose([source_1, source_2]) @@ -879,13 +956,13 @@ def test_blob_compose_w_user_project(storage_client, buckets_to_delete, user_pro def test_blob_rewrite_new_blob_add_key(shared_bucket, blobs_to_delete, file_data): info = file_data["simple"] - source = shared_bucket.blob("source") + source = shared_bucket.blob(uuid.uuid4().hex) source.upload_from_filename(info["path"]) blobs_to_delete.append(source) source_data = source.download_as_bytes() key = os.urandom(32) - dest = shared_bucket.blob("dest", encryption_key=key) + dest = shared_bucket.blob(uuid.uuid4().hex, encryption_key=key) token, rewritten, total = dest.rewrite(source) blobs_to_delete.append(dest) @@ -1027,7 +1104,7 @@ def test_blob_update_storage_class_large_file( ): from google.cloud.storage import constants - blob = shared_bucket.blob("BigFile") + blob = shared_bucket.blob(f"BigFile{uuid.uuid4().hex}") info = file_data["big"] blob.upload_from_filename(info["path"]) @@ -1040,3 +1117,35 @@ def test_blob_update_storage_class_large_file( blob.update_storage_class(constants.COLDLINE_STORAGE_CLASS) blob.reload() assert blob.storage_class == constants.COLDLINE_STORAGE_CLASS + + +def test_object_retention_lock(storage_client, buckets_to_delete, blobs_to_delete): + from google.cloud.storage._helpers import _NOW + from google.cloud.storage._helpers import _UTC + + # Test bucket created with object retention enabled + new_bucket_name = _helpers.unique_name("object-retention") + created_bucket = _helpers.retry_429_503(storage_client.create_bucket)( + new_bucket_name, enable_object_retention=True + ) + buckets_to_delete.append(created_bucket) + assert created_bucket.object_retention_mode == "Enabled" + + # Test create object with object retention enabled + payload = b"Hello World" + mode = "Unlocked" + current_time = _NOW(_UTC).replace(tzinfo=None) + expiration_time = current_time + datetime.timedelta(seconds=10) + blob = created_bucket.blob("object-retention-lock") + blob.retention.mode = mode + blob.retention.retain_until_time = expiration_time + blob.upload_from_string(payload) + blobs_to_delete.append(blob) + blob.reload() + assert blob.retention.mode == mode + + # Test patch object to disable object retention + blob.retention.mode = None + blob.retention.retain_until_time = None + blob.patch(override_unlocked_retention=True) + assert blob.retention.mode is None diff --git a/tests/system/test_bucket.py b/tests/system/test_bucket.py index 78fa135ff..270a77ad1 100644 --- a/tests/system/test_bucket.py +++ b/tests/system/test_bucket.py @@ -13,7 +13,6 @@ # limitations under the License. import datetime - import pytest from google.api_core import exceptions @@ -42,10 +41,13 @@ def test_bucket_lifecycle_rules(storage_client, buckets_to_delete): from google.cloud.storage import constants from google.cloud.storage.bucket import LifecycleRuleDelete from google.cloud.storage.bucket import LifecycleRuleSetStorageClass + from google.cloud.storage.bucket import LifecycleRuleAbortIncompleteMultipartUpload bucket_name = _helpers.unique_name("w-lifcycle-rules") custom_time_before = datetime.date(2018, 8, 1) noncurrent_before = datetime.date(2018, 8, 1) + matches_prefix = ["storage-sys-test", "gcs-sys-test"] + matches_suffix = ["suffix-test"] with pytest.raises(exceptions.NotFound): storage_client.get_bucket(bucket_name) @@ -58,12 +60,17 @@ def test_bucket_lifecycle_rules(storage_client, buckets_to_delete): custom_time_before=custom_time_before, days_since_noncurrent_time=2, noncurrent_time_before=noncurrent_before, + matches_prefix=matches_prefix, + matches_suffix=matches_suffix, ) bucket.add_lifecycle_set_storage_class_rule( constants.COLDLINE_STORAGE_CLASS, is_live=False, matches_storage_class=[constants.NEARLINE_STORAGE_CLASS], ) + bucket.add_lifecycle_abort_incomplete_multipart_upload_rule( + age=42, + ) expected_rules = [ LifecycleRuleDelete( @@ -73,12 +80,17 @@ def test_bucket_lifecycle_rules(storage_client, buckets_to_delete): custom_time_before=custom_time_before, days_since_noncurrent_time=2, noncurrent_time_before=noncurrent_before, + matches_prefix=matches_prefix, + matches_suffix=matches_suffix, ), LifecycleRuleSetStorageClass( constants.COLDLINE_STORAGE_CLASS, is_live=False, matches_storage_class=[constants.NEARLINE_STORAGE_CLASS], ), + LifecycleRuleAbortIncompleteMultipartUpload( + age=42, + ), ] _helpers.retry_429_503(bucket.create)(location="us") @@ -87,12 +99,34 @@ def test_bucket_lifecycle_rules(storage_client, buckets_to_delete): assert bucket.name == bucket_name assert list(bucket.lifecycle_rules) == expected_rules + # Test modifying lifecycle rules + expected_rules[0] = LifecycleRuleDelete( + age=30, + matches_prefix=["new-prefix"], + matches_suffix=["new-suffix"], + ) + rules = list(bucket.lifecycle_rules) + rules[0]["condition"] = { + "age": 30, + "matchesPrefix": ["new-prefix"], + "matchesSuffix": ["new-suffix"], + } + bucket.lifecycle_rules = rules + bucket.patch() + + assert list(bucket.lifecycle_rules) == expected_rules + + # Test clearing lifecycle rules bucket.clear_lifecyle_rules() bucket.patch() assert list(bucket.lifecycle_rules) == [] +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) def test_bucket_update_labels(storage_client, buckets_to_delete): bucket_name = _helpers.unique_name("update-labels") bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) @@ -117,7 +151,9 @@ def test_bucket_update_labels(storage_client, buckets_to_delete): def test_bucket_get_set_iam_policy( - storage_client, buckets_to_delete, service_account, + storage_client, + buckets_to_delete, + service_account, ): from google.cloud.storage.iam import STORAGE_OBJECT_VIEWER_ROLE from google.api_core.exceptions import BadRequest @@ -134,7 +170,7 @@ def test_bucket_get_set_iam_policy( policy = bucket.get_iam_policy(requested_policy_version=3) assert policy == policy_no_version - member = "serviceAccount:{}".format(storage_client.get_service_account_email()) + member = f"serviceAccount:{storage_client.get_service_account_email()}" binding_w_condition = { "role": STORAGE_OBJECT_VIEWER_ROLE, @@ -177,7 +213,10 @@ def test_bucket_crud_w_requester_pays(storage_client, buckets_to_delete, user_pr assert created.name == bucket_name assert created.requester_pays - with_user_project = storage_client.bucket(bucket_name, user_project=user_project,) + with_user_project = storage_client.bucket( + bucket_name, + user_project=user_project, + ) try: # Exercise 'buckets.get' w/ userProject. @@ -210,7 +249,8 @@ def test_bucket_acls_iam_w_user_project( ): bucket_name = _helpers.unique_name("acl-w-user-project") created = _helpers.retry_429_503(storage_client.create_bucket)( - bucket_name, requester_pays=True, + bucket_name, + requester_pays=True, ) buckets_to_delete.append(created) @@ -282,7 +322,10 @@ def test_bucket_acls_w_metageneration_match(storage_client, buckets_to_delete): def test_bucket_copy_blob( - storage_client, buckets_to_delete, blobs_to_delete, user_project, + storage_client, + buckets_to_delete, + blobs_to_delete, + user_project, ): payload = b"DEADBEEF" bucket_name = _helpers.unique_name("copy-blob") @@ -304,7 +347,10 @@ def test_bucket_copy_blob( def test_bucket_copy_blob_w_user_project( - storage_client, buckets_to_delete, blobs_to_delete, user_project, + storage_client, + buckets_to_delete, + blobs_to_delete, + user_project, ): payload = b"DEADBEEF" bucket_name = _helpers.unique_name("copy-w-requester-pays") @@ -330,7 +376,9 @@ def test_bucket_copy_blob_w_user_project( def test_bucket_copy_blob_w_generation_match( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): payload = b"DEADBEEF" bucket_name = _helpers.unique_name("generation-match") @@ -345,7 +393,10 @@ def test_bucket_copy_blob_w_generation_match( dest_bucket = storage_client.bucket(bucket_name) new_blob = dest_bucket.copy_blob( - blob, dest_bucket, "simple-copy", if_source_generation_match=blob.generation, + blob, + dest_bucket, + "simple-copy", + if_source_generation_match=blob.generation, ) blobs_to_delete.append(new_blob) @@ -353,13 +404,15 @@ def test_bucket_copy_blob_w_generation_match( def test_bucket_copy_blob_w_metageneration_match( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): payload = b"DEADBEEF" bucket_name = _helpers.unique_name("generation-match") - created = _helpers.retry_429_503(storage_client.create_bucket)( - bucket_name, requester_pays=True - ) + bucket = storage_client.bucket(bucket_name) + bucket.requester_pays = True + created = _helpers.retry_429_503(storage_client.create_bucket)(bucket) buckets_to_delete.append(created) assert created.name == bucket_name @@ -381,7 +434,10 @@ def test_bucket_copy_blob_w_metageneration_match( def test_bucket_get_blob_with_user_project( - storage_client, buckets_to_delete, blobs_to_delete, user_project, + storage_client, + buckets_to_delete, + blobs_to_delete, + user_project, ): blob_name = "blob-name" payload = b"DEADBEEF" @@ -413,7 +469,10 @@ def test_bucket_list_blobs(listable_bucket, listable_filenames): @_helpers.retry_failures def test_bucket_list_blobs_w_user_project( - storage_client, listable_bucket, listable_filenames, user_project, + storage_client, + listable_bucket, + listable_filenames, + user_project, ): with_user_project = storage_client.bucket( listable_bucket.name, user_project=user_project @@ -545,7 +604,8 @@ def test_bucket_list_blobs_hierarchy_third_level(hierarchy_bucket, hierarchy_fil @_helpers.retry_failures def test_bucket_list_blobs_hierarchy_w_include_trailing_delimiter( - hierarchy_bucket, hierarchy_filenames, + hierarchy_bucket, + hierarchy_filenames, ): expected_names = ["file01.txt", "parent/"] expected_prefixes = set(["parent/"]) @@ -561,10 +621,84 @@ def test_bucket_list_blobs_hierarchy_w_include_trailing_delimiter( assert iterator.prefixes == expected_prefixes -def test_bucket_w_retention_period( - storage_client, buckets_to_delete, blobs_to_delete, +@_helpers.retry_failures +def test_bucket_list_blobs_w_match_glob( + storage_client, + buckets_to_delete, + blobs_to_delete, ): - period_secs = 10 + bucket_name = _helpers.unique_name("w-matchglob") + bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) + buckets_to_delete.append(bucket) + + payload = b"helloworld" + blob_names = ["foo/bar", "foo/baz", "foo/foobar", "foobar"] + for name in blob_names: + blob = bucket.blob(name) + blob.upload_from_string(payload) + blobs_to_delete.append(blob) + + match_glob_results = { + "foo*bar": ["foobar"], + "foo**bar": ["foo/bar", "foo/foobar", "foobar"], + "**/foobar": ["foo/foobar", "foobar"], + "*/ba[rz]": ["foo/bar", "foo/baz"], + "*/ba[!a-y]": ["foo/baz"], + "**/{foobar,baz}": ["foo/baz", "foo/foobar", "foobar"], + "foo/{foo*,*baz}": ["foo/baz", "foo/foobar"], + } + for match_glob, expected_names in match_glob_results.items(): + blob_iter = bucket.list_blobs(match_glob=match_glob) + blobs = list(blob_iter) + assert [blob.name for blob in blobs] == expected_names + + +def test_bucket_list_blobs_include_managed_folders( + storage_client, + buckets_to_delete, + blobs_to_delete, + hierarchy_filenames, +): + bucket_name = _helpers.unique_name("ubla-mf") + bucket = storage_client.bucket(bucket_name) + bucket.iam_configuration.uniform_bucket_level_access_enabled = True + _helpers.retry_429_503(bucket.create)() + buckets_to_delete.append(bucket) + + payload = b"helloworld" + for filename in hierarchy_filenames: + blob = bucket.blob(filename) + blob.upload_from_string(payload) + blobs_to_delete.append(blob) + + # Make API call to create a managed folder. + # TODO: change to use storage control client once available. + path = f"/b/{bucket_name}/managedFolders" + properties = {"name": "managedfolder1"} + storage_client._post_resource(path, properties) + + expected_prefixes = set(["parent/"]) + blob_iter = bucket.list_blobs(delimiter="/") + list(blob_iter) + assert blob_iter.prefixes == expected_prefixes + + # Test that managed folders are only included when IncludeFoldersAsPrefixes is set. + expected_prefixes = set(["parent/", "managedfolder1/"]) + blob_iter = bucket.list_blobs(delimiter="/", include_folders_as_prefixes=True) + list(blob_iter) + assert blob_iter.prefixes == expected_prefixes + + # Cleanup: API call to delete a managed folder. + # TODO: change to use storage control client once available. + path = f"/b/{bucket_name}/managedFolders/managedfolder1" + storage_client._delete_resource(path) + + +def test_bucket_update_retention_period( + storage_client, + buckets_to_delete, +): + period_secs = 3 bucket_name = _helpers.unique_name("w-retention-period") bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) buckets_to_delete.append(bucket) @@ -573,55 +707,71 @@ def test_bucket_w_retention_period( bucket.default_event_based_hold = False bucket.patch() + # Changes to the bucket will be readable immediately after writing, + # but configuration changes may take time to propagate. + _helpers.retry_has_retention_period(bucket.reload)() + assert bucket.retention_period == period_secs assert isinstance(bucket.retention_policy_effective_time, datetime.datetime) assert not bucket.default_event_based_hold assert not bucket.retention_policy_locked - blob_name = "test-blob" - payload = b"DEADBEEF" - blob = bucket.blob(blob_name) - blob.upload_from_string(payload) - - blobs_to_delete.append(blob) - - other = bucket.get_blob(blob_name) - - assert not other.event_based_hold - assert not other.temporary_hold - assert isinstance(other.retention_expiration_time, datetime.datetime) - - with pytest.raises(exceptions.Forbidden): - other.delete() - bucket.retention_period = None bucket.patch() + # Changes to the bucket will be readable immediately after writing, + # but configuration changes may take time to propagate. + _helpers.retry_no_retention_period(bucket.reload)() + assert bucket.retention_period is None assert bucket.retention_policy_effective_time is None assert not bucket.default_event_based_hold assert not bucket.retention_policy_locked - _helpers.retry_no_event_based_hold(other.reload)() - assert not other.event_based_hold - assert not other.temporary_hold - assert other.retention_expiration_time is None +def test_delete_object_bucket_w_retention_period( + storage_client, + buckets_to_delete, + blobs_to_delete, +): + # Create a bucket with retention period. + period_secs = 12 + bucket = storage_client.bucket(_helpers.unique_name("w-retention-period")) + bucket.retention_period = period_secs + bucket.default_event_based_hold = False + bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket) + buckets_to_delete.append(bucket) - other.delete() + _helpers.retry_has_retention_period(bucket.reload)() + assert bucket.retention_period == period_secs + assert isinstance(bucket.retention_policy_effective_time, datetime.datetime) + + payload = b"DEADBEEF" + blob = bucket.blob(_helpers.unique_name("w-retention")) + blob.upload_from_string(payload) + blobs_to_delete.append(blob) + + _helpers.retry_has_retention_expiration(blob.reload)() + assert isinstance(blob.retention_expiration_time, datetime.datetime) + assert not blob.event_based_hold + assert not blob.temporary_hold + + # Attempts to delete objects whose age is less than the retention period should fail. + with pytest.raises(exceptions.Forbidden): + blob.delete() + + # Object can be deleted once it reaches the age defined in the retention policy. + _helpers.await_config_changes_propagate(sec=period_secs) + blob.delete() blobs_to_delete.pop() def test_bucket_w_default_event_based_hold( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + blobs_to_delete, + default_ebh_bucket, ): - bucket_name = _helpers.unique_name("w-def-ebh") - bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) - buckets_to_delete.append(bucket) - - bucket.default_event_based_hold = True - bucket.patch() - + bucket = storage_client.get_bucket(default_ebh_bucket) assert bucket.default_event_based_hold assert bucket.retention_period is None assert bucket.retention_policy_effective_time is None @@ -655,11 +805,14 @@ def test_bucket_w_default_event_based_hold( assert bucket.retention_policy_effective_time is None assert not bucket.retention_policy_locked + # Changes to the bucket will be readable immediately after writing, + # but configuration changes may take time to propagate. + _helpers.await_config_changes_propagate() + blob.upload_from_string(payload) # https://github.com/googleapis/python-storage/issues/435 - if blob.event_based_hold: - _helpers.retry_no_event_based_hold(blob.reload)() + _helpers.retry_no_event_based_hold(blob.reload)() assert not blob.event_based_hold assert not blob.temporary_hold @@ -670,7 +823,9 @@ def test_bucket_w_default_event_based_hold( def test_blob_w_temporary_hold( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): bucket_name = _helpers.unique_name("w-tmp-hold") bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) @@ -702,7 +857,8 @@ def test_blob_w_temporary_hold( def test_bucket_lock_retention_policy( - storage_client, buckets_to_delete, + storage_client, + buckets_to_delete, ): period_secs = 10 bucket_name = _helpers.unique_name("loc-ret-policy") @@ -727,8 +883,14 @@ def test_bucket_lock_retention_policy( bucket.patch() +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) def test_new_bucket_w_ubla( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): bucket_name = _helpers.unique_name("new-w-ubla") bucket = storage_client.bucket(bucket_name) @@ -765,7 +927,9 @@ def test_new_bucket_w_ubla( def test_ubla_set_unset_preserves_acls( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): bucket_name = _helpers.unique_name("ubla-acls") bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) @@ -794,6 +958,7 @@ def test_ubla_set_unset_preserves_acls( # Clear UBLA bucket.iam_configuration.uniform_bucket_level_access_enabled = False bucket.patch() + _helpers.await_config_changes_propagate() # Query ACLs after clearing UBLA bucket.acl.reload() @@ -806,7 +971,9 @@ def test_ubla_set_unset_preserves_acls( def test_new_bucket_created_w_inherited_pap( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): from google.cloud.storage import constants @@ -834,6 +1001,9 @@ def test_new_bucket_created_w_inherited_pap( bucket.iam_configuration.uniform_bucket_level_access_enabled = False bucket.patch() + + _helpers.await_config_changes_propagate() + assert ( bucket.iam_configuration.public_access_prevention == constants.PUBLIC_ACCESS_PREVENTION_ENFORCED @@ -857,7 +1027,9 @@ def test_new_bucket_created_w_inherited_pap( @pytest.mark.skip(reason="Unspecified PAP is changing to inherited") def test_new_bucket_created_w_enforced_pap( - storage_client, buckets_to_delete, blobs_to_delete, + storage_client, + buckets_to_delete, + blobs_to_delete, ): from google.cloud.storage import constants @@ -885,3 +1057,211 @@ def test_new_bucket_created_w_enforced_pap( constants.PUBLIC_ACCESS_PREVENTION_INHERITED, ] assert not bucket.iam_configuration.uniform_bucket_level_access_enabled + + +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) +def test_new_bucket_with_rpo( + storage_client, + buckets_to_delete, + blobs_to_delete, +): + from google.cloud.storage import constants + + bucket_name = _helpers.unique_name("new-w-turbo-replication") + bucket = storage_client.create_bucket(bucket_name, location="NAM4") + buckets_to_delete.append(bucket) + + assert bucket.rpo == constants.RPO_DEFAULT + + bucket.rpo = constants.RPO_ASYNC_TURBO + bucket.patch() + + bucket_from_server = storage_client.get_bucket(bucket_name) + + assert bucket_from_server.rpo == constants.RPO_ASYNC_TURBO + + +def test_new_bucket_with_autoclass( + storage_client, + buckets_to_delete, +): + from google.cloud.storage import constants + + # Autoclass can be enabled via bucket create + bucket_name = _helpers.unique_name("new-w-autoclass") + bucket_obj = storage_client.bucket(bucket_name) + bucket_obj.autoclass_enabled = True + bucket = storage_client.create_bucket(bucket_obj) + previous_toggle_time = bucket.autoclass_toggle_time + buckets_to_delete.append(bucket) + + # Autoclass terminal_storage_class is defaulted to NEARLINE if not specified + assert bucket.autoclass_enabled is True + assert bucket.autoclass_terminal_storage_class == constants.NEARLINE_STORAGE_CLASS + + # Autoclass can be enabled/disabled via bucket patch + bucket.autoclass_enabled = False + bucket.patch(if_metageneration_match=bucket.metageneration) + + assert bucket.autoclass_enabled is False + assert bucket.autoclass_toggle_time != previous_toggle_time + + +def test_bucket_delete_force(storage_client): + bucket_name = _helpers.unique_name("version-disabled") + bucket_obj = storage_client.bucket(bucket_name) + bucket = storage_client.create_bucket(bucket_obj) + + BLOB_NAME = "my_object" + blob = bucket.blob(BLOB_NAME) + blob.upload_from_string("abcd") + blob.upload_from_string("efgh") + + blobs = bucket.list_blobs(versions=True) + counter = 0 + for blob in blobs: + counter += 1 + assert blob.name == BLOB_NAME + assert counter == 1 + + bucket.delete(force=True) # Will fail with 409 if blobs aren't deleted + + +def test_bucket_delete_force_works_with_versions(storage_client): + bucket_name = _helpers.unique_name("version-enabled") + bucket_obj = storage_client.bucket(bucket_name) + bucket_obj.versioning_enabled = True + bucket = storage_client.create_bucket(bucket_obj) + assert bucket.versioning_enabled + + BLOB_NAME = "my_versioned_object" + blob = bucket.blob(BLOB_NAME) + blob.upload_from_string("abcd") + blob.upload_from_string("efgh") + + blobs = bucket.list_blobs(versions=True) + counter = 0 + for blob in blobs: + counter += 1 + assert blob.name == BLOB_NAME + assert counter == 2 + + bucket.delete(force=True) # Will fail with 409 if versions aren't deleted + + +def test_config_autoclass_w_existing_bucket( + storage_client, + buckets_to_delete, +): + from google.cloud.storage import constants + + bucket_name = _helpers.unique_name("for-autoclass") + bucket = storage_client.create_bucket(bucket_name) + buckets_to_delete.append(bucket) + assert bucket.autoclass_enabled is False + assert bucket.autoclass_toggle_time is None + assert bucket.autoclass_terminal_storage_class is None + assert bucket.autoclass_terminal_storage_class_update_time is None + + # Enable Autoclass on existing buckets with terminal_storage_class set to ARCHIVE + bucket.autoclass_enabled = True + bucket.autoclass_terminal_storage_class = constants.ARCHIVE_STORAGE_CLASS + bucket.patch(if_metageneration_match=bucket.metageneration) + previous_tsc_update_time = bucket.autoclass_terminal_storage_class_update_time + assert bucket.autoclass_enabled is True + assert bucket.autoclass_terminal_storage_class == constants.ARCHIVE_STORAGE_CLASS + + # Configure Autoclass terminal_storage_class to NEARLINE + bucket.autoclass_terminal_storage_class = constants.NEARLINE_STORAGE_CLASS + bucket.patch(if_metageneration_match=bucket.metageneration) + assert bucket.autoclass_enabled is True + assert bucket.autoclass_terminal_storage_class == constants.NEARLINE_STORAGE_CLASS + assert ( + bucket.autoclass_terminal_storage_class_update_time != previous_tsc_update_time + ) + + +def test_soft_delete_policy( + storage_client, + buckets_to_delete, +): + from google.cloud.storage.bucket import SoftDeletePolicy + + # Create a bucket with soft delete policy. + duration_secs = 7 * 86400 + bucket = storage_client.bucket(_helpers.unique_name("w-soft-delete")) + bucket.soft_delete_policy.retention_duration_seconds = duration_secs + bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket) + buckets_to_delete.append(bucket) + + policy = bucket.soft_delete_policy + assert isinstance(policy, SoftDeletePolicy) + assert policy.retention_duration_seconds == duration_secs + assert isinstance(policy.effective_time, datetime.datetime) + + # Insert an object and get object metadata prior soft-deleted. + payload = b"DEADBEEF" + blob_name = _helpers.unique_name("soft-delete") + blob = bucket.blob(blob_name) + blob.upload_from_string(payload) + + blob = bucket.get_blob(blob_name) + gen = blob.generation + assert blob.soft_delete_time is None + assert blob.hard_delete_time is None + + # Delete the object to enter soft-deleted state. + blob.delete() + + iter_default = bucket.list_blobs() + assert len(list(iter_default)) == 0 + iter_w_soft_delete = bucket.list_blobs(soft_deleted=True) + assert len(list(iter_w_soft_delete)) > 0 + + # Get the soft-deleted object. + soft_deleted_blob = bucket.get_blob(blob_name, generation=gen, soft_deleted=True) + assert soft_deleted_blob.soft_delete_time is not None + assert soft_deleted_blob.hard_delete_time is not None + + # Restore the soft-deleted object. + restored_blob = bucket.restore_blob(blob_name, generation=gen) + assert restored_blob.exists() is True + assert restored_blob.generation != gen + + # Patch the soft delete policy on an existing bucket. + new_duration_secs = 10 * 86400 + bucket.soft_delete_policy.retention_duration_seconds = new_duration_secs + bucket.patch() + assert bucket.soft_delete_policy.retention_duration_seconds == new_duration_secs + + +def test_new_bucket_with_hierarchical_namespace( + storage_client, + buckets_to_delete, +): + # Test new bucket without specifying hierarchical namespace + bucket_name = _helpers.unique_name("new-wo-hns") + bucket_obj = storage_client.bucket(bucket_name) + bucket = storage_client.create_bucket(bucket_obj) + buckets_to_delete.append(bucket) + assert bucket.hierarchical_namespace_enabled is None + + # Test new bucket with hierarchical namespace disabled + bucket_name = _helpers.unique_name("new-hns-disabled") + bucket_obj = storage_client.bucket(bucket_name) + bucket_obj.hierarchical_namespace_enabled = False + bucket = storage_client.create_bucket(bucket_obj) + buckets_to_delete.append(bucket) + assert bucket.hierarchical_namespace_enabled is False + + # Test new bucket with hierarchical namespace enabled + bucket_name = _helpers.unique_name("new-hns-enabled") + bucket_obj = storage_client.bucket(bucket_name) + bucket_obj.hierarchical_namespace_enabled = True + bucket_obj.iam_configuration.uniform_bucket_level_access_enabled = True + bucket = storage_client.create_bucket(bucket_obj) + buckets_to_delete.append(bucket) + assert bucket.hierarchical_namespace_enabled is True diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f531f4bb4..6b3798c83 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime import io import re +import os import tempfile import pytest @@ -23,9 +25,15 @@ from . import _helpers +dual_data_loc_1 = os.getenv("DUAL_REGION_LOC_1", "US-EAST1") +dual_data_loc_2 = os.getenv("DUAL_REGION_LOC_2", "US-WEST1") public_bucket = "gcp-public-data-landsat" +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) @vpcsc_config.skip_if_inside_vpcsc def test_anonymous_client_access_to_public_bucket(): from google.cloud.storage.client import Client @@ -33,12 +41,17 @@ def test_anonymous_client_access_to_public_bucket(): anonymous_client = Client.create_anonymous_client() bucket = anonymous_client.bucket(public_bucket) (blob,) = _helpers.retry_429_503(anonymous_client.list_blobs)( - bucket, max_results=1, + bucket, + max_results=1, ) with tempfile.TemporaryFile() as stream: _helpers.retry_429_503(blob.download_to_file)(stream) +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Test does not yet support endpoint override", +) def test_get_service_account_email(storage_client, service_account): domain = "gs-project-accounts.iam.gserviceaccount.com" email = storage_client.get_service_account_email() @@ -63,6 +76,28 @@ def test_create_bucket_simple(storage_client, buckets_to_delete): assert created.name == new_bucket_name +def test_create_bucket_dual_region(storage_client, buckets_to_delete): + from google.cloud.storage.constants import DUAL_REGION_LOCATION_TYPE + + new_bucket_name = _helpers.unique_name("dual-region-bucket") + location = "US" + + data_locations = [dual_data_loc_1, dual_data_loc_2] + + with pytest.raises(exceptions.NotFound): + storage_client.get_bucket(new_bucket_name) + + created = _helpers.retry_429_503(storage_client.create_bucket)( + new_bucket_name, location=location, data_locations=data_locations + ) + buckets_to_delete.append(created) + + assert created.name == new_bucket_name + assert created.location == location + assert created.location_type == DUAL_REGION_LOCATION_TYPE + assert created.data_locations == data_locations + + def test_list_buckets(storage_client, buckets_to_delete): buckets_to_create = [ _helpers.unique_name("new"), @@ -85,7 +120,10 @@ def test_list_buckets(storage_client, buckets_to_delete): def test_download_blob_to_file_w_uri( - storage_client, shared_bucket, blobs_to_delete, service_account, + storage_client, + shared_bucket, + blobs_to_delete, + service_account, ): blob = shared_bucket.blob("MyBuffer") payload = b"Hello World" @@ -93,7 +131,6 @@ def test_download_blob_to_file_w_uri( blobs_to_delete.append(blob) with tempfile.NamedTemporaryFile() as temp_f: - with open(temp_f.name, "wb") as file_obj: storage_client.download_blob_to_file( "gs://" + shared_bucket.name + "/MyBuffer", file_obj @@ -106,7 +143,10 @@ def test_download_blob_to_file_w_uri( def test_download_blob_to_file_w_etag( - storage_client, shared_bucket, blobs_to_delete, service_account, + storage_client, + shared_bucket, + blobs_to_delete, + service_account, ): filename = "kittens" blob = shared_bucket.blob(filename) @@ -140,6 +180,82 @@ def test_download_blob_to_file_w_etag( buffer = io.BytesIO() storage_client.download_blob_to_file( - "gs://" + shared_bucket.name + "/" + filename, buffer, if_etag_match=blob.etag, + "gs://" + shared_bucket.name + "/" + filename, + buffer, + if_etag_match=blob.etag, ) assert buffer.getvalue() == payload + + +@pytest.mark.skipif( + _helpers.is_api_endpoint_override, + reason="Credentials not yet supported in preprod testing.", +) +def test_client_universe_domain( + universe_domain_client, + test_universe_location, + buckets_to_delete, + blobs_to_delete, +): + bucket_name = _helpers.unique_name("gcp-systest-ud") + ud_bucket = universe_domain_client.create_bucket( + bucket_name, location=test_universe_location + ) + buckets_to_delete.append(ud_bucket) + + blob_name = _helpers.unique_name("gcp-systest-ud") + blob = ud_bucket.blob(blob_name) + payload = b"The quick brown fox jumps over the lazy dog" + blob.upload_from_string(payload) + blobs_to_delete.append(blob) + + with tempfile.NamedTemporaryFile() as temp_f: + with open(temp_f.name, "wb") as file_obj: + universe_domain_client.download_blob_to_file(blob, file_obj) + with open(temp_f.name, "rb") as file_obj: + stored_contents = file_obj.read() + + assert stored_contents == payload + + +def test_restore_bucket( + storage_client, + buckets_to_delete, +): + from google.cloud.storage.bucket import SoftDeletePolicy + + # Create a bucket with soft delete policy. + duration_secs = 7 * 86400 + bucket = storage_client.bucket(_helpers.unique_name("w-soft-delete")) + bucket.soft_delete_policy.retention_duration_seconds = duration_secs + bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket) + buckets_to_delete.append(bucket) + + policy = bucket.soft_delete_policy + assert isinstance(policy, SoftDeletePolicy) + assert policy.retention_duration_seconds == duration_secs + assert isinstance(policy.effective_time, datetime.datetime) + + # Record the bucket's name and generation + name = bucket.name + generation = bucket.generation + assert generation is not None + + # Delete the bucket, then use the generation to get a reference to it again. + _helpers.retry_429_503(bucket.delete)() + soft_deleted_bucket = _helpers.retry_429_503(storage_client.get_bucket)( + name, generation=generation, soft_deleted=True + ) + assert soft_deleted_bucket.name == name + assert soft_deleted_bucket.generation == generation + assert soft_deleted_bucket.soft_delete_time is not None + assert soft_deleted_bucket.hard_delete_time is not None + + # Restore the bucket. + restored_bucket = _helpers.retry_429_503(storage_client.restore_bucket)( + name, generation=generation + ) + assert restored_bucket.name == name + assert restored_bucket.generation == generation + assert restored_bucket.soft_delete_time is None + assert restored_bucket.hard_delete_time is None diff --git a/tests/system/test_fileio.py b/tests/system/test_fileio.py index 79bf0c1eb..ba12d3bc2 100644 --- a/tests/system/test_fileio.py +++ b/tests/system/test_fileio.py @@ -14,18 +14,24 @@ # limitations under the License. +import pytest + +from google.cloud.storage.fileio import CHUNK_SIZE_MULTIPLE from .test_blob import _check_blob_hash def test_blobwriter_and_blobreader( - shared_bucket, blobs_to_delete, file_data, service_account, + shared_bucket, + blobs_to_delete, + file_data, + service_account, ): blob = shared_bucket.blob("LargeFile") # Test BlobWriter works. info = file_data["big"] with open(info["path"], "rb") as file_obj: - with blob.open("wb", chunk_size=256 * 1024) as writer: + with blob.open("wb", chunk_size=256 * 1024, if_generation_match=0) as writer: writer.write(file_obj.read(100)) writer.write(file_obj.read(256 * 1024)) writer.write(file_obj.read()) @@ -49,16 +55,18 @@ def test_blobwriter_and_blobreader( def test_blobwriter_and_blobreader_text_mode( - shared_bucket, blobs_to_delete, service_account, + shared_bucket, + blobs_to_delete, + service_account, ): blob = shared_bucket.blob("MultibyteTextFile") # Construct a multibyte text_data sample file. - base_multibyte_text_string = u"abcde あいうえお line: " + base_multibyte_text_string = "abcde あいうえお line: " text_data = "\n".join([base_multibyte_text_string + str(x) for x in range(100)]) # Test text BlobWriter works. - with blob.open("wt") as writer: + with blob.open("wt", if_generation_match=0) as writer: writer.write(text_data[:100]) writer.write(text_data[100:]) blobs_to_delete.append(blob) @@ -71,3 +79,63 @@ def test_blobwriter_and_blobreader_text_mode( assert text_data[:100] == reader.read(100) assert 0 == reader.seek(0) assert reader.read() == text_data + + +def test_blobwriter_exit( + shared_bucket, + blobs_to_delete, + service_account, +): + blob = shared_bucket.blob("NeverUploaded") + + # no-op when nothing was uploaded yet + with pytest.raises(ValueError, match="SIGTERM received"): + with blob.open("wb") as writer: + writer.write(b"first chunk") # not yet uploaded + raise ValueError("SIGTERM received") # no upload to cancel in __exit__ + # blob should not exist + assert not blob.exists() + + # unhandled exceptions should cancel the upload + with pytest.raises(ValueError, match="SIGTERM received"): + with blob.open("wb", chunk_size=CHUNK_SIZE_MULTIPLE) as writer: + writer.write(b"first chunk") # not yet uploaded + writer.write(bytes(CHUNK_SIZE_MULTIPLE)) # uploaded + raise ValueError("SIGTERM received") # upload is cancelled in __exit__ + # blob should not exist + assert not blob.exists() + + # handled exceptions should not cancel the upload + with blob.open("wb", chunk_size=CHUNK_SIZE_MULTIPLE) as writer: + writer.write(b"first chunk") # not yet uploaded + writer.write(bytes(CHUNK_SIZE_MULTIPLE)) # uploaded + try: + raise ValueError("This is fine") + except ValueError: + pass # no exception context passed to __exit__ + blobs_to_delete.append(blob) + # blob should have been uploaded + assert blob.exists() + + +def test_blobreader_w_raw_download( + shared_bucket, + blobs_to_delete, + file_data, +): + blob = shared_bucket.blob("LargeFile") + info = file_data["big"] + with open(info["path"], "rb") as file_obj: + with blob.open("wb", chunk_size=256 * 1024, if_generation_match=0) as writer: + writer.write(file_obj.read()) + blobs_to_delete.append(blob) + + # Test BlobReader read and seek handles raw downloads. + with open(info["path"], "rb") as file_obj: + with blob.open("rb", chunk_size=256 * 1024, raw_download=True) as reader: + reader.seek(0) + file_obj.seek(0) + assert file_obj.read() == reader.read() + # End of file reached; further reads should be blank but not + # raise an error. + assert reader.read() == b"" diff --git a/tests/system/test_hmac_key_metadata.py b/tests/system/test_hmac_key_metadata.py index 705b1350b..d91e613b1 100644 --- a/tests/system/test_hmac_key_metadata.py +++ b/tests/system/test_hmac_key_metadata.py @@ -16,8 +16,6 @@ import pytest -from google.cloud import _helpers as _cloud_helpers - from . import _helpers @@ -32,9 +30,12 @@ def ensure_hmac_key_deleted(hmac_key): @pytest.fixture def scrubbed_hmac_keys(storage_client): + from google.cloud.storage._helpers import _NOW + from google.cloud.storage._helpers import _UTC + before_hmac_keys = set(storage_client.list_hmac_keys()) - now = datetime.datetime.utcnow().replace(tzinfo=_cloud_helpers.UTC) + now = _NOW(_UTC) yesterday = now - datetime.timedelta(days=1) # Delete any HMAC keys older than a day. diff --git a/tests/system/test_kms_integration.py b/tests/system/test_kms_integration.py index 67dc5351f..619ffe110 100644 --- a/tests/system/test_kms_integration.py +++ b/tests/system/test_kms_integration.py @@ -14,96 +14,11 @@ import os -import pytest - -from google.api_core import exceptions -from google.cloud import kms from . import _helpers keyring_name = "gcs-test" default_key_name = "gcs-test" alt_key_name = "gcs-test-alternate" -_key_name_format = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}" - - -def _kms_key_name(client, bucket, key_name): - return _key_name_format.format( - client.project, bucket.location.lower(), keyring_name, key_name, - ) - - -@pytest.fixture(scope="session") -def kms_bucket_name(): - return _helpers.unique_name("gcp-systest-kms") - - -@pytest.fixture(scope="session") -def kms_bucket(storage_client, kms_bucket_name, no_mtls): - bucket = _helpers.retry_429_503(storage_client.create_bucket)(kms_bucket_name) - - yield bucket - - _helpers.delete_bucket(bucket) - - -@pytest.fixture(scope="session") -def kms_client(): - return kms.KeyManagementServiceClient() - - -@pytest.fixture(scope="function") -def keyring(storage_client, kms_bucket, kms_client): - project = storage_client.project - location = kms_bucket.location.lower() - purpose = kms.enums.CryptoKey.CryptoKeyPurpose.ENCRYPT_DECRYPT - - # If the keyring doesn't exist create it. - keyring_path = kms_client.key_ring_path(project, location, keyring_name) - - try: - kms_client.get_key_ring(keyring_path) - except exceptions.NotFound: - parent = kms_client.location_path(project, location) - kms_client.create_key_ring(parent, keyring_name, {}) - - # Mark this service account as an owner of the new keyring - service_account_email = storage_client.get_service_account_email() - policy = { - "bindings": [ - { - "role": "roles/cloudkms.cryptoKeyEncrypterDecrypter", - "members": ["serviceAccount:" + service_account_email], - } - ] - } - kms_client.set_iam_policy(keyring_path, policy) - - # Populate the keyring with the keys we use in the tests - key_names = [ - "gcs-test", - "gcs-test-alternate", - "explicit-kms-key-name", - "default-kms-key-name", - "override-default-kms-key-name", - "alt-default-kms-key-name", - ] - for key_name in key_names: - key_path = kms_client.crypto_key_path(project, location, keyring_name, key_name) - try: - kms_client.get_crypto_key(key_path) - except exceptions.NotFound: - key = {"purpose": purpose} - kms_client.create_crypto_key(keyring_path, key_name, key) - - -@pytest.fixture(scope="session") -def kms_key_name(storage_client, kms_bucket): - return _kms_key_name(storage_client, kms_bucket, default_key_name) - - -@pytest.fixture(scope="session") -def alt_kms_key_name(storage_client, kms_bucket): - return _kms_key_name(storage_client, kms_bucket, alt_key_name) def test_blob_w_explicit_kms_key_name( @@ -127,13 +42,13 @@ def test_blob_w_explicit_kms_key_name( @_helpers.retry_failures def test_bucket_w_default_kms_key_name( - kms_bucket, blobs_to_delete, kms_key_name, alt_kms_key_name, file_data, + kms_bucket, + blobs_to_delete, + kms_key_name, + alt_kms_key_name, + file_data, ): blob_name = "default-kms-key-name" - override_blob_name = "override-default-kms-key-name" - alt_blob_name = "alt-default-kms-key-name" - cleartext_blob_name = "cleartext" - info = file_data["simple"] with open(info["path"], "rb") as file_obj: @@ -143,6 +58,10 @@ def test_bucket_w_default_kms_key_name( kms_bucket.patch() assert kms_bucket.default_kms_key_name == kms_key_name + # Changes to the bucket will be readable immediately after writing, + # but configuration changes may take time to propagate. + _helpers.await_config_changes_propagate() + defaulted_blob = kms_bucket.blob(blob_name) defaulted_blob.upload_from_filename(info["path"]) blobs_to_delete.append(defaulted_blob) @@ -152,38 +71,22 @@ def test_bucket_w_default_kms_key_name( # We don't know the current version of the key. assert defaulted_blob.kms_key_name.startswith(kms_key_name) - override_blob = kms_bucket.blob(override_blob_name, kms_key_name=alt_kms_key_name) - override_blob.upload_from_filename(info["path"]) - blobs_to_delete.append(override_blob) - - assert override_blob.download_as_bytes() == payload - # We don't know the current version of the key. - assert override_blob.kms_key_name.startswith(alt_kms_key_name) - + # Test changing the default KMS key. kms_bucket.default_kms_key_name = alt_kms_key_name kms_bucket.patch() + assert kms_bucket.default_kms_key_name == alt_kms_key_name - alt_blob = kms_bucket.blob(alt_blob_name) - alt_blob.upload_from_filename(info["path"]) - blobs_to_delete.append(alt_blob) - - assert alt_blob.download_as_bytes() == payload - # We don't know the current version of the key. - assert alt_blob.kms_key_name.startswith(alt_kms_key_name) - + # Test removing the default KMS key. kms_bucket.default_kms_key_name = None kms_bucket.patch() - - cleartext_blob = kms_bucket.blob(cleartext_blob_name) - cleartext_blob.upload_from_filename(info["path"]) - blobs_to_delete.append(cleartext_blob) - - assert cleartext_blob.download_as_bytes() == payload - assert cleartext_blob.kms_key_name is None + assert kms_bucket.default_kms_key_name is None def test_blob_rewrite_rotate_csek_to_cmek( - kms_bucket, blobs_to_delete, kms_key_name, file_data, + kms_bucket, + blobs_to_delete, + kms_key_name, + file_data, ): blob_name = "rotating-keys" source_key = os.urandom(32) @@ -214,11 +117,26 @@ def test_blob_rewrite_rotate_csek_to_cmek( assert dest.download_as_bytes() == source_data + # Test existing kmsKeyName version is ignored in the rewrite request + dest = kms_bucket.get_blob(blob_name) + source = kms_bucket.get_blob(blob_name) + token, rewritten, total = dest.rewrite(source) + + while token is not None: + token, rewritten, total = dest.rewrite(source, token=token) + + assert rewritten == len(source_data) + assert dest.download_as_bytes() == source_data + def test_blob_upload_w_bucket_cmek_enabled( - kms_bucket, blobs_to_delete, kms_key_name, file_data, + kms_bucket, + blobs_to_delete, + kms_key_name, + alt_kms_key_name, ): blob_name = "test-blob" + override_blob_name = "override-default-kms-key-name" payload = b"DEADBEEF" alt_payload = b"NEWDEADBEEF" @@ -226,19 +144,29 @@ def test_blob_upload_w_bucket_cmek_enabled( kms_bucket.patch() assert kms_bucket.default_kms_key_name == kms_key_name + # Changes to the bucket will be readable immediately after writing, + # but configuration changes may take time to propagate. + _helpers.await_config_changes_propagate() + blob = kms_bucket.blob(blob_name) blob.upload_from_string(payload) blobs_to_delete.append(blob) _helpers.retry_429_harder(_helpers.retry_has_kms_key_name(blob.reload))() - # We don't know the current version of the key. assert blob.kms_key_name.startswith(kms_key_name) blob.upload_from_string(alt_payload, if_generation_match=blob.generation) - assert blob.download_as_bytes() == alt_payload + # Test the specific key is used to encrypt the object if you have both + # a default KMS key set on your bucket and a specific key included in your request. + override_blob = kms_bucket.blob(override_blob_name, kms_key_name=alt_kms_key_name) + override_blob.upload_from_string(payload) + blobs_to_delete.append(override_blob) + + assert override_blob.download_as_bytes() == payload + assert override_blob.kms_key_name.startswith(alt_kms_key_name) + kms_bucket.default_kms_key_name = None _helpers.retry_429_harder(kms_bucket.patch)() - assert kms_bucket.default_kms_key_name is None diff --git a/tests/system/test_notification.py b/tests/system/test_notification.py index 6c49064aa..f52ae3219 100644 --- a/tests/system/test_notification.py +++ b/tests/system/test_notification.py @@ -54,7 +54,7 @@ def topic_name(): @pytest.fixture(scope="session") def topic_path(storage_client, topic_name): - return "projects/{}/topics/{}".format(storage_client.project, topic_name) + return f"projects/{storage_client.project}/topics/{topic_name}" @pytest.fixture(scope="session") @@ -64,13 +64,16 @@ def notification_topic(storage_client, publisher_client, topic_path, no_mtls): binding = policy.bindings.add() binding.role = "roles/pubsub.publisher" binding.members.append( - "serviceAccount:{}".format(storage_client.get_service_account_email()) + f"serviceAccount:{storage_client.get_service_account_email()}" ) publisher_client.set_iam_policy(topic_path, policy) def test_notification_create_minimal( - storage_client, buckets_to_delete, topic_name, notification_topic, + storage_client, + buckets_to_delete, + topic_name, + notification_topic, ): bucket_name = _helpers.unique_name("notification-minimal") bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) @@ -126,7 +129,11 @@ def test_notification_create_explicit( def test_notification_create_w_user_project( - storage_client, buckets_to_delete, topic_name, notification_topic, user_project, + storage_client, + buckets_to_delete, + topic_name, + notification_topic, + user_project, ): bucket_name = _helpers.unique_name("notification-w-up") bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket_name) diff --git a/tests/system/test_transfer_manager.py b/tests/system/test_transfer_manager.py new file mode 100644 index 000000000..7a257e960 --- /dev/null +++ b/tests/system/test_transfer_manager.py @@ -0,0 +1,477 @@ +# coding=utf-8 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile +import os + +import pytest + +from google.cloud.storage import transfer_manager +from google.cloud.storage._helpers import _base64_md5hash + +from google.api_core import exceptions + +DEADLINE = 30 + +encryption_key = "b23ff11bba187db8c37077e6af3b25b8" + + +def _check_blob_hash(blob, info): + md5_hash = blob.md5_hash + if not isinstance(md5_hash, bytes): + md5_hash = md5_hash.encode("utf-8") + + assert md5_hash == info["hash"] + + +def test_upload_many(shared_bucket, file_data, blobs_to_delete): + FILE_BLOB_PAIRS = [ + (file_data["simple"]["path"], shared_bucket.blob("simple1")), + (file_data["simple"]["path"], shared_bucket.blob("simple2")), + ] + + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + worker_type=transfer_manager.PROCESS, + deadline=DEADLINE, + ) + assert results == [None, None] + + blobs = shared_bucket.list_blobs() + for blob in blobs: + if blob.name.startswith("simple"): + blobs_to_delete.append(blob) + assert len(blobs_to_delete) == 2 + + +def test_upload_many_with_threads_and_file_objs( + shared_bucket, file_data, blobs_to_delete +): + FILE_BLOB_PAIRS = [ + (open(file_data["simple"]["path"], "rb"), shared_bucket.blob("simple1")), + (open(file_data["simple"]["path"], "rb"), shared_bucket.blob("simple2")), + ] + + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + worker_type=transfer_manager.THREAD, + deadline=DEADLINE, + ) + assert results == [None, None] + + blobs = shared_bucket.list_blobs() + for blob in blobs: + if blob.name.startswith("simple"): + blobs_to_delete.append(blob) + assert len(blobs_to_delete) == 2 + + +def test_upload_many_skip_if_exists( + listable_bucket, listable_filenames, file_data, blobs_to_delete +): + FILE_BLOB_PAIRS = [ + (file_data["logo"]["path"], listable_bucket.blob(listable_filenames[0])), + (file_data["simple"]["path"], listable_bucket.blob("simple")), + ] + + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + skip_if_exists=True, + raise_exception=True, + deadline=DEADLINE, + ) + assert isinstance(results[0], exceptions.PreconditionFailed) + assert results[1] is None + + blobs = listable_bucket.list_blobs() + for blob in blobs: + if blob.name.startswith("simple"): + blobs_to_delete.append(blob) + assert len(blobs_to_delete) == 1 + + +def test_upload_many_from_filenames_with_attributes( + listable_bucket, listable_filenames, file_data, blobs_to_delete +): + SOURCE_DIRECTORY, FILENAME = os.path.split(file_data["logo"]["path"]) + + transfer_manager.upload_many_from_filenames( + listable_bucket, + [FILENAME], + source_directory=SOURCE_DIRECTORY, + additional_blob_attributes={"cache_control": "no-cache"}, + raise_exception=True, + ) + + blob = listable_bucket.blob(FILENAME) + blob.reload() + blobs_to_delete.append(blob) + assert blob.cache_control == "no-cache" + + +def test_download_many(listable_bucket): + blobs = list(listable_bucket.list_blobs()) + with tempfile.TemporaryDirectory() as tempdir: + filenames = [ + os.path.join(tempdir, "file_a.txt"), + os.path.join(tempdir, "file_b.txt"), + ] + BLOB_FILE_PAIRS = zip(blobs[:2], filenames) + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + worker_type=transfer_manager.PROCESS, + deadline=DEADLINE, + ) + assert results == [None, None] + for count, filename in enumerate(filenames): + with open(filename, "rb") as fp: + assert len(fp.read()) == blobs[count].size + + +def test_download_many_with_threads_and_file_objs(listable_bucket): + blobs = list(listable_bucket.list_blobs()) + with tempfile.TemporaryFile() as file_a, tempfile.TemporaryFile() as file_b: + tempfiles = [file_a, file_b] + BLOB_FILE_PAIRS = zip(blobs[:2], tempfiles) + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + worker_type=transfer_manager.THREAD, + deadline=DEADLINE, + ) + assert results == [None, None] + for fp in tempfiles: + assert fp.tell() != 0 + + +def test_download_chunks_concurrently(shared_bucket, file_data): + # Upload a big file + source_file = file_data["big"] + upload_blob = shared_bucket.blob("chunky_file") + upload_blob.upload_from_filename(source_file["path"]) + upload_blob.reload() + size = upload_blob.size + chunk_size = size // 32 + + # Get a fresh blob obj w/o metadata for testing purposes + download_blob = shared_bucket.blob("chunky_file") + + with tempfile.TemporaryDirectory() as tempdir: + full_filename = os.path.join(tempdir, "chunky_file_1") + transfer_manager.download_chunks_concurrently( + download_blob, + full_filename, + chunk_size=chunk_size, + deadline=DEADLINE, + ) + with open(full_filename, "rb") as file_obj: + assert _base64_md5hash(file_obj) == source_file["hash"] + + # Now test for case where last chunk is exactly 1 byte. + trailing_chunk_filename = os.path.join(tempdir, "chunky_file_2") + transfer_manager.download_chunks_concurrently( + download_blob, + trailing_chunk_filename, + chunk_size=size - 1, + deadline=DEADLINE, + ) + with open(trailing_chunk_filename, "rb") as file_obj: + assert _base64_md5hash(file_obj) == source_file["hash"] + + # And for a case where there is only one chunk. + trailing_chunk_filename = os.path.join(tempdir, "chunky_file_3") + transfer_manager.download_chunks_concurrently( + download_blob, + trailing_chunk_filename, + chunk_size=size, + deadline=DEADLINE, + ) + with open(trailing_chunk_filename, "rb") as file_obj: + assert _base64_md5hash(file_obj) == source_file["hash"] + + # Also test threaded mode. + threaded_filename = os.path.join(tempdir, "chunky_file_4") + transfer_manager.download_chunks_concurrently( + download_blob, + threaded_filename, + chunk_size=chunk_size, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + ) + with open(threaded_filename, "rb") as file_obj: + assert _base64_md5hash(file_obj) == source_file["hash"] + + +def test_upload_chunks_concurrently(shared_bucket, file_data, blobs_to_delete): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file" + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + # Also test threaded mode + blob_name = "mpu_threaded" + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + transfer_manager.upload_chunks_concurrently( + filename, + upload_blob, + chunk_size=chunk_size, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + ) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_metadata( + shared_bucket, file_data, blobs_to_delete +): + from google.cloud.storage._helpers import _NOW + from google.cloud.storage._helpers import _UTC + + now = _NOW(_UTC) + custom_metadata = {"key_a": "value_a", "key_b": "value_b"} + + METADATA = { + "cache_control": "private", + "content_disposition": "inline", + "content_language": "en-US", + "custom_time": now, + "metadata": custom_metadata, + "storage_class": "NEARLINE", + } + + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_with_metadata" + upload_blob = shared_bucket.blob(blob_name) + + for key, value in METADATA.items(): + setattr(upload_blob, key, value) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(upload_blob) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.get_blob(blob_name) + + for key, value in METADATA.items(): + assert getattr(download_blob, key) == value + + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_content_encoding( + shared_bucket, file_data, blobs_to_delete +): + import gzip + + METADATA = { + "content_encoding": "gzip", + } + + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_encoded" + upload_blob = shared_bucket.blob(blob_name) + + for key, value in METADATA.items(): + setattr(upload_blob, key, value) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + + with tempfile.NamedTemporaryFile() as tmp_gzip: + with open(filename, "rb") as f: + compressed_bytes = gzip.compress(f.read()) + + tmp_gzip.write(compressed_bytes) + tmp_gzip.seek(0) + transfer_manager.upload_chunks_concurrently( + tmp_gzip.name, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(upload_blob) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.get_blob(blob_name) + + for key, value in METADATA.items(): + assert getattr(download_blob, key) == value + + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_encryption_key( + shared_bucket, file_data, blobs_to_delete +): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_encrypted" + upload_blob = shared_bucket.blob(blob_name, encryption_key=encryption_key) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(upload_blob) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.get_blob(blob_name, encryption_key=encryption_key) + + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + with tempfile.NamedTemporaryFile() as tmp: + keyless_blob = shared_bucket.get_blob(blob_name) + + with pytest.raises(exceptions.BadRequest): + keyless_blob.download_to_file(tmp) + + +def test_upload_chunks_concurrently_with_kms( + kms_bucket, file_data, blobs_to_delete, kms_key_name +): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_kms" + blob = kms_bucket.blob(blob_name, kms_key_name=kms_key_name) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(blob) + blob.reload() + assert blob.kms_key_name.startswith(kms_key_name) + + with tempfile.NamedTemporaryFile() as tmp: + blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_quoted_blob_names( + shared_bucket, file_data, blobs_to_delete +): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "../example_bucket/mpu_file" + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + # If the blob name is not quoted/encoded at all, this will result in a 403. + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + + with tempfile.NamedTemporaryFile() as tmp: + # If the blob name is not quoted correctly, this will result in a 404. + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + # Test emoji names are not mangled. + blob_name = "\U0001f681" # Helicopter emoji + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + transfer_manager.upload_chunks_concurrently( + filename, + upload_blob, + chunk_size=chunk_size, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + ) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index b99b78cfd..d628bfddb 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,21 +19,21 @@ from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED +GCCL_INVOCATION_TEST_CONST = "gccl-invocation-id/test-invocation-123" -class Test__get_storage_host(unittest.TestCase): + +class Test__get_storage_emulator_override(unittest.TestCase): @staticmethod def _call_fut(): - from google.cloud.storage._helpers import _get_storage_host + from google.cloud.storage._helpers import _get_storage_emulator_override - return _get_storage_host() + return _get_storage_emulator_override() def test_wo_env_var(self): - from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST - with mock.patch("os.environ", {}): - host = self._call_fut() + override = self._call_fut() - self.assertEqual(host, _DEFAULT_STORAGE_HOST) + self.assertIsNone(override) def test_w_env_var(self): from google.cloud.storage._helpers import STORAGE_EMULATOR_ENV_VAR @@ -41,9 +41,64 @@ def test_w_env_var(self): HOST = "https://api.example.com" with mock.patch("os.environ", {STORAGE_EMULATOR_ENV_VAR: HOST}): - host = self._call_fut() + emu = self._call_fut() + + self.assertEqual(emu, HOST) + + +class Test__get_api_endpoint_override(unittest.TestCase): + @staticmethod + def _call_fut(): + from google.cloud.storage._helpers import _get_api_endpoint_override + + return _get_api_endpoint_override() + + def test_wo_env_var(self): + from google.cloud.storage._helpers import _TRUE_DEFAULT_STORAGE_HOST + from google.cloud.storage._helpers import _DEFAULT_SCHEME + + with mock.patch("os.environ", {}): + override = self._call_fut() + + self.assertIsNone(override, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST) + + def test_w_env_var(self): + from google.cloud.storage._helpers import _API_ENDPOINT_OVERRIDE_ENV_VAR + + BASE_URL = "https://api.example.com" + + with mock.patch("os.environ", {_API_ENDPOINT_OVERRIDE_ENV_VAR: BASE_URL}): + override = self._call_fut() + + self.assertEqual(override, BASE_URL) + + +class Test__get_environ_project(unittest.TestCase): + @staticmethod + def _call_fut(): + from google.cloud.storage._helpers import _get_environ_project + + return _get_environ_project() - self.assertEqual(host, HOST) + def test_wo_env_var(self): + with mock.patch("os.environ", {}): + project = self._call_fut() + + self.assertEqual(project, None) + + def test_w_env_var(self): + from google.auth import environment_vars + + PROJECT = "environ-project" + + with mock.patch("os.environ", {environment_vars.PROJECT: PROJECT}): + project = self._call_fut() + self.assertEqual(project, PROJECT) + + with mock.patch("os.environ", {environment_vars.LEGACY_PROJECT: PROJECT}): + project = self._call_fut() + + self.assertEqual(project, PROJECT) class Test_PropertyMixin(unittest.TestCase): @@ -64,7 +119,6 @@ def _make_one(self, *args, **kw): def _derivedClass(self, path=None, user_project=None): class Derived(self._get_target_class()): - client = None _actual_encryption_headers = None @@ -148,7 +202,9 @@ def test_reload_w_etag_match(self): derived._changes = object() derived.client = client - derived.reload(if_etag_match=etag,) + derived.reload( + if_etag_match=etag, + ) self.assertEqual(derived._properties, response) self.assertEqual(derived._changes, set()) @@ -303,7 +359,7 @@ def test_patch_w_defaults(self): expected_data, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, _target_object=derived, ) @@ -322,12 +378,14 @@ def test_patch_w_metageneration_match_w_timeout_w_retry(self): retry = mock.Mock(spec=[]) generation_number = 9 metageneration_number = 6 + override_unlocked_retention = True derived.patch( if_generation_match=generation_number, if_metageneration_match=metageneration_number, timeout=timeout, retry=retry, + override_unlocked_retention=override_unlocked_retention, ) self.assertEqual(derived._properties, {"foo": "Foo"}) @@ -339,6 +397,7 @@ def test_patch_w_metageneration_match_w_timeout_w_retry(self): "projection": "full", "ifGenerationMatch": generation_number, "ifMetagenerationMatch": metageneration_number, + "overrideUnlockedRetention": override_unlocked_retention, } client._patch_resource.assert_called_once_with( path, @@ -378,7 +437,7 @@ def test_patch_w_user_project_w_explicit_client(self): expected_data, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, _target_object=derived, ) @@ -423,9 +482,12 @@ def test_update_with_metageneration_not_match_w_timeout_w_retry(self): client = derived.client = mock.Mock(spec=["_put_resource"]) client._put_resource.return_value = api_response timeout = 42 + override_unlocked_retention = True derived.update( - if_metageneration_not_match=generation_number, timeout=timeout, + if_metageneration_not_match=generation_number, + timeout=timeout, + override_unlocked_retention=override_unlocked_retention, ) self.assertEqual(derived._properties, {"foo": "Foo"}) @@ -435,6 +497,7 @@ def test_update_with_metageneration_not_match_w_timeout_w_retry(self): expected_query_params = { "projection": "full", "ifMetagenerationNotMatch": generation_number, + "overrideUnlockedRetention": override_unlocked_retention, } client._put_resource.assert_called_once_with( path, @@ -642,56 +705,17 @@ def _call_fut(self, **args): return _bucket_bound_hostname_url(**args) def test_full_hostname(self): - HOST = "scheme://domain.tcl/" + HOST = "scheme://domain.tcl" self.assertEqual(self._call_fut(host=HOST), HOST) def test_hostname_and_scheme(self): HOST = "domain.tcl" SCHEME = "scheme" - EXPECTED_URL = SCHEME + "://" + HOST + "/" + EXPECTED_URL = SCHEME + "://" + HOST self.assertEqual(self._call_fut(host=HOST, scheme=SCHEME), EXPECTED_URL) -class Test__api_core_retry_to_resumable_media_retry(unittest.TestCase): - def test_conflict(self): - from google.cloud.storage._helpers import ( - _api_core_retry_to_resumable_media_retry, - ) - - with self.assertRaises(ValueError): - _api_core_retry_to_resumable_media_retry(retry=DEFAULT_RETRY, num_retries=2) - - def test_retry(self): - from google.cloud.storage._helpers import ( - _api_core_retry_to_resumable_media_retry, - ) - - retry_strategy = _api_core_retry_to_resumable_media_retry(retry=DEFAULT_RETRY) - self.assertEqual(retry_strategy.max_sleep, DEFAULT_RETRY._maximum) - self.assertEqual(retry_strategy.max_cumulative_retry, DEFAULT_RETRY._deadline) - self.assertEqual(retry_strategy.initial_delay, DEFAULT_RETRY._initial) - self.assertEqual(retry_strategy.multiplier, DEFAULT_RETRY._multiplier) - - def test_num_retries(self): - from google.cloud.storage._helpers import ( - _api_core_retry_to_resumable_media_retry, - ) - - retry_strategy = _api_core_retry_to_resumable_media_retry( - retry=None, num_retries=2 - ) - self.assertEqual(retry_strategy.max_retries, 2) - - def test_none(self): - from google.cloud.storage._helpers import ( - _api_core_retry_to_resumable_media_retry, - ) - - retry_strategy = _api_core_retry_to_resumable_media_retry(retry=None) - self.assertEqual(retry_strategy.max_retries, 0) - - class _MD5Hash(object): def __init__(self, digest_val): self.digest_val = digest_val diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index fcdb5d1a7..33ff1a890 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -13,9 +13,14 @@ # limitations under the License. import unittest +from unittest.mock import patch import mock +from google.cloud.storage import _helpers + +GCCL_INVOCATION_TEST_CONST = "gccl-invocation-id/test-invocation-123" + class TestConnection(unittest.TestCase): @staticmethod @@ -44,12 +49,17 @@ def test_extra_headers(self): conn = self._make_one(client) req_data = "hey-yoooouuuuu-guuuuuyyssss" - result = conn.api_request("GET", "/rainbow", data=req_data, expect_json=False) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + result = conn.api_request( + "GET", "/rainbow", data=req_data, expect_json=False + ) self.assertEqual(result, data) expected_headers = { "Accept-Encoding": "gzip", - base_http.CLIENT_INFO_HEADER: conn.user_agent, + base_http.CLIENT_INFO_HEADER: f"{conn.user_agent} {GCCL_INVOCATION_TEST_CONST}", "User-Agent": conn.user_agent, } expected_uri = conn.build_api_url("/rainbow") @@ -61,6 +71,58 @@ def test_extra_headers(self): timeout=_DEFAULT_TIMEOUT, ) + def test_metadata_op_has_client_custom_headers(self): + import requests + import google.auth.credentials + from google.cloud import _http as base_http + from google.cloud.storage import Client + from google.cloud.storage.constants import _DEFAULT_TIMEOUT + + custom_headers = { + "x-goog-custom-audit-foo": "bar", + "x-goog-custom-audit-user": "baz", + } + http = mock.create_autospec(requests.Session, instance=True) + response = requests.Response() + response.status_code = 200 + data = b"brent-spiner" + response._content = data + http.is_mtls = False + http.request.return_value = response + credentials = mock.Mock( + spec=google.auth.credentials.Credentials, + universe_domain=_helpers._DEFAULT_UNIVERSE_DOMAIN, + ) + client = Client( + project="project", + credentials=credentials, + _http=http, + extra_headers=custom_headers, + ) + req_data = "hey-yoooouuuuu-guuuuuyyssss" + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + result = client._connection.api_request( + "GET", "/rainbow", data=req_data, expect_json=False + ) + self.assertEqual(result, data) + + expected_headers = { + **custom_headers, + "Accept-Encoding": "gzip", + base_http.CLIENT_INFO_HEADER: f"{client._connection.user_agent} {GCCL_INVOCATION_TEST_CONST}", + "User-Agent": client._connection.user_agent, + } + expected_uri = client._connection.build_api_url("/rainbow") + http.request.assert_called_once_with( + data=req_data, + headers=expected_headers, + method="GET", + url=expected_uri, + timeout=_DEFAULT_TIMEOUT, + ) + def test_build_api_url_no_extra_query_params(self): from urllib.parse import parse_qsl from urllib.parse import urlsplit @@ -68,7 +130,7 @@ def test_build_api_url_no_extra_query_params(self): conn = self._make_one(object()) uri = conn.build_api_url("/foo") scheme, netloc, path, qs, _ = urlsplit(uri) - self.assertEqual("%s://%s" % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(f"{scheme}://{netloc}", conn.API_BASE_URL) self.assertEqual(path, "/".join(["", "storage", conn.API_VERSION, "foo"])) parms = dict(parse_qsl(qs)) pretty_print = parms.pop("prettyPrint", "false") @@ -83,7 +145,7 @@ def test_build_api_url_w_custom_endpoint(self): conn = self._make_one(object(), api_endpoint=custom_endpoint) uri = conn.build_api_url("/foo") scheme, netloc, path, qs, _ = urlsplit(uri) - self.assertEqual("%s://%s" % (scheme, netloc), custom_endpoint) + self.assertEqual(f"{scheme}://{netloc}", custom_endpoint) self.assertEqual(path, "/".join(["", "storage", conn.API_VERSION, "foo"])) parms = dict(parse_qsl(qs)) pretty_print = parms.pop("prettyPrint", "false") @@ -97,7 +159,7 @@ def test_build_api_url_w_extra_query_params(self): conn = self._make_one(object()) uri = conn.build_api_url("/foo", {"bar": "baz"}) scheme, netloc, path, qs, _ = urlsplit(uri) - self.assertEqual("%s://%s" % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(f"{scheme}://{netloc}", conn.API_BASE_URL) self.assertEqual(path, "/".join(["", "storage", conn.API_VERSION, "foo"])) parms = dict(parse_qsl(qs)) self.assertEqual(parms["bar"], "baz") @@ -237,7 +299,7 @@ def test_duplicate_user_agent(self): client_info = ClientInfo(user_agent="test/123") conn = self._make_one(object(), client_info=client_info) - expected_user_agent = "test/123 gcloud-python/{} ".format(__version__) + expected_user_agent = f"test/123 gcloud-python/{__version__} " self.assertEqual(conn._client_info.user_agent, expected_user_agent) client = mock.Mock(_connection=conn, spec=["_connection"]) diff --git a/tests/unit/test__opentelemetry_tracing.py b/tests/unit/test__opentelemetry_tracing.py new file mode 100644 index 000000000..bdbb40fd2 --- /dev/null +++ b/tests/unit/test__opentelemetry_tracing.py @@ -0,0 +1,218 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import os +import pytest +import sys + +import mock +from google.api_core.exceptions import GoogleAPICallError +from google.cloud.storage import __version__ +from google.cloud.storage import _opentelemetry_tracing + + +@pytest.fixture +def setup(): + """Setup OTel packages and tracer provider.""" + try: + from opentelemetry import trace as trace_api + from opentelemetry.sdk.trace import TracerProvider, export + from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, + ) + except ImportError: # pragma: NO COVER + pytest.skip("This test suite requires OpenTelemetry pacakges.") + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = export.SimpleSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace_api.set_tracer_provider(tracer_provider) + importlib.reload(_opentelemetry_tracing) + yield memory_exporter + + +@pytest.fixture() +def mock_os_environ(monkeypatch): + """Mock os.environ.""" + monkeypatch.setattr(os, "environ", {}) + return os.environ + + +@pytest.fixture() +def setup_optin(mock_os_environ): + """Mock envar to opt-in tracing for storage client.""" + mock_os_environ["ENABLE_GCS_PYTHON_CLIENT_OTEL_TRACES"] = True + importlib.reload(_opentelemetry_tracing) + + +def test_opentelemetry_not_installed(setup, monkeypatch): + monkeypatch.setitem(sys.modules, "opentelemetry", None) + importlib.reload(_opentelemetry_tracing) + # Test no-ops when OpenTelemetry is not installed. + with _opentelemetry_tracing.create_trace_span("No-ops w/o opentelemetry") as span: + assert span is None + assert not _opentelemetry_tracing.HAS_OPENTELEMETRY + + +def test_opentelemetry_no_trace_optin(setup): + assert _opentelemetry_tracing.HAS_OPENTELEMETRY + assert not _opentelemetry_tracing.enable_otel_traces + # Test no-ops when user has not opt-in. + # This prevents customers accidentally being billed for tracing. + with _opentelemetry_tracing.create_trace_span("No-ops w/o opt-in") as span: + assert span is None + + +def test_enable_trace_yield_span(setup, setup_optin): + assert _opentelemetry_tracing.HAS_OPENTELEMETRY + assert _opentelemetry_tracing.enable_otel_traces + with _opentelemetry_tracing.create_trace_span("No-ops for opentelemetry") as span: + assert span is not None + + +def test_enable_trace_call(setup, setup_optin): + from opentelemetry import trace as trace_api + + extra_attributes = { + "attribute1": "value1", + } + expected_attributes = _opentelemetry_tracing._default_attributes.copy() + expected_attributes.update(_opentelemetry_tracing._cloud_trace_adoption_attrs) + expected_attributes.update(extra_attributes) + + with _opentelemetry_tracing.create_trace_span( + "OtelTracing.Test", attributes=extra_attributes + ) as span: + span.set_attribute("after_setup_attribute", 1) + + expected_attributes["after_setup_attribute"] = 1 + + assert span.kind == trace_api.SpanKind.CLIENT + assert span.attributes == expected_attributes + assert span.name == "OtelTracing.Test" + + +def test_enable_trace_error(setup, setup_optin): + from opentelemetry import trace as trace_api + + extra_attributes = { + "attribute1": "value1", + } + expected_attributes = _opentelemetry_tracing._default_attributes.copy() + expected_attributes.update(_opentelemetry_tracing._cloud_trace_adoption_attrs) + expected_attributes.update(extra_attributes) + + with pytest.raises(GoogleAPICallError): + with _opentelemetry_tracing.create_trace_span( + "OtelTracing.Test", attributes=extra_attributes + ) as span: + from google.cloud.exceptions import NotFound + + assert span.kind == trace_api.SpanKind.CLIENT + assert span.attributes == expected_attributes + assert span.name == "OtelTracing.Test" + raise NotFound("Test catching NotFound error in trace span.") + + +def test_get_final_attributes(setup, setup_optin): + from google.api_core import retry as api_retry + + test_span_name = "OtelTracing.Test" + test_span_attributes = { + "foo": "bar", + } + api_request = { + "method": "GET", + "path": "/foo/bar/baz", + "timeout": (100, 100), + } + retry_obj = api_retry.Retry() + + expected_attributes = { + "foo": "bar", + "rpc.service": "CloudStorage", + "rpc.system": "http", + "user_agent.original": f"gcloud-python/{__version__}", + "http.request.method": "GET", + "url.full": "https://testOtel.org/foo/bar/baz", + "connect_timeout,read_timeout": (100, 100), + "retry": f"multiplier{retry_obj._multiplier}/deadline{retry_obj._deadline}/max{retry_obj._maximum}/initial{retry_obj._initial}/predicate{retry_obj._predicate}", + } + expected_attributes.update(_opentelemetry_tracing._cloud_trace_adoption_attrs) + + with mock.patch("google.cloud.storage.client.Client") as test_client: + test_client.project = "test_project" + test_client._connection.API_BASE_URL = "https://testOtel.org" + with _opentelemetry_tracing.create_trace_span( + test_span_name, + attributes=test_span_attributes, + client=test_client, + api_request=api_request, + retry=retry_obj, + ) as span: + assert span is not None + assert span.name == test_span_name + assert span.attributes == expected_attributes + + +def test_set_conditional_retry_attr(setup, setup_optin): + from google.api_core import retry as api_retry + from google.cloud.storage.retry import ConditionalRetryPolicy + + test_span_name = "OtelTracing.Test" + retry_policy = api_retry.Retry() + conditional_predicate = mock.Mock() + required_kwargs = ("kwarg",) + retry_obj = ConditionalRetryPolicy( + retry_policy, conditional_predicate, required_kwargs + ) + + retry_attrs = { + "retry": f"multiplier{retry_policy._multiplier}/deadline{retry_policy._deadline}/max{retry_policy._maximum}/initial{retry_policy._initial}/predicate{conditional_predicate}", + } + expected_attributes = _opentelemetry_tracing._default_attributes.copy() + expected_attributes.update(_opentelemetry_tracing._cloud_trace_adoption_attrs) + expected_attributes.update(retry_attrs) + + with _opentelemetry_tracing.create_trace_span( + test_span_name, + retry=retry_obj, + ) as span: + assert span is not None + assert span.name == test_span_name + assert span.attributes == expected_attributes + + +def test_set_api_request_attr(): + from google.cloud.storage import Client + + test_client = Client() + args_method = {"method": "GET"} + expected_attributes = {"http.request.method": "GET"} + attr = _opentelemetry_tracing._set_api_request_attr(args_method, test_client) + assert attr == expected_attributes + + args_path = {"path": "/foo/bar/baz"} + expected_attributes = {"url.full": "https://storage.googleapis.com/foo/bar/baz"} + attr = _opentelemetry_tracing._set_api_request_attr(args_path, test_client) + assert attr == expected_attributes + + args_timeout = {"timeout": (100, 100)} + expected_attributes = { + "connect_timeout,read_timeout": (100, 100), + } + attr = _opentelemetry_tracing._set_api_request_attr(args_timeout, test_client) + assert attr == expected_attributes diff --git a/tests/unit/test__signing.py b/tests/unit/test__signing.py index f863460c5..156911a73 100644 --- a/tests/unit/test__signing.py +++ b/tests/unit/test__signing.py @@ -26,6 +26,7 @@ import mock import pytest +from google.cloud.storage._helpers import _UTC from . import _read_local_json @@ -74,9 +75,7 @@ def test_w_expiration_naive_datetime(self): self.assertEqual(self._call_fut(expiration_no_tz), utc_seconds) def test_w_expiration_utc_datetime(self): - from google.cloud._helpers import UTC - - expiration_utc = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, UTC) + expiration_utc = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) utc_seconds = _utc_seconds(expiration_utc) self.assertEqual(self._call_fut(expiration_utc), utc_seconds) @@ -88,32 +87,32 @@ def test_w_expiration_other_zone_datetime(self): self.assertEqual(self._call_fut(expiration_other), cet_seconds) def test_w_expiration_timedelta_seconds(self): - fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0) + fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) utc_seconds = _utc_seconds(fake_utcnow) expiration_as_delta = datetime.timedelta(seconds=10) patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_as_delta) self.assertEqual(result, utc_seconds + 10) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) def test_w_expiration_timedelta_days(self): - fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0) + fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) utc_seconds = _utc_seconds(fake_utcnow) expiration_as_delta = datetime.timedelta(days=1) patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_as_delta) self.assertEqual(result, utc_seconds + 86400) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) class Test_get_expiration_seconds_v4(unittest.TestCase): @@ -138,88 +137,83 @@ def test_w_expiration_int_gt_seven_days(self): expiration_seconds = _utc_seconds(expiration_utc) patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: with self.assertRaises(ValueError): self._call_fut(expiration_seconds) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) def test_w_expiration_int(self): fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0) expiration_seconds = 10 patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_seconds) self.assertEqual(result, expiration_seconds) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) def test_w_expiration_naive_datetime(self): - fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0) + fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) delta = datetime.timedelta(seconds=10) expiration_no_tz = fake_utcnow + delta patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_no_tz) self.assertEqual(result, delta.seconds) - utcnow.assert_called_once_with() + utcnow.assert_called_once() def test_w_expiration_utc_datetime(self): - from google.cloud._helpers import UTC - - fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, UTC) + fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) delta = datetime.timedelta(seconds=10) expiration_utc = fake_utcnow + delta patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_utc) self.assertEqual(result, delta.seconds) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) def test_w_expiration_other_zone_datetime(self): - from google.cloud._helpers import UTC - zone = _make_cet_timezone() - fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, UTC) + fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) fake_cetnow = fake_utcnow.astimezone(zone) delta = datetime.timedelta(seconds=10) expiration_other = fake_cetnow + delta patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_other) - self.assertEqual(result, delta.seconds) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) def test_w_expiration_timedelta(self): - fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0) + fake_utcnow = datetime.datetime(2004, 8, 19, 0, 0, 0, 0, _UTC) expiration_as_delta = datetime.timedelta(seconds=10) patch = mock.patch( - "google.cloud.storage._signing.NOW", return_value=fake_utcnow + "google.cloud.storage._signing._NOW", return_value=fake_utcnow ) with patch as utcnow: result = self._call_fut(expiration_as_delta) self.assertEqual(result, expiration_as_delta.total_seconds()) - utcnow.assert_called_once_with() + utcnow.assert_called_once_with(datetime.timezone.utc) class Test_get_signed_query_params_v2(unittest.TestCase): @@ -326,7 +320,7 @@ def test_w_query_parameters(self): query_parameters = {"foo": "bar", "baz": "qux"} canonical = self._call_fut(method, resource, query_parameters, None) self.assertEqual(canonical.method, method) - self.assertEqual(canonical.resource, "{}?baz=qux&foo=bar".format(resource)) + self.assertEqual(canonical.resource, f"{resource}?baz=qux&foo=bar") self.assertEqual(canonical.query_parameters, [("baz", "qux"), ("foo", "bar")]) self.assertEqual(canonical.headers, []) @@ -399,7 +393,7 @@ def _generate_helper( for key, value in query_parameters.items() } expected_qp = urlencode(sorted(normalized_qp.items())) - expected_resource = "{}?{}".format(resource, expected_qp) + expected_resource = f"{resource}?{expected_qp}" elements.append(content_md5 or "") elements.append(content_type or "") @@ -534,7 +528,7 @@ def _generate_helper( credentials = _make_credentials(signer_email=signer_email) credentials.sign_bytes.return_value = b"DEADBEEF" - with mock.patch("google.cloud.storage._signing.NOW", lambda: now): + with mock.patch("google.cloud.storage._signing._NOW", lambda tz: now): url = self._call_fut( credentials, resource, @@ -568,9 +562,7 @@ def _generate_helper( self.assertEqual(params["X-Goog-Algorithm"], "GOOG4-RSA-SHA256") now_date = now.date().strftime("%Y%m%d") - expected_cred = "{}/{}/auto/storage/goog4_request".format( - signer_email, now_date - ) + expected_cred = f"{signer_email}/{now_date}/auto/storage/goog4_request" self.assertEqual(params["X-Goog-Credential"], expected_cred) now_stamp = now.strftime("%Y%m%dT%H%M%SZ") @@ -678,7 +670,9 @@ def test_with_signer_email(self): credentials = _make_credentials(signer_email=signer_email) credentials.sign_bytes.return_value = b"DEADBEEF" self._call_fut( - credentials, resource=resource, expiration=datetime.timedelta(days=5), + credentials, + resource=resource, + expiration=datetime.timedelta(days=5), ) def test_with_service_account_email_and_signer_email(self): @@ -797,7 +791,7 @@ def test_get_v4_now_dtstamps(self): from google.cloud.storage._signing import get_v4_now_dtstamps with mock.patch( - "google.cloud.storage._signing.NOW", + "google.cloud.storage._signing._NOW", return_value=datetime.datetime(2020, 3, 12, 13, 14, 15), ) as now_mock: timestamp, datestamp = get_v4_now_dtstamps() @@ -857,7 +851,7 @@ def test_conformance_bucket(test_data): resource = "/" _run_conformance_test(resource, test_data, _API_ACCESS_ENDPOINT) else: - resource = "/{}".format(test_data["bucket"]) + resource = f"/{test_data['bucket']}" _run_conformance_test(resource, test_data) @@ -873,13 +867,13 @@ def test_conformance_blob(test_data): # For the VIRTUAL_HOSTED_STYLE else: - _API_ACCESS_ENDPOINT = "{scheme}://{bucket_name}.storage.googleapis.com".format( - scheme=test_data["scheme"], bucket_name=test_data["bucket"] + _API_ACCESS_ENDPOINT = ( + f"{test_data['scheme']}://{test_data['bucket']}.storage.googleapis.com" ) - resource = "/{}".format(test_data["object"]) + resource = f"/{test_data['object']}" _run_conformance_test(resource, test_data, _API_ACCESS_ENDPOINT) else: - resource = "/{}/{}".format(test_data["bucket"], test_data["object"]) + resource = f"/{test_data['bucket']}/{test_data['object']}" _run_conformance_test(resource, test_data) diff --git a/tests/unit/test_acl.py b/tests/unit/test_acl.py index 6083ef1e1..bce716c74 100644 --- a/tests/unit/test_acl.py +++ b/tests/unit/test_acl.py @@ -56,7 +56,7 @@ def test___str__w_identifier(self): TYPE = "type" ID = "id" entity = self._make_one(TYPE, ID) - self.assertEqual(str(entity), "%s-%s" % (TYPE, ID)) + self.assertEqual(str(entity), f"{TYPE}-{ID}") def test_grant_simple(self): TYPE = "type" @@ -229,7 +229,7 @@ def test___iter___non_empty_w_roles(self): acl.loaded = True entity = acl.entity(TYPE, ID) entity.grant(ROLE) - self.assertEqual(list(acl), [{"entity": "%s-%s" % (TYPE, ID), "role": ROLE}]) + self.assertEqual(list(acl), [{"entity": f"{TYPE}-{ID}", "role": ROLE}]) def test___iter___non_empty_w_empty_role(self): TYPE = "type" @@ -313,7 +313,7 @@ def test_has_entity_hit_str(self): acl = self._make_one() acl.loaded = True acl.entity(TYPE, ID) - self.assertTrue(acl.has_entity("%s-%s" % (TYPE, ID))) + self.assertTrue(acl.has_entity(f"{TYPE}-{ID}")) def test_has_entity_hit_entity(self): TYPE = "type" @@ -371,7 +371,7 @@ def test_get_entity_hit_str(self): acl = self._make_one() acl.loaded = True acl.entity(TYPE, ID) - self.assertTrue(acl.has_entity("%s-%s" % (TYPE, ID))) + self.assertTrue(acl.has_entity(f"{TYPE}-{ID}")) def test_get_entity_hit_entity(self): TYPE = "type" @@ -422,7 +422,7 @@ def test_add_entity_hit(self): TYPE = "type" ID = "id" - ENTITY_VAL = "%s-%s" % (TYPE, ID) + ENTITY_VAL = f"{TYPE}-{ID}" ROLE = "role" entity = _ACLEntity(TYPE, ID) entity.grant(ROLE) @@ -470,7 +470,7 @@ def test_user(self): entity.grant(ROLE) self.assertEqual(entity.type, "user") self.assertEqual(entity.identifier, ID) - self.assertEqual(list(acl), [{"entity": "user-%s" % ID, "role": ROLE}]) + self.assertEqual(list(acl), [{"entity": f"user-{ID}", "role": ROLE}]) def test_group(self): ID = "id" @@ -481,7 +481,7 @@ def test_group(self): entity.grant(ROLE) self.assertEqual(entity.type, "group") self.assertEqual(entity.identifier, ID) - self.assertEqual(list(acl), [{"entity": "group-%s" % ID, "role": ROLE}]) + self.assertEqual(list(acl), [{"entity": f"group-{ID}", "role": ROLE}]) def test_domain(self): ID = "id" @@ -492,7 +492,7 @@ def test_domain(self): entity.grant(ROLE) self.assertEqual(entity.type, "domain") self.assertEqual(entity.identifier, ID) - self.assertEqual(list(acl), [{"entity": "domain-%s" % ID, "role": ROLE}]) + self.assertEqual(list(acl), [{"entity": f"domain-{ID}", "role": ROLE}]) def test_all(self): ROLE = "role" @@ -1003,8 +1003,8 @@ def test_ctor(self): self.assertEqual(acl.entities, {}) self.assertFalse(acl.loaded) self.assertIs(acl.bucket, bucket) - self.assertEqual(acl.reload_path, "/b/%s/acl" % NAME) - self.assertEqual(acl.save_path, "/b/%s" % NAME) + self.assertEqual(acl.reload_path, f"/b/{NAME}/acl") + self.assertEqual(acl.save_path, f"/b/{NAME}") def test_user_project(self): NAME = "name" @@ -1033,8 +1033,8 @@ def test_ctor(self): self.assertEqual(acl.entities, {}) self.assertFalse(acl.loaded) self.assertIs(acl.bucket, bucket) - self.assertEqual(acl.reload_path, "/b/%s/defaultObjectAcl" % NAME) - self.assertEqual(acl.save_path, "/b/%s" % NAME) + self.assertEqual(acl.reload_path, f"/b/{NAME}/defaultObjectAcl") + self.assertEqual(acl.save_path, f"/b/{NAME}") class Test_ObjectACL(unittest.TestCase): @@ -1056,8 +1056,8 @@ def test_ctor(self): self.assertEqual(acl.entities, {}) self.assertFalse(acl.loaded) self.assertIs(acl.blob, blob) - self.assertEqual(acl.reload_path, "/b/%s/o/%s/acl" % (NAME, BLOB_NAME)) - self.assertEqual(acl.save_path, "/b/%s/o/%s" % (NAME, BLOB_NAME)) + self.assertEqual(acl.reload_path, f"/b/{NAME}/o/{BLOB_NAME}/acl") + self.assertEqual(acl.save_path, f"/b/{NAME}/o/{BLOB_NAME}") def test_user_project(self): NAME = "name" @@ -1070,9 +1070,61 @@ def test_user_project(self): blob.user_project = USER_PROJECT self.assertEqual(acl.user_project, USER_PROJECT) + def test_passthrough_methods(self): + NAME = "name" + BLOB_NAME = "blob-name" + bucket = _Bucket(NAME) + blob = _Blob(bucket, BLOB_NAME) + acl = self._make_one(blob) + + client = mock.Mock() + + with mock.patch("google.cloud.storage.acl.ACL.clear") as m: + kwargs = { + "client": client, + "if_generation_match": 1, + "if_generation_not_match": 2, + "if_metageneration_match": 3, + "if_metageneration_not_match": 4, + "timeout": 60, + "retry": None, + } + + acl.clear(**kwargs) + m.assert_called_once_with(**kwargs) + + with mock.patch("google.cloud.storage.acl.ACL.save") as m: + kwargs = { + "acl": [], + "client": client, + "if_generation_match": 1, + "if_generation_not_match": 2, + "if_metageneration_match": 3, + "if_metageneration_not_match": 4, + "timeout": 60, + "retry": None, + } + + acl.save(**kwargs) + m.assert_called_once_with(**kwargs) + + with mock.patch("google.cloud.storage.acl.ACL.save_predefined") as m: + kwargs = { + "predefined": "predef", + "client": client, + "if_generation_match": 1, + "if_generation_not_match": 2, + "if_metageneration_match": 3, + "if_metageneration_not_match": 4, + "timeout": 60, + "retry": None, + } + + acl.save_predefined(**kwargs) + m.assert_called_once_with(**kwargs) -class _Blob(object): +class _Blob(object): user_project = None def __init__(self, bucket, blob): @@ -1081,11 +1133,10 @@ def __init__(self, bucket, blob): @property def path(self): - return "%s/o/%s" % (self.bucket.path, self.blob) + return f"{self.bucket.path}/o/{self.blob}" class _Bucket(object): - user_project = None def __init__(self, name): @@ -1093,4 +1144,4 @@ def __init__(self, name): @property def path(self): - return "/b/%s" % self.name + return f"/b/{self.name}" diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index 89bf583e9..3070af956 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -20,11 +20,16 @@ import mock import requests +from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN + def _make_credentials(): import google.auth.credentials - return mock.Mock(spec=google.auth.credentials.Credentials) + return mock.Mock( + spec=google.auth.credentials.Credentials, + universe_domain=_DEFAULT_UNIVERSE_DOMAIN, + ) def _make_response(status=http.client.OK, content=b"", headers={}): @@ -280,7 +285,7 @@ def _check_subrequest_no_payload(self, chunk, method, url): self.assertEqual(lines[1], "Content-Type: application/http") self.assertEqual(lines[2], "MIME-Version: 1.0") self.assertEqual(lines[3], "") - self.assertEqual(lines[4], "%s %s HTTP/1.1" % (method, url)) + self.assertEqual(lines[4], f"{method} {url} HTTP/1.1") self.assertEqual(lines[5], "") self.assertEqual(lines[6], "") @@ -294,14 +299,14 @@ def _check_subrequest_payload(self, chunk, method, url, payload): self.assertEqual(lines[1], "Content-Type: application/http") self.assertEqual(lines[2], "MIME-Version: 1.0") self.assertEqual(lines[3], "") - self.assertEqual(lines[4], "%s %s HTTP/1.1" % (method, url)) + self.assertEqual(lines[4], f"{method} {url} HTTP/1.1") if method == "GET": self.assertEqual(len(lines), 7) self.assertEqual(lines[5], "") self.assertEqual(lines[6], "") else: self.assertEqual(len(lines), 9) - self.assertEqual(lines[5], "Content-Length: %d" % len(payload_str)) + self.assertEqual(lines[5], f"Content-Length: {len(payload_str)}") self.assertEqual(lines[6], "Content-Type: application/json") self.assertEqual(lines[7], "") self.assertEqual(json.loads(lines[8]), payload) @@ -334,6 +339,7 @@ def test_finish_nonempty(self): result = batch.finish() self.assertEqual(len(result), len(batch._requests)) + self.assertEqual(len(result), len(batch._responses)) response1, response2, response3 = result @@ -352,7 +358,7 @@ def test_finish_nonempty(self): self.assertEqual(response3.headers, {"Content-Length": "0"}) self.assertEqual(response3.status_code, NO_CONTENT) - expected_url = "{}/batch/storage/v1".format(batch.API_BASE_URL) + expected_url = f"{batch.API_BASE_URL}/batch/storage/v1" http.request.assert_called_once_with( method="POST", url=expected_url, @@ -422,7 +428,7 @@ def test_finish_nonempty_with_status_failure(self): self.assertEqual(target1._properties, {"foo": 1, "bar": 2}) self.assertIs(target2._properties, target2_future_before) - expected_url = "{}/batch/storage/v1".format(batch.API_BASE_URL) + expected_url = f"{batch.API_BASE_URL}/batch/storage/v1" http.request.assert_called_once_with( method="POST", url=expected_url, @@ -438,6 +444,55 @@ def test_finish_nonempty_with_status_failure(self): self._check_subrequest_payload(chunks[0], "GET", url, {}) self._check_subrequest_payload(chunks[1], "GET", url, {}) + def test_finish_no_raise_exception(self): + url = "http://api.example.com/other_api" + expected_response = _make_response( + content=_TWO_PART_MIME_RESPONSE_WITH_FAIL, + headers={"content-type": 'multipart/mixed; boundary="DEADBEEF="'}, + ) + http = _make_requests_session([expected_response]) + connection = _Connection(http=http) + client = _Client(connection) + batch = self._make_one(client) + batch.API_BASE_URL = "http://api.example.com" + target1 = _MockObject() + target2 = _MockObject() + + batch._do_request("GET", url, {}, None, target1, timeout=42) + batch._do_request("GET", url, {}, None, target2, timeout=420) + + # Make sure futures are not populated. + self.assertEqual( + [future for future in batch._target_objects], [target1, target2] + ) + + batch.finish(raise_exception=False) + + self.assertEqual(len(batch._requests), 2) + self.assertEqual(len(batch._responses), 2) + + # Make sure NotFound exception is added to responses and target2 + self.assertEqual(target1._properties, {"foo": 1, "bar": 2}) + self.assertEqual(target2._properties, {"error": {"message": "Not Found"}}) + + expected_url = f"{batch.API_BASE_URL}/batch/storage/v1" + http.request.assert_called_once_with( + method="POST", + url=expected_url, + headers=mock.ANY, + data=mock.ANY, + timeout=420, # the last request timeout prevails + ) + + _, request_body, _, boundary = self._get_mutlipart_request(http) + + chunks = self._get_payload_chunks(boundary, request_body) + self.assertEqual(len(chunks), 2) + self._check_subrequest_payload(chunks[0], "GET", url, {}) + self._check_subrequest_payload(chunks[1], "GET", url, {}) + self.assertEqual(batch._responses[0].status_code, 200) + self.assertEqual(batch._responses[1].status_code, 404) + def test_finish_nonempty_non_multipart_response(self): url = "http://api.example.com/other_api" http = _make_requests_session([_make_response()]) @@ -497,6 +552,7 @@ def test_as_context_mgr_wo_error(self): self.assertEqual(list(client._batch_stack), []) self.assertEqual(len(batch._requests), 3) + self.assertEqual(len(batch._responses), 3) self.assertEqual(batch._requests[0][0], "POST") self.assertEqual(batch._requests[1][0], "PATCH") self.assertEqual(batch._requests[2][0], "DELETE") @@ -505,6 +561,43 @@ def test_as_context_mgr_wo_error(self): self.assertEqual(target2._properties, {"foo": 1, "bar": 3}) self.assertEqual(target3._properties, b"") + def test_as_context_mgr_no_raise_exception(self): + from google.cloud.storage.client import Client + + url = "http://api.example.com/other_api" + expected_response = _make_response( + content=_TWO_PART_MIME_RESPONSE_WITH_FAIL, + headers={"content-type": 'multipart/mixed; boundary="DEADBEEF="'}, + ) + http = _make_requests_session([expected_response]) + project = "PROJECT" + credentials = _make_credentials() + client = Client(project=project, credentials=credentials) + client._http_internal = http + + self.assertEqual(list(client._batch_stack), []) + + target1 = _MockObject() + target2 = _MockObject() + + with self._make_one(client, raise_exception=False) as batch: + self.assertEqual(list(client._batch_stack), [batch]) + batch._make_request("GET", url, {}, target_object=target1) + batch._make_request("GET", url, {}, target_object=target2) + + self.assertEqual(list(client._batch_stack), []) + self.assertEqual(len(batch._requests), 2) + self.assertEqual(len(batch._responses), 2) + self.assertEqual(batch._requests[0][0], "GET") + self.assertEqual(batch._requests[1][0], "GET") + self.assertEqual(batch._target_objects, [target1, target2]) + + # Make sure NotFound exception is added to responses and target2 + self.assertEqual(batch._responses[0].status_code, 200) + self.assertEqual(batch._responses[1].status_code, 404) + self.assertEqual(target1._properties, {"foo": 1, "bar": 2}) + self.assertEqual(target2._properties, {"error": {"message": "Not Found"}}) + def test_as_context_mgr_w_error(self): from google.cloud.storage.batch import _FutureDict from google.cloud.storage.client import Client @@ -577,9 +670,9 @@ def _unpack_helper(self, response, content): self.assertEqual(len(result), 3) self.assertEqual(result[0].status_code, http.client.OK) - self.assertEqual(result[0].json(), {u"bar": 2, u"foo": 1}) + self.assertEqual(result[0].json(), {"bar": 2, "foo": 1}) self.assertEqual(result[1].status_code, http.client.OK) - self.assertEqual(result[1].json(), {u"foo": 1, u"bar": 3}) + self.assertEqual(result[1].json(), {"foo": 1, "bar": 3}) self.assertEqual(result[2].status_code, http.client.NO_CONTENT) def test_bytes_headers(self): @@ -588,7 +681,7 @@ def test_bytes_headers(self): self._unpack_helper(RESPONSE, CONTENT) def test_unicode_headers(self): - RESPONSE = {"content-type": u'multipart/mixed; boundary="DEADBEEF="'} + RESPONSE = {"content-type": 'multipart/mixed; boundary="DEADBEEF="'} CONTENT = _THREE_PART_MIME_RESPONSE self._unpack_helper(RESPONSE, CONTENT) @@ -673,7 +766,6 @@ def test___setitem__(self): class _Connection(object): - project = "TESTING" def __init__(self, **kw): diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index a70c16e75..06ba62220 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -21,17 +21,25 @@ import tempfile import unittest import http.client +from unittest.mock import patch from urllib.parse import urlencode import mock import pytest -from google.cloud.storage.retry import ( - DEFAULT_RETRY, - DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, -) +from google.cloud.exceptions import NotFound +from google.cloud.storage import _helpers +from google.cloud.storage._helpers import _get_default_headers +from google.cloud.storage._helpers import _get_default_storage_base_url +from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC +from google.cloud.storage.exceptions import DataCorruption +from google.cloud.storage.exceptions import InvalidResponse +from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED +from tests.unit.test__helpers import GCCL_INVOCATION_TEST_CONST def _make_credentials(): @@ -60,6 +68,7 @@ def _get_default_timeout(): def _make_client(*args, **kw): from google.cloud.storage.client import Client + kw["api_endpoint"] = kw.get("api_endpoint") or _get_default_storage_base_url() return mock.create_autospec(Client, instance=True, **kw) def test_ctor_wo_encryption_key(self): @@ -78,7 +87,7 @@ def test_ctor_wo_encryption_key(self): def test_ctor_with_encoded_unicode(self): blob_name = b"wet \xe2\x9b\xb5" blob = self._make_one(blob_name, bucket=None) - unicode_name = u"wet \N{sailboat}" + unicode_name = "wet \N{sailboat}" self.assertNotIsInstance(blob.name, bytes) self.assertIsInstance(blob.name, str) self.assertEqual(blob.name, unicode_name) @@ -128,15 +137,13 @@ def test_ctor_with_generation(self): self.assertEqual(blob.generation, GENERATION) def _set_properties_helper(self, kms_key_name=None): - import datetime - from google.cloud._helpers import UTC from google.cloud._helpers import _RFC3339_MICROS - now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now = _NOW(_UTC) NOW = now.strftime(_RFC3339_MICROS) BLOB_NAME = "blob-name" GENERATION = 12345 - BLOB_ID = "name/{}/{}".format(BLOB_NAME, GENERATION) + BLOB_ID = f"name/{BLOB_NAME}/{GENERATION}" SELF_LINK = "http://example.com/self/" METAGENERATION = 23456 SIZE = 12345 @@ -261,7 +268,7 @@ def test_acl_property(self): from google.cloud.storage.acl import ObjectACL fake_bucket = _Bucket() - blob = self._make_one(u"name", bucket=fake_bucket) + blob = self._make_one("name", bucket=fake_bucket) acl = blob.acl self.assertIsInstance(acl, ObjectACL) self.assertIs(acl, blob._acl) @@ -304,20 +311,20 @@ def test_kms_key_name_setter(self): def test_path_bad_bucket(self): fake_bucket = object() - name = u"blob-name" + name = "blob-name" blob = self._make_one(name, bucket=fake_bucket) self.assertRaises(AttributeError, getattr, blob, "path") def test_path_no_name(self): bucket = _Bucket() - blob = self._make_one(u"", bucket=bucket) + blob = self._make_one("", bucket=bucket) self.assertRaises(ValueError, getattr, blob, "path") def test_path_normal(self): BLOB_NAME = "blob-name" bucket = _Bucket() blob = self._make_one(BLOB_NAME, bucket=bucket) - self.assertEqual(blob.path, "/b/name/o/%s" % BLOB_NAME) + self.assertEqual(blob.path, f"/b/name/o/{BLOB_NAME}") def test_path_w_slash_in_name(self): BLOB_NAME = "parent/child" @@ -326,7 +333,7 @@ def test_path_w_slash_in_name(self): self.assertEqual(blob.path, "/b/name/o/parent%2Fchild") def test_path_with_non_ascii(self): - blob_name = u"Caf\xe9" + blob_name = "Caf\xe9" bucket = _Bucket() blob = self._make_one(blob_name, bucket=bucket) self.assertEqual(blob.path, "/b/name/o/Caf%C3%A9") @@ -398,7 +405,7 @@ def test_public_url(self): bucket = _Bucket() blob = self._make_one(BLOB_NAME, bucket=bucket) self.assertEqual( - blob.public_url, "https://storage.googleapis.com/name/%s" % BLOB_NAME + blob.public_url, f"https://storage.googleapis.com/name/{BLOB_NAME}" ) def test_public_url_w_slash_in_name(self): @@ -416,12 +423,21 @@ def test_public_url_w_tilde_in_name(self): self.assertEqual(blob.public_url, "https://storage.googleapis.com/name/foo~bar") def test_public_url_with_non_ascii(self): - blob_name = u"winter \N{snowman}" + blob_name = "winter \N{snowman}" bucket = _Bucket() blob = self._make_one(blob_name, bucket=bucket) expected_url = "https://storage.googleapis.com/name/winter%20%E2%98%83" self.assertEqual(blob.public_url, expected_url) + def test_public_url_without_client(self): + BLOB_NAME = "blob-name" + bucket = _Bucket() + bucket.client = None + blob = self._make_one(BLOB_NAME, bucket=bucket) + self.assertEqual( + blob.public_url, f"https://storage.googleapis.com/name/{BLOB_NAME}" + ) + def test_generate_signed_url_w_invalid_version(self): BLOB_NAME = "blob-name" EXPIRATION = "2014-10-16T20:34:37.000Z" @@ -455,17 +471,14 @@ def _generate_signed_url_helper( scheme="http", ): from urllib import parse - from google.cloud._helpers import UTC from google.cloud.storage._helpers import _bucket_bound_hostname_url - from google.cloud.storage.blob import _API_ACCESS_ENDPOINT + from google.cloud.storage._helpers import _get_default_storage_base_url from google.cloud.storage.blob import _get_encryption_headers - api_access_endpoint = api_access_endpoint or _API_ACCESS_ENDPOINT - delta = datetime.timedelta(hours=1) if expiration is None: - expiration = datetime.datetime.utcnow().replace(tzinfo=UTC) + delta + expiration = _NOW(_UTC) + delta if credentials is None: expected_creds = _make_credentials() @@ -474,6 +487,8 @@ def _generate_signed_url_helper( expected_creds = credentials client = self._make_client(_credentials=object()) + expected_universe_domain = client.universe_domain + bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket, encryption_key=encryption_key) @@ -482,9 +497,7 @@ def _generate_signed_url_helper( else: effective_version = version - to_patch = "google.cloud.storage.blob.generate_signed_url_{}".format( - effective_version - ) + to_patch = f"google.cloud.storage.blob.generate_signed_url_{effective_version}" with mock.patch(to_patch) as signer: signed_uri = blob.generate_signed_url( @@ -520,11 +533,15 @@ def _generate_signed_url_helper( bucket_bound_hostname, scheme ) else: - expected_api_access_endpoint = api_access_endpoint - expected_resource = "/{}/{}".format(bucket.name, quoted_name) + expected_api_access_endpoint = ( + api_access_endpoint + if api_access_endpoint + else _get_default_storage_base_url() + ) + expected_resource = f"/{bucket.name}/{quoted_name}" if virtual_hosted_style or bucket_bound_hostname: - expected_resource = "/{}".format(quoted_name) + expected_resource = f"/{quoted_name}" if encryption_key is not None: expected_headers = headers or {} @@ -549,6 +566,7 @@ def _generate_signed_url_helper( "query_parameters": query_parameters, "access_token": access_token, "service_account_email": service_account_email, + "universe_domain": expected_universe_domain, } signer.assert_called_once_with(expected_creds, **expected_kwargs) @@ -563,13 +581,11 @@ def test_generate_signed_url_v2_w_defaults(self): self._generate_signed_url_v2_helper() def test_generate_signed_url_v2_w_expiration(self): - from google.cloud._helpers import UTC - - expiration = datetime.datetime.utcnow().replace(tzinfo=UTC) + expiration = _NOW(_UTC) self._generate_signed_url_v2_helper(expiration=expiration) def test_generate_signed_url_v2_w_non_ascii_name(self): - BLOB_NAME = u"\u0410\u043a\u043a\u043e\u0440\u0434\u044b.txt" + BLOB_NAME = "\u0410\u043a\u043a\u043e\u0440\u0434\u044b.txt" self._generate_signed_url_v2_helper(blob_name=BLOB_NAME) def test_generate_signed_url_v2_w_slash_in_name(self): @@ -629,7 +645,7 @@ def test_generate_signed_url_v4_w_defaults(self): self._generate_signed_url_v4_helper() def test_generate_signed_url_v4_w_non_ascii_name(self): - BLOB_NAME = u"\u0410\u043a\u043a\u043e\u0440\u0434\u044b.txt" + BLOB_NAME = "\u0410\u043a\u043a\u043e\u0440\u0434\u044b.txt" self._generate_signed_url_v4_helper(blob_name=BLOB_NAME) def test_generate_signed_url_v4_w_slash_in_name(self): @@ -692,6 +708,17 @@ def test_generate_signed_url_v4_w_credentials(self): credentials = object() self._generate_signed_url_v4_helper(credentials=credentials) + def test_generate_signed_url_v4_w_incompatible_params(self): + with self.assertRaises(ValueError): + self._generate_signed_url_v4_helper( + api_access_endpoint="example.com", + bucket_bound_hostname="cdn.example.com", + ) + with self.assertRaises(ValueError): + self._generate_signed_url_v4_helper( + virtual_hosted_style=True, bucket_bound_hostname="cdn.example.com" + ) + def test_exists_miss_w_defaults(self): from google.cloud.exceptions import NotFound @@ -760,6 +787,32 @@ def test_exists_hit_w_generation_w_retry(self): _target_object=None, ) + def test_exists_hit_w_generation_w_soft_deleted(self): + blob_name = "blob-name" + generation = 123456 + api_response = {"name": blob_name} + client = mock.Mock(spec=["_get_resource"]) + client._get_resource.return_value = api_response + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket, generation=generation) + + self.assertTrue(blob.exists(retry=None, soft_deleted=True)) + + expected_query_params = { + "fields": "name", + "generation": generation, + "softDeleted": True, + } + expected_headers = {} + client._get_resource.assert_called_once_with( + blob.path, + query_params=expected_query_params, + headers=expected_headers, + timeout=self._get_default_timeout(), + retry=None, + _target_object=None, + ) + def test_exists_w_etag_match(self): blob_name = "blob-name" etag = "kittens" @@ -769,7 +822,12 @@ def test_exists_w_etag_match(self): bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - self.assertTrue(blob.exists(if_etag_match=etag, retry=None,)) + self.assertTrue( + blob.exists( + if_etag_match=etag, + retry=None, + ) + ) expected_query_params = { "fields": "name", @@ -840,7 +898,7 @@ def test_delete_wo_generation(self): None, None, None, - DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + DEFAULT_RETRY, ) ], ) @@ -867,7 +925,7 @@ def test_delete_w_generation(self): None, None, None, - DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + DEFAULT_RETRY, ) ], ) @@ -894,15 +952,15 @@ def test_delete_w_generation_match(self): None, None, None, - DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + DEFAULT_RETRY, ) ], ) def test__get_transport(self): - client = mock.Mock(spec=[u"_credentials", "_http"]) + client = mock.Mock(spec=["_credentials", "_http"]) client._http = mock.sentinel.transport - blob = self._make_one(u"blob-name", bucket=None) + blob = self._make_one("blob-name", bucket=None) transport = blob._get_transport(client) @@ -937,7 +995,7 @@ def test__get_download_url_with_generation_match(self): ) self.assertEqual( download_url, - "{}?ifGenerationMatch={}".format(MEDIA_LINK, GENERATION_NUMBER), + f"{MEDIA_LINK}?ifGenerationMatch={GENERATION_NUMBER}", ) def test__get_download_url_with_media_link_w_user_project(self): @@ -953,9 +1011,7 @@ def test__get_download_url_with_media_link_w_user_project(self): client._connection.API_BASE_URL = "https://storage.googleapis.com" download_url = blob._get_download_url(client) - self.assertEqual( - download_url, "{}?userProject={}".format(media_link, user_project) - ) + self.assertEqual(download_url, f"{media_link}?userProject={user_project}") def test__get_download_url_on_the_fly(self): blob_name = "bzzz-fly.txt" @@ -1193,6 +1249,8 @@ def _do_download_helper_wo_chunks( extra_kwargs.update(timeout_kwarg) + retry = extra_kwargs.get("retry", DEFAULT_RETRY) + with patch as patched: if w_range: blob._do_download( @@ -1203,7 +1261,7 @@ def _do_download_helper_wo_chunks( start=1, end=3, raw_download=raw_download, - **extra_kwargs + **extra_kwargs, ) else: blob._do_download( @@ -1212,7 +1270,7 @@ def _do_download_helper_wo_chunks( download_url, headers, raw_download=raw_download, - **extra_kwargs + **extra_kwargs, ) if w_range: @@ -1222,7 +1280,8 @@ def _do_download_helper_wo_chunks( headers=headers, start=1, end=3, - checksum="md5", + checksum="auto", + retry=retry, ) else: patched.assert_called_once_with( @@ -1231,20 +1290,14 @@ def _do_download_helper_wo_chunks( headers=headers, start=None, end=None, - checksum="md5", + checksum="auto", + retry=retry, ) patched.return_value.consume.assert_called_once_with( transport, timeout=expected_timeout ) - retry_strategy = patched.return_value._retry_strategy - retry = extra_kwargs.get("retry", None) - if retry is None: - self.assertEqual(retry_strategy.max_retries, 0) - else: - self.assertEqual(retry_strategy.max_sleep, retry._maximum) - def test__do_download_wo_chunks_wo_range_wo_raw(self): self._do_download_helper_wo_chunks(w_range=False, raw_download=False) @@ -1341,7 +1394,7 @@ def side_effect(*args, **kwargs): end=3, raw_download=raw_download, checksum=checksum, - **timeout_kwarg + **timeout_kwarg, ) else: blob._do_download( @@ -1351,16 +1404,28 @@ def side_effect(*args, **kwargs): headers, raw_download=raw_download, checksum=checksum, - **timeout_kwarg + **timeout_kwarg, ) if w_range: patched.assert_called_once_with( - download_url, chunk_size, file_obj, headers=headers, start=1, end=3 + download_url, + chunk_size, + file_obj, + headers=headers, + start=1, + end=3, + retry=DEFAULT_RETRY, ) else: patched.assert_called_once_with( - download_url, chunk_size, file_obj, headers=headers, start=0, end=None + download_url, + chunk_size, + file_obj, + headers=headers, + start=0, + end=None, + retry=DEFAULT_RETRY, ) download.consume_next_chunk.assert_called_once_with( transport, timeout=expected_timeout @@ -1406,33 +1471,35 @@ def test_download_to_file_with_failure(self): blob_name = "blob-name" client = self._make_client() - client.download_blob_to_file.side_effect = NotFound("testing") bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - with self.assertRaises(NotFound): - blob.download_to_file(file_obj) + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = NotFound("testing") - self.assertEqual(file_obj.tell(), 0) + with self.assertRaises(NotFound): + blob.download_to_file(file_obj) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + self.assertEqual(file_obj.tell(), 0) + + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) def test_download_to_file_wo_media_link(self): blob_name = "blob-name" @@ -1441,28 +1508,29 @@ def test_download_to_file_wo_media_link(self): blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - blob.download_to_file(file_obj) + with mock.patch.object(blob, "_prep_and_do_download"): + blob.download_to_file(file_obj) - # Make sure the media link is still unknown. - self.assertIsNone(blob.media_link) + # Make sure the media link is still unknown. + self.assertIsNone(blob.media_link) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) def test_download_to_file_w_etag_match(self): etag = "kittens" @@ -1470,25 +1538,26 @@ def test_download_to_file_w_etag_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - blob.download_to_file(file_obj, if_etag_not_match=etag) + with mock.patch.object(blob, "_prep_and_do_download"): + blob.download_to_file(file_obj, if_etag_not_match=etag) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=etag, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=etag, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) def test_download_to_file_w_generation_match(self): generation_number = 6 @@ -1496,25 +1565,26 @@ def test_download_to_file_w_generation_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - blob.download_to_file(file_obj, if_generation_not_match=generation_number) + with mock.patch.object(blob, "_prep_and_do_download"): + blob.download_to_file(file_obj, if_generation_not_match=generation_number) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=generation_number, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=generation_number, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) def _download_to_file_helper( self, use_chunks, raw_download, timeout=None, **extra_kwargs @@ -1539,28 +1609,30 @@ def _download_to_file_helper( extra_kwargs.update(timeout_kwarg) file_obj = io.BytesIO() - if raw_download: - blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) - else: - blob.download_to_file(file_obj, **extra_kwargs) - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) + with mock.patch.object(blob, "_prep_and_do_download"): + if raw_download: + blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) + else: + blob.download_to_file(file_obj, **extra_kwargs) + + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=raw_download, + timeout=expected_timeout, + checksum="auto", + retry=expected_retry, + ) def test_download_to_file_wo_chunks_wo_raw(self): self._download_to_file_helper(use_chunks=False, raw_download=False) @@ -1597,48 +1669,51 @@ def _download_to_filename_helper( blob = self._make_one(blob_name, bucket=bucket, properties=properties) - with _NamedTemporaryFile() as temp: - if timeout is None: - blob.download_to_filename( - temp.name, raw_download=raw_download, **extra_kwargs - ) - else: - blob.download_to_filename( - temp.name, - raw_download=raw_download, - timeout=timeout, - **extra_kwargs - ) - - if updated is None: - self.assertIsNone(blob.updated) - else: - mtime = os.path.getmtime(temp.name) - updated_time = blob.updated.timestamp() - self.assertEqual(mtime, updated_time) - - expected_timeout = self._get_default_timeout() if timeout is None else timeout + with mock.patch.object(blob, "_prep_and_do_download"): + with _NamedTemporaryFile() as temp: + if timeout is None: + blob.download_to_filename( + temp.name, raw_download=raw_download, **extra_kwargs + ) + else: + blob.download_to_filename( + temp.name, + raw_download=raw_download, + timeout=timeout, + **extra_kwargs, + ) + + if updated is None: + self.assertIsNone(blob.updated) + else: + mtime = os.path.getmtime(temp.name) + updated_time = blob.updated.timestamp() + self.assertEqual(mtime, updated_time) + + expected_timeout = ( + self._get_default_timeout() if timeout is None else timeout + ) - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + raw_download=raw_download, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=expected_timeout, + checksum="auto", + retry=expected_retry, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_updated_wo_raw(self): updated = "2014-12-06T13:13:50.690Z" @@ -1672,28 +1747,29 @@ def test_download_to_filename_w_etag_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_etag_match=etag) + with mock.patch.object(blob, "_prep_and_do_download"): + with _NamedTemporaryFile() as temp: + blob.download_to_filename(temp.name, if_etag_match=etag) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=etag, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=etag, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_generation_match(self): from google.cloud._testing import _NamedTemporaryFile @@ -1702,107 +1778,155 @@ def test_download_to_filename_w_generation_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_generation_match=generation_number) + with mock.patch.object(blob, "_prep_and_do_download"): + with _NamedTemporaryFile() as temp: + blob.download_to_filename( + temp.name, if_generation_match=generation_number + ) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=generation_number, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=generation_number, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_corrupted(self): - from google.resumable_media import DataCorruption - blob_name = "blob-name" client = self._make_client() bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - client.download_blob_to_file.side_effect = DataCorruption("testing") - # Try to download into a temporary file (don't use - # `_NamedTemporaryFile` it will try to remove after the file is - # already removed) - filehandle, filename = tempfile.mkstemp() - os.close(filehandle) - self.assertTrue(os.path.exists(filename)) + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = DataCorruption("testing") - with self.assertRaises(DataCorruption): - blob.download_to_filename(filename) + # Try to download into a temporary file (don't use + # `_NamedTemporaryFile` it will try to remove after the file is + # already removed) + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) - # Make sure the file was cleaned up. - self.assertFalse(os.path.exists(filename)) + with self.assertRaises(DataCorruption): + blob.download_to_filename(filename) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, filename) + # Make sure the file was cleaned up. + self.assertFalse(os.path.exists(filename)) - def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, filename) + + def test_download_to_filename_notfound(self): blob_name = "blob-name" client = self._make_client() bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - if timeout is None: + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = NotFound("testing") + + # Try to download into a temporary file (don't use + # `_NamedTemporaryFile` it will try to remove after the file is + # already removed) + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) + + with self.assertRaises(NotFound): + blob.download_to_filename(filename) + + # Make sure the file was cleaned up. + self.assertFalse(os.path.exists(filename)) + expected_timeout = self._get_default_timeout() - fetched = blob.download_as_bytes(raw_download=raw_download, **extra_kwargs) - else: - expected_timeout = timeout - fetched = blob.download_as_bytes( - raw_download=raw_download, timeout=timeout, **extra_kwargs + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="auto", + retry=DEFAULT_RETRY, ) - self.assertEqual(fetched, b"") + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, filename) - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertIsInstance(stream, io.BytesIO) + with mock.patch.object(blob, "_prep_and_do_download"): + if timeout is None: + expected_timeout = self._get_default_timeout() + fetched = blob.download_as_bytes( + raw_download=raw_download, **extra_kwargs + ) + else: + expected_timeout = timeout + fetched = blob.download_as_bytes( + raw_download=raw_download, timeout=timeout, **extra_kwargs + ) + self.assertEqual(fetched, b"") + + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + raw_download=raw_download, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=expected_timeout, + checksum="auto", + retry=expected_retry, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertIsInstance(stream, io.BytesIO) def test_download_as_bytes_w_custom_timeout(self): self._download_as_bytes_helper(raw_download=False, timeout=9.58) @@ -1815,14 +1939,14 @@ def test_download_as_bytes_w_etag_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_bytes(if_etag_match=ETAG) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -1833,7 +1957,7 @@ def test_download_as_bytes_w_etag_match(self): if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ) @@ -1845,14 +1969,14 @@ def test_download_as_bytes_w_generation_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_bytes(if_generation_match=GENERATION_NUMBER) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -1863,7 +1987,7 @@ def test_download_as_bytes_w_generation_match(self): if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ) @@ -1895,9 +2019,9 @@ def _download_as_text_helper( encoding=None, charset=None, no_charset=False, - expected_value=u"DEADBEEF", + expected_value="DEADBEEF", payload=None, - **extra_kwargs + **extra_kwargs, ): if payload is None: if encoding is not None: @@ -1911,7 +2035,7 @@ def _download_as_text_helper( properties = {} if charset is not None: - properties["contentType"] = "text/plain; charset={}".format(charset) + properties["contentType"] = f"text/plain; charset={charset}" elif no_charset: properties = {"contentType": "text/plain"} @@ -2001,22 +2125,26 @@ def test_download_as_text_w_custom_timeout(self): def test_download_as_text_w_if_etag_match_str(self): self._download_as_text_helper( - raw_download=False, if_etag_match="kittens", + raw_download=False, + if_etag_match="kittens", ) def test_download_as_text_w_if_etag_match_list(self): self._download_as_text_helper( - raw_download=False, if_etag_match=["kittens", "fluffy"], + raw_download=False, + if_etag_match=["kittens", "fluffy"], ) def test_download_as_text_w_if_etag_not_match_str(self): self._download_as_text_helper( - raw_download=False, if_etag_not_match="kittens", + raw_download=False, + if_etag_not_match="kittens", ) def test_download_as_text_w_if_etag_not_match_list(self): self._download_as_text_helper( - raw_download=False, if_etag_not_match=["kittens", "fluffy"], + raw_download=False, + if_etag_not_match=["kittens", "fluffy"], ) def test_download_as_text_w_if_generation_match(self): @@ -2034,16 +2162,18 @@ def test_download_as_text_w_if_metageneration_not_match(self): def test_download_as_text_w_encoding(self): encoding = "utf-16" self._download_as_text_helper( - raw_download=False, encoding=encoding, + raw_download=False, + encoding=encoding, ) def test_download_as_text_w_no_charset(self): self._download_as_text_helper( - raw_download=False, no_charset=True, + raw_download=False, + no_charset=True, ) def test_download_as_text_w_non_ascii_w_explicit_encoding(self): - expected_value = u"\x0AFe" + expected_value = "\x0AFe" encoding = "utf-16" charset = "latin1" payload = expected_value.encode(encoding) @@ -2056,7 +2186,7 @@ def test_download_as_text_w_non_ascii_w_explicit_encoding(self): ) def test_download_as_text_w_non_ascii_wo_explicit_encoding_w_charset(self): - expected_value = u"\x0AFe" + expected_value = "\x0AFe" charset = "utf-16" payload = expected_value.encode(charset) self._download_as_text_helper( @@ -2076,14 +2206,14 @@ def test_download_as_string(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_string() self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2094,12 +2224,14 @@ def test_download_as_string(self, mock_warn): if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - checksum="md5", + checksum="auto", retry=DEFAULT_RETRY, ) - mock_warn.assert_called_once_with( - _DOWNLOAD_AS_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2, + mock_warn.assert_any_call( + _DOWNLOAD_AS_STRING_DEPRECATED, + PendingDeprecationWarning, + stacklevel=2, ) @mock.patch("warnings.warn") @@ -2112,14 +2244,14 @@ def test_download_as_string_no_retry(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_string(retry=None) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2130,42 +2262,44 @@ def test_download_as_string_no_retry(self, mock_warn): if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - checksum="md5", + checksum="auto", retry=None, ) - mock_warn.assert_called_once_with( - _DOWNLOAD_AS_STRING_DEPRECATED, PendingDeprecationWarning, stacklevel=2, + mock_warn.assert_any_call( + _DOWNLOAD_AS_STRING_DEPRECATED, + PendingDeprecationWarning, + stacklevel=2, ) def test__get_content_type_explicit(self): - blob = self._make_one(u"blob-name", bucket=None) + blob = self._make_one("blob-name", bucket=None) - content_type = u"text/plain" + content_type = "text/plain" return_value = blob._get_content_type(content_type) self.assertEqual(return_value, content_type) def test__get_content_type_from_blob(self): - blob = self._make_one(u"blob-name", bucket=None) - blob.content_type = u"video/mp4" + blob = self._make_one("blob-name", bucket=None) + blob.content_type = "video/mp4" return_value = blob._get_content_type(None) self.assertEqual(return_value, blob.content_type) def test__get_content_type_from_filename(self): - blob = self._make_one(u"blob-name", bucket=None) + blob = self._make_one("blob-name", bucket=None) return_value = blob._get_content_type(None, filename="archive.tar") self.assertEqual(return_value, "application/x-tar") def test__get_content_type_default(self): - blob = self._make_one(u"blob-name", bucket=None) + blob = self._make_one("blob-name", bucket=None) return_value = blob._get_content_type(None) - self.assertEqual(return_value, u"application/octet-stream") + self.assertEqual(return_value, "application/octet-stream") def test__get_writable_metadata_no_changes(self): - name = u"blob-name" + name = "blob-name" blob = self._make_one(name, bucket=None) object_metadata = blob._get_writable_metadata() @@ -2173,7 +2307,7 @@ def test__get_writable_metadata_no_changes(self): self.assertEqual(object_metadata, expected) def test__get_writable_metadata_with_changes(self): - name = u"blob-name" + name = "blob-name" blob = self._make_one(name, bucket=None) blob.storage_class = "NEARLINE" blob.cache_control = "max-age=3600" @@ -2189,7 +2323,7 @@ def test__get_writable_metadata_with_changes(self): self.assertEqual(object_metadata, expected) def test__get_writable_metadata_unwritable_field(self): - name = u"blob-name" + name = "blob-name" properties = {"updated": "2016-10-16T18:18:18.181Z"} blob = self._make_one(name, bucket=None, properties=properties) # Fake that `updated` is in changes. @@ -2200,7 +2334,7 @@ def test__get_writable_metadata_unwritable_field(self): self.assertEqual(object_metadata, expected) def test__set_metadata_to_none(self): - name = u"blob-name" + name = "blob-name" blob = self._make_one(name, bucket=None) blob.storage_class = "NEARLINE" blob.cache_control = "max-age=3600" @@ -2210,22 +2344,44 @@ def test__set_metadata_to_none(self): patch_prop.assert_called_once_with("metadata", None) def test__get_upload_arguments(self): - name = u"blob-name" + name = "blob-name" key = b"[pXw@,p@@AfBfrR3x-2b2SCHR,.?YwRO" + custom_headers = { + "x-goog-custom-audit-foo": "bar", + "x-goog-custom-audit-user": "baz", + } + client = mock.Mock(_connection=_Connection) + client._connection.user_agent = "testing 1.2.3" + client._extra_headers = custom_headers blob = self._make_one(name, bucket=None, encryption_key=key) blob.content_disposition = "inline" - content_type = u"image/jpeg" - info = blob._get_upload_arguments(content_type) + COMMAND = "tm.upload_many" + content_type = "image/jpeg" + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + info = blob._get_upload_arguments(client, content_type, command=COMMAND) headers, object_metadata, new_content_type = info header_key_value = "W3BYd0AscEBAQWZCZnJSM3gtMmIyU0NIUiwuP1l3Uk8=" header_key_hash_value = "G0++dxF4q5rG4o9kE8gvEKn15RH6wLm0wXV1MgAlXOg=" - expected_headers = { - "X-Goog-Encryption-Algorithm": "AES256", - "X-Goog-Encryption-Key": header_key_value, - "X-Goog-Encryption-Key-Sha256": header_key_hash_value, - } + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + expected_headers = { + **_get_default_headers( + client._connection.user_agent, content_type, command=COMMAND + ), + "X-Goog-Encryption-Algorithm": "AES256", + "X-Goog-Encryption-Key": header_key_value, + "X-Goog-Encryption-Key-Sha256": header_key_hash_value, + **custom_headers, + } + self.assertEqual( + headers["X-Goog-API-Client"], + f"{client._connection.user_agent} {GCCL_INVOCATION_TEST_CONST} gccl-gcs-cmd/{COMMAND}", + ) self.assertEqual(headers, expected_headers) expected_metadata = { "contentDisposition": blob.content_disposition, @@ -2247,7 +2403,6 @@ def _do_multipart_success( mock_get_boundary, client=None, size=None, - num_retries=None, user_project=None, predefined_acl=None, if_generation_match=None, @@ -2261,7 +2416,7 @@ def _do_multipart_success( retry=None, ): bucket = _Bucket(name="w00t", user_project=user_project) - blob = self._make_one(u"blob-name", bucket=bucket, kms_key_name=kms_key_name) + blob = self._make_one("blob-name", bucket=bucket, kms_key_name=kms_key_name) self.assertIsNone(blob.chunk_size) if metadata: self.assertIsNone(blob.metadata) @@ -2275,6 +2430,7 @@ def _do_multipart_success( client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = {} # Mock get_api_base_url_for_mtls function. mtls_url = "https://foo.mtls" @@ -2285,7 +2441,7 @@ def _do_multipart_success( data = b"data here hear hier" stream = io.BytesIO(data) - content_type = u"application/xml" + content_type = "application/xml" if timeout is None: expected_timeout = self._get_default_timeout() @@ -2294,20 +2450,23 @@ def _do_multipart_success( expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - response = blob._do_multipart_upload( - client, - stream, - content_type, - size, - num_retries, - predefined_acl, - if_generation_match, - if_generation_not_match, - if_metageneration_match, - if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + response = blob._do_multipart_upload( + client, + stream, + content_type, + size, + predefined_acl, + if_generation_match, + if_generation_not_match, + if_metageneration_match, + if_metageneration_not_match, + checksum=None, + retry=retry, + **timeout_kwarg, + ) # Clean up the get_api_base_url_for_mtls mock. if mtls: @@ -2368,53 +2527,59 @@ def _do_multipart_success( + data_read + b"\r\n--==0==--" ) - headers = {"content-type": b'multipart/related; boundary="==0=="'} + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + headers = { + **_get_default_headers( + client._connection.user_agent, + b'multipart/related; boundary="==0=="', + "application/xml", + ), + **client._extra_headers, + } client._http.request.assert_called_once_with( "POST", upload_url, data=payload, headers=headers, timeout=expected_timeout ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_no_size(self, mock_get_boundary): self._do_multipart_success(mock_get_boundary, predefined_acl="private") - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_no_size_retry(self, mock_get_boundary): self._do_multipart_success( mock_get_boundary, predefined_acl="private", retry=DEFAULT_RETRY ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_no_size_num_retries(self, mock_get_boundary): - self._do_multipart_success( - mock_get_boundary, predefined_acl="private", num_retries=2 - ) - - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_no_size_retry_conflict(self, mock_get_boundary): - with self.assertRaises(ValueError): - self._do_multipart_success( - mock_get_boundary, - predefined_acl="private", - num_retries=2, - retry=DEFAULT_RETRY, - ) - - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_no_size_mtls(self, mock_get_boundary): self._do_multipart_success( mock_get_boundary, predefined_acl="private", mtls=True ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_size(self, mock_get_boundary): self._do_multipart_success(mock_get_boundary, size=10) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_user_project(self, mock_get_boundary): user_project = "user-project-123" self._do_multipart_success(mock_get_boundary, user_project=user_project) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_kms(self, mock_get_boundary): kms_resource = ( "projects/test-project-123/" @@ -2424,7 +2589,9 @@ def test__do_multipart_upload_with_kms(self, mock_get_boundary): ) self._do_multipart_success(mock_get_boundary, kms_key_name=kms_resource) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_kms_with_version(self, mock_get_boundary): kms_resource = ( "projects/test-project-123/" @@ -2435,39 +2602,66 @@ def test__do_multipart_upload_with_kms_with_version(self, mock_get_boundary): ) self._do_multipart_success(mock_get_boundary, kms_key_name=kms_resource) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_retry(self, mock_get_boundary): self._do_multipart_success(mock_get_boundary, retry=DEFAULT_RETRY) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_generation_match(self, mock_get_boundary): self._do_multipart_success( mock_get_boundary, if_generation_match=4, if_metageneration_match=4 ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_custom_timeout(self, mock_get_boundary): self._do_multipart_success(mock_get_boundary, timeout=9.58) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_generation_not_match(self, mock_get_boundary): self._do_multipart_success( mock_get_boundary, if_generation_not_match=4, if_metageneration_not_match=4 ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_client(self, mock_get_boundary): transport = self._mock_transport(http.client.OK, {}) client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = {} + self._do_multipart_success(mock_get_boundary, client=client) + + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) + def test__do_multipart_upload_with_client_custom_headers(self, mock_get_boundary): + custom_headers = { + "x-goog-custom-audit-foo": "bar", + "x-goog-custom-audit-user": "baz", + } + transport = self._mock_transport(http.client.OK, {}) + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) + client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = custom_headers self._do_multipart_success(mock_get_boundary, client=client) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch( + "google.cloud.storage._media._upload.get_boundary", return_value=b"==0==" + ) def test__do_multipart_upload_with_metadata(self, mock_get_boundary): self._do_multipart_success(mock_get_boundary, metadata={"test": "test"}) def test__do_multipart_upload_bad_size(self): - blob = self._make_one(u"blob-name", bucket=None) + blob = self._make_one("blob-name", bucket=None) data = b"data here hear hier" stream = io.BytesIO(data) @@ -2489,7 +2683,6 @@ def _initiate_resumable_helper( size=None, extra_headers=None, chunk_size=None, - num_retries=None, user_project=None, predefined_acl=None, if_generation_match=None, @@ -2503,11 +2696,11 @@ def _initiate_resumable_helper( mtls=False, retry=None, ): - from google.resumable_media.requests import ResumableUpload + from google.cloud.storage._media.requests import ResumableUpload from google.cloud.storage.blob import _DEFAULT_CHUNKSIZE bucket = _Bucket(name="whammy", user_project=user_project) - blob = self._make_one(u"blob-name", bucket=bucket, kms_key_name=kms_key_name) + blob = self._make_one("blob-name", bucket=bucket, kms_key_name=kms_key_name) if metadata: self.assertIsNone(blob.metadata) blob._properties["metadata"] = metadata @@ -2535,10 +2728,9 @@ def _initiate_resumable_helper( transport = self._mock_transport(http.client.OK, response_headers) # Create some mock arguments and call the method under test. - client = mock.Mock( - _http=transport, _connection=_Connection, spec=[u"_http"] - ) + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = {} # Mock get_api_base_url_for_mtls function. mtls_url = "https://foo.mtls" @@ -2549,7 +2741,7 @@ def _initiate_resumable_helper( data = b"hello hallo halo hi-low" stream = io.BytesIO(data) - content_type = u"text/plain" + content_type = "text/plain" if timeout is None: expected_timeout = self._get_default_timeout() @@ -2557,23 +2749,24 @@ def _initiate_resumable_helper( else: expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - - upload, transport = blob._initiate_resumable_upload( - client, - stream, - content_type, - size, - num_retries, - extra_headers=extra_headers, - chunk_size=chunk_size, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + upload, transport = blob._initiate_resumable_upload( + client, + stream, + content_type, + size, + extra_headers=extra_headers, + chunk_size=chunk_size, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + retry=retry, + **timeout_kwarg, + ) # Clean up the get_api_base_url_for_mtls mock. if mtls: @@ -2613,11 +2806,23 @@ def _initiate_resumable_helper( upload_url += "?" + urlencode(qs_params) self.assertEqual(upload.upload_url, upload_url) - if extra_headers is None: - self.assertEqual(upload._headers, {}) - else: - self.assertEqual(upload._headers, extra_headers) - self.assertIsNot(upload._headers, extra_headers) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + if extra_headers is None: + expected_headers = { + **_get_default_headers(client._connection.user_agent, content_type), + **client._extra_headers, + } + self.assertEqual(upload._headers, expected_headers) + else: + expected_headers = { + **_get_default_headers(client._connection.user_agent, content_type), + **client._extra_headers, + **extra_headers, + } + self.assertEqual(upload._headers, expected_headers) + self.assertIsNot(upload._headers, expected_headers) self.assertFalse(upload.finished) if chunk_size is None: if blob_chunk_size is None: @@ -2637,29 +2842,27 @@ def _initiate_resumable_helper( self.assertEqual(upload._content_type, content_type) self.assertEqual(upload.resumable_url, resumable_url) retry_strategy = upload._retry_strategy - self.assertFalse(num_retries is not None and retry is not None) - if num_retries is not None and retry is None: - self.assertEqual(retry_strategy.max_retries, num_retries) - elif retry is None: - self.assertEqual(retry_strategy.max_retries, 0) - else: - self.assertEqual(retry_strategy.max_sleep, 60.0) - self.assertEqual(retry_strategy.max_cumulative_retry, 120.0) - self.assertIsNone(retry_strategy.max_retries) + self.assertEqual(retry_strategy, retry) self.assertIs(client._http, transport) # Make sure we never read from the stream. self.assertEqual(stream.tell(), 0) if metadata: - object_metadata = {"name": u"blob-name", "metadata": metadata} + object_metadata = {"name": "blob-name", "metadata": metadata} else: # Check the mocks. blob._get_writable_metadata.assert_called_once_with() payload = json.dumps(object_metadata).encode("utf-8") - expected_headers = { - "content-type": "application/json; charset=UTF-8", - "x-upload-content-type": content_type, - } + + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + expected_headers = { + **_get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ), + **client._extra_headers, + } if size is not None: expected_headers["x-upload-content-length"] = str(size) if extra_headers is not None: @@ -2724,13 +2927,6 @@ def test__initiate_resumable_upload_with_extra_headers(self): def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_helper(retry=DEFAULT_RETRY) - def test__initiate_resumable_upload_w_num_retries(self): - self._initiate_resumable_helper(num_retries=11) - - def test__initiate_resumable_upload_with_retry_conflict(self): - with self.assertRaises(ValueError): - self._initiate_resumable_helper(retry=DEFAULT_RETRY, num_retries=2) - def test__initiate_resumable_upload_with_generation_match(self): self._initiate_resumable_helper( if_generation_match=4, if_metageneration_match=4 @@ -2749,24 +2945,37 @@ def test__initiate_resumable_upload_with_client(self): response_headers = {"location": resumable_url} transport = self._mock_transport(http.client.OK, response_headers) - client = mock.Mock(_http=transport, _connection=_Connection, spec=[u"_http"]) + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) + client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = {} + self._initiate_resumable_helper(client=client) + + def test__initiate_resumable_upload_with_client_custom_headers(self): + custom_headers = { + "x-goog-custom-audit-foo": "bar", + "x-goog-custom-audit-user": "baz", + } + resumable_url = "http://test.invalid?upload_id=hey-you" + response_headers = {"location": resumable_url} + transport = self._mock_transport(http.client.OK, response_headers) + + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = custom_headers self._initiate_resumable_helper(client=client) def _make_resumable_transport( self, headers1, headers2, headers3, total_bytes, data_corruption=False ): - from google import resumable_media - fake_transport = mock.Mock(spec=["request"]) fake_response1 = self._mock_requests_response(http.client.OK, headers1) fake_response2 = self._mock_requests_response( - resumable_media.PERMANENT_REDIRECT, headers2 + http.client.PERMANENT_REDIRECT, headers2 ) - json_body = '{{"size": "{:d}"}}'.format(total_bytes) + json_body = f'{{"size": "{total_bytes:d}"}}' if data_corruption: - fake_response3 = resumable_media.DataCorruption(None) + fake_response3 = DataCorruption(None) else: fake_response3 = self._mock_requests_response( http.client.OK, headers3, content=json_body.encode("utf-8") @@ -2778,6 +2987,7 @@ def _make_resumable_transport( @staticmethod def _do_resumable_upload_call0( + client, blob, content_type, size=None, @@ -2795,11 +3005,10 @@ def _do_resumable_upload_call0( + "/o?uploadType=resumable" ) if predefined_acl is not None: - upload_url += "&predefinedAcl={}".format(predefined_acl) - expected_headers = { - "content-type": "application/json; charset=UTF-8", - "x-upload-content-type": content_type, - } + upload_url += f"&predefinedAcl={predefined_acl}" + expected_headers = _get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ) if size is not None: expected_headers["x-upload-content-length"] = str(size) payload = json.dumps({"name": blob.name}).encode("utf-8") @@ -2809,6 +3018,7 @@ def _do_resumable_upload_call0( @staticmethod def _do_resumable_upload_call1( + client, blob, content_type, data, @@ -2823,11 +3033,14 @@ def _do_resumable_upload_call1( ): # Second mock transport.request() does sends first chunk. if size is None: - content_range = "bytes 0-{:d}/*".format(blob.chunk_size - 1) + content_range = f"bytes 0-{blob.chunk_size - 1:}/*" else: - content_range = "bytes 0-{:d}/{:d}".format(blob.chunk_size - 1, size) + content_range = f"bytes 0-{blob.chunk_size - 1}/{size}" expected_headers = { + **_get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ), "content-type": content_type, "content-range": content_range, } @@ -2842,6 +3055,7 @@ def _do_resumable_upload_call1( @staticmethod def _do_resumable_upload_call2( + client, blob, content_type, data, @@ -2855,10 +3069,11 @@ def _do_resumable_upload_call2( timeout=None, ): # Third mock transport.request() does sends last chunk. - content_range = "bytes {:d}-{:d}/{:d}".format( - blob.chunk_size, total_bytes - 1, total_bytes - ) + content_range = f"bytes {blob.chunk_size:d}-{total_bytes - 1:d}/{total_bytes:d}" expected_headers = { + **_get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ), "content-type": content_type, "content-range": content_range, } @@ -2874,7 +3089,6 @@ def _do_resumable_upload_call2( def _do_resumable_helper( self, use_size=False, - num_retries=None, predefined_acl=None, if_generation_match=None, if_generation_not_match=None, @@ -2884,13 +3098,11 @@ def _do_resumable_helper( data_corruption=False, retry=None, ): - bucket = _Bucket(name="yesterday") - blob = self._make_one(u"blob-name", bucket=bucket) - blob.chunk_size = blob._CHUNK_SIZE_MULTIPLE - self.assertIsNotNone(blob.chunk_size) - + CHUNK_SIZE = 256 * 1024 + USER_AGENT = "testing 1.2.3" + content_type = "text/html" # Data to be uploaded. - data = b"" + (b"A" * blob.chunk_size) + b"" + data = b"" + (b"A" * CHUNK_SIZE) + b"" total_bytes = len(data) if use_size: size = total_bytes @@ -2899,17 +3111,37 @@ def _do_resumable_helper( # Create mocks to be checked for doing transport. resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" - headers1 = {"location": resumable_url} - headers2 = {"range": "bytes=0-{:d}".format(blob.chunk_size - 1)} - transport, responses = self._make_resumable_transport( - headers1, headers2, {}, total_bytes, data_corruption=data_corruption - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + headers1 = { + **_get_default_headers(USER_AGENT, content_type), + "location": resumable_url, + } + headers2 = { + **_get_default_headers(USER_AGENT, content_type), + "range": f"bytes=0-{CHUNK_SIZE - 1:d}", + } + headers3 = _get_default_headers(USER_AGENT, content_type) + transport, responses = self._make_resumable_transport( + headers1, + headers2, + headers3, + total_bytes, + data_corruption=data_corruption, + ) # Create some mock arguments and call the method under test. client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._connection.user_agent = USER_AGENT + client._extra_headers = {} stream = io.BytesIO(data) - content_type = u"text/html" + + bucket = _Bucket(name="yesterday") + blob = self._make_one("blob-name", bucket=bucket) + blob.chunk_size = blob._CHUNK_SIZE_MULTIPLE + self.assertIsNotNone(blob.chunk_size) if timeout is None: expected_timeout = self._get_default_timeout() @@ -2918,63 +3150,69 @@ def _do_resumable_helper( expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - response = blob._do_resumable_upload( - client, - stream, - content_type, - size, - num_retries, - predefined_acl, - if_generation_match, - if_generation_not_match, - if_metageneration_match, - if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + response = blob._do_resumable_upload( + client, + stream, + content_type, + size, + predefined_acl, + if_generation_match, + if_generation_not_match, + if_metageneration_match, + if_metageneration_not_match, + checksum=None, + retry=retry, + **timeout_kwarg, + ) - # Check the returned values. - self.assertIs(response, responses[2]) - self.assertEqual(stream.tell(), total_bytes) + # Check the returned values. + self.assertIs(response, responses[2]) + self.assertEqual(stream.tell(), total_bytes) - # Check the mocks. - call0 = self._do_resumable_upload_call0( - blob, - content_type, - size=size, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=expected_timeout, - ) - call1 = self._do_resumable_upload_call1( - blob, - content_type, - data, - resumable_url, - size=size, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=expected_timeout, - ) - call2 = self._do_resumable_upload_call2( - blob, - content_type, - data, - resumable_url, - total_bytes, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=expected_timeout, - ) + # Check the mocks. + call0 = self._do_resumable_upload_call0( + client, + blob, + content_type, + size=size, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=expected_timeout, + ) + call1 = self._do_resumable_upload_call1( + client, + blob, + content_type, + data, + resumable_url, + size=size, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=expected_timeout, + ) + call2 = self._do_resumable_upload_call2( + client, + blob, + content_type, + data, + resumable_url, + total_bytes, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=expected_timeout, + ) self.assertEqual(transport.request.mock_calls, [call0, call1, call2]) def test__do_resumable_upload_with_custom_timeout(self): @@ -2989,19 +3227,10 @@ def test__do_resumable_upload_with_size(self): def test__do_resumable_upload_with_retry(self): self._do_resumable_helper(retry=DEFAULT_RETRY) - def test__do_resumable_upload_w_num_retries(self): - self._do_resumable_helper(num_retries=8) - - def test__do_resumable_upload_with_retry_conflict(self): - with self.assertRaises(ValueError): - self._do_resumable_helper(num_retries=9, retry=DEFAULT_RETRY) - def test__do_resumable_upload_with_predefined_acl(self): self._do_resumable_helper(predefined_acl="private") def test__do_resumable_upload_with_data_corruption(self): - from google.resumable_media import DataCorruption - with mock.patch("google.cloud.storage.blob.Blob.delete") as patch: try: self._do_resumable_helper(data_corruption=True) @@ -3012,7 +3241,6 @@ def test__do_resumable_upload_with_data_corruption(self): def _do_upload_helper( self, chunk_size=None, - num_retries=None, predefined_acl=None, if_generation_match=None, if_generation_not_match=None, @@ -3024,10 +3252,10 @@ def _do_upload_helper( ): from google.cloud.storage.blob import _MAX_MULTIPART_SIZE - blob = self._make_one(u"blob-name", bucket=None) + blob = self._make_one("blob-name", bucket=None) # Create a fake response. - response = mock.Mock(spec=[u"json"]) + response = mock.Mock(spec=["json"]) response.json.return_value = mock.sentinel.json # Mock **both** helpers. blob._do_multipart_upload = mock.Mock(return_value=response, spec=[]) @@ -3041,7 +3269,7 @@ def _do_upload_helper( client = mock.sentinel.client stream = mock.sentinel.stream - content_type = u"video/mp4" + content_type = "video/mp4" if size is None: size = 12345654321 @@ -3058,14 +3286,14 @@ def _do_upload_helper( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, if_metageneration_match, if_metageneration_not_match, + checksum=None, retry=retry, - **timeout_kwarg + **timeout_kwarg, ) if retry is DEFAULT_RETRY_IF_GENERATION_SPECIFIED: @@ -3079,7 +3307,6 @@ def _do_upload_helper( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, @@ -3088,6 +3315,7 @@ def _do_upload_helper( timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) blob._do_resumable_upload.assert_not_called() else: @@ -3097,7 +3325,6 @@ def _do_upload_helper( stream, content_type, size, - num_retries, predefined_acl, if_generation_match, if_generation_not_match, @@ -3106,6 +3333,7 @@ def _do_upload_helper( timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) def test__do_upload_uses_multipart(self): @@ -3135,9 +3363,6 @@ def test__do_upload_uses_resumable_w_custom_timeout(self): def test__do_upload_with_retry(self): self._do_upload_helper(retry=DEFAULT_RETRY) - def test__do_upload_w_num_retries(self): - self._do_upload_helper(num_retries=2) - def test__do_upload_with_conditional_retry_success(self): self._do_upload_helper( retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, if_generation_match=123456 @@ -3147,8 +3372,6 @@ def test__do_upload_with_conditional_retry_failure(self): self._do_upload_helper(retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED) def _upload_from_file_helper(self, side_effect=None, **kwargs): - from google.cloud._helpers import UTC - blob = self._make_one("blob-name", bucket=None) # Mock low-level upload helper on blob (it is tested elsewhere). created_json = {"updated": "2017-01-01T09:09:09.081Z"} @@ -3161,25 +3384,26 @@ def _upload_from_file_helper(self, side_effect=None, **kwargs): data = b"data is here" stream = io.BytesIO(data) stream.seek(2) # Not at zero. - content_type = u"font/woff" + content_type = "font/woff" client = mock.sentinel.client predefined_acl = kwargs.get("predefined_acl", None) if_generation_match = kwargs.get("if_generation_match", None) if_generation_not_match = kwargs.get("if_generation_not_match", None) if_metageneration_match = kwargs.get("if_metageneration_match", None) if_metageneration_not_match = kwargs.get("if_metageneration_not_match", None) - num_retries = kwargs.get("num_retries", None) - default_retry = ( - DEFAULT_RETRY_IF_GENERATION_SPECIFIED if not num_retries else None - ) - retry = kwargs.get("retry", default_retry) + retry = kwargs.get("retry", DEFAULT_RETRY) ret_val = blob.upload_from_file( - stream, size=len(data), content_type=content_type, client=client, **kwargs + stream, + size=len(data), + content_type=content_type, + client=client, + checksum=None, + **kwargs, ) # Check the response and side-effects. self.assertIsNone(ret_val) - new_updated = datetime.datetime(2017, 1, 1, 9, 9, 9, 81000, tzinfo=UTC) + new_updated = datetime.datetime(2017, 1, 1, 9, 9, 9, 81000, tzinfo=_UTC) self.assertEqual(blob.updated, new_updated) expected_timeout = kwargs.get("timeout", self._get_default_timeout()) @@ -3189,7 +3413,6 @@ def _upload_from_file_helper(self, side_effect=None, **kwargs): stream, content_type, len(data), - num_retries, predefined_acl, if_generation_match, if_generation_not_match, @@ -3198,6 +3421,7 @@ def _upload_from_file_helper(self, side_effect=None, **kwargs): timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) return stream @@ -3208,29 +3432,6 @@ def test_upload_from_file_success(self): def test_upload_from_file_with_retry(self): self._upload_from_file_helper(retry=DEFAULT_RETRY) - @mock.patch("warnings.warn") - def test_upload_from_file_w_num_retries(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE - - self._upload_from_file_helper(num_retries=2) - - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) - - @mock.patch("warnings.warn") - def test_upload_from_file_with_retry_conflict(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE - - # Special case here: in a conflict this method should NOT raise an error - # as that's handled further downstream. It should pass both options - # through. - self._upload_from_file_helper(retry=DEFAULT_RETRY, num_retries=2) - - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) - def test_upload_from_file_with_rewind(self): stream = self._upload_from_file_helper(rewind=True) assert stream.tell() == 0 @@ -3241,7 +3442,6 @@ def test_upload_from_file_with_custom_timeout(self): def test_upload_from_file_failure(self): import requests - from google.resumable_media import InvalidResponse from google.cloud import exceptions message = "Someone is already in this spot." @@ -3263,65 +3463,38 @@ def _do_upload_mock_call_helper( content_type, size, timeout=None, - num_retries=None, retry=None, ): self.assertEqual(blob._do_upload.call_count, 1) mock_call = blob._do_upload.mock_calls[0] - call_name, pos_args, kwargs = mock_call - self.assertEqual(call_name, "") - self.assertEqual(len(pos_args), 10) - self.assertEqual(pos_args[0], client) - self.assertEqual(pos_args[2], content_type) - self.assertEqual(pos_args[3], size) - self.assertEqual(pos_args[4], num_retries) # num_retries - self.assertIsNone(pos_args[5]) # predefined_acl - self.assertIsNone(pos_args[6]) # if_generation_match - self.assertIsNone(pos_args[7]) # if_generation_not_match - self.assertIsNone(pos_args[8]) # if_metageneration_match - self.assertIsNone(pos_args[9]) # if_metageneration_not_match - - expected_timeout = self._get_default_timeout() if timeout is None else timeout - if not retry: - retry = DEFAULT_RETRY_IF_GENERATION_SPECIFIED if not num_retries else None - self.assertEqual( - kwargs, {"timeout": expected_timeout, "checksum": None, "retry": retry} - ) - - return pos_args[1] - - def test_upload_from_filename(self): - from google.cloud._testing import _NamedTemporaryFile - - blob = self._make_one("blob-name", bucket=None) - # Mock low-level upload helper on blob (it is tested elsewhere). - created_json = {"metadata": {"mint": "ice-cream"}} - blob._do_upload = mock.Mock(return_value=created_json, spec=[]) - # Make sure `metadata` is empty before the request. - self.assertIsNone(blob.metadata) - - data = b"soooo much data" - content_type = u"image/svg+xml" - client = mock.sentinel.client - with _NamedTemporaryFile() as temp: - with open(temp.name, "wb") as file_obj: - file_obj.write(data) - - ret_val = blob.upload_from_filename( - temp.name, content_type=content_type, client=client - ) + call_name, pos_args, kwargs = mock_call + self.assertEqual(call_name, "") + self.assertEqual(len(pos_args), 9) + self.assertEqual(pos_args[0], client) + self.assertEqual(pos_args[2], content_type) + self.assertEqual(pos_args[3], size) + self.assertIsNone(pos_args[4]) # predefined_acl + self.assertIsNone(pos_args[5]) # if_generation_match + self.assertIsNone(pos_args[6]) # if_generation_not_match + self.assertIsNone(pos_args[7]) # if_metageneration_match + self.assertIsNone(pos_args[8]) # if_metageneration_not_match - # Check the response and side-effects. - self.assertIsNone(ret_val) - self.assertEqual(blob.metadata, created_json["metadata"]) + expected_timeout = self._get_default_timeout() if timeout is None else timeout + if not retry: + retry = DEFAULT_RETRY + self.assertEqual( + kwargs, + { + "timeout": expected_timeout, + "checksum": None, + "retry": retry, + "command": None, + }, + ) - # Check the mock. - stream = self._do_upload_mock_call_helper(blob, client, content_type, len(data)) - self.assertTrue(stream.closed) - self.assertEqual(stream.mode, "rb") - self.assertEqual(stream.name, temp.name) + return pos_args[1] - def test_upload_from_filename_with_retry(self): + def test_upload_from_filename(self): from google.cloud._testing import _NamedTemporaryFile blob = self._make_one("blob-name", bucket=None) @@ -3332,14 +3505,14 @@ def test_upload_from_filename_with_retry(self): self.assertIsNone(blob.metadata) data = b"soooo much data" - content_type = u"image/svg+xml" + content_type = "image/svg+xml" client = mock.sentinel.client with _NamedTemporaryFile() as temp: with open(temp.name, "wb") as file_obj: file_obj.write(data) ret_val = blob.upload_from_filename( - temp.name, content_type=content_type, client=client, retry=DEFAULT_RETRY + temp.name, content_type=content_type, client=client, checksum=None ) # Check the response and side-effects. @@ -3347,17 +3520,13 @@ def test_upload_from_filename_with_retry(self): self.assertEqual(blob.metadata, created_json["metadata"]) # Check the mock. - stream = self._do_upload_mock_call_helper( - blob, client, content_type, len(data), retry=DEFAULT_RETRY - ) + stream = self._do_upload_mock_call_helper(blob, client, content_type, len(data)) self.assertTrue(stream.closed) self.assertEqual(stream.mode, "rb") self.assertEqual(stream.name, temp.name) - @mock.patch("warnings.warn") - def test_upload_from_filename_w_num_retries(self, mock_warn): + def test_upload_from_filename_with_retry(self): from google.cloud._testing import _NamedTemporaryFile - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE blob = self._make_one("blob-name", bucket=None) # Mock low-level upload helper on blob (it is tested elsewhere). @@ -3367,14 +3536,18 @@ def test_upload_from_filename_w_num_retries(self, mock_warn): self.assertIsNone(blob.metadata) data = b"soooo much data" - content_type = u"image/svg+xml" + content_type = "image/svg+xml" client = mock.sentinel.client with _NamedTemporaryFile() as temp: with open(temp.name, "wb") as file_obj: file_obj.write(data) ret_val = blob.upload_from_filename( - temp.name, content_type=content_type, client=client, num_retries=2 + temp.name, + content_type=content_type, + client=client, + retry=DEFAULT_RETRY, + checksum=None, ) # Check the response and side-effects. @@ -3383,16 +3556,12 @@ def test_upload_from_filename_w_num_retries(self, mock_warn): # Check the mock. stream = self._do_upload_mock_call_helper( - blob, client, content_type, len(data), num_retries=2 + blob, client, content_type, len(data), retry=DEFAULT_RETRY ) self.assertTrue(stream.closed) self.assertEqual(stream.mode, "rb") self.assertEqual(stream.name, temp.name) - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) - def test_upload_from_filename_w_custom_timeout(self): from google.cloud._testing import _NamedTemporaryFile @@ -3404,14 +3573,18 @@ def test_upload_from_filename_w_custom_timeout(self): self.assertIsNone(blob.metadata) data = b"soooo much data" - content_type = u"image/svg+xml" + content_type = "image/svg+xml" client = mock.sentinel.client with _NamedTemporaryFile() as temp: with open(temp.name, "wb") as file_obj: file_obj.write(data) blob.upload_from_filename( - temp.name, content_type=content_type, client=client, timeout=9.58 + temp.name, + content_type=content_type, + client=client, + timeout=9.58, + checksum=None, ) # Check the mock. @@ -3431,7 +3604,7 @@ def _upload_from_string_helper(self, data, **kwargs): self.assertIsNone(blob.component_count) client = mock.sentinel.client - ret_val = blob.upload_from_string(data, client=client, **kwargs) + ret_val = blob.upload_from_string(data, client=client, checksum=None, **kwargs) # Check the response and side-effects. self.assertIsNone(ret_val) @@ -3440,8 +3613,8 @@ def _upload_from_string_helper(self, data, **kwargs): extra_kwargs = {} if "retry" in kwargs: extra_kwargs["retry"] = kwargs["retry"] - if "num_retries" in kwargs: - extra_kwargs["num_retries"] = kwargs["num_retries"] + else: + extra_kwargs["retry"] = DEFAULT_RETRY # Check the mock. payload = _to_bytes(data, encoding="utf-8") stream = self._do_upload_mock_call_helper( @@ -3450,7 +3623,7 @@ def _upload_from_string_helper(self, data, **kwargs): "text/plain", len(payload), kwargs.get("timeout", self._get_default_timeout()), - **extra_kwargs + **extra_kwargs, ) self.assertIsInstance(stream, io.BytesIO) self.assertEqual(stream.getvalue(), payload) @@ -3464,72 +3637,72 @@ def test_upload_from_string_w_bytes(self): self._upload_from_string_helper(data) def test_upload_from_string_w_text(self): - data = u"\N{snowman} \N{sailboat}" + data = "\N{snowman} \N{sailboat}" self._upload_from_string_helper(data) def test_upload_from_string_w_text_w_retry(self): - data = u"\N{snowman} \N{sailboat}" + data = "\N{snowman} \N{sailboat}" self._upload_from_string_helper(data, retry=DEFAULT_RETRY) - @mock.patch("warnings.warn") - def test_upload_from_string_with_num_retries(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE - - data = u"\N{snowman} \N{sailboat}" - self._upload_from_string_helper(data, num_retries=2) - - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) - def _create_resumable_upload_session_helper( self, origin=None, side_effect=None, timeout=None, + predefined_acl=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, retry=None, + client=None, ): bucket = _Bucket(name="alex-trebek") blob = self._make_one("blob-name", bucket=bucket) chunk_size = 99 * blob._CHUNK_SIZE_MULTIPLE blob.chunk_size = chunk_size - - # Create mocks to be checked for doing transport. resumable_url = "http://test.invalid?upload_id=clean-up-everybody" - response_headers = {"location": resumable_url} - transport = self._mock_transport(http.client.OK, response_headers) - if side_effect is not None: - transport.request.side_effect = side_effect - - # Create some mock arguments and call the method under test. - content_type = u"text/plain" + content_type = "text/plain" size = 10000 - client = mock.Mock(_http=transport, _connection=_Connection, spec=[u"_http"]) - client._connection.API_BASE_URL = "https://storage.googleapis.com" + transport = None + + if not client: + # Create mocks to be checked for doing transport. + response_headers = {"location": resumable_url} + transport = self._mock_transport(http.client.OK, response_headers) + + # Create some mock arguments and call the method under test. + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) + client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._connection.user_agent = "testing 1.2.3" + client._extra_headers = {} + if transport is None: + transport = client._http + if side_effect is not None: + transport.request.side_effect = side_effect if timeout is None: expected_timeout = self._get_default_timeout() timeout_kwarg = {} else: expected_timeout = timeout timeout_kwarg = {"timeout": timeout} - - new_url = blob.create_resumable_upload_session( - content_type=content_type, - size=size, - origin=origin, - client=client, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - retry=retry, - **timeout_kwarg - ) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + new_url = blob.create_resumable_upload_session( + content_type=content_type, + size=size, + origin=origin, + client=client, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + retry=retry, + **timeout_kwarg, + ) # Check the returned value and (lack of) side-effect. self.assertEqual(new_url, resumable_url) @@ -3541,6 +3714,9 @@ def _create_resumable_upload_session_helper( ) qs_params = [("uploadType", "resumable")] + if predefined_acl is not None: + qs_params.append(("predefinedAcl", predefined_acl)) + if if_generation_match is not None: qs_params.append(("ifGenerationMatch", if_generation_match)) @@ -3555,11 +3731,17 @@ def _create_resumable_upload_session_helper( upload_url += "?" + urlencode(qs_params) payload = b'{"name": "blob-name"}' - expected_headers = { - "content-type": "application/json; charset=UTF-8", - "x-upload-content-length": str(size), - "x-upload-content-type": content_type, - } + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + expected_headers = { + **_get_default_headers( + client._connection.user_agent, x_upload_content_type=content_type + ), + **client._extra_headers, + "x-upload-content-length": str(size), + "x-upload-content-type": content_type, + } if origin is not None: expected_headers["Origin"] = origin transport.request.assert_called_once_with( @@ -3579,6 +3761,9 @@ def test_create_resumable_upload_session_with_custom_timeout(self): def test_create_resumable_upload_session_with_origin(self): self._create_resumable_upload_session_helper(origin="http://google.com") + def test_create_resumable_upload_session_with_predefined_acl(self): + self._create_resumable_upload_session_helper(predefined_acl="private") + def test_create_resumable_upload_session_with_generation_match(self): self._create_resumable_upload_session_helper( if_generation_match=123456, if_metageneration_match=2 @@ -3600,7 +3785,6 @@ def test_create_resumable_upload_session_with_conditional_retry_failure(self): ) def test_create_resumable_upload_session_with_failure(self): - from google.resumable_media import InvalidResponse from google.cloud import exceptions message = "5-oh-3 woe is me." @@ -3615,6 +3799,28 @@ def test_create_resumable_upload_session_with_failure(self): self.assertIn(message, exc_info.exception.message) self.assertEqual(exc_info.exception.errors, []) + def test_create_resumable_upload_session_with_client(self): + resumable_url = "http://test.invalid?upload_id=clean-up-everybody" + response_headers = {"location": resumable_url} + transport = self._mock_transport(http.client.OK, response_headers) + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) + client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = {} + self._create_resumable_upload_session_helper(client=client) + + def test_create_resumable_upload_session_with_client_custom_headers(self): + custom_headers = { + "x-goog-custom-audit-foo": "bar", + "x-goog-custom-audit-user": "baz", + } + resumable_url = "http://test.invalid?upload_id=clean-up-everybody" + response_headers = {"location": resumable_url} + transport = self._mock_transport(http.client.OK, response_headers) + client = mock.Mock(_http=transport, _connection=_Connection, spec=["_http"]) + client._connection.API_BASE_URL = "https://storage.googleapis.com" + client._extra_headers = custom_headers + self._create_resumable_upload_session_helper(client=client) + def test_get_iam_policy_defaults(self): from google.cloud.storage.iam import STORAGE_OWNER_ROLE from google.cloud.storage.iam import STORAGE_EDITOR_ROLE @@ -3622,7 +3828,7 @@ def test_get_iam_policy_defaults(self): from google.api_core.iam import Policy blob_name = "blob-name" - path = "/b/name/o/%s" % (blob_name,) + path = f"/b/name/o/{blob_name}" etag = "DEADBEEF" version = 1 owner1 = "user:phred@example.com" @@ -3657,7 +3863,7 @@ def test_get_iam_policy_defaults(self): self.assertEqual(policy.version, api_response["version"]) self.assertEqual(dict(policy), expected_policy) - expected_path = "%s/iam" % (path,) + expected_path = f"{path}/iam" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -3673,7 +3879,7 @@ def test_get_iam_policy_w_user_project_w_timeout(self): blob_name = "blob-name" user_project = "user-project-123" timeout = 42 - path = "/b/name/o/%s" % (blob_name,) + path = f"/b/name/o/{blob_name}" etag = "DEADBEEF" version = 1 api_response = { @@ -3695,7 +3901,7 @@ def test_get_iam_policy_w_user_project_w_timeout(self): self.assertEqual(policy.version, api_response["version"]) self.assertEqual(dict(policy), expected_policy) - expected_path = "%s/iam" % (path,) + expected_path = f"{path}/iam" expected_query_params = {"userProject": user_project} client._get_resource.assert_called_once_with( expected_path, @@ -3709,7 +3915,7 @@ def test_get_iam_policy_w_requested_policy_version(self): from google.cloud.storage.iam import STORAGE_OWNER_ROLE blob_name = "blob-name" - path = "/b/name/o/%s" % (blob_name,) + path = f"/b/name/o/{blob_name}" etag = "DEADBEEF" version = 3 owner1 = "user:phred@example.com" @@ -3729,7 +3935,7 @@ def test_get_iam_policy_w_requested_policy_version(self): self.assertEqual(policy.version, version) - expected_path = "%s/iam" % (path,) + expected_path = f"{path}/iam" expected_query_params = {"optionsRequestedPolicyVersion": version} client._get_resource.assert_called_once_with( expected_path, @@ -3747,7 +3953,7 @@ def test_set_iam_policy(self): from google.api_core.iam import Policy blob_name = "blob-name" - path = "/b/name/o/%s" % (blob_name,) + path = f"/b/name/o/{blob_name}" etag = "DEADBEEF" version = 1 owner1 = "user:phred@example.com" @@ -3777,7 +3983,7 @@ def test_set_iam_policy(self): self.assertEqual(returned.version, version) self.assertEqual(dict(returned), dict(policy)) - expected_path = "%s/iam" % (path,) + expected_path = f"{path}/iam" expected_data = { "resourceId": path, "bindings": mock.ANY, @@ -3805,7 +4011,7 @@ def test_set_iam_policy_w_user_project_w_explicit_client_w_timeout_retry(self): blob_name = "blob-name" user_project = "user-project-123" - path = "/b/name/o/%s" % (blob_name,) + path = f"/b/name/o/{blob_name}" etag = "DEADBEEF" version = 1 bindings = [] @@ -3820,14 +4026,17 @@ def test_set_iam_policy_w_user_project_w_explicit_client_w_timeout_retry(self): retry = mock.Mock(spec=[]) returned = blob.set_iam_policy( - policy, client=client, timeout=timeout, retry=retry, + policy, + client=client, + timeout=timeout, + retry=retry, ) self.assertEqual(returned.etag, etag) self.assertEqual(returned.version, version) self.assertEqual(dict(returned), dict(policy)) - expected_path = "%s/iam" % (path,) + expected_path = f"{path}/iam" expected_data = { # bindings omitted "resourceId": path, } @@ -3863,7 +4072,7 @@ def test_test_iam_permissions_defaults(self): self.assertEqual(found, expected) - expected_path = "/b/name/o/%s/iam/testPermissions" % (blob_name,) + expected_path = f"/b/name/o/{blob_name}/iam/testPermissions" expected_query_params = {"permissions": permissions} client._get_resource.assert_called_once_with( expected_path, @@ -3898,7 +4107,7 @@ def test_test_iam_permissions_w_user_project_w_timeout_w_retry(self): self.assertEqual(found, expected) - expected_path = "/b/name/o/%s/iam/testPermissions" % (blob_name,) + expected_path = f"/b/name/o/{blob_name}/iam/testPermissions" expected_query_params = { "permissions": permissions, "userProject": user_project, @@ -3934,7 +4143,7 @@ def test_make_public_w_defaults(self): expected_patch_data, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_make_public_w_timeout(self): @@ -3961,7 +4170,7 @@ def test_make_public_w_timeout(self): expected_patch_data, query_params=expected_query_params, timeout=timeout, - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_make_public_w_preconditions(self): @@ -3991,7 +4200,7 @@ def test_make_public_w_preconditions(self): expected_patch_data, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_make_private_w_defaults(self): @@ -4015,7 +4224,7 @@ def test_make_private_w_defaults(self): expected_patch_data, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_make_private_w_timeout(self): @@ -4040,7 +4249,7 @@ def test_make_private_w_timeout(self): expected_patch_data, query_params=expected_query_params, timeout=timeout, - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_make_private_w_preconditions(self): @@ -4068,7 +4277,7 @@ def test_make_private_w_preconditions(self): expected_patch_data, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_compose_wo_content_type_set(self): @@ -4088,7 +4297,7 @@ def test_compose_wo_content_type_set(self): self.assertIsNone(destination.content_type) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ {"name": source_1.name, "generation": source_1.generation}, @@ -4125,7 +4334,7 @@ def test_compose_minimal_w_user_project_w_timeout(self): self.assertEqual(destination.etag, "DEADBEEF") - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ {"name": source_1.name, "generation": source_1.generation}, @@ -4163,7 +4372,7 @@ def test_compose_w_additional_property_changes_w_retry(self): self.assertEqual(destination.etag, "DEADBEEF") - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ {"name": source_1.name, "generation": source_1.generation}, @@ -4204,7 +4413,7 @@ def test_compose_w_source_generation_match(self): if_source_generation_match=source_generation_numbers, ) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ { @@ -4272,7 +4481,7 @@ def test_compose_w_source_generation_match_nones(self): if_source_generation_match=source_generation_numbers, ) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ { @@ -4310,10 +4519,11 @@ def test_compose_w_generation_match(self): destination = self._make_one(destination_name, bucket=bucket) destination.compose( - sources=[source_1, source_2], if_generation_match=generation_number, + sources=[source_1, source_2], + if_generation_match=generation_number, ) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ {"name": source_1.name, "generation": source_1.generation}, @@ -4349,10 +4559,11 @@ def test_compose_w_if_generation_match_list_w_warning(self, mock_warn): destination = self._make_one(destination_name, bucket=bucket) destination.compose( - sources=[source_1, source_2], if_generation_match=generation_numbers, + sources=[source_1, source_2], + if_generation_match=generation_numbers, ) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ { @@ -4382,8 +4593,10 @@ def test_compose_w_if_generation_match_list_w_warning(self, mock_warn): _target_object=destination, ) - mock_warn.assert_called_with( - _COMPOSE_IF_GENERATION_LIST_DEPRECATED, DeprecationWarning, stacklevel=2, + mock_warn.assert_any_call( + _COMPOSE_IF_GENERATION_LIST_DEPRECATED, + DeprecationWarning, + stacklevel=2, ) @mock.patch("warnings.warn") @@ -4410,8 +4623,10 @@ def test_compose_w_if_generation_match_and_if_s_generation_match(self, mock_warn client._post_resource.assert_not_called() - mock_warn.assert_called_with( - _COMPOSE_IF_GENERATION_LIST_DEPRECATED, DeprecationWarning, stacklevel=2, + mock_warn.assert_any_call( + _COMPOSE_IF_GENERATION_LIST_DEPRECATED, + DeprecationWarning, + stacklevel=2, ) @mock.patch("warnings.warn") @@ -4430,10 +4645,11 @@ def test_compose_w_if_metageneration_match_list_w_warning(self, mock_warn): destination = self._make_one(destination_name, bucket=bucket) destination.compose( - sources=[source_1, source_2], if_metageneration_match=metageneration_number, + sources=[source_1, source_2], + if_metageneration_match=metageneration_number, ) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ {"name": source_1_name, "generation": None}, @@ -4451,7 +4667,7 @@ def test_compose_w_if_metageneration_match_list_w_warning(self, mock_warn): _target_object=destination, ) - mock_warn.assert_called_with( + mock_warn.assert_any_call( _COMPOSE_IF_METAGENERATION_LIST_DEPRECATED, DeprecationWarning, stacklevel=2, @@ -4471,10 +4687,11 @@ def test_compose_w_metageneration_match(self): destination = self._make_one(destination_name, bucket=bucket) destination.compose( - sources=[source_1, source_2], if_metageneration_match=metageneration_number, + sources=[source_1, source_2], + if_metageneration_match=metageneration_number, ) - expected_path = "/b/name/o/%s/compose" % destination_name + expected_path = f"/b/name/o/{destination_name}/compose" expected_data = { "sourceObjects": [ {"name": source_1.name, "generation": source_1.generation}, @@ -4720,7 +4937,7 @@ def test_rewrite_same_name_no_old_key_new_key_done_w_user_project(self): self.assertEqual(rewritten, bytes_rewritten) self.assertEqual(size, object_size) - expected_path = "/b/name/o/%s/rewriteTo/b/name/o/%s" % (blob_name, blob_name) + expected_path = f"/b/name/o/{blob_name}/rewriteTo/b/name/o/{blob_name}" expected_query_params = {"userProject": user_project} expected_data = {} expected_headers = { @@ -4768,7 +4985,7 @@ def test_rewrite_same_name_no_key_new_key_w_token(self): self.assertEqual(rewritten, bytes_rewritten) self.assertEqual(size, object_size) - expected_path = "/b/name/o/%s/rewriteTo/b/name/o/%s" % (blob_name, blob_name) + expected_path = f"/b/name/o/{blob_name}/rewriteTo/b/name/o/{blob_name}" expected_data = {} expected_query_params = {"rewriteToken": previous_token} expected_headers = { @@ -4820,7 +5037,7 @@ def test_rewrite_same_name_w_old_key_new_kms_key(self): self.assertEqual(rewritten, bytes_rewritten) self.assertEqual(size, object_size) - expected_path = "/b/name/o/%s/rewriteTo/b/name/o/%s" % (blob_name, blob_name) + expected_path = f"/b/name/o/{blob_name}/rewriteTo/b/name/o/{blob_name}" expected_data = {"kmsKeyName": dest_kms_resource} expected_query_params = {"destinationKmsKeyName": dest_kms_resource} expected_headers = { @@ -4838,20 +5055,61 @@ def test_rewrite_same_name_w_old_key_new_kms_key(self): _target_object=dest, ) - def test_update_storage_class_invalid(self): - blob_name = "blob-name" - bucket = _Bucket() - blob = self._make_one(blob_name, bucket=bucket) - blob.rewrite = mock.Mock(spec=[]) + def test_rewrite_same_name_w_kms_key_w_version(self): + blob_name = "blob" + source_key = b"01234567890123456789012345678901" # 32 bytes + source_key_b64 = base64.b64encode(source_key).rstrip().decode("ascii") + source_key_hash = hashlib.sha256(source_key).digest() + source_key_hash_b64 = base64.b64encode(source_key_hash).rstrip().decode("ascii") + dest_kms_resource = ( + "projects/test-project-123/" + "locations/us/" + "keyRings/test-ring/" + "cryptoKeys/test-key" + "cryptoKeyVersions/1" + ) + bytes_rewritten = object_size = 42 + api_response = { + "totalBytesRewritten": bytes_rewritten, + "objectSize": object_size, + "done": True, + "resource": {"etag": "DEADBEEF"}, + } + client = mock.Mock(spec=["_post_resource"]) + client._post_resource.return_value = api_response + bucket = _Bucket(client=client) + source = self._make_one(blob_name, bucket=bucket, encryption_key=source_key) + dest = self._make_one(blob_name, bucket=bucket, kms_key_name=dest_kms_resource) - with self.assertRaises(ValueError): - blob.update_storage_class(u"BOGUS") + token, rewritten, size = dest.rewrite(source) + + self.assertIsNone(token) + self.assertEqual(rewritten, bytes_rewritten) + self.assertEqual(size, object_size) - blob.rewrite.assert_not_called() + expected_path = f"/b/name/o/{blob_name}/rewriteTo/b/name/o/{blob_name}" + expected_data = {"kmsKeyName": dest_kms_resource} + # The kmsKeyName version value can't be used in the rewrite request, + # so the client instead ignores it. + expected_query_params = {} + expected_headers = { + "X-Goog-Copy-Source-Encryption-Algorithm": "AES256", + "X-Goog-Copy-Source-Encryption-Key": source_key_b64, + "X-Goog-Copy-Source-Encryption-Key-Sha256": source_key_hash_b64, + } + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + headers=expected_headers, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + _target_object=dest, + ) def _update_storage_class_multi_pass_helper(self, **kw): blob_name = "blob-name" - storage_class = u"NEARLINE" + storage_class = "NEARLINE" rewrite_token = "TOKEN" bytes_rewritten = 42 object_size = 84 @@ -4965,7 +5223,7 @@ def test_update_storage_class_multi_pass_w_retry(self): def _update_storage_class_single_pass_helper(self, **kw): blob_name = "blob-name" - storage_class = u"NEARLINE" + storage_class = "NEARLINE" object_size = 84 client = mock.Mock(spec=[]) bucket = _Bucket(client=client) @@ -5059,6 +5317,38 @@ def test_update_storage_class_single_pass_w_retry(self): retry = mock.Mock(spec=[]) self._update_storage_class_single_pass_helper(retry=retry) + def test_update_storage_class_invalid(self): + from google.cloud.exceptions import BadRequest + + storage_class = "BOGUS" + blob_name = "blob-name" + client = mock.Mock(spec=[]) + bucket = _Bucket(client=client) + blob = self._make_one(blob_name, bucket=bucket) + blob.rewrite = mock.Mock(spec=[]) + blob.rewrite.side_effect = BadRequest("Invalid storage class") + + with self.assertRaises(BadRequest): + blob.update_storage_class(storage_class) + + # Test that invalid classes are allowed without client side validation. + # Fall back to server side validation and errors. + self.assertEqual(blob.storage_class, storage_class) + + blob.rewrite.assert_called_once_with( + blob, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + if_source_generation_match=None, + if_source_generation_not_match=None, + if_source_metageneration_match=None, + if_source_metageneration_not_match=None, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ) + def test_cache_control_getter(self): BLOB_NAME = "blob-name" bucket = _Bucket() @@ -5339,11 +5629,10 @@ def test_owner(self): def test_retention_expiration_time(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) TIME_CREATED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"retentionExpirationTime": TIME_CREATED} blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) @@ -5430,11 +5719,10 @@ def test_temporary_hold_setter(self): def test_time_deleted(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) TIME_DELETED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"timeDeleted": TIME_DELETED} blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) @@ -5447,11 +5735,10 @@ def test_time_deleted_unset(self): def test_time_created(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) TIME_CREATED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"timeCreated": TIME_CREATED} blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) @@ -5464,11 +5751,10 @@ def test_time_created_unset(self): def test_updated(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) UPDATED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"updated": UPDATED} blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) @@ -5481,22 +5767,19 @@ def test_updated_unset(self): def test_custom_time_getter(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) TIME_CREATED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"customTime": TIME_CREATED} blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) self.assertEqual(blob.custom_time, TIMESTAMP) def test_custom_time_setter(self): - from google.cloud._helpers import UTC - BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) blob = self._make_one(BLOB_NAME, bucket=bucket) self.assertIsNone(blob.custom_time) blob.custom_time = TIMESTAMP @@ -5505,11 +5788,10 @@ def test_custom_time_setter(self): def test_custom_time_setter_none_value(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC BLOB_NAME = "blob-name" bucket = _Bucket() - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) TIME_CREATED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"customTime": TIME_CREATED} blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) @@ -5522,38 +5804,97 @@ def test_custom_time_unset(self): blob = self._make_one("blob-name", bucket=BUCKET) self.assertIsNone(blob.custom_time) - def test_from_string_w_valid_uri(self): + def test_soft_hard_delete_time_getter(self): + from google.cloud._helpers import _RFC3339_MICROS + + BLOB_NAME = "blob-name" + bucket = _Bucket() + soft_timstamp = datetime.datetime(2024, 1, 5, 20, 34, 37, tzinfo=_UTC) + soft_delete = soft_timstamp.strftime(_RFC3339_MICROS) + hard_timstamp = datetime.datetime(2024, 1, 15, 20, 34, 37, tzinfo=_UTC) + hard_delete = hard_timstamp.strftime(_RFC3339_MICROS) + properties = { + "softDeleteTime": soft_delete, + "hardDeleteTime": hard_delete, + } + blob = self._make_one(BLOB_NAME, bucket=bucket, properties=properties) + self.assertEqual(blob.soft_delete_time, soft_timstamp) + self.assertEqual(blob.hard_delete_time, hard_timstamp) + + def test_soft_hard_delte_time_unset(self): + BUCKET = object() + blob = self._make_one("blob-name", bucket=BUCKET) + self.assertIsNone(blob.soft_delete_time) + self.assertIsNone(blob.hard_delete_time) + + def test_from_uri_w_valid_uri(self): from google.cloud.storage.blob import Blob client = self._make_client() - uri = "gs://BUCKET_NAME/b" - blob = Blob.from_string(uri, client) + basic_uri = "gs://bucket_name/b" + blob = Blob.from_uri(basic_uri, client) self.assertIsInstance(blob, Blob) self.assertIs(blob.client, client) self.assertEqual(blob.name, "b") - self.assertEqual(blob.bucket.name, "BUCKET_NAME") + self.assertEqual(blob.bucket.name, "bucket_name") + + nested_uri = "gs://bucket_name/path1/path2/b#name" + blob = Blob.from_uri(nested_uri, client) + + self.assertIsInstance(blob, Blob) + self.assertIs(blob.client, client) + self.assertEqual(blob.name, "path1/path2/b#name") + self.assertEqual(blob.bucket.name, "bucket_name") - def test_from_string_w_invalid_uri(self): + def test_from_uri_w_invalid_uri(self): from google.cloud.storage.blob import Blob client = self._make_client() - with pytest.raises(ValueError, match="URI scheme must be gs"): - Blob.from_string("http://bucket_name/b", client) + with pytest.raises(ValueError): + Blob.from_uri("http://bucket_name/b", client) - def test_from_string_w_domain_name_bucket(self): + def test_from_uri_w_domain_name_bucket(self): from google.cloud.storage.blob import Blob client = self._make_client() uri = "gs://buckets.example.com/b" - blob = Blob.from_string(uri, client) + blob = Blob.from_uri(uri, client) self.assertIsInstance(blob, Blob) self.assertIs(blob.client, client) self.assertEqual(blob.name, "b") self.assertEqual(blob.bucket.name, "buckets.example.com") + @mock.patch("warnings.warn") + def test_from_string(self, mock_warn): + from google.cloud.storage.blob import _FROM_STRING_DEPRECATED + from google.cloud.storage.blob import Blob + + client = self._make_client() + basic_uri = "gs://bucket_name/b" + blob = Blob.from_string(basic_uri, client) + + self.assertIsInstance(blob, Blob) + self.assertIs(blob.client, client) + self.assertEqual(blob.name, "b") + self.assertEqual(blob.bucket.name, "bucket_name") + + nested_uri = "gs://bucket_name/path1/path2/b#name" + blob = Blob.from_string(nested_uri, client) + + self.assertIsInstance(blob, Blob) + self.assertIs(blob.client, client) + self.assertEqual(blob.name, "path1/path2/b#name") + self.assertEqual(blob.bucket.name, "bucket_name") + + mock_warn.assert_any_call( + _FROM_STRING_DEPRECATED, + PendingDeprecationWarning, + stacklevel=2, + ) + def test_open(self): from io import TextIOWrapper from google.cloud.storage.fileio import BlobReader @@ -5596,6 +5937,108 @@ def test_open(self): with self.assertRaises(ValueError): blob.open("w", ignore_flush=False) + def test_downloads_w_client_custom_headers(self): + import google.auth.credentials + from google.cloud.storage import Client + + custom_headers = { + "x-goog-custom-audit-foo": "bar", + "x-goog-custom-audit-user": "baz", + } + credentials = mock.Mock( + spec=google.auth.credentials.Credentials, + universe_domain=_DEFAULT_UNIVERSE_DOMAIN, + ) + client = Client( + project="project", credentials=credentials, extra_headers=custom_headers + ) + blob = self._make_one("blob-name", bucket=_Bucket(client)) + file_obj = io.BytesIO() + + downloads = { + client.download_blob_to_file: (blob, file_obj), + blob.download_to_file: (file_obj,), + blob.download_as_bytes: (), + } + for method, args in downloads.items(): + with mock.patch.object(blob, "_do_download"): + method(*args) + blob._do_download.assert_called() + called_headers = blob._do_download.call_args.args[-4] + self.assertIsInstance(called_headers, dict) + self.assertLessEqual(custom_headers.items(), called_headers.items()) + + def test_object_lock_retention_configuration(self): + from google.cloud.storage.blob import Retention + + BLOB_NAME = "blob-name" + BUCKET = object() + blob = self._make_one(BLOB_NAME, bucket=BUCKET) + + retention = blob.retention + + self.assertIsInstance(retention, Retention) + self.assertIs(retention.blob, blob) + self.assertIsNone(retention.mode) + self.assertIsNone(retention.retain_until_time) + self.assertIsNone(retention.retention_expiration_time) + + def test_object_lock_retention_configuration_w_entry(self): + from google.cloud._helpers import _RFC3339_MICROS + from google.cloud.storage.blob import Retention + + now = _NOW(_UTC) + expiration_time = now + datetime.timedelta(hours=1) + expiration = expiration_time.strftime(_RFC3339_MICROS) + mode = "Locked" + properties = { + "retention": { + "mode": mode, + "retainUntilTime": expiration, + "retentionExpirationTime": expiration, + } + } + BLOB_NAME = "blob-name" + BUCKET = object() + blob = self._make_one(BLOB_NAME, bucket=BUCKET, properties=properties) + retention_config = Retention( + blob=blob, + mode=mode, + retain_until_time=expiration_time, + retention_expiration_time=expiration_time, + ) + + retention = blob.retention + + self.assertIsInstance(retention, Retention) + self.assertEqual(retention, retention_config) + self.assertIs(retention.blob, blob) + self.assertEqual(retention.mode, mode) + self.assertEqual(retention.retain_until_time, expiration_time) + self.assertEqual(retention.retention_expiration_time, expiration_time) + + def test_object_lock_retention_configuration_setter(self): + from google.cloud.storage.blob import Retention + + BLOB_NAME = "blob-name" + bucket = _Bucket() + blob = self._make_one(BLOB_NAME, bucket=bucket) + self.assertIsInstance(blob.retention, Retention) + + mode = "Locked" + now = _NOW(_UTC) + expiration_time = now + datetime.timedelta(hours=1) + retention_config = Retention( + blob=blob, mode=mode, retain_until_time=expiration_time + ) + blob.retention.mode = mode + blob.retention.retain_until_time = expiration_time + self.assertEqual(blob.retention, retention_config) + self.assertIn("retention", blob._changes) + blob.retention.retain_until_time = None + self.assertIsNone(blob.retention.retain_until_time) + self.assertIn("retention", blob._changes) + class Test__quote(unittest.TestCase): @staticmethod @@ -5609,7 +6052,7 @@ def test_bytes(self): self.assertEqual(quoted, "%DE%AD%BE%EF") def test_unicode(self): - helicopter = u"\U0001f681" + helicopter = "\U0001f681" quoted = self._call_fut(helicopter) self.assertEqual(quoted, "%F0%9F%9A%81") @@ -5641,21 +6084,21 @@ def _call_fut(*args, **kwargs): return _maybe_rewind(*args, **kwargs) def test_default(self): - stream = mock.Mock(spec=[u"seek"]) + stream = mock.Mock(spec=["seek"]) ret_val = self._call_fut(stream) self.assertIsNone(ret_val) stream.seek.assert_not_called() def test_do_not_rewind(self): - stream = mock.Mock(spec=[u"seek"]) + stream = mock.Mock(spec=["seek"]) ret_val = self._call_fut(stream, rewind=False) self.assertIsNone(ret_val) stream.seek.assert_not_called() def test_do_rewind(self): - stream = mock.Mock(spec=[u"seek"]) + stream = mock.Mock(spec=["seek"]) ret_val = self._call_fut(stream, rewind=True) self.assertIsNone(ret_val) @@ -5672,7 +6115,6 @@ def _call_fut(error): def _helper(self, message, code=http.client.BAD_REQUEST, reason=None, args=()): import requests - from google.resumable_media import InvalidResponse from google.api_core import exceptions response = requests.Response() @@ -5689,7 +6131,7 @@ def _helper(self, message, code=http.client.BAD_REQUEST, reason=None, args=()): def test_default(self): message = "Failure" exc_info = self._helper(message) - expected = "GET http://example.com/: {}".format(message) + expected = f"GET http://example.com/: {message}" self.assertEqual(exc_info.exception.message, expected) self.assertEqual(exc_info.exception.errors, []) @@ -5721,24 +6163,20 @@ def test_w_empty_list(self): def test_wo_existing_qs(self): BASE_URL = "https://test.example.com/base" NV_LIST = [("one", "One"), ("two", "Two")] - expected = "&".join(["{}={}".format(name, value) for name, value in NV_LIST]) - self.assertEqual( - self._call_fut(BASE_URL, NV_LIST), "{}?{}".format(BASE_URL, expected) - ) + expected = "&".join([f"{name}={value}" for name, value in NV_LIST]) + self.assertEqual(self._call_fut(BASE_URL, NV_LIST), f"{BASE_URL}?{expected}") def test_w_existing_qs(self): BASE_URL = "https://test.example.com/base?one=Three" NV_LIST = [("one", "One"), ("two", "Two")] - expected = "&".join(["{}={}".format(name, value) for name, value in NV_LIST]) - self.assertEqual( - self._call_fut(BASE_URL, NV_LIST), "{}&{}".format(BASE_URL, expected) - ) + expected = "&".join([f"{name}={value}" for name, value in NV_LIST]) + self.assertEqual(self._call_fut(BASE_URL, NV_LIST), f"{BASE_URL}&{expected}") class _Connection(object): - API_BASE_URL = "http://example.com" USER_AGENT = "testing 1.2.3" + user_agent = "testing 1.2.3" credentials = object() diff --git a/tests/unit/test_bucket.py b/tests/unit/test_bucket.py index 8bccee19c..7129232a0 100644 --- a/tests/unit/test_bucket.py +++ b/tests/unit/test_bucket.py @@ -25,6 +25,11 @@ from google.cloud.storage.constants import PUBLIC_ACCESS_PREVENTION_ENFORCED from google.cloud.storage.constants import PUBLIC_ACCESS_PREVENTION_INHERITED from google.cloud.storage.constants import PUBLIC_ACCESS_PREVENTION_UNSPECIFIED +from google.cloud.storage.constants import RPO_DEFAULT +from google.cloud.storage.constants import RPO_ASYNC_TURBO +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC +from google.cloud.storage._helpers import _get_default_storage_base_url def _create_signing_credentials(): @@ -228,6 +233,28 @@ def test_ctor_w_noncurrent_time_before(self): self.assertEqual(conditions.number_of_newer_versions, 3) self.assertEqual(conditions.noncurrent_time_before, noncurrent_before) + def test_ctor_w_matches_prefix(self): + conditions = self._make_one(matches_prefix=["test-prefix"]) + expected = {"matchesPrefix": ["test-prefix"]} + self.assertEqual(dict(conditions), expected) + self.assertIsNone(conditions.age) + self.assertIsNone(conditions.created_before) + self.assertIsNone(conditions.is_live) + self.assertIsNone(conditions.matches_storage_class) + self.assertIsNone(conditions.matches_suffix) + self.assertEqual(conditions.matches_prefix, ["test-prefix"]) + + def test_ctor_w_matches_suffix(self): + conditions = self._make_one(matches_suffix=["test-suffix"]) + expected = {"matchesSuffix": ["test-suffix"]} + self.assertEqual(dict(conditions), expected) + self.assertIsNone(conditions.age) + self.assertIsNone(conditions.created_before) + self.assertIsNone(conditions.is_live) + self.assertIsNone(conditions.matches_storage_class) + self.assertIsNone(conditions.matches_prefix) + self.assertEqual(conditions.matches_suffix, ["test-suffix"]) + def test_from_api_repr(self): import datetime @@ -335,6 +362,43 @@ def test_from_api_repr(self): self.assertEqual(dict(rule), resource) +class Test_LifecycleRuleAbortIncompleteMultipartUpload(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.storage.bucket import ( + LifecycleRuleAbortIncompleteMultipartUpload, + ) + + return LifecycleRuleAbortIncompleteMultipartUpload + + def _make_one(self, **kw): + return self._get_target_class()(**kw) + + def test_ctor_wo_conditions(self): + with self.assertRaises(ValueError): + self._make_one() + + def test_ctor_w_condition(self): + rule = self._make_one(age=10) + expected = { + "action": {"type": "AbortIncompleteMultipartUpload"}, + "condition": {"age": 10}, + } + self.assertEqual(dict(rule), expected) + + def test_from_api_repr(self): + klass = self._get_target_class() + conditions = { + "age": 10, + } + resource = { + "action": {"type": "AbortIncompleteMultipartUpload"}, + "condition": conditions, + } + rule = klass.from_api_repr(resource) + self.assertEqual(dict(rule), resource) + + class Test_IAMConfiguration(unittest.TestCase): @staticmethod def _get_target_class(): @@ -368,11 +432,8 @@ def test_ctor_defaults(self): self.assertIsNone(config.bucket_policy_only_locked_time) def test_ctor_explicit_ubla(self): - import datetime - from google.cloud._helpers import UTC - bucket = self._make_bucket() - now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now = _NOW(_UTC) config = self._make_one( bucket, @@ -390,7 +451,8 @@ def test_ctor_explicit_pap(self): bucket = self._make_bucket() config = self._make_one( - bucket, public_access_prevention=PUBLIC_ACCESS_PREVENTION_ENFORCED, + bucket, + public_access_prevention=PUBLIC_ACCESS_PREVENTION_ENFORCED, ) self.assertIs(config.bucket, bucket) @@ -407,11 +469,8 @@ def test_ctor_explicit_pap(self): ) def test_ctor_explicit_bpo(self): - import datetime - from google.cloud._helpers import UTC - bucket = self._make_bucket() - now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now = _NOW(_UTC) config = pytest.deprecated_call( self._make_one, @@ -437,11 +496,8 @@ def test_ctor_ubla_and_bpo_enabled(self): ) def test_ctor_ubla_and_bpo_time(self): - import datetime - from google.cloud._helpers import UTC - bucket = self._make_bucket() - now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now = _NOW(_UTC) with self.assertRaises(ValueError): self._make_one( @@ -485,13 +541,11 @@ def test_from_api_repr_w_disabled(self): self.assertIsNone(config.bucket_policy_only_locked_time) def test_from_api_repr_w_enabled(self): - import datetime - from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 klass = self._get_target_class() bucket = self._make_bucket() - now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now = _NOW(_UTC) resource = { "uniformBucketLevelAccess": { "enabled": True, @@ -546,18 +600,27 @@ def _get_default_timeout(): def _make_client(**kw): from google.cloud.storage.client import Client + kw["api_endpoint"] = kw.get("api_endpoint") or _get_default_storage_base_url() return mock.create_autospec(Client, instance=True, **kw) - def _make_one(self, client=None, name=None, properties=None, user_project=None): + def _make_one( + self, + client=None, + name=None, + properties=None, + user_project=None, + generation=None, + ): if client is None: client = self._make_client() if user_project is None: - bucket = self._get_target_class()(client, name=name) + bucket = self._get_target_class()(client, name=name, generation=generation) else: bucket = self._get_target_class()( - client, name=name, user_project=user_project + client, name=name, user_project=user_project, generation=generation ) - bucket._properties = properties or {} + if properties: + bucket._properties = {**bucket._properties, **properties} return bucket def test_ctor_w_invalid_name(self): @@ -578,6 +641,9 @@ def test_ctor(self): self.assertIs(bucket._default_object_acl.bucket, bucket) self.assertEqual(list(bucket._label_removals), []) self.assertIsNone(bucket.user_project) + self.assertEqual(bucket.generation, None) + self.assertEqual(bucket.soft_delete_time, None) + self.assertEqual(bucket.hard_delete_time, None) def test_ctor_w_user_project(self): NAME = "name" @@ -594,6 +660,31 @@ def test_ctor_w_user_project(self): self.assertEqual(list(bucket._label_removals), []) self.assertEqual(bucket.user_project, USER_PROJECT) + def test_ctor_w_generation_and_soft_delete_info(self): + from google.cloud._helpers import _RFC3339_MICROS + + NAME = "name" + generation = 12345 + + soft_timestamp = datetime.datetime(2024, 1, 5, 20, 34, 37, tzinfo=_UTC) + soft_delete = soft_timestamp.strftime(_RFC3339_MICROS) + hard_timestamp = datetime.datetime(2024, 1, 15, 20, 34, 37, tzinfo=_UTC) + hard_delete = hard_timestamp.strftime(_RFC3339_MICROS) + properties = {"softDeleteTime": soft_delete, "hardDeleteTime": hard_delete} + + bucket = self._make_one(name=NAME, generation=generation, properties=properties) + self.assertEqual(bucket.name, NAME) + self.assertEqual(list(bucket._changes), []) + self.assertFalse(bucket._acl.loaded) + self.assertIs(bucket._acl.bucket, bucket) + self.assertFalse(bucket._default_object_acl.loaded) + self.assertIs(bucket._default_object_acl.bucket, bucket) + self.assertEqual(list(bucket._label_removals), []) + self.assertIsNone(bucket.user_project) + self.assertEqual(bucket.generation, generation) + self.assertEqual(bucket.soft_delete_time, soft_timestamp) + self.assertEqual(bucket.hard_delete_time, hard_timestamp) + def test_blob_wo_keys(self): from google.cloud.storage.blob import Blob @@ -862,7 +953,7 @@ def test_path_no_name(self): def test_path_w_name(self): NAME = "name" bucket = self._make_one(name=NAME) - self.assertEqual(bucket.path, "/b/%s" % NAME) + self.assertEqual(bucket.path, f"/b/{NAME}") def test_get_blob_miss_w_defaults(self): from google.cloud.exceptions import NotFound @@ -878,7 +969,7 @@ def test_get_blob_miss_w_defaults(self): self.assertIsNone(result) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -912,7 +1003,7 @@ def test_get_blob_hit_w_user_project(self): self.assertIs(blob.bucket, bucket) self.assertEqual(blob.name, blob_name) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = { "userProject": user_project, "projection": "noAcl", @@ -927,6 +1018,40 @@ def test_get_blob_hit_w_user_project(self): _target_object=blob, ) + def test_get_blob_hit_w_generation_w_soft_deleted(self): + from google.cloud.storage.blob import Blob + + name = "name" + blob_name = "blob-name" + generation = 1512565576797178 + api_response = {"name": blob_name, "generation": generation} + client = mock.Mock(spec=["_get_resource"]) + client._get_resource.return_value = api_response + bucket = self._make_one(client, name=name) + + blob = bucket.get_blob(blob_name, generation=generation, soft_deleted=True) + + self.assertIsInstance(blob, Blob) + self.assertIs(blob.bucket, bucket) + self.assertEqual(blob.name, blob_name) + self.assertEqual(blob.generation, generation) + + expected_path = f"/b/{name}/o/{blob_name}" + expected_query_params = { + "generation": generation, + "projection": "noAcl", + "softDeleted": True, + } + expected_headers = {} + client._get_resource.assert_called_once_with( + expected_path, + query_params=expected_query_params, + headers=expected_headers, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=blob, + ) + def test_get_blob_hit_w_generation_w_timeout(self): from google.cloud.storage.blob import Blob @@ -946,7 +1071,7 @@ def test_get_blob_hit_w_generation_w_timeout(self): self.assertEqual(blob.name, blob_name) self.assertEqual(blob.generation, generation) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = { "generation": generation, "projection": "noAcl", @@ -980,7 +1105,7 @@ def test_get_blob_w_etag_match_w_retry(self): self.assertEqual(blob.name, blob_name) self.assertEqual(blob.etag, etag) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = { "projection": "noAcl", } @@ -1015,7 +1140,7 @@ def test_get_blob_w_generation_match_w_retry(self): self.assertEqual(blob.name, blob_name) self.assertEqual(blob.generation, generation) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = { "ifGenerationMatch": generation, "projection": "noAcl", @@ -1053,7 +1178,7 @@ def test_get_blob_hit_with_kwargs_w_explicit_client(self): self.assertEqual(blob.chunk_size, chunk_size) self.assertEqual(blob._encryption_key, key) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = { "projection": "noAcl", } @@ -1081,12 +1206,16 @@ def test_list_blobs_w_defaults(self): expected_max_results = None expected_prefix = None expected_delimiter = None + expected_match_glob = None expected_start_offset = None expected_end_offset = None expected_include_trailing_delimiter = None expected_versions = None expected_projection = "noAcl" expected_fields = None + expected_include_folders_as_prefixes = None + soft_deleted = None + page_size = None client.list_blobs.assert_called_once_with( bucket, max_results=expected_max_results, @@ -1101,6 +1230,10 @@ def test_list_blobs_w_defaults(self): fields=expected_fields, timeout=self._get_default_timeout(), retry=DEFAULT_RETRY, + match_glob=expected_match_glob, + include_folders_as_prefixes=expected_include_folders_as_prefixes, + soft_deleted=soft_deleted, + page_size=page_size, ) def test_list_blobs_w_explicit(self): @@ -1109,10 +1242,14 @@ def test_list_blobs_w_explicit(self): page_token = "ABCD" prefix = "subfolder" delimiter = "/" + match_glob = "**txt" start_offset = "c" end_offset = "g" include_trailing_delimiter = True + include_folders_as_prefixes = True versions = True + soft_deleted = True + page_size = 2 projection = "full" fields = "items/contentLanguage,nextPageToken" bucket = self._make_one(client=None, name=name) @@ -1135,6 +1272,10 @@ def test_list_blobs_w_explicit(self): client=other_client, timeout=timeout, retry=retry, + match_glob=match_glob, + include_folders_as_prefixes=include_folders_as_prefixes, + soft_deleted=soft_deleted, + page_size=page_size, ) self.assertIs(iterator, other_client.list_blobs.return_value) @@ -1143,12 +1284,16 @@ def test_list_blobs_w_explicit(self): expected_max_results = max_results expected_prefix = prefix expected_delimiter = delimiter + expected_match_glob = match_glob expected_start_offset = start_offset expected_end_offset = end_offset expected_include_trailing_delimiter = include_trailing_delimiter expected_versions = versions expected_projection = projection expected_fields = fields + expected_include_folders_as_prefixes = include_folders_as_prefixes + expected_soft_deleted = soft_deleted + expected_page_size = page_size other_client.list_blobs.assert_called_once_with( bucket, max_results=expected_max_results, @@ -1163,6 +1308,10 @@ def test_list_blobs_w_explicit(self): fields=expected_fields, timeout=timeout, retry=retry, + match_glob=expected_match_glob, + include_folders_as_prefixes=expected_include_folders_as_prefixes, + soft_deleted=expected_soft_deleted, + page_size=expected_page_size, ) def test_list_notifications_w_defaults(self): @@ -1178,7 +1327,7 @@ def test_list_notifications_w_defaults(self): self.assertIs(iterator, client._list_resource.return_value) self.assertIs(iterator.bucket, bucket) - expected_path = "/b/{}/notificationConfigs".format(bucket_name) + expected_path = f"/b/{bucket_name}/notificationConfigs" expected_item_to_value = _item_to_notification client._list_resource.assert_called_once_with( expected_path, @@ -1198,16 +1347,21 @@ def test_list_notifications_w_explicit(self): retry = mock.Mock(spec=[]) iterator = bucket.list_notifications( - client=other_client, timeout=timeout, retry=retry, + client=other_client, + timeout=timeout, + retry=retry, ) self.assertIs(iterator, other_client._list_resource.return_value) self.assertIs(iterator.bucket, bucket) - expected_path = "/b/{}/notificationConfigs".format(bucket_name) + expected_path = f"/b/{bucket_name}/notificationConfigs" expected_item_to_value = _item_to_notification other_client._list_resource.assert_called_once_with( - expected_path, expected_item_to_value, timeout=timeout, retry=retry, + expected_path, + expected_item_to_value, + timeout=timeout, + retry=retry, ) def test_get_notification_miss_w_defaults(self): @@ -1225,7 +1379,7 @@ def test_get_notification_miss_w_defaults(self): with self.assertRaises(NotFound): bucket.get_notification(notification_id=notification_id) - expected_path = "/b/{}/notificationConfigs/{}".format(name, notification_id) + expected_path = f"/b/{name}/notificationConfigs/{notification_id}" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -1260,7 +1414,9 @@ def test_get_notification_hit_w_explicit_w_user_project(self): bucket = self._make_one(client=client, name=name, user_project=user_project) notification = bucket.get_notification( - notification_id=notification_id, timeout=timeout, retry=retry, + notification_id=notification_id, + timeout=timeout, + retry=retry, ) self.assertIsInstance(notification, BucketNotification) @@ -1272,7 +1428,7 @@ def test_get_notification_hit_w_explicit_w_user_project(self): self.assertIsNone(notification.blob_name_prefix) self.assertEqual(notification.payload_format, JSON_API_V1_PAYLOAD_FORMAT) - expected_path = "/b/{}/notificationConfigs/{}".format(name, notification_id) + expected_path = f"/b/{name}/notificationConfigs/{notification_id}" expected_query_params = {"userProject": user_project} client._get_resource.assert_called_once_with( expected_path, @@ -1309,7 +1465,8 @@ def test_delete_hit_w_metageneration_match_w_explicit_client(self): bucket = self._make_one(client=None, name=name) result = bucket.delete( - client=client, if_metageneration_match=metageneration_number, + client=client, + if_metageneration_match=metageneration_number, ) self.assertIsNone(result) @@ -1343,10 +1500,16 @@ def test_delete_hit_w_force_w_user_project_w_explicit_timeout_retry(self): client=client, timeout=timeout, retry=retry, + versions=True, ) bucket.delete_blobs.assert_called_once_with( - [], on_error=mock.ANY, client=client, timeout=timeout, retry=retry, + [], + on_error=mock.ANY, + client=client, + timeout=timeout, + retry=retry, + preserve_generation=True, ) expected_query_params = {"userProject": user_project} @@ -1376,6 +1539,7 @@ def test_delete_hit_w_force_delete_blobs(self): client=client, timeout=self._get_default_timeout(), retry=DEFAULT_RETRY, + versions=True, ) bucket.delete_blobs.assert_called_once_with( @@ -1384,6 +1548,7 @@ def test_delete_hit_w_force_delete_blobs(self): client=client, timeout=self._get_default_timeout(), retry=DEFAULT_RETRY, + preserve_generation=True, ) expected_query_params = {} @@ -1403,8 +1568,10 @@ def test_delete_w_force_w_user_project_w_miss_on_blob(self): client = mock.Mock(spec=["_delete_resource"]) client._delete_resource.return_value = None bucket = self._make_one(client=client, name=name) - blob = mock.Mock(spec=["name"]) + blob = mock.Mock(spec=["name", "generation"]) blob.name = blob_name + GEN = 1234 + blob.generation = GEN blobs = [blob] bucket.list_blobs = mock.Mock(return_value=iter(blobs)) bucket.delete_blob = mock.Mock(side_effect=NotFound("testing")) @@ -1416,6 +1583,7 @@ def test_delete_w_force_w_user_project_w_miss_on_blob(self): bucket.delete_blob.assert_called_once_with( blob_name, client=client, + generation=GEN, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, @@ -1467,13 +1635,13 @@ def test_delete_blob_miss_w_defaults(self): with self.assertRaises(NotFound): bucket.delete_blob(blob_name) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = {} client._delete_resource.assert_called_once_with( expected_path, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, _target_object=None, ) @@ -1490,13 +1658,13 @@ def test_delete_blob_hit_w_user_project_w_timeout(self): self.assertIsNone(result) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = {"userProject": user_project} client._delete_resource.assert_called_once_with( expected_path, query_params=expected_query_params, timeout=timeout, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, _target_object=None, ) @@ -1513,7 +1681,7 @@ def test_delete_blob_hit_w_generation_w_retry(self): self.assertIsNone(result) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = {"generation": generation} client._delete_resource.assert_called_once_with( expected_path, @@ -1540,7 +1708,7 @@ def test_delete_blob_hit_w_generation_match(self): self.assertIsNone(result) - expected_path = "/b/%s/o/%s" % (name, blob_name) + expected_path = f"/b/{name}/o/{blob_name}" expected_query_params = { "ifGenerationMatch": generation, "ifMetagenerationMatch": metageneration, @@ -1549,7 +1717,7 @@ def test_delete_blob_hit_w_generation_match(self): expected_path, query_params=expected_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, _target_object=None, ) @@ -1575,12 +1743,13 @@ def test_delete_blobs_hit_w_explicit_client_w_timeout(self): bucket.delete_blob.assert_called_once_with( blob_name, client=client, + generation=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=timeout, - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_delete_blobs_w_generation_match_wrong_len(self): @@ -1593,7 +1762,8 @@ def test_delete_blobs_w_generation_match_wrong_len(self): with self.assertRaises(ValueError): bucket.delete_blobs( - [blob_name, blob_name2], if_generation_not_match=[generation_number], + [blob_name, blob_name2], + if_generation_not_match=[generation_number], ) bucket.delete_blob.assert_not_called() @@ -1618,6 +1788,7 @@ def test_delete_blobs_w_generation_match_w_retry(self): call_1 = mock.call( blob_name, client=None, + generation=None, if_generation_match=generation_number, if_generation_not_match=None, if_metageneration_match=None, @@ -1628,6 +1799,7 @@ def test_delete_blobs_w_generation_match_w_retry(self): call_2 = mock.call( blob_name2, client=None, + generation=None, if_generation_match=generation_number2, if_generation_not_match=None, if_metageneration_match=None, @@ -1655,22 +1827,70 @@ def test_delete_blobs_w_generation_match_none(self): call_1 = mock.call( blob_name, client=None, + generation=None, if_generation_match=generation_number, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) call_2 = mock.call( blob_name2, client=None, + generation=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, + ) + bucket.delete_blob.assert_has_calls([call_1, call_2]) + + def test_delete_blobs_w_preserve_generation(self): + name = "name" + blob_name = "blob-name" + blob_name2 = "blob-name2" + generation_number = 1234567890 + generation_number2 = 7890123456 + client = mock.Mock(spec=[]) + bucket = self._make_one(client=client, name=name) + blob = self._make_blob(bucket.name, blob_name) + blob.generation = generation_number + blob2 = self._make_blob(bucket.name, blob_name2) + blob2.generation = generation_number2 + bucket.delete_blob = mock.Mock() + retry = mock.Mock(spec=[]) + + # Test generation is propagated from list of blob instances + bucket.delete_blobs( + [blob, blob2], + preserve_generation=True, + retry=retry, + ) + + call_1 = mock.call( + blob_name, + client=None, + generation=generation_number, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=self._get_default_timeout(), + retry=retry, + ) + call_2 = mock.call( + blob_name2, + client=None, + generation=generation_number2, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=self._get_default_timeout(), + retry=retry, ) bucket.delete_blob.assert_has_calls([call_1, call_2]) @@ -1691,22 +1911,24 @@ def test_delete_blobs_miss_wo_on_error(self): call_1 = mock.call( blob_name, client=None, + generation=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) call_2 = mock.call( blob_name2, client=None, + generation=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) bucket.delete_blob.assert_has_calls([call_1, call_2]) @@ -1729,22 +1951,24 @@ def test_delete_blobs_miss_w_on_error(self): call_1 = mock.call( blob_name, client=None, + generation=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) call_2 = mock.call( blob_name2, client=None, + generation=None, if_generation_match=None, if_generation_not_match=None, if_metageneration_match=None, if_metageneration_not_match=None, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) bucket.delete_blob.assert_has_calls([call_1, call_2]) @@ -1758,7 +1982,7 @@ def test_reload_w_etag_match(self): bucket.reload(if_etag_match=etag) - expected_path = "/b/%s" % (name,) + expected_path = f"/b/{name}" expected_query_params = { "projection": "noAcl", } @@ -1784,7 +2008,7 @@ def test_reload_w_metageneration_match(self): bucket.reload(if_metageneration_match=metageneration_number) - expected_path = "/b/%s" % (name,) + expected_path = f"/b/{name}" expected_query_params = { "projection": "noAcl", "ifMetagenerationMatch": metageneration_number, @@ -1806,6 +2030,31 @@ def test_reload_w_generation_match(self): with self.assertRaises(TypeError): bucket.reload(if_generation_match=6) + def test_reload_w_soft_deleted(self): + name = "name" + api_response = {"name": name} + client = mock.Mock(spec=["_get_resource"]) + client._get_resource.return_value = api_response + bucket = self._make_one(client, name=name, generation=12345) + + bucket.reload(soft_deleted=True) + + expected_path = f"/b/{name}" + expected_query_params = { + "projection": "noAcl", + "softDeleted": True, + "generation": 12345, + } + expected_headers = {} + client._get_resource.assert_called_once_with( + expected_path, + query_params=expected_query_params, + headers=expected_headers, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=bucket, + ) + def test_update_w_metageneration_match(self): name = "name" metageneration_number = 9 @@ -1846,7 +2095,7 @@ def _make_blob(bucket_name, blob_name): blob = mock.create_autospec(Blob) blob.name = blob_name - blob.path = "/b/{}/o/{}".format(bucket_name, blob_name) + blob.path = f"/b/{bucket_name}/o/{blob_name}" return blob def test_copy_blobs_wo_name(self): @@ -1893,7 +2142,10 @@ def test_copy_blob_w_source_generation_w_timeout(self): timeout = 42 new_blob = source.copy_blob( - blob, dest, source_generation=generation, timeout=timeout, + blob, + dest, + source_generation=generation, + timeout=timeout, ) self.assertIs(new_blob.bucket, dest) @@ -1992,7 +2244,7 @@ def test_copy_blob_w_preserve_acl_false_w_explicit_client(self): _target_object=new_blob, ) - expected_patch_path = "/b/{}/o/{}".format(dest_name, new_name) + expected_patch_path = f"/b/{dest_name}/o/{new_name}" expected_patch_data = {"acl": []} expected_patch_query_params = {"projection": "full"} client._patch_resource.assert_called_once_with( @@ -2000,7 +2252,7 @@ def test_copy_blob_w_preserve_acl_false_w_explicit_client(self): expected_patch_data, query_params=expected_patch_query_params, timeout=self._get_default_timeout(), - retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, + retry=DEFAULT_RETRY, ) def test_copy_blob_w_name_and_user_project(self): @@ -2178,12 +2430,10 @@ def test_iam_configuration_policy_missing(self): self.assertIsNone(config.bucket_policy_only_locked_time) def test_iam_configuration_policy_w_entry(self): - import datetime - from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 from google.cloud.storage.bucket import IAMConfiguration - now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now = _NOW(_UTC) NAME = "name" properties = { "iamConfiguration": { @@ -2223,6 +2473,7 @@ def test_lifecycle_rules_getter(self): from google.cloud.storage.bucket import ( LifecycleRuleDelete, LifecycleRuleSetStorageClass, + LifecycleRuleAbortIncompleteMultipartUpload, ) NAME = "name" @@ -2231,7 +2482,11 @@ def test_lifecycle_rules_getter(self): "action": {"type": "SetStorageClass", "storageClass": "NEARLINE"}, "condition": {"isLive": False}, } - rules = [DELETE_RULE, SSC_RULE] + MULTIPART_RULE = { + "action": {"type": "AbortIncompleteMultipartUpload"}, + "condition": {"age": 42}, + } + rules = [DELETE_RULE, SSC_RULE, MULTIPART_RULE] properties = {"lifecycle": {"rule": rules}} bucket = self._make_one(name=NAME, properties=properties) @@ -2245,6 +2500,12 @@ def test_lifecycle_rules_getter(self): self.assertIsInstance(ssc_rule, LifecycleRuleSetStorageClass) self.assertEqual(dict(ssc_rule), SSC_RULE) + multipart_rule = found[2] + self.assertIsInstance( + multipart_rule, LifecycleRuleAbortIncompleteMultipartUpload + ) + self.assertEqual(dict(multipart_rule), MULTIPART_RULE) + def test_lifecycle_rules_setter_w_dicts(self): NAME = "name" DELETE_RULE = {"action": {"type": "Delete"}, "condition": {"age": 42}} @@ -2297,6 +2558,7 @@ def test_clear_lifecycle_rules(self): bucket._properties["lifecycle"] = {"rule": rules} self.assertEqual(list(bucket.lifecycle_rules), rules) + # This is a deprecated alias and will test both methods bucket.clear_lifecyle_rules() self.assertEqual(list(bucket.lifecycle_rules), []) @@ -2329,6 +2591,21 @@ def test_add_lifecycle_set_storage_class_rule(self): self.assertEqual([dict(rule) for rule in bucket.lifecycle_rules], rules) self.assertTrue("lifecycle" in bucket._changes) + def test_add_lifecycle_abort_incomplete_multipart_upload_rule(self): + NAME = "name" + AIMPU_RULE = { + "action": {"type": "AbortIncompleteMultipartUpload"}, + "condition": {"age": 42}, + } + rules = [AIMPU_RULE] + bucket = self._make_one(name=NAME) + self.assertEqual(list(bucket.lifecycle_rules), []) + + bucket.add_lifecycle_abort_incomplete_multipart_upload_rule(age=42) + + self.assertEqual([dict(rule) for rule in bucket.lifecycle_rules], rules) + self.assertTrue("lifecycle" in bucket._changes) + def test_cors_getter(self): NAME = "name" CORS_ENTRY = { @@ -2476,6 +2753,68 @@ def test_location_type_getter_set(self): bucket = self._make_one(properties=properties) self.assertEqual(bucket.location_type, REGION_LOCATION_TYPE) + def test_rpo_getter_and_setter(self): + bucket = self._make_one() + bucket.rpo = RPO_ASYNC_TURBO + self.assertEqual(bucket.rpo, RPO_ASYNC_TURBO) + bucket.rpo = RPO_DEFAULT + self.assertIn("rpo", bucket._changes) + self.assertEqual(bucket.rpo, RPO_DEFAULT) + + def test_autoclass_enabled_getter_and_setter(self): + properties = {"autoclass": {"enabled": True}} + bucket = self._make_one(properties=properties) + self.assertTrue(bucket.autoclass_enabled) + bucket.autoclass_enabled = False + self.assertIn("autoclass", bucket._changes) + self.assertFalse(bucket.autoclass_enabled) + + def test_autoclass_config_unset(self): + bucket = self._make_one() + self.assertIsNone(bucket.autoclass_toggle_time) + self.assertIsNone(bucket.autoclass_terminal_storage_class) + self.assertIsNone(bucket.autoclass_terminal_storage_class_update_time) + + properties = {"autoclass": {}} + bucket = self._make_one(properties=properties) + self.assertIsNone(bucket.autoclass_toggle_time) + self.assertIsNone(bucket.autoclass_terminal_storage_class) + self.assertIsNone(bucket.autoclass_terminal_storage_class_update_time) + + def test_autoclass_toggle_and_tsc_update_time(self): + from google.cloud._helpers import _datetime_to_rfc3339 + + effective_time = _NOW(_UTC) + properties = { + "autoclass": { + "enabled": True, + "toggleTime": _datetime_to_rfc3339(effective_time), + "terminalStorageClass": "NEARLINE", + "terminalStorageClassUpdateTime": _datetime_to_rfc3339(effective_time), + } + } + bucket = self._make_one(properties=properties) + self.assertEqual(bucket.autoclass_toggle_time, effective_time) + self.assertEqual( + bucket.autoclass_terminal_storage_class_update_time, effective_time + ) + + def test_autoclass_tsc_getter_and_setter(self): + from google.cloud.storage import constants + + properties = { + "autoclass": {"terminalStorageClass": constants.ARCHIVE_STORAGE_CLASS} + } + bucket = self._make_one(properties=properties) + self.assertEqual( + bucket.autoclass_terminal_storage_class, constants.ARCHIVE_STORAGE_CLASS + ) + bucket.autoclass_terminal_storage_class = constants.NEARLINE_STORAGE_CLASS + self.assertIn("autoclass", bucket._changes) + self.assertEqual( + bucket.autoclass_terminal_storage_class, constants.NEARLINE_STORAGE_CLASS + ) + def test_get_logging_w_prefix(self): NAME = "name" LOG_BUCKET = "logs" @@ -2568,11 +2907,9 @@ def test_retention_policy_effective_time_et_missing(self): self.assertIsNone(bucket.retention_policy_effective_time) def test_retention_policy_effective_time(self): - import datetime from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import UTC - effective_time = datetime.datetime.utcnow().replace(tzinfo=UTC) + effective_time = _NOW(_UTC) properties = { "retentionPolicy": {"effectiveTime": _datetime_to_rfc3339(effective_time)} } @@ -2645,11 +2982,15 @@ def test_storage_class_getter(self): self.assertEqual(bucket.storage_class, NEARLINE_STORAGE_CLASS) def test_storage_class_setter_invalid(self): + invalid_class = "BOGUS" NAME = "name" bucket = self._make_one(name=NAME) - with self.assertRaises(ValueError): - bucket.storage_class = "BOGUS" - self.assertFalse("storageClass" in bucket._changes) + bucket.storage_class = invalid_class + + # Test that invalid classes are allowed without client side validation. + # Fall back to server side validation and errors. + self.assertEqual(bucket.storage_class, invalid_class) + self.assertTrue("storageClass" in bucket._changes) def test_storage_class_setter_STANDARD(self): from google.cloud.storage.constants import STANDARD_STORAGE_CLASS @@ -2720,9 +3061,8 @@ def test_storage_class_setter_DURABLE_REDUCED_AVAILABILITY(self): def test_time_created(self): from google.cloud._helpers import _RFC3339_MICROS - from google.cloud._helpers import UTC - TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=UTC) + TIMESTAMP = datetime.datetime(2014, 11, 5, 20, 34, 37, tzinfo=_UTC) TIME_CREATED = TIMESTAMP.strftime(_RFC3339_MICROS) properties = {"timeCreated": TIME_CREATED} bucket = self._make_one(properties=properties) @@ -2732,6 +3072,19 @@ def test_time_created_unset(self): bucket = self._make_one() self.assertIsNone(bucket.time_created) + def test_updated(self): + from google.cloud._helpers import _RFC3339_MICROS + + TIMESTAMP = datetime.datetime(2023, 11, 5, 20, 34, 37, tzinfo=_UTC) + UPDATED = TIMESTAMP.strftime(_RFC3339_MICROS) + properties = {"updated": UPDATED} + bucket = self._make_one(properties=properties) + self.assertEqual(bucket.updated, TIMESTAMP) + + def test_updated_unset(self): + bucket = self._make_one() + self.assertIsNone(bucket.updated) + def test_versioning_enabled_getter_missing(self): NAME = "name" bucket = self._make_one(name=NAME) @@ -2743,8 +3096,7 @@ def test_versioning_enabled_getter(self): bucket = self._make_one(name=NAME, properties=before) self.assertEqual(bucket.versioning_enabled, True) - @mock.patch("warnings.warn") - def test_create_w_defaults_deprecated(self, mock_warn): + def test_create_w_defaults(self): bucket_name = "bucket-name" api_response = {"name": bucket_name} client = mock.Mock(spec=["create_bucket"]) @@ -2760,25 +3112,19 @@ def test_create_w_defaults_deprecated(self, mock_warn): location=None, predefined_acl=None, predefined_default_object_acl=None, + enable_object_retention=False, timeout=self._get_default_timeout(), retry=DEFAULT_RETRY, ) - mock_warn.assert_called_with( - "Bucket.create() is deprecated and will be removed in future." - "Use Client.create_bucket() instead.", - PendingDeprecationWarning, - stacklevel=1, - ) - - @mock.patch("warnings.warn") - def test_create_w_explicit_deprecated(self, mock_warn): + def test_create_w_explicit(self): project = "PROJECT" location = "eu" user_project = "USER_PROJECT" bucket_name = "bucket-name" predefined_acl = "authenticatedRead" predefined_default_object_acl = "bucketOwnerFullControl" + enable_object_retention = True api_response = {"name": bucket_name} client = mock.Mock(spec=["create_bucket"]) client.create_bucket.return_value = api_response @@ -2793,6 +3139,7 @@ def test_create_w_explicit_deprecated(self, mock_warn): location=location, predefined_acl=predefined_acl, predefined_default_object_acl=predefined_default_object_acl, + enable_object_retention=enable_object_retention, timeout=timeout, retry=retry, ) @@ -2804,17 +3151,11 @@ def test_create_w_explicit_deprecated(self, mock_warn): location=location, predefined_acl=predefined_acl, predefined_default_object_acl=predefined_default_object_acl, + enable_object_retention=enable_object_retention, timeout=timeout, retry=retry, ) - mock_warn.assert_called_with( - "Bucket.create() is deprecated and will be removed in future." - "Use Client.create_bucket() instead.", - PendingDeprecationWarning, - stacklevel=1, - ) - def test_versioning_enabled_setter(self): NAME = "name" bucket = self._make_one(name=NAME) @@ -2840,6 +3181,62 @@ def test_requester_pays_setter(self): bucket.requester_pays = True self.assertTrue(bucket.requester_pays) + def test_object_retention_mode_getter(self): + bucket = self._make_one() + self.assertIsNone(bucket.object_retention_mode) + mode = "Enabled" + properties = {"objectRetention": {"mode": mode}} + bucket = self._make_one(properties=properties) + self.assertEqual(bucket.object_retention_mode, mode) + + def test_soft_delete_policy_getter_w_entry(self): + from google.cloud.storage.bucket import SoftDeletePolicy + from google.cloud._helpers import _datetime_to_rfc3339 + + seconds = 86400 * 10 # 10 days + effective_time = _NOW(_UTC) + properties = { + "softDeletePolicy": { + "retentionDurationSeconds": seconds, + "effectiveTime": _datetime_to_rfc3339(effective_time), + } + } + bucket = self._make_one(properties=properties) + + policy = SoftDeletePolicy( + bucket=bucket, + retention_duration_seconds=seconds, + effective_time=effective_time, + ) + self.assertIsInstance(bucket.soft_delete_policy, SoftDeletePolicy) + self.assertEqual(bucket.soft_delete_policy, policy) + self.assertEqual(bucket.soft_delete_policy.retention_duration_seconds, seconds) + self.assertEqual(bucket.soft_delete_policy.effective_time, effective_time) + + def test_soft_delete_policy_setter(self): + bucket = self._make_one() + policy = bucket.soft_delete_policy + self.assertIsNone(policy.retention_duration_seconds) + self.assertIsNone(policy.effective_time) + + seconds = 86400 * 10 # 10 days + bucket.soft_delete_policy.retention_duration_seconds = seconds + self.assertTrue("softDeletePolicy" in bucket._changes) + self.assertEqual(bucket.soft_delete_policy.retention_duration_seconds, seconds) + + def test_hierarchical_namespace_enabled_getter_and_setter(self): + # Test hierarchical_namespace configuration unset + bucket = self._make_one() + self.assertIsNone(bucket.hierarchical_namespace_enabled) + + # Test hierarchical_namespace configuration explicitly set + properties = {"hierarchicalNamespace": {"enabled": True}} + bucket = self._make_one(properties=properties) + self.assertTrue(bucket.hierarchical_namespace_enabled) + bucket.hierarchical_namespace_enabled = False + self.assertIn("hierarchicalNamespace", bucket._changes) + self.assertFalse(bucket.hierarchical_namespace_enabled) + def test_configure_website_defaults(self): NAME = "name" UNSET = {"website": {"mainPageSuffix": None, "notFoundPage": None}} @@ -2870,7 +3267,7 @@ def test_get_iam_policy_defaults(self): from google.api_core.iam import Policy bucket_name = "name" - path = "/b/%s" % (bucket_name,) + path = f"/b/{bucket_name}" etag = "DEADBEEF" version = 1 owner1 = "user:phred@example.com" @@ -2904,7 +3301,7 @@ def test_get_iam_policy_defaults(self): self.assertEqual(policy.version, api_response["version"]) self.assertEqual(dict(policy), expected_policy) - expected_path = "/b/%s/iam" % (bucket_name,) + expected_path = f"/b/{bucket_name}/iam" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -2920,7 +3317,7 @@ def test_get_iam_policy_w_user_project_w_timeout(self): bucket_name = "name" timeout = 42 user_project = "user-project-123" - path = "/b/%s" % (bucket_name,) + path = f"/b/{bucket_name}" etag = "DEADBEEF" version = 1 api_response = { @@ -2943,7 +3340,7 @@ def test_get_iam_policy_w_user_project_w_timeout(self): self.assertEqual(policy.version, api_response["version"]) self.assertEqual(dict(policy), expected_policy) - expected_path = "/b/%s/iam" % (bucket_name,) + expected_path = f"/b/{bucket_name}/iam" expected_query_params = {"userProject": user_project} client._get_resource.assert_called_once_with( expected_path, @@ -2957,7 +3354,7 @@ def test_get_iam_policy_w_requested_policy_version_w_retry(self): from google.cloud.storage.iam import STORAGE_OWNER_ROLE bucket_name = "name" - path = "/b/%s" % (bucket_name,) + path = f"/b/{bucket_name}" etag = "DEADBEEF" version = 3 owner1 = "user:phred@example.com" @@ -2977,7 +3374,7 @@ def test_get_iam_policy_w_requested_policy_version_w_retry(self): self.assertEqual(policy.version, version) - expected_path = "/b/%s/iam" % (bucket_name,) + expected_path = f"/b/{bucket_name}/iam" expected_query_params = {"optionsRequestedPolicyVersion": version} client._get_resource.assert_called_once_with( expected_path, @@ -3023,7 +3420,7 @@ def test_set_iam_policy_w_defaults(self): self.assertEqual(returned.version, version) self.assertEqual(dict(returned), dict(policy)) - expected_path = "%s/iam" % (bucket.path,) + expected_path = f"{bucket.path}/iam" expected_data = { "resourceId": bucket.path, "bindings": mock.ANY, @@ -3087,7 +3484,7 @@ def test_set_iam_policy_w_user_project_w_expl_client_w_timeout_retry(self): self.assertEqual(returned.version, version) self.assertEqual(dict(returned), dict(policy)) - expected_path = "%s/iam" % (bucket.path,) + expected_path = f"{bucket.path}/iam" expected_data = { "resourceId": bucket.path, "bindings": mock.ANY, @@ -3131,7 +3528,7 @@ def test_test_iam_permissions_defaults(self): self.assertEqual(found, expected) - expected_path = "/b/%s/iam/testPermissions" % (name,) + expected_path = f"/b/{name}/iam/testPermissions" expected_query_params = {} expected_query_params = {"permissions": permissions} client._get_resource.assert_called_once_with( @@ -3166,7 +3563,7 @@ def test_test_iam_permissions_w_user_project_w_timeout_w_retry(self): self.assertEqual(found, expected) - expected_path = "/b/%s/iam/testPermissions" % (name,) + expected_path = f"/b/{name}/iam/testPermissions" expected_query_params = { "permissions": permissions, "userProject": user_project, @@ -3279,7 +3676,7 @@ def _make_public_w_future_helper(self, default_object_acl_loaded=True): ) if not default_object_acl_loaded: - expected_path = "/b/%s/defaultObjectAcl" % (name,) + expected_path = f"/b/{name}/defaultObjectAcl" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -3491,7 +3888,7 @@ def _make_private_w_future_helper(self, default_object_acl_loaded=True): ) if not default_object_acl_loaded: - expected_path = "/b/%s/defaultObjectAcl" % (name,) + expected_path = f"/b/{name}/defaultObjectAcl" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -3634,9 +4031,7 @@ def _generate_upload_policy_helper(self, **kwargs): break else: # pragma: NO COVER self.fail( - "Condition {} not found in {}".format( - expected_condition, policy_conditions - ) + f"Condition {expected_condition} not found in {policy_conditions}" ) return policy_fields, policy @@ -3741,7 +4136,7 @@ def test_lock_retention_policy_ok_w_timeout_w_retry(self): bucket.lock_retention_policy(timeout=timeout, retry=retry) - expected_path = "/b/{}/lockRetentionPolicy".format(name) + expected_path = f"/b/{name}/lockRetentionPolicy" expected_data = None expected_query_params = {"ifMetagenerationMatch": metageneration} client._post_resource.assert_called_once_with( @@ -3779,7 +4174,7 @@ def test_lock_retention_policy_w_user_project(self): bucket.lock_retention_policy() - expected_path = "/b/{}/lockRetentionPolicy".format(name) + expected_path = f"/b/{name}/lockRetentionPolicy" expected_data = None expected_query_params = { "ifMetagenerationMatch": metageneration, @@ -3794,6 +4189,109 @@ def test_lock_retention_policy_w_user_project(self): _target_object=bucket, ) + def test_restore_blob_w_defaults(self): + bucket_name = "restore_bucket" + blob_name = "restore_blob" + generation = 123456 + api_response = {"name": blob_name, "generation": generation} + client = mock.Mock(spec=["_post_resource"]) + client._post_resource.return_value = api_response + bucket = self._make_one(client=client, name=bucket_name) + + restored_blob = bucket.restore_blob(blob_name) + + self.assertIs(restored_blob.bucket, bucket) + self.assertEqual(restored_blob.name, blob_name) + expected_path = f"/b/{bucket_name}/o/{blob_name}/restore" + expected_data = None + expected_query_params = {} + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ) + + def test_restore_blob_w_explicit(self): + user_project = "user-project-123" + bucket_name = "restore_bucket" + blob_name = "restore_blob" + generation = 123456 + api_response = {"name": blob_name, "generation": generation} + client = mock.Mock(spec=["_post_resource"]) + client._post_resource.return_value = api_response + bucket = self._make_one( + client=client, name=bucket_name, user_project=user_project + ) + if_generation_match = 123456 + if_generation_not_match = 654321 + if_metageneration_match = 1 + if_metageneration_not_match = 2 + projection = "noAcl" + + restored_blob = bucket.restore_blob( + blob_name, + client=client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + projection=projection, + ) + + self.assertEqual(restored_blob.name, blob_name) + self.assertEqual(restored_blob.bucket, bucket) + expected_path = f"/b/{bucket_name}/o/{blob_name}/restore" + expected_data = None + expected_query_params = { + "userProject": user_project, + "projection": projection, + "ifGenerationMatch": if_generation_match, + "ifGenerationNotMatch": if_generation_not_match, + "ifMetagenerationMatch": if_metageneration_match, + "ifMetagenerationNotMatch": if_metageneration_not_match, + } + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ) + + def test_restore_blob_explicit_copy_source_acl(self): + bucket_name = "restore_bucket" + blob_name = "restore" + generation = 123456 + api_response = {"name": blob_name, "generation": generation} + client = mock.Mock(spec=["_post_resource"]) + client._post_resource.return_value = api_response + bucket = self._make_one(client=client, name=bucket_name) + copy_source_acl = False + + restored_blob = bucket.restore_blob( + blob_name, + copy_source_acl=copy_source_acl, + generation=generation, + ) + + self.assertEqual(restored_blob.name, blob_name) + self.assertEqual(restored_blob.bucket, bucket) + expected_path = f"/b/{bucket_name}/o/{blob_name}/restore" + expected_data = None + expected_query_params = { + "copySourceAcl": False, + "generation": generation, + } + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ) + def test_generate_signed_url_w_invalid_version(self): expiration = "2014-10-16T20:34:37.000Z" client = self._make_client() @@ -3821,16 +4319,13 @@ def _generate_signed_url_helper( scheme="http", ): from urllib import parse - from google.cloud._helpers import UTC from google.cloud.storage._helpers import _bucket_bound_hostname_url - from google.cloud.storage.blob import _API_ACCESS_ENDPOINT - - api_access_endpoint = api_access_endpoint or _API_ACCESS_ENDPOINT + from google.cloud.storage._helpers import _get_default_storage_base_url delta = datetime.timedelta(hours=1) if expiration is None: - expiration = datetime.datetime.utcnow().replace(tzinfo=UTC) + delta + expiration = _NOW(_UTC) + delta client = self._make_client(_credentials=credentials) bucket = self._make_one(name=bucket_name, client=client) @@ -3873,8 +4368,10 @@ def _generate_signed_url_helper( bucket_bound_hostname, scheme ) else: - expected_api_access_endpoint = api_access_endpoint - expected_resource = "/{}".format(parse.quote(bucket_name)) + expected_api_access_endpoint = ( + api_access_endpoint or _get_default_storage_base_url() + ) + expected_resource = f"/{parse.quote(bucket_name)}" if virtual_hosted_style or bucket_bound_hostname: expected_resource = "/" @@ -3889,39 +4386,59 @@ def _generate_signed_url_helper( } signer.assert_called_once_with(expected_creds, **expected_kwargs) - def test_get_bucket_from_string_w_valid_uri(self): + def test_get_bucket_from_uri_w_valid_uri(self): from google.cloud.storage.bucket import Bucket client = self._make_client() BUCKET_NAME = "BUCKET_NAME" uri = "gs://" + BUCKET_NAME - bucket = Bucket.from_string(uri, client) + bucket = Bucket.from_uri(uri, client) self.assertIsInstance(bucket, Bucket) self.assertIs(bucket.client, client) self.assertEqual(bucket.name, BUCKET_NAME) - def test_get_bucket_from_string_w_invalid_uri(self): + def test_get_bucket_from_uri_w_invalid_uri(self): from google.cloud.storage.bucket import Bucket client = self._make_client() with pytest.raises(ValueError, match="URI scheme must be gs"): - Bucket.from_string("http://bucket_name", client) + Bucket.from_uri("http://bucket_name", client) - def test_get_bucket_from_string_w_domain_name_bucket(self): + def test_get_bucket_from_uri_w_domain_name_bucket(self): from google.cloud.storage.bucket import Bucket client = self._make_client() BUCKET_NAME = "buckets.example.com" uri = "gs://" + BUCKET_NAME + bucket = Bucket.from_uri(uri, client) + + self.assertIsInstance(bucket, Bucket) + self.assertIs(bucket.client, client) + self.assertEqual(bucket.name, BUCKET_NAME) + + @mock.patch("warnings.warn") + def test_get_bucket_from_string(self, mock_warn): + from google.cloud.storage.bucket import _FROM_STRING_MESSAGE + from google.cloud.storage.bucket import Bucket + + client = self._make_client() + BUCKET_NAME = "BUCKET_NAME" + uri = "gs://" + BUCKET_NAME + bucket = Bucket.from_string(uri, client) self.assertIsInstance(bucket, Bucket) self.assertIs(bucket.client, client) self.assertEqual(bucket.name, BUCKET_NAME) + mock_warn.assert_any_call( + _FROM_STRING_MESSAGE, + PendingDeprecationWarning, + stacklevel=2, + ) def test_generate_signed_url_no_version_passed_warning(self): self._generate_signed_url_helper() @@ -3934,9 +4451,7 @@ def test_generate_signed_url_v2_w_defaults(self): self._generate_signed_url_v2_helper() def test_generate_signed_url_v2_w_expiration(self): - from google.cloud._helpers import UTC - - expiration = datetime.datetime.utcnow().replace(tzinfo=UTC) + expiration = _NOW(_UTC) self._generate_signed_url_v2_helper(expiration=expiration) def test_generate_signed_url_v2_w_endpoint(self): @@ -4023,6 +4538,17 @@ def test_generate_signed_url_v4_w_bucket_bound_hostname_w_scheme(self): def test_generate_signed_url_v4_w_bucket_bound_hostname_w_bare_hostname(self): self._generate_signed_url_v4_helper(bucket_bound_hostname="cdn.example.com") + def test_generate_signed_url_v4_w_incompatible_params(self): + with self.assertRaises(ValueError): + self._generate_signed_url_v4_helper( + api_access_endpoint="example.com", + bucket_bound_hostname="cdn.example.com", + ) + with self.assertRaises(ValueError): + self._generate_signed_url_v4_helper( + virtual_hosted_style=True, bucket_bound_hostname="cdn.example.com" + ) + class Test__item_to_notification(unittest.TestCase): def _call_fut(self, iterator, item): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c7abf5b0d..0bef1ea91 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -16,6 +16,7 @@ import http.client import io import json +from unittest.mock import patch import mock import pytest import re @@ -23,15 +24,22 @@ import unittest import urllib - from google.api_core import exceptions - +from google.auth.credentials import AnonymousCredentials from google.oauth2.service_account import Credentials -from . import _read_local_json +from google.cloud.storage import _helpers +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC +from google.cloud.storage._helpers import STORAGE_EMULATOR_ENV_VAR +from google.cloud.storage._helpers import _API_ENDPOINT_OVERRIDE_ENV_VAR +from google.cloud.storage._helpers import _get_default_headers +from google.cloud.storage._helpers import _DEFAULT_UNIVERSE_DOMAIN +from google.cloud.storage._http import Connection from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED - +from tests.unit.test__helpers import GCCL_INVOCATION_TEST_CONST +from . import _read_local_json _SERVICE_ACCOUNT_JSON = _read_local_json("url_signer_v4_test_account.json") _CONFORMANCE_TESTS = _read_local_json("url_signer_v4_test_data.json")[ @@ -41,13 +49,19 @@ _FAKE_CREDENTIALS = Credentials.from_service_account_info(_SERVICE_ACCOUNT_JSON) -def _make_credentials(project=None): +def _make_credentials(project=None, universe_domain=_DEFAULT_UNIVERSE_DOMAIN): import google.auth.credentials if project is not None: - return mock.Mock(spec=google.auth.credentials.Credentials, project_id=project) + return mock.Mock( + spec=google.auth.credentials.Credentials, + project_id=project, + universe_domain=universe_domain, + ) - return mock.Mock(spec=google.auth.credentials.Credentials) + return mock.Mock( + spec=google.auth.credentials.Credentials, universe_domain=universe_domain + ) def _create_signing_credentials(): @@ -58,7 +72,9 @@ class _SigningCredentials( ): pass - credentials = mock.Mock(spec=_SigningCredentials) + credentials = mock.Mock( + spec=_SigningCredentials, universe_domain=_DEFAULT_UNIVERSE_DOMAIN + ) credentials.sign_bytes = mock.Mock(return_value=b"Signature_bytes") credentials.signer_email = "test@mail.com" return credentials @@ -116,7 +132,6 @@ def _make_one(self, *args, **kw): def test_ctor_connection_type(self): from google.cloud._http import ClientInfo - from google.cloud.storage._http import Connection PROJECT = "PROJECT" credentials = _make_credentials() @@ -159,25 +174,76 @@ def test_ctor_w_client_options_dict(self): ) self.assertEqual(client._connection.API_BASE_URL, api_endpoint) + self.assertEqual(client.api_endpoint, api_endpoint) def test_ctor_w_client_options_object(self): from google.api_core.client_options import ClientOptions PROJECT = "PROJECT" credentials = _make_credentials() - client_options = ClientOptions(api_endpoint="https://www.foo-googleapis.com") + api_endpoint = "https://www.foo-googleapis.com" + client_options = ClientOptions(api_endpoint=api_endpoint) + + client = self._make_one( + project=PROJECT, credentials=credentials, client_options=client_options + ) + + self.assertEqual(client._connection.API_BASE_URL, api_endpoint) + self.assertEqual(client.api_endpoint, api_endpoint) + + def test_ctor_w_universe_domain_and_matched_credentials(self): + PROJECT = "PROJECT" + universe_domain = "example.com" + expected_api_endpoint = f"https://storage.{universe_domain}" + credentials = _make_credentials(universe_domain=universe_domain) + client_options = {"universe_domain": universe_domain} client = self._make_one( project=PROJECT, credentials=credentials, client_options=client_options ) + self.assertEqual(client._connection.API_BASE_URL, expected_api_endpoint) + self.assertEqual(client.api_endpoint, expected_api_endpoint) + self.assertEqual(client.universe_domain, universe_domain) + + def test_ctor_w_universe_domain_and_mismatched_credentials(self): + PROJECT = "PROJECT" + universe_domain = "example.com" + credentials = _make_credentials() # default universe domain + client_options = {"universe_domain": universe_domain} + + with self.assertRaises(ValueError): + self._make_one( + project=PROJECT, credentials=credentials, client_options=client_options + ) + + def test_ctor_w_universe_domain_and_mtls(self): + PROJECT = "PROJECT" + universe_domain = "example.com" + client_options = {"universe_domain": universe_domain} + + credentials = _make_credentials( + project=PROJECT, universe_domain=universe_domain + ) + + environ = {"GOOGLE_API_USE_CLIENT_CERTIFICATE": "true"} + with mock.patch("os.environ", environ): + with self.assertRaises(ValueError): + self._make_one(credentials=credentials, client_options=client_options) + + def test_ctor_w_custom_headers(self): + PROJECT = "PROJECT" + credentials = _make_credentials() + custom_headers = {"x-goog-custom-audit-foo": "bar"} + client = self._make_one( + project=PROJECT, credentials=credentials, extra_headers=custom_headers + ) self.assertEqual( - client._connection.API_BASE_URL, "https://www.foo-googleapis.com" + client._connection.API_BASE_URL, client._connection.DEFAULT_API_ENDPOINT ) + self.assertEqual(client._connection.extra_headers, custom_headers) def test_ctor_wo_project(self): - from google.cloud.storage._http import Connection - PROJECT = "PROJECT" credentials = _make_credentials(project=PROJECT) @@ -190,8 +256,6 @@ def test_ctor_wo_project(self): self.assertEqual(list(client._batch_stack), []) def test_ctor_w_project_explicit_none(self): - from google.cloud.storage._http import Connection - credentials = _make_credentials() client = self._make_one(project=None, credentials=credentials) @@ -204,7 +268,6 @@ def test_ctor_w_project_explicit_none(self): def test_ctor_w_client_info(self): from google.cloud._http import ClientInfo - from google.cloud.storage._http import Connection credentials = _make_credentials() client_info = ClientInfo() @@ -236,10 +299,101 @@ def test_ctor_mtls(self): self.assertEqual(client._connection.ALLOW_AUTO_SWITCH_TO_MTLS_URL, False) self.assertEqual(client._connection.API_BASE_URL, "http://foo") - def test_create_anonymous_client(self): - from google.auth.credentials import AnonymousCredentials - from google.cloud.storage._http import Connection + def test_ctor_w_custom_endpoint_use_auth(self): + custom_endpoint = "storage-example.p.googleapis.com" + client = self._make_one(client_options={"api_endpoint": custom_endpoint}) + self.assertEqual(client._connection.API_BASE_URL, custom_endpoint) + self.assertIsNotNone(client.project) + self.assertIsInstance(client._connection, Connection) + self.assertIsNotNone(client._connection.credentials) + self.assertNotIsInstance(client._connection.credentials, AnonymousCredentials) + def test_ctor_w_custom_endpoint_bypass_auth(self): + custom_endpoint = "storage-example.p.googleapis.com" + client = self._make_one( + client_options={"api_endpoint": custom_endpoint}, + use_auth_w_custom_endpoint=False, + ) + self.assertEqual(client._connection.API_BASE_URL, custom_endpoint) + self.assertEqual(client.project, None) + self.assertIsInstance(client._connection, Connection) + self.assertIsInstance(client._connection.credentials, AnonymousCredentials) + + def test_ctor_w_custom_endpoint_w_credentials(self): + PROJECT = "PROJECT" + custom_endpoint = "storage-example.p.googleapis.com" + credentials = _make_credentials(project=PROJECT) + client = self._make_one( + credentials=credentials, client_options={"api_endpoint": custom_endpoint} + ) + self.assertEqual(client._connection.API_BASE_URL, custom_endpoint) + self.assertEqual(client.project, PROJECT) + self.assertIsInstance(client._connection, Connection) + self.assertIs(client._connection.credentials, credentials) + + def test_ctor_w_emulator_wo_project(self): + # bypasses authentication if STORAGE_EMULATOR_ENV_VAR is set + host = "http://localhost:8080" + environ = {STORAGE_EMULATOR_ENV_VAR: host} + with mock.patch("os.environ", environ): + client = self._make_one() + + self.assertIsNone(client.project) + self.assertEqual(client._connection.API_BASE_URL, host) + self.assertIsInstance(client._connection.credentials, AnonymousCredentials) + + def test_ctor_w_emulator_w_environ_project(self): + # bypasses authentication and infers the project from the environment + host = "http://localhost:8080" + environ_project = "environ-project" + environ = { + STORAGE_EMULATOR_ENV_VAR: host, + "GOOGLE_CLOUD_PROJECT": environ_project, + } + with mock.patch("os.environ", environ): + client = self._make_one() + + self.assertEqual(client.project, environ_project) + self.assertEqual(client._connection.API_BASE_URL, host) + self.assertIsInstance(client._connection.credentials, AnonymousCredentials) + + def test_ctor_w_emulator_w_project_arg(self): + # project argument overrides project set in the enviroment + host = "http://localhost:8080" + environ_project = "environ-project" + project = "my-test-project" + environ = { + STORAGE_EMULATOR_ENV_VAR: host, + "GOOGLE_CLOUD_PROJECT": environ_project, + } + with mock.patch("os.environ", environ): + client = self._make_one(project=project) + + self.assertEqual(client.project, project) + self.assertEqual(client._connection.API_BASE_URL, host) + self.assertIsInstance(client._connection.credentials, AnonymousCredentials) + + def test_ctor_w_emulator_w_credentials(self): + host = "http://localhost:8080" + environ = {STORAGE_EMULATOR_ENV_VAR: host} + credentials = _make_credentials() + with mock.patch("os.environ", environ): + client = self._make_one(credentials=credentials) + + self.assertEqual(client._connection.API_BASE_URL, host) + self.assertIs(client._connection.credentials, credentials) + + def test_ctor_w_api_endpoint_override(self): + host = "http://localhost:8080" + environ = {_API_ENDPOINT_OVERRIDE_ENV_VAR: host} + project = "my-test-project" + with mock.patch("os.environ", environ): + client = self._make_one(project=project) + + self.assertEqual(client.project, project) + self.assertEqual(client._connection.API_BASE_URL, host) + + def test_create_anonymous_client(self): klass = self._get_target_class() client = klass.create_anonymous_client() @@ -319,7 +473,7 @@ def test_get_service_account_email_wo_project(self): ) _, kwargs = http.request.call_args scheme, netloc, path, qs, _ = urllib.parse.urlsplit(kwargs.get("url")) - self.assertEqual("%s://%s" % (scheme, netloc), client._connection.API_BASE_URL) + self.assertEqual(f"{scheme}://{netloc}", client._connection.API_BASE_URL) self.assertEqual( path, "/".join( @@ -357,7 +511,7 @@ def test_get_service_account_email_w_project(self): ) _, kwargs = http.request.call_args scheme, netloc, path, qs, _ = urllib.parse.urlsplit(kwargs.get("url")) - self.assertEqual("%s://%s" % (scheme, netloc), client._connection.API_BASE_URL) + self.assertEqual(f"{scheme}://{netloc}", client._connection.API_BASE_URL) self.assertEqual( path, "/".join( @@ -378,13 +532,15 @@ def test_bucket(self): PROJECT = "PROJECT" CREDENTIALS = _make_credentials() BUCKET_NAME = "BUCKET_NAME" + GENERATION = 12345 client = self._make_one(project=PROJECT, credentials=CREDENTIALS) - bucket = client.bucket(BUCKET_NAME) + bucket = client.bucket(BUCKET_NAME, generation=GENERATION) self.assertIsInstance(bucket, Bucket) self.assertIs(bucket.client, client) self.assertEqual(bucket.name, BUCKET_NAME) self.assertIsNone(bucket.user_project) + self.assertEqual(bucket.generation, GENERATION) def test_bucket_w_user_project(self): from google.cloud.storage.bucket import Bucket @@ -482,7 +638,10 @@ def test__list_resource_w_defaults(self): client = self._make_one(project=project, credentials=credentials) connection = client._base_connection = _make_connection() - iterator = client._list_resource(path=path, item_to_value=item_to_value,) + iterator = client._list_resource( + path=path, + item_to_value=item_to_value, + ) self.assertIsInstance(iterator, HTTPIterator) self.assertIs(iterator.client, client) @@ -801,6 +960,20 @@ def test__bucket_arg_to_bucket_w_bucket_w_client(self): self.assertIs(found, bucket) self.assertIs(found.client, other_client) + def test__bucket_arg_to_bucket_raises_on_generation(self): + from google.cloud.storage.bucket import Bucket + + project = "PROJECT" + credentials = _make_credentials() + client = self._make_one(project=project, credentials=credentials) + other_client = mock.Mock(spec=[]) + bucket_name = "w_client" + + bucket = Bucket(other_client, name=bucket_name) + + with self.assertRaises(ValueError): + client._bucket_arg_to_bucket(bucket, generation=12345) + def test__bucket_arg_to_bucket_w_bucket_wo_client(self): from google.cloud.storage.bucket import Bucket @@ -820,14 +993,16 @@ def test__bucket_arg_to_bucket_w_bucket_name(self): from google.cloud.storage.bucket import Bucket project = "PROJECT" + generation = 12345 credentials = _make_credentials() client = self._make_one(project=project, credentials=credentials) bucket_name = "string-name" - found = client._bucket_arg_to_bucket(bucket_name) + found = client._bucket_arg_to_bucket(bucket_name, generation) self.assertIsInstance(found, Bucket) self.assertEqual(found.name, bucket_name) + self.assertEqual(found.generation, generation) self.assertIs(found.client, client) def test_get_bucket_miss_w_string_w_defaults(self): @@ -844,7 +1019,7 @@ def test_get_bucket_miss_w_string_w_defaults(self): with self.assertRaises(NotFound): client.get_bucket(bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -876,7 +1051,7 @@ def test_get_bucket_hit_w_string_w_timeout(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -888,6 +1063,41 @@ def test_get_bucket_hit_w_string_w_timeout(self): _target_object=bucket, ) + def test_get_bucket_hit_w_string_w_soft_deleted(self): + from google.cloud.storage.bucket import Bucket + + project = "PROJECT" + bucket_name = "bucket-name" + generation = 12345 + api_response = {"name": bucket_name, "generation": generation} + credentials = _make_credentials() + client = self._make_one(project=project, credentials=credentials) + client._get_resource = mock.Mock(return_value=api_response) + + bucket = client.get_bucket( + bucket_name, generation=generation, soft_deleted=True + ) + + self.assertIsInstance(bucket, Bucket) + self.assertEqual(bucket.name, bucket_name) + self.assertEqual(bucket.generation, generation) + + expected_path = f"/b/{bucket_name}" + expected_query_params = { + "generation": generation, + "projection": "noAcl", + "softDeleted": True, + } + expected_headers = {} + client._get_resource.assert_called_once_with( + expected_path, + query_params=expected_query_params, + headers=expected_headers, + timeout=60, + retry=DEFAULT_RETRY, + _target_object=bucket, + ) + def test_get_bucket_hit_w_string_w_metageneration_match(self): from google.cloud.storage.bucket import Bucket @@ -906,7 +1116,7 @@ def test_get_bucket_hit_w_string_w_metageneration_match(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = { "projection": "noAcl", "ifMetagenerationMatch": metageneration_number, @@ -936,7 +1146,7 @@ def test_get_bucket_miss_w_object_w_retry(self): with self.assertRaises(NotFound): client.get_bucket(bucket_obj, retry=retry) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -968,7 +1178,7 @@ def test_get_bucket_hit_w_object_defaults(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -996,7 +1206,7 @@ def test_get_bucket_hit_w_object_w_retry_none(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -1022,7 +1232,7 @@ def test_lookup_bucket_miss_w_defaults(self): self.assertIsNone(bucket) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -1054,7 +1264,7 @@ def test_lookup_bucket_hit_w_timeout(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -1084,7 +1294,7 @@ def test_lookup_bucket_hit_w_metageneration_match(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = { "projection": "noAcl", "ifMetagenerationMatch": metageneration_number, @@ -1115,7 +1325,7 @@ def test_lookup_bucket_hit_w_retry(self): self.assertIsInstance(bucket, Bucket) self.assertEqual(bucket.name, bucket_name) - expected_path = "/b/%s" % (bucket_name,) + expected_path = f"/b/{bucket_name}" expected_query_params = {"projection": "noAcl"} expected_headers = {} client._get_resource.assert_called_once_with( @@ -1128,11 +1338,113 @@ def test_lookup_bucket_hit_w_retry(self): ) def test_create_bucket_w_missing_client_project(self): + from google.cloud.exceptions import BadRequest + credentials = _make_credentials() client = self._make_one(project=None, credentials=credentials) - with self.assertRaises(ValueError): - client.create_bucket("bucket") + client._post_resource = mock.Mock() + client._post_resource.side_effect = BadRequest("Required parameter: project") + + bucket_name = "bucket-name" + + with self.assertRaises(BadRequest): + client.create_bucket(bucket_name) + + expected_path = "/b" + expected_data = {"name": bucket_name} + # no required parameter: project + expected_query_params = {} + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=mock.ANY, + ) + + def test_create_bucket_w_missing_client_project_w_emulator(self): + # mock STORAGE_EMULATOR_ENV_VAR is set + host = "http://localhost:8080" + environ = {STORAGE_EMULATOR_ENV_VAR: host} + with mock.patch("os.environ", environ): + client = self._make_one() + + bucket_name = "bucket-name" + api_response = {"name": bucket_name} + client._post_resource = mock.Mock() + client._post_resource.return_value = api_response + + # mock STORAGE_EMULATOR_ENV_VAR is set + with mock.patch("os.environ", environ): + bucket = client.create_bucket(bucket_name) + + expected_path = "/b" + expected_data = api_response + expected_query_params = {"project": ""} + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=bucket, + ) + + def test_create_bucket_w_environ_project_w_emulator(self): + # mock STORAGE_EMULATOR_ENV_VAR is set + host = "http://localhost:8080" + environ_project = "environ-project" + environ = { + STORAGE_EMULATOR_ENV_VAR: host, + "GOOGLE_CLOUD_PROJECT": environ_project, + } + with mock.patch("os.environ", environ): + client = self._make_one() + + bucket_name = "bucket-name" + api_response = {"name": bucket_name} + client._post_resource = mock.Mock() + client._post_resource.return_value = api_response + + # mock STORAGE_EMULATOR_ENV_VAR is set + with mock.patch("os.environ", environ): + bucket = client.create_bucket(bucket_name) + + expected_path = "/b" + expected_data = api_response + expected_query_params = {"project": environ_project} + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=bucket, + ) + + def test_create_bucket_w_custom_endpoint(self): + custom_endpoint = "storage-example.p.googleapis.com" + client = self._make_one(client_options={"api_endpoint": custom_endpoint}) + bucket_name = "bucket-name" + api_response = {"name": bucket_name} + client._post_resource = mock.Mock() + client._post_resource.return_value = api_response + + bucket = client.create_bucket(bucket_name) + + expected_path = "/b" + expected_data = api_response + expected_query_params = {"project": client.project} + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=bucket, + ) def test_create_bucket_w_conflict_w_user_project(self): from google.cloud.exceptions import Conflict @@ -1226,7 +1538,9 @@ def test_create_bucket_w_predefined_acl_valid_w_timeout(self): timeout = 42 bucket = client.create_bucket( - bucket_name, predefined_acl="publicRead", timeout=timeout, + bucket_name, + predefined_acl="publicRead", + timeout=timeout, ) expected_path = "/b" @@ -1268,7 +1582,9 @@ def test_create_bucket_w_predefined_default_object_acl_valid_w_retry(self): retry = mock.Mock(spec=[]) bucket = client.create_bucket( - bucket_name, predefined_default_object_acl="publicRead", retry=retry, + bucket_name, + predefined_default_object_acl="publicRead", + retry=retry, ) expected_path = "/b" @@ -1312,6 +1628,44 @@ def test_create_bucket_w_explicit_location(self): _target_object=bucket, ) + def test_create_bucket_w_custom_dual_region(self): + project = "PROJECT" + bucket_name = "bucket-name" + location = "US" + data_locations = ["US-EAST1", "US-WEST1"] + api_response = { + "location": location, + "customPlacementConfig": {"dataLocations": data_locations}, + "name": bucket_name, + } + credentials = _make_credentials() + client = self._make_one(project=project, credentials=credentials) + client._post_resource = mock.Mock() + client._post_resource.return_value = api_response + + bucket = client.create_bucket( + bucket_name, location=location, data_locations=data_locations + ) + + self.assertEqual(bucket.location, location) + self.assertEqual(bucket.data_locations, data_locations) + + expected_path = "/b" + expected_data = { + "location": location, + "customPlacementConfig": {"dataLocations": data_locations}, + "name": bucket_name, + } + expected_query_params = {"project": project} + client._post_resource.assert_called_once_with( + expected_path, + expected_data, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + _target_object=bucket, + ) + def test_create_bucket_w_explicit_project(self): project = "PROJECT" other_project = "other-project-123" @@ -1376,11 +1730,14 @@ def test_create_bucket_w_extra_properties(self): bucket.requester_pays = True bucket.labels = labels - client.create_bucket(bucket, location=location) + client.create_bucket(bucket, location=location, enable_object_retention=True) expected_path = "/b" expected_data = api_response - expected_query_params = {"project": project} + expected_query_params = { + "project": project, + "enableObjectRetention": True, + } client._post_resource.assert_called_once_with( expected_path, expected_data, @@ -1413,9 +1770,16 @@ def test_create_bucket_w_name_only(self): _target_object=bucket, ) - def test_download_blob_to_file_with_failure(self): - from google.resumable_media import InvalidResponse + @staticmethod + def _make_blob(*args, **kw): from google.cloud.storage.blob import Blob + + blob = Blob(*args, **kw) + + return blob + + def test_download_blob_to_file_with_failure(self): + from google.cloud.storage.exceptions import InvalidResponse from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" @@ -1426,19 +1790,24 @@ def test_download_blob_to_file_with_failure(self): grmp_response = InvalidResponse(raw_response) credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._get_download_url = mock.Mock() blob._do_download = mock.Mock() blob._do_download.side_effect = grmp_response file_obj = io.BytesIO() - with self.assertRaises(exceptions.NotFound): - client.download_blob_to_file(blob, file_obj) - - self.assertEqual(file_obj.tell(), 0) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + with self.assertRaises(exceptions.NotFound): + client.download_blob_to_file(blob, file_obj) - headers = {"accept-encoding": "gzip"} + self.assertEqual(file_obj.tell(), 0) + headers = { + **_get_default_headers(client._connection.user_agent), + "accept-encoding": "gzip", + } blob._do_download.assert_called_once_with( client._http, file_obj, @@ -1447,7 +1816,7 @@ def test_download_blob_to_file_with_failure(self): None, None, False, - checksum="md5", + checksum="auto", timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, ) @@ -1458,18 +1827,26 @@ def test_download_blob_to_file_with_uri(self): project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(project=project, credentials=credentials) - blob = mock.Mock() + blob = self._make_blob(name="blob_name", bucket=None) file_obj = io.BytesIO() blob._encryption_key = None blob._get_download_url = mock.Mock() blob._do_download = mock.Mock() - with mock.patch( - "google.cloud.storage.client.Blob.from_string", return_value=blob + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): - client.download_blob_to_file("gs://bucket_name/path/to/object", file_obj) + with mock.patch( + "google.cloud.storage.client.Blob.from_uri", return_value=blob + ): + client.download_blob_to_file( + "gs://bucket_name/path/to/object", file_obj + ) - headers = {"accept-encoding": "gzip"} + headers = { + **_get_default_headers(client._connection.user_agent), + "accept-encoding": "gzip", + } blob._do_download.assert_called_once_with( client._http, file_obj, @@ -1478,7 +1855,7 @@ def test_download_blob_to_file_with_uri(self): None, None, False, - checksum="md5", + checksum="auto", timeout=_DEFAULT_TIMEOUT, retry=DEFAULT_RETRY, ) @@ -1489,7 +1866,7 @@ def test_download_blob_to_file_with_invalid_uri(self): client = self._make_one(project=project, credentials=credentials) file_obj = io.BytesIO() - with pytest.raises(ValueError, match="URI scheme must be gs"): + with pytest.raises(ValueError): client.download_blob_to_file("http://bucket_name/path/to/object", file_obj) def test_download_blob_to_file_w_no_retry(self): @@ -1499,7 +1876,10 @@ def test_download_blob_to_file_w_no_retry(self): def test_download_blob_to_file_w_conditional_etag_match_string(self): self._download_blob_to_file_helper( - use_chunks=True, raw_download=True, retry=None, if_etag_match="kittens", + use_chunks=True, + raw_download=True, + retry=None, + if_etag_match="kittens", ) def test_download_blob_to_file_w_conditional_etag_match_list(self): @@ -1512,7 +1892,10 @@ def test_download_blob_to_file_w_conditional_etag_match_list(self): def test_download_blob_to_file_w_conditional_etag_not_match_string(self): self._download_blob_to_file_helper( - use_chunks=True, raw_download=True, retry=None, if_etag_not_match="kittens", + use_chunks=True, + raw_download=True, + retry=None, + if_etag_not_match="kittens", ) def test_download_blob_to_file_w_conditional_etag_not_match_list(self): @@ -1542,27 +1925,28 @@ def test_download_blob_to_file_w_conditional_retry_fail(self): def _download_blob_to_file_helper( self, use_chunks, raw_download, expect_condition_fail=False, **extra_kwargs ): - from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._get_download_url = mock.Mock() if use_chunks: blob._CHUNK_SIZE_MULTIPLE = 1 blob.chunk_size = 3 blob._do_download = mock.Mock() - file_obj = io.BytesIO() - if raw_download: - client.download_blob_to_file( - blob, file_obj, raw_download=True, **extra_kwargs - ) - else: - client.download_blob_to_file(blob, file_obj, **extra_kwargs) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + if raw_download: + client.download_blob_to_file( + blob, file_obj, raw_download=True, **extra_kwargs + ) + else: + client.download_blob_to_file(blob, file_obj, **extra_kwargs) expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) if ( @@ -1585,6 +1969,11 @@ def _download_blob_to_file_helper( if_etag_not_match = [if_etag_not_match] headers["If-None-Match"] = ", ".join(if_etag_not_match) + with patch.object( + _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST + ): + headers = {**_get_default_headers(client._connection.user_agent), **headers} + blob._do_download.assert_called_once_with( client._http, file_obj, @@ -1593,7 +1982,7 @@ def _download_blob_to_file_helper( None, None, raw_download, - checksum="md5", + checksum="auto", timeout=_DEFAULT_TIMEOUT, retry=expected_retry, ) @@ -1610,6 +1999,23 @@ def test_download_blob_to_file_wo_chunks_w_raw(self): def test_download_blob_to_file_w_chunks_w_raw(self): self._download_blob_to_file_helper(use_chunks=True, raw_download=True) + def test_download_blob_have_different_uuid(self): + project = "PROJECT" + credentials = _make_credentials(project=project) + client = self._make_one(credentials=credentials) + blob = self._make_blob(name="blob_name", bucket=None) + blob._encryption_key = None + blob._do_download = mock.Mock() + blob._get_download_url = mock.Mock() + file_obj = io.BytesIO() + client.download_blob_to_file(blob, file_obj) + client.download_blob_to_file(blob, file_obj) + + self.assertNotEqual( + blob._do_download.call_args_list[0][0][3]["X-Goog-API-Client"], + blob._do_download.call_args_list[1][0][3]["X-Goog-API-Client"], + ) + def test_list_blobs_w_defaults_w_bucket_obj(self): from google.cloud.storage.bucket import Bucket from google.cloud.storage.bucket import _blobs_page_start @@ -1628,7 +2034,7 @@ def test_list_blobs_w_defaults_w_bucket_obj(self): self.assertIs(iterator.bucket, bucket) self.assertEqual(iterator.prefixes, set()) - expected_path = "/b/{}/o".format(bucket_name) + expected_path = f"/b/{bucket_name}/o" expected_item_to_value = _item_to_blob expected_page_token = None expected_max_results = None @@ -1658,9 +2064,12 @@ def test_list_blobs_w_explicit_w_user_project(self): page_token = "ABCD" prefix = "subfolder" delimiter = "/" + match_glob = "**txt" start_offset = "c" end_offset = "g" include_trailing_delimiter = True + include_folders_as_prefixes = True + soft_deleted = False versions = True projection = "full" page_size = 2 @@ -1672,7 +2081,7 @@ def test_list_blobs_w_explicit_w_user_project(self): bucket = client._bucket_arg_to_bucket.return_value = mock.Mock( spec=["path", "user_project"], ) - bucket.path = "/b/{}".format(bucket_name) + bucket.path = f"/b/{bucket_name}" bucket.user_project = user_project timeout = 42 retry = mock.Mock(spec=[]) @@ -1692,13 +2101,16 @@ def test_list_blobs_w_explicit_w_user_project(self): page_size=page_size, timeout=timeout, retry=retry, + match_glob=match_glob, + include_folders_as_prefixes=include_folders_as_prefixes, + soft_deleted=soft_deleted, ) self.assertIs(iterator, client._list_resource.return_value) self.assertIs(iterator.bucket, bucket) self.assertEqual(iterator.prefixes, set()) - expected_path = "/b/{}/o".format(bucket_name) + expected_path = f"/b/{bucket_name}/o" expected_item_to_value = _item_to_blob expected_page_token = page_token expected_max_results = max_results @@ -1706,12 +2118,15 @@ def test_list_blobs_w_explicit_w_user_project(self): "projection": projection, "prefix": prefix, "delimiter": delimiter, + "matchGlob": match_glob, "startOffset": start_offset, "endOffset": end_offset, "includeTrailingDelimiter": include_trailing_delimiter, "versions": versions, "fields": fields, "userProject": user_project, + "includeFoldersAsPrefixes": include_folders_as_prefixes, + "softDeleted": soft_deleted, } expected_page_start = _blobs_page_start expected_page_size = 2 @@ -1728,12 +2143,143 @@ def test_list_blobs_w_explicit_w_user_project(self): ) def test_list_buckets_wo_project(self): + from google.cloud.exceptions import BadRequest + from google.cloud.storage.client import _item_to_bucket + credentials = _make_credentials() client = self._make_one(project=None, credentials=credentials) - with self.assertRaises(ValueError): + client._list_resource = mock.Mock() + client._list_resource.side_effect = BadRequest("Required parameter: project") + + with self.assertRaises(BadRequest): client.list_buckets() + expected_path = "/b" + expected_item_to_value = _item_to_bucket + expected_page_token = None + expected_max_results = None + expected_page_size = None + # no required parameter: project + expected_extra_params = { + "projection": "noAcl", + } + client._list_resource.assert_called_once_with( + expected_path, + expected_item_to_value, + page_token=expected_page_token, + max_results=expected_max_results, + extra_params=expected_extra_params, + page_size=expected_page_size, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + + def test_list_buckets_wo_project_w_emulator(self): + from google.cloud.storage.client import _item_to_bucket + + # mock STORAGE_EMULATOR_ENV_VAR is set + host = "http://localhost:8080" + environ = {STORAGE_EMULATOR_ENV_VAR: host} + with mock.patch("os.environ", environ): + client = self._make_one() + + client._list_resource = mock.Mock(spec=[]) + + # mock STORAGE_EMULATOR_ENV_VAR is set + with mock.patch("os.environ", environ): + client.list_buckets() + + expected_path = "/b" + expected_item_to_value = _item_to_bucket + expected_page_token = None + expected_max_results = None + expected_page_size = None + expected_extra_params = { + "project": "", + "projection": "noAcl", + } + client._list_resource.assert_called_once_with( + expected_path, + expected_item_to_value, + page_token=expected_page_token, + max_results=expected_max_results, + extra_params=expected_extra_params, + page_size=expected_page_size, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + + def test_list_buckets_w_environ_project_w_emulator(self): + from google.cloud.storage.client import _item_to_bucket + + # mock STORAGE_EMULATOR_ENV_VAR is set + host = "http://localhost:8080" + environ_project = "environ-project" + environ = { + STORAGE_EMULATOR_ENV_VAR: host, + "GOOGLE_CLOUD_PROJECT": environ_project, + } + with mock.patch("os.environ", environ): + client = self._make_one() + + client._list_resource = mock.Mock(spec=[]) + + # mock STORAGE_EMULATOR_ENV_VAR is set + with mock.patch("os.environ", environ): + client.list_buckets() + + expected_path = "/b" + expected_item_to_value = _item_to_bucket + expected_page_token = None + expected_max_results = None + expected_page_size = None + expected_extra_params = { + "project": environ_project, + "projection": "noAcl", + } + client._list_resource.assert_called_once_with( + expected_path, + expected_item_to_value, + page_token=expected_page_token, + max_results=expected_max_results, + extra_params=expected_extra_params, + page_size=expected_page_size, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + + def test_list_buckets_w_custom_endpoint(self): + from google.cloud.storage.client import _item_to_bucket + + custom_endpoint = "storage-example.p.googleapis.com" + client = self._make_one(client_options={"api_endpoint": custom_endpoint}) + client._list_resource = mock.Mock(spec=[]) + + iterator = client.list_buckets() + + self.assertIs(iterator, client._list_resource.return_value) + + expected_path = "/b" + expected_item_to_value = _item_to_bucket + expected_page_token = None + expected_max_results = None + expected_page_size = None + expected_extra_params = { + "project": client.project, + "projection": "noAcl", + } + client._list_resource.assert_called_once_with( + expected_path, + expected_item_to_value, + page_token=expected_page_token, + max_results=expected_max_results, + extra_params=expected_extra_params, + page_size=expected_page_size, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + def test_list_buckets_w_defaults(self): from google.cloud.storage.client import _item_to_bucket @@ -1766,6 +2312,39 @@ def test_list_buckets_w_defaults(self): retry=DEFAULT_RETRY, ) + def test_list_buckets_w_soft_deleted(self): + from google.cloud.storage.client import _item_to_bucket + + project = "PROJECT" + credentials = _make_credentials() + client = self._make_one(project=project, credentials=credentials) + client._list_resource = mock.Mock(spec=[]) + + iterator = client.list_buckets(soft_deleted=True) + + self.assertIs(iterator, client._list_resource.return_value) + + expected_path = "/b" + expected_item_to_value = _item_to_bucket + expected_page_token = None + expected_max_results = None + expected_page_size = None + expected_extra_params = { + "project": project, + "projection": "noAcl", + "softDeleted": True, + } + client._list_resource.assert_called_once_with( + expected_path, + expected_item_to_value, + page_token=expected_page_token, + max_results=expected_max_results, + extra_params=expected_extra_params, + page_size=expected_page_size, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + def test_list_buckets_w_explicit(self): from google.cloud.storage.client import _item_to_bucket @@ -1819,11 +2398,40 @@ def test_list_buckets_w_explicit(self): retry=retry, ) + def test_restore_bucket(self): + from google.cloud.storage.bucket import Bucket + + PROJECT = "PROJECT" + NAME = "my_deleted_bucket" + GENERATION = 12345 + + api_response = {"name": NAME} + credentials = _make_credentials() + client = self._make_one(project=PROJECT, credentials=credentials) + client._post_resource = mock.Mock(return_value=api_response) + + bucket = client.restore_bucket(NAME, GENERATION) + + self.assertIsInstance(bucket, Bucket) + self.assertEqual(bucket.name, NAME) + + expected_path = f"/b/{NAME}/restore" + expected_query_params = {"generation": 12345, "projection": "noAcl"} + client._post_resource.assert_called_once_with( + expected_path, + None, + query_params=expected_query_params, + timeout=self._get_default_timeout(), + retry=DEFAULT_RETRY, + ) + def _create_hmac_key_helper( - self, explicit_project=None, user_project=None, timeout=None, retry=None, + self, + explicit_project=None, + user_project=None, + timeout=None, + retry=None, ): - import datetime - from google.cloud._helpers import UTC from google.cloud.storage.hmac_key import HMACKeyMetadata project = "PROJECT" @@ -1831,8 +2439,8 @@ def _create_hmac_key_helper( credentials = _make_credentials() email = "storage-user-123@example.com" secret = "a" * 40 - now = datetime.datetime.utcnow().replace(tzinfo=UTC) - now_stamp = "{}Z".format(now.isoformat()) + now = _NOW(_UTC) + now_stamp = f"{now.isoformat()}Z" if explicit_project is not None: expected_project = explicit_project @@ -1844,7 +2452,7 @@ def _create_hmac_key_helper( "metadata": { "accessId": access_id, "etag": "ETAG", - "id": "projects/{}/hmacKeys/{}".format(project, access_id), + "id": f"projects/{project}/hmacKeys/{access_id}", "project": expected_project, "state": "ACTIVE", "serviceAccountEmail": email, @@ -1883,7 +2491,7 @@ def _create_hmac_key_helper( self.assertEqual(metadata._properties, api_response["metadata"]) self.assertEqual(secret, api_response["secret"]) - expected_path = "/projects/{}/hmacKeys".format(expected_project) + expected_path = f"/projects/{expected_project}/hmacKeys" expected_data = None expected_query_params = {"serviceAccountEmail": email} @@ -1925,7 +2533,7 @@ def test_list_hmac_keys_w_defaults(self): self.assertIs(iterator, client._list_resource.return_value) - expected_path = "/projects/{}/hmacKeys".format(project) + expected_path = f"/projects/{project}/hmacKeys" expected_item_to_value = _item_to_hmac_key_metadata expected_max_results = None expected_extra_params = {} @@ -1965,7 +2573,7 @@ def test_list_hmac_keys_w_explicit(self): self.assertIs(iterator, client._list_resource.return_value) - expected_path = "/projects/{}/hmacKeys".format(other_project) + expected_path = f"/projects/{other_project}/hmacKeys" expected_item_to_value = _item_to_hmac_key_metadata expected_max_results = max_results expected_extra_params = { @@ -2013,7 +2621,7 @@ def test_get_hmac_key_metadata_wo_project(self): ) _, kwargs = http.request.call_args scheme, netloc, path, qs, _ = urllib.parse.urlsplit(kwargs.get("url")) - self.assertEqual("%s://%s" % (scheme, netloc), client._connection.API_BASE_URL) + self.assertEqual(f"{scheme}://{netloc}", client._connection.API_BASE_URL) self.assertEqual( path, "/".join( @@ -2068,7 +2676,7 @@ def test_get_hmac_key_metadata_w_project(self): ) _, kwargs = http.request.call_args scheme, netloc, path, qs, _ = urllib.parse.urlsplit(kwargs.get("url")) - self.assertEqual("%s://%s" % (scheme, netloc), client._connection.API_BASE_URL) + self.assertEqual(f"{scheme}://{netloc}", client._connection.API_BASE_URL) self.assertEqual( path, "/".join( @@ -2228,7 +2836,7 @@ def test_get_signed_policy_v4_virtual_hosted_style(self): credentials=_create_signing_credentials(), ) self.assertEqual( - policy["url"], "https://{}.storage.googleapis.com/".format(BUCKET_NAME) + policy["url"], f"https://{BUCKET_NAME}.storage.googleapis.com/" ) def test_get_signed_policy_v4_bucket_bound_hostname(self): @@ -2247,7 +2855,26 @@ def test_get_signed_policy_v4_bucket_bound_hostname(self): bucket_bound_hostname="https://bucket.bound_hostname", credentials=_create_signing_credentials(), ) - self.assertEqual(policy["url"], "https://bucket.bound_hostname") + self.assertEqual(policy["url"], "https://bucket.bound_hostname/") + + def test_get_signed_policy_v4_with_conflicting_arguments(self): + import datetime + + project = "PROJECT" + credentials = _make_credentials(project=project) + client = self._make_one(credentials=credentials) + + dtstamps_patch, _, _ = _time_functions_patches() + with dtstamps_patch: + with self.assertRaises(ValueError): + client.generate_signed_post_policy_v4( + "bucket-name", + "object-name", + expiration=datetime.datetime(2020, 3, 12), + bucket_bound_hostname="https://bucket.bound_hostname", + virtual_hosted_style=True, + credentials=_create_signing_credentials(), + ) def test_get_signed_policy_v4_bucket_bound_hostname_with_scheme(self): import datetime @@ -2335,6 +2962,50 @@ def test_get_signed_policy_v4_with_access_token(self): self.assertEqual(fields["x-goog-signature"], EXPECTED_SIGN) self.assertEqual(fields["policy"], EXPECTED_POLICY) + def test_get_signed_policy_v4_with_access_token_sa_email(self): + import datetime + + BUCKET_NAME = "bucket-name" + BLOB_NAME = "object-name" + EXPECTED_SIGN = "0c4003044105" + EXPECTED_POLICY = "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJidWNrZXQtbmFtZSJ9LHsiYWNsIjoicHJpdmF0ZSJ9LFsic3RhcnRzLXdpdGgiLCIkQ29udGVudC1UeXBlIiwidGV4dC9wbGFpbiJdLHsiYnVja2V0IjoiYnVja2V0LW5hbWUifSx7ImtleSI6Im9iamVjdC1uYW1lIn0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMzEyVDExNDcxNloifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdEBtYWlsLmNvbS8yMDIwMDMxMi9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAzLTI2VDAwOjAwOjEwWiJ9" + + project = "PROJECT" + credentials = _make_credentials(project=project) + client = self._make_one(credentials=credentials) + + dtstamps_patch, now_patch, expire_secs_patch = _time_functions_patches() + with dtstamps_patch, now_patch, expire_secs_patch: + with mock.patch( + "google.cloud.storage.client._sign_message", return_value=b"DEADBEEF" + ): + policy = client.generate_signed_post_policy_v4( + BUCKET_NAME, + BLOB_NAME, + expiration=datetime.datetime(2020, 3, 12), + conditions=[ + {"bucket": BUCKET_NAME}, + {"acl": "private"}, + ["starts-with", "$Content-Type", "text/plain"], + ], + service_account_email="test@mail.com", + access_token="token", + ) + self.assertEqual( + policy["url"], "https://storage.googleapis.com/" + BUCKET_NAME + "/" + ) + fields = policy["fields"] + + self.assertEqual(fields["key"], BLOB_NAME) + self.assertEqual(fields["x-goog-algorithm"], "GOOG4-RSA-SHA256") + self.assertEqual(fields["x-goog-date"], "20200312T114716Z") + self.assertEqual( + fields["x-goog-credential"], + "test@mail.com/20200312/auto/storage/goog4_request", + ) + self.assertEqual(fields["x-goog-signature"], EXPECTED_SIGN) + self.assertEqual(fields["policy"], EXPECTED_POLICY) + class Test__item_to_bucket(unittest.TestCase): def _call_fut(self, iterator, item): @@ -2399,13 +3070,12 @@ def test_conformance_post_policy(test_data): client = Client(credentials=_FAKE_CREDENTIALS, project="PROJECT") # mocking time functions - with mock.patch("google.cloud.storage._signing.NOW", return_value=timestamp): + with mock.patch("google.cloud.storage._signing._NOW", return_value=timestamp): with mock.patch( "google.cloud.storage.client.get_expiration_seconds_v4", return_value=in_data["expiration"], ): with mock.patch("google.cloud.storage.client._NOW", return_value=timestamp): - policy = client.generate_signed_post_policy_v4( bucket_name=in_data["bucket"], blob_name=in_data["object"], diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py new file mode 100644 index 000000000..beaa775bc --- /dev/null +++ b/tests/unit/test_exceptions.py @@ -0,0 +1,82 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from importlib import reload +from unittest.mock import Mock +from unittest.mock import sentinel +import sys + + +def test_exceptions_imports_correctly_in_base_case(): + try: + mock = Mock(spec=[]) + sys.modules["google.resumable_media"] = mock + + from google.cloud.storage import exceptions + + reload(exceptions) + invalid_response = exceptions.InvalidResponse(Mock()) + ir_base_names = [base.__name__ for base in invalid_response.__class__.__bases__] + assert ir_base_names == ["Exception"] + + data_corruption = exceptions.DataCorruption(Mock()) + dc_base_names = [base.__name__ for base in data_corruption.__class__.__bases__] + assert dc_base_names == ["Exception"] + finally: + del sys.modules["google.resumable_media"] + reload(exceptions) + + +def test_exceptions_imports_correctly_in_resumable_media_installed_case(): + try: + mock = Mock(spec=["InvalidResponse", "DataCorruption"]) + + class InvalidResponse(Exception): + def __init__(self, response, *args): + super().__init__(*args) + self.response = response + + class DataCorruption(Exception): + def __init__(self, response, *args): + super().__init__(*args) + self.response = response + + mock.InvalidResponse = InvalidResponse + mock.DataCorruption = DataCorruption + + sys.modules["google.resumable_media"] = mock + + from google.cloud.storage import exceptions + + reload(exceptions) + invalid_response = exceptions.InvalidResponse(Mock()) + ir_base_names = [base.__name__ for base in invalid_response.__class__.__bases__] + assert ir_base_names == ["InvalidResponse"] + + data_corruption = exceptions.DataCorruption(Mock()) + dc_base_names = [base.__name__ for base in data_corruption.__class__.__bases__] + assert dc_base_names == ["DataCorruption"] + finally: + del sys.modules["google.resumable_media"] + reload(exceptions) + + +def test_InvalidResponse(): + from google.cloud.storage import exceptions + + response = sentinel.response + error = exceptions.InvalidResponse(response, 1, "a", [b"m"], True) + + assert error.response is response + assert error.args == (1, "a", [b"m"], True) diff --git a/tests/unit/test_fileio.py b/tests/unit/test_fileio.py index d71103707..8da25d9e3 100644 --- a/tests/unit/test_fileio.py +++ b/tests/unit/test_fileio.py @@ -21,13 +21,14 @@ import mock from google.api_core.exceptions import RequestRangeNotSatisfiable +from google.cloud.storage.fileio import CHUNK_SIZE_MULTIPLE from google.cloud.storage.retry import DEFAULT_RETRY +from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED TEST_TEXT_DATA = string.ascii_lowercase + "\n" + string.ascii_uppercase + "\n" TEST_BINARY_DATA = TEST_TEXT_DATA.encode("utf-8") -TEST_MULTIBYTE_TEXT_DATA = u"あいうえおかきくけこさしすせそたちつてと" +TEST_MULTIBYTE_TEXT_DATA = "あいうえおかきくけこさしすせそたちつてと" PLAIN_CONTENT_TYPE = "text/plain" -NUM_RETRIES = 2 class _BlobReaderBase: @@ -247,11 +248,47 @@ def initialize_size(**_): reader.close() + def test_advanced_seek(self): + blob = mock.Mock() + + def read_from_fake_data(start=0, end=None, **_): + return TEST_BINARY_DATA[start:end] * 1024 + + blob.download_as_bytes = mock.Mock(side_effect=read_from_fake_data) + blob.size = None + download_kwargs = {"if_metageneration_match": 1} + reader = self._make_blob_reader(blob, chunk_size=1024, **download_kwargs) + + # Seek needs the blob size to work and should call reload() if the size + # is not known. Set a mock to initialize the size if reload() is called. + def initialize_size(**_): + blob.size = len(TEST_BINARY_DATA) * 1024 + + blob.reload = mock.Mock(side_effect=initialize_size) + + self.assertEqual(reader.tell(), 0) + # Mimic tarfile access pattern. Read tarinfo block. + reader.read(512) + self.assertEqual(reader.tell(), 512) + self.assertEqual(reader.seek(512), 512) + # Mimic read actual tar content. + reader.read(400) + self.assertEqual(reader.tell(), 912) + # Tarfile offsets are rounded up by block size + # A sanity seek/read is used to check for unexpected ends. + reader.seek(1023) + reader.read(1) + self.assertEqual(reader.tell(), 1024) + reader.read(512) + self.assertEqual(reader.tell(), 1536) + reader.close() + def test_close(self): blob = mock.Mock() reader = self._make_blob_reader(blob) reader.close() + self.assertTrue(reader.closed) with self.assertRaises(ValueError): reader.read() @@ -290,13 +327,6 @@ def test_attributes_explicit(self): self.assertEqual(writer._chunk_size, 512 * 1024) self.assertEqual(writer._retry, DEFAULT_RETRY) - def test_deprecated_text_mode_attribute(self): - blob = mock.Mock() - blob.chunk_size = 256 * 1024 - writer = self._make_blob_writer(blob, text_mode=True) - self.assertTrue(writer._ignore_flush) - writer.flush() # This should do nothing and not raise an error. - def test_reject_wrong_chunk_size(self): blob = mock.Mock() blob.chunk_size = 123 @@ -305,11 +335,10 @@ def test_reject_wrong_chunk_size(self): @mock.patch("warnings.warn") def test_write(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE - blob = mock.Mock() upload = mock.Mock() transport = mock.Mock() + timeout = 600 blob._initiate_resumable_upload.return_value = (upload, transport) @@ -318,26 +347,28 @@ def test_write(self, mock_warn): # arguments are used. # It would be normal to use a context manager here, but not doing so # gives us more control over close() for test purposes. - upload_kwargs = {"if_metageneration_match": 1} + upload_kwargs = { + "if_metageneration_match": 1, + "timeout": timeout, + } chunk_size = 8 # Note: Real upload requires a multiple of 256KiB. writer = self._make_blob_writer( blob, chunk_size=chunk_size, - num_retries=NUM_RETRIES, content_type=PLAIN_CONTENT_TYPE, **upload_kwargs ) # The transmit_next_chunk method must actually consume bytes from the # sliding buffer for the flush() feature to work properly. - upload.transmit_next_chunk.side_effect = lambda _: writer._buffer.read( + upload.transmit_next_chunk.side_effect = lambda _, timeout: writer._buffer.read( chunk_size ) # Write under chunk_size. This should be buffered and the upload not # initiated. writer.write(TEST_BINARY_DATA[0:4]) - blob.initiate_resumable_upload.assert_not_called() + blob._initiate_resumable_upload.assert_not_called() # Write over chunk_size. This should result in upload initialization # and multiple chunks uploaded. @@ -347,12 +378,11 @@ def test_write(self, mock_warn): writer._buffer, PLAIN_CONTENT_TYPE, None, - NUM_RETRIES, chunk_size=chunk_size, - retry=None, + retry=DEFAULT_RETRY, **upload_kwargs ) - upload.transmit_next_chunk.assert_called_with(transport) + upload.transmit_next_chunk.assert_called_with(transport, timeout=timeout) self.assertEqual(upload.transmit_next_chunk.call_count, 4) # Write another byte, finalize and close. @@ -361,9 +391,70 @@ def test_write(self, mock_warn): writer.close() self.assertEqual(upload.transmit_next_chunk.call_count, 5) - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) + def test_close_errors(self): + blob = mock.Mock(chunk_size=None) + + upload = mock.Mock() + transport = mock.Mock() + + blob._initiate_resumable_upload.return_value = (upload, transport) + + writer = self._make_blob_writer(blob) + + writer.close() + # Close a second time to verify it successfully does nothing. + writer.close() + + self.assertTrue(writer.closed) + # Try to write to closed file. + with self.assertRaises(ValueError): + writer.write(TEST_BINARY_DATA) + + def test_terminate_after_initiate(self): + blob = mock.Mock() + + upload = mock.Mock(upload_url="dummy") + transport = mock.Mock() + + blob._initiate_resumable_upload.return_value = (upload, transport) + + with self.assertRaises(RuntimeError): + with self._make_blob_writer(blob, chunk_size=CHUNK_SIZE_MULTIPLE) as writer: + writer.write(bytes(CHUNK_SIZE_MULTIPLE + 1)) # initiate upload + raise RuntimeError # should terminate the upload + blob._initiate_resumable_upload.assert_called_once() # upload initiated + self.assertTrue(writer.closed) # terminate called + transport.delete.assert_called_with("dummy") # resumable upload terminated + + def test_terminate_before_initiate(self): + blob = mock.Mock() + + upload = mock.Mock() + transport = mock.Mock() + + blob._initiate_resumable_upload.return_value = (upload, transport) + + with self.assertRaises(RuntimeError): + with self._make_blob_writer(blob, chunk_size=CHUNK_SIZE_MULTIPLE) as writer: + writer.write(bytes(CHUNK_SIZE_MULTIPLE - 1)) # upload not yet initiated + raise RuntimeError # there is no resumable upload to terminate + blob._initiate_resumable_upload.assert_not_called() # upload not yet initiated + self.assertTrue(writer.closed) # terminate called + transport.delete.assert_not_called() # there's no resumable upload to terminate + + def test_terminate_skipped(self): + blob = mock.Mock() + + upload = mock.Mock() + transport = mock.Mock() + + blob._initiate_resumable_upload.return_value = (upload, transport) + + with self._make_blob_writer(blob, chunk_size=CHUNK_SIZE_MULTIPLE) as writer: + writer.write(bytes(CHUNK_SIZE_MULTIPLE + 1)) # upload initiated + blob._initiate_resumable_upload.assert_called() # upload initiated + self.assertTrue(writer.closed) # close called + transport.delete.assert_not_called() # terminate not called def test_flush_fails(self): blob = mock.Mock(chunk_size=None) @@ -377,9 +468,9 @@ def test_seek_fails(self): writer = self._make_blob_writer(blob) with self.assertRaises(io.UnsupportedOperation): - writer.seek() + writer.seek(0) - def test_conditional_retry_failure(self): + def test_retry_enabled(self): blob = mock.Mock() upload = mock.Mock() @@ -393,7 +484,10 @@ def test_conditional_retry_failure(self): # gives us more control over close() for test purposes. chunk_size = 8 # Note: Real upload requires a multiple of 256KiB. writer = self._make_blob_writer( - blob, chunk_size=chunk_size, content_type=PLAIN_CONTENT_TYPE, + blob, + chunk_size=chunk_size, + content_type=PLAIN_CONTENT_TYPE, + if_generation_match=123456, ) # The transmit_next_chunk method must actually consume bytes from the @@ -405,20 +499,20 @@ def test_conditional_retry_failure(self): # Write under chunk_size. This should be buffered and the upload not # initiated. writer.write(TEST_BINARY_DATA[0:4]) - blob.initiate_resumable_upload.assert_not_called() + blob._initiate_resumable_upload.assert_not_called() # Write over chunk_size. This should result in upload initialization # and multiple chunks uploaded. - # Due to the condition not being fulfilled, retry should be None. + # Retry should be DEFAULT_RETRY. writer.write(TEST_BINARY_DATA[4:32]) blob._initiate_resumable_upload.assert_called_once_with( blob.bucket.client, writer._buffer, PLAIN_CONTENT_TYPE, None, # size - None, # num_retries chunk_size=chunk_size, - retry=None, + retry=DEFAULT_RETRY, + if_generation_match=123456, ) upload.transmit_next_chunk.assert_called_with(transport) self.assertEqual(upload.transmit_next_chunk.call_count, 4) @@ -428,7 +522,7 @@ def test_conditional_retry_failure(self): writer.close() self.assertEqual(upload.transmit_next_chunk.call_count, 5) - def test_conditional_retry_pass(self): + def test_forced_default_retry(self): blob = mock.Mock() upload = mock.Mock() @@ -445,7 +539,7 @@ def test_conditional_retry_pass(self): blob, chunk_size=chunk_size, content_type=PLAIN_CONTENT_TYPE, - if_generation_match=123456, + retry=DEFAULT_RETRY, ) # The transmit_next_chunk method must actually consume bytes from the @@ -457,21 +551,18 @@ def test_conditional_retry_pass(self): # Write under chunk_size. This should be buffered and the upload not # initiated. writer.write(TEST_BINARY_DATA[0:4]) - blob.initiate_resumable_upload.assert_not_called() + blob._initiate_resumable_upload.assert_not_called() # Write over chunk_size. This should result in upload initialization # and multiple chunks uploaded. - # Due to the condition being fulfilled, retry should be DEFAULT_RETRY. writer.write(TEST_BINARY_DATA[4:32]) blob._initiate_resumable_upload.assert_called_once_with( blob.bucket.client, writer._buffer, PLAIN_CONTENT_TYPE, None, # size - None, # num_retries chunk_size=chunk_size, retry=DEFAULT_RETRY, - if_generation_match=123456, ) upload.transmit_next_chunk.assert_called_with(transport) self.assertEqual(upload.transmit_next_chunk.call_count, 4) @@ -481,7 +572,13 @@ def test_conditional_retry_pass(self): writer.close() self.assertEqual(upload.transmit_next_chunk.call_count, 5) - def test_forced_default_retry(self): + def test_rejects_invalid_kwargs(self): + blob = mock.Mock() + with self.assertRaises(ValueError): + self._make_blob_writer(blob, invalid_kwarg=1) + + def test_conditional_retry_w_condition(self): + # Not the default, but still supported in the signature for compatibility. blob = mock.Mock() upload = mock.Mock() @@ -498,7 +595,8 @@ def test_forced_default_retry(self): blob, chunk_size=chunk_size, content_type=PLAIN_CONTENT_TYPE, - retry=DEFAULT_RETRY, + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + if_generation_match=100, ) # The transmit_next_chunk method must actually consume bytes from the @@ -510,7 +608,7 @@ def test_forced_default_retry(self): # Write under chunk_size. This should be buffered and the upload not # initiated. writer.write(TEST_BINARY_DATA[0:4]) - blob.initiate_resumable_upload.assert_not_called() + blob._initiate_resumable_upload.assert_not_called() # Write over chunk_size. This should result in upload initialization # and multiple chunks uploaded. @@ -520,68 +618,15 @@ def test_forced_default_retry(self): writer._buffer, PLAIN_CONTENT_TYPE, None, # size - None, # num_retries chunk_size=chunk_size, retry=DEFAULT_RETRY, + if_generation_match=100, ) - upload.transmit_next_chunk.assert_called_with(transport) - self.assertEqual(upload.transmit_next_chunk.call_count, 4) - - # Write another byte, finalize and close. - writer.write(TEST_BINARY_DATA[32:33]) - writer.close() - self.assertEqual(upload.transmit_next_chunk.call_count, 5) - - @mock.patch("warnings.warn") - def test_num_retries_and_retry_conflict(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE + def test_conditional_retry_wo_condition(self): + # Not the default, but still supported in the signature for compatibility. blob = mock.Mock() - blob._initiate_resumable_upload.side_effect = ValueError - - with mock.patch("google.cloud.storage.fileio.CHUNK_SIZE_MULTIPLE", 1): - # Create a writer. - # It would be normal to use a context manager here, but not doing so - # gives us more control over close() for test purposes. - chunk_size = 8 # Note: Real upload requires a multiple of 256KiB. - writer = self._make_blob_writer( - blob, - chunk_size=chunk_size, - content_type=PLAIN_CONTENT_TYPE, - num_retries=2, - retry=DEFAULT_RETRY, - ) - - # Write under chunk_size. This should be buffered and the upload not - # initiated. - writer.write(TEST_BINARY_DATA[0:4]) - blob.initiate_resumable_upload.assert_not_called() - - # Write over chunk_size. The mock will raise a ValueError, simulating - # actual behavior when num_retries and retry are both specified. - with self.assertRaises(ValueError): - writer.write(TEST_BINARY_DATA[4:32]) - - blob._initiate_resumable_upload.assert_called_once_with( - blob.bucket.client, - writer._buffer, - PLAIN_CONTENT_TYPE, - None, # size - 2, # num_retries - chunk_size=chunk_size, - retry=DEFAULT_RETRY, - ) - - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) - - @mock.patch("warnings.warn") - def test_num_retries_only(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE - - blob = mock.Mock() upload = mock.Mock() transport = mock.Mock() @@ -596,7 +641,7 @@ def test_num_retries_only(self, mock_warn): blob, chunk_size=chunk_size, content_type=PLAIN_CONTENT_TYPE, - num_retries=2, + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, ) # The transmit_next_chunk method must actually consume bytes from the @@ -608,7 +653,7 @@ def test_num_retries_only(self, mock_warn): # Write under chunk_size. This should be buffered and the upload not # initiated. writer.write(TEST_BINARY_DATA[0:4]) - blob.initiate_resumable_upload.assert_not_called() + blob._initiate_resumable_upload.assert_not_called() # Write over chunk_size. This should result in upload initialization # and multiple chunks uploaded. @@ -618,26 +663,9 @@ def test_num_retries_only(self, mock_warn): writer._buffer, PLAIN_CONTENT_TYPE, None, # size - 2, # num_retries chunk_size=chunk_size, retry=None, ) - upload.transmit_next_chunk.assert_called_with(transport) - self.assertEqual(upload.transmit_next_chunk.call_count, 4) - - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2 - ) - - # Write another byte, finalize and close. - writer.write(TEST_BINARY_DATA[32:33]) - writer.close() - self.assertEqual(upload.transmit_next_chunk.call_count, 5) - - def test_rejects_invalid_kwargs(self): - blob = mock.Mock() - with self.assertRaises(ValueError): - self._make_blob_writer(blob, invalid_kwarg=1) class Test_SlidingBuffer(unittest.TestCase): @@ -709,6 +737,7 @@ def test_seek(self): def test_close(self): buff = self._make_sliding_buffer() buff.close() + self.assertTrue(buff.closed) with self.assertRaises(ValueError): buff.read() @@ -855,6 +884,7 @@ def test_close(self): reader = self._make_blob_reader(blob) reader.close() + self.assertTrue(reader.closed) with self.assertRaises(ValueError): reader.read() @@ -866,8 +896,6 @@ def test_close(self): class TestBlobWriterText(unittest.TestCase, _BlobWriterBase): @mock.patch("warnings.warn") def test_write(self, mock_warn): - from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE - blob = mock.Mock() upload = mock.Mock() transport = mock.Mock() @@ -883,7 +911,6 @@ def test_write(self, mock_warn): blob, chunk_size=chunk_size, ignore_flush=True, - num_retries=NUM_RETRIES, content_type=PLAIN_CONTENT_TYPE, ) @@ -891,14 +918,14 @@ def test_write(self, mock_warn): # The transmit_next_chunk method must actually consume bytes from the # sliding buffer for the flush() feature to work properly. - upload.transmit_next_chunk.side_effect = lambda _: unwrapped_writer._buffer.read( - chunk_size + upload.transmit_next_chunk.side_effect = ( + lambda _: unwrapped_writer._buffer.read(chunk_size) ) # Write under chunk_size. This should be buffered and the upload not # initiated. writer.write(TEST_MULTIBYTE_TEXT_DATA[0:2]) - blob.initiate_resumable_upload.assert_not_called() + blob._initiate_resumable_upload.assert_not_called() # Write all data and close. writer.write(TEST_MULTIBYTE_TEXT_DATA[2:]) @@ -909,12 +936,7 @@ def test_write(self, mock_warn): unwrapped_writer._buffer, PLAIN_CONTENT_TYPE, None, - NUM_RETRIES, chunk_size=chunk_size, - retry=None, + retry=DEFAULT_RETRY, ) upload.transmit_next_chunk.assert_called_with(transport) - - mock_warn.assert_called_once_with( - _NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2, - ) diff --git a/tests/unit/test_hmac_key.py b/tests/unit/test_hmac_key.py index 59a2b221f..941852d37 100644 --- a/tests/unit/test_hmac_key.py +++ b/tests/unit/test_hmac_key.py @@ -18,6 +18,8 @@ from google.cloud.storage.retry import DEFAULT_RETRY from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON +from google.cloud.storage._helpers import _NOW +from google.cloud.storage._helpers import _UTC class TestHMACKeyMetadata(unittest.TestCase): @@ -149,11 +151,12 @@ def test_state_getter(self): def test_state_setter_invalid_state(self): metadata = self._make_one() expected = "INVALID" + metadata.state = expected - with self.assertRaises(ValueError): - metadata.state = expected - - self.assertIsNone(metadata.state) + # Test that invalid states are allowed without client side validation. + # Fall back to server side validation and errors. + self.assertEqual(metadata.state, expected) + self.assertEqual(metadata._properties["state"], expected) def test_state_setter_inactive(self): metadata = self._make_one() @@ -172,24 +175,18 @@ def test_state_setter_active(self): self.assertEqual(metadata._properties["state"], expected) def test_time_created_getter(self): - import datetime - from google.cloud._helpers import UTC - metadata = self._make_one() - now = datetime.datetime.utcnow() - now_stamp = "{}Z".format(now.isoformat()) + now = _NOW() + now_stamp = f"{now.isoformat()}Z" metadata._properties["timeCreated"] = now_stamp - self.assertEqual(metadata.time_created, now.replace(tzinfo=UTC)) + self.assertEqual(metadata.time_created, now.replace(tzinfo=_UTC)) def test_updated_getter(self): - import datetime - from google.cloud._helpers import UTC - metadata = self._make_one() - now = datetime.datetime.utcnow() - now_stamp = "{}Z".format(now.isoformat()) + now = _NOW() + now_stamp = f"{now.isoformat()}Z" metadata._properties["updated"] = now_stamp - self.assertEqual(metadata.updated, now.replace(tzinfo=UTC)) + self.assertEqual(metadata.updated, now.replace(tzinfo=_UTC)) def test_path_wo_access_id(self): metadata = self._make_one() @@ -203,9 +200,7 @@ def test_path_w_access_id_wo_project(self): metadata = self._make_one() metadata._properties["accessId"] = access_id - expected_path = "/projects/{}/hmacKeys/{}".format( - client.DEFAULT_PROJECT, access_id - ) + expected_path = f"/projects/{client.DEFAULT_PROJECT}/hmacKeys/{access_id}" self.assertEqual(metadata.path, expected_path) def test_path_w_access_id_w_explicit_project(self): @@ -215,7 +210,7 @@ def test_path_w_access_id_w_explicit_project(self): metadata._properties["accessId"] = access_id metadata._properties["projectId"] = project - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" self.assertEqual(metadata.path, expected_path) def test_exists_miss_w_defaults(self): @@ -231,7 +226,7 @@ def test_exists_miss_w_defaults(self): self.assertFalse(metadata.exists()) - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -260,7 +255,7 @@ def test_exists_hit_w_explicit_w_user_project(self): self.assertTrue(metadata.exists(timeout=timeout, retry=retry)) - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_query_params = {"userProject": user_project} client._get_resource.assert_called_once_with( expected_path, @@ -283,7 +278,7 @@ def test_reload_miss_w_defaults(self): with self.assertRaises(NotFound): metadata.reload() - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_query_params = {} client._get_resource.assert_called_once_with( expected_path, @@ -314,7 +309,7 @@ def test_reload_hit_w_project_set(self): self.assertEqual(metadata._properties, resource) - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_query_params = {"userProject": user_project} client._get_resource.assert_called_once_with( expected_path, @@ -338,7 +333,7 @@ def test_update_miss_no_project_set_w_defaults(self): with self.assertRaises(NotFound): metadata.update() - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_data = {"state": "INACTIVE"} expected_query_params = {} client._put_resource.assert_called_once_with( @@ -373,7 +368,7 @@ def test_update_hit_w_project_set_w_timeout_w_retry(self): self.assertEqual(metadata._properties, resource) - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_data = {"state": "ACTIVE"} expected_query_params = {"userProject": user_project} client._put_resource.assert_called_once_with( @@ -411,7 +406,7 @@ def test_delete_miss_no_project_set_w_defaults(self): with self.assertRaises(NotFound): metadata.delete() - expected_path = "/projects/{}/hmacKeys/{}".format(client.project, access_id) + expected_path = f"/projects/{client.project}/hmacKeys/{access_id}" expected_query_params = {} client._delete_resource.assert_called_once_with( expected_path, @@ -436,7 +431,7 @@ def test_delete_hit_w_project_set_w_explicit_timeout_retry(self): metadata.delete(timeout=timeout, retry=retry) - expected_path = "/projects/{}/hmacKeys/{}".format(project, access_id) + expected_path = f"/projects/{project}/hmacKeys/{access_id}" expected_query_params = {"userProject": user_project} client._delete_resource.assert_called_once_with( expected_path, diff --git a/tests/unit/test_notification.py b/tests/unit/test_notification.py index cf4e15c13..d59444915 100644 --- a/tests/unit/test_notification.py +++ b/tests/unit/test_notification.py @@ -20,7 +20,6 @@ class TestBucketNotification(unittest.TestCase): - BUCKET_NAME = "test-bucket" BUCKET_PROJECT = "bucket-project-123" TOPIC_NAME = "test-topic" @@ -33,10 +32,8 @@ class TestBucketNotification(unittest.TestCase): NOTIFICATION_ID = "123" SELF_LINK = "https://example.com/notification/123" ETAG = "DEADBEEF" - CREATE_PATH = "/b/{}/notificationConfigs".format(BUCKET_NAME) - NOTIFICATION_PATH = "/b/{}/notificationConfigs/{}".format( - BUCKET_NAME, NOTIFICATION_ID - ) + CREATE_PATH = f"/b/{BUCKET_NAME}/notificationConfigs" + NOTIFICATION_PATH = f"/b/{BUCKET_NAME}/notificationConfigs/{NOTIFICATION_ID}" @staticmethod def event_types(): diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index b985e5c16..04581c06c 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -15,6 +15,7 @@ import unittest from google.cloud.storage import _helpers +from google.cloud.storage.exceptions import InvalidResponse import mock @@ -34,11 +35,16 @@ def test_w_retryable_transport_error(self): exc = eTransportError(caught_exc) self.assertTrue(retry._should_retry(exc)) - def test_w_wrapped_type(self): + def test_w_retryable_types(self): from google.cloud.storage import retry for exc_type in retry._RETRYABLE_TYPES: - exc = exc_type("testing") + # Some of the types need one positional argument, some two. + # The easiest way to accommodate both is just to use a try/except. + try: + exc = exc_type("testing") + except TypeError: + exc = exc_type("testing", "testing") self.assertTrue(self._call_fut(exc)) def test_w_google_api_call_error_hit(self): @@ -55,26 +61,22 @@ def test_w_google_api_call_error_miss(self): exc.code = 999 self.assertFalse(self._call_fut(exc)) - def test_w_requests_connection_error(self): - import requests - - exc = requests.ConnectionError() + def test_w_InvalidResponse_hit(self): + response = mock.Mock() + response.status_code = 408 + exc = InvalidResponse(response, "testing") self.assertTrue(self._call_fut(exc)) - def test_w_requests_chunked_encoding_error(self): - import requests.exceptions - - exc = requests.exceptions.ChunkedEncodingError() - self.assertTrue(self._call_fut(exc)) + def test_w_InvalidResponse_miss(self): + response = mock.Mock() + response.status_code = 999 + exc = InvalidResponse(response, "testing") + self.assertFalse(self._call_fut(exc)) - def test_miss_w_stdlib_error(self): + def test_w_stdlib_error_miss(self): exc = ValueError("testing") self.assertFalse(self._call_fut(exc)) - def test_w_stdlib_connection_error(self): - exc = ConnectionError() - self.assertTrue(self._call_fut(exc)) - class TestConditionalRetryPolicy(unittest.TestCase): def _make_one(self, retry_policy, conditional_predicate, required_kwargs): diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py new file mode 100644 index 000000000..151cd2877 --- /dev/null +++ b/tests/unit/test_transfer_manager.py @@ -0,0 +1,1181 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from google.cloud.storage import Blob +from google.cloud.storage import Client +from google.cloud.storage import transfer_manager +from google.cloud.storage.retry import DEFAULT_RETRY + +from google.api_core import exceptions + +from google.cloud.storage.exceptions import DataCorruption + +import os +import tempfile +import mock +import pickle + +BLOB_TOKEN_STRING = "blob token" +FAKE_CONTENT_TYPE = "text/fake" +UPLOAD_KWARGS = {"content-type": FAKE_CONTENT_TYPE} +FAKE_RESULT = "nothing to see here" +FAKE_ENCODING = "fake_gzip" +DOWNLOAD_KWARGS = {"accept-encoding": FAKE_ENCODING} +CHUNK_SIZE = 8 +HOSTNAME = "https://example.com" +URL = "https://example.com/bucket/blob" +USER_AGENT = "agent" +EXPECTED_UPLOAD_KWARGS = { + "command": "tm.upload_many", + **UPLOAD_KWARGS, +} +EXPECTED_DOWNLOAD_KWARGS = { + "command": "tm.download_many", + **DOWNLOAD_KWARGS, +} + + +# Used in subprocesses only, so excluded from coverage +def _validate_blob_token_in_subprocess( + maybe_pickled_blob, method_name, path_or_file, **kwargs +): # pragma: NO COVER + assert pickle.loads(maybe_pickled_blob) == BLOB_TOKEN_STRING + assert "filename" in method_name + assert path_or_file.startswith("file") + assert kwargs == EXPECTED_UPLOAD_KWARGS or kwargs == EXPECTED_DOWNLOAD_KWARGS + return FAKE_RESULT + + +def test_upload_many_with_filenames(): + FILE_BLOB_PAIRS = [ + ("file_a.txt", mock.Mock(spec=Blob)), + ("file_b.txt", mock.Mock(spec=Blob)), + ] + expected_upload_kwargs = EXPECTED_UPLOAD_KWARGS.copy() + expected_upload_kwargs["if_generation_match"] = 0 + + for _, blob_mock in FILE_BLOB_PAIRS: + blob_mock._handle_filename_and_upload.return_value = FAKE_RESULT + + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + skip_if_exists=True, + upload_kwargs=UPLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + ) + for filename, mock_blob in FILE_BLOB_PAIRS: + mock_blob._handle_filename_and_upload.assert_any_call( + filename, **expected_upload_kwargs + ) + for result in results: + assert result == FAKE_RESULT + + +def test_upload_many_with_file_objs(): + FILE_BLOB_PAIRS = [ + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + ] + expected_upload_kwargs = EXPECTED_UPLOAD_KWARGS.copy() + expected_upload_kwargs["if_generation_match"] = 0 + + for _, blob_mock in FILE_BLOB_PAIRS: + blob_mock._prep_and_do_upload.return_value = FAKE_RESULT + + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + skip_if_exists=True, + upload_kwargs=UPLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + ) + for file, mock_blob in FILE_BLOB_PAIRS: + mock_blob._prep_and_do_upload.assert_any_call(file, **expected_upload_kwargs) + for result in results: + assert result == FAKE_RESULT + + +def test_upload_many_passes_concurrency_options(): + FILE_BLOB_PAIRS = [ + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + ] + MAX_WORKERS = 7 + DEADLINE = 10 + with mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( + "concurrent.futures.wait" + ) as wait_patch: + transfer_manager.upload_many( + FILE_BLOB_PAIRS, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + max_workers=MAX_WORKERS, + ) + pool_patch.assert_called_with(max_workers=MAX_WORKERS) + wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) + + +def test_threads_deprecation_with_upload(): + FILE_BLOB_PAIRS = [ + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + ] + MAX_WORKERS = 7 + DEADLINE = 10 + with mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( + "concurrent.futures.wait" + ) as wait_patch: + with pytest.warns(): + transfer_manager.upload_many( + FILE_BLOB_PAIRS, deadline=DEADLINE, threads=MAX_WORKERS + ) + pool_patch.assert_called_with(max_workers=MAX_WORKERS) + wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) + + +def test_threads_deprecation_conflict_with_upload(): + FILE_BLOB_PAIRS = [ + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), + ] + MAX_WORKERS = 7 + DEADLINE = 10 + with pytest.raises(ValueError): + transfer_manager.upload_many( + FILE_BLOB_PAIRS, + deadline=DEADLINE, + threads=5, + worker_type=transfer_manager.THREAD, + max_workers=MAX_WORKERS, + ) + + +def test_upload_many_suppresses_exceptions(): + FILE_BLOB_PAIRS = [ + ("file_a.txt", mock.Mock(spec=Blob)), + ("file_b.txt", mock.Mock(spec=Blob)), + ] + for _, mock_blob in FILE_BLOB_PAIRS: + mock_blob._handle_filename_and_upload.side_effect = ConnectionError() + + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, worker_type=transfer_manager.THREAD + ) + for result in results: + assert isinstance(result, ConnectionError) + + +def test_upload_many_raises_exceptions(): + FILE_BLOB_PAIRS = [ + ("file_a.txt", mock.Mock(spec=Blob)), + ("file_b.txt", mock.Mock(spec=Blob)), + ] + for _, mock_blob in FILE_BLOB_PAIRS: + mock_blob._handle_filename_and_upload.side_effect = ConnectionError() + + with pytest.raises(ConnectionError): + transfer_manager.upload_many( + FILE_BLOB_PAIRS, raise_exception=True, worker_type=transfer_manager.THREAD + ) + + +def test_upload_many_suppresses_412_with_skip_if_exists(): + FILE_BLOB_PAIRS = [ + ("file_a.txt", mock.Mock(spec=Blob)), + ("file_b.txt", mock.Mock(spec=Blob)), + ] + for _, mock_blob in FILE_BLOB_PAIRS: + mock_blob._handle_filename_and_upload.side_effect = ( + exceptions.PreconditionFailed("412") + ) + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + skip_if_exists=True, + raise_exception=True, + worker_type=transfer_manager.THREAD, + ) + for result in results: + assert isinstance(result, exceptions.PreconditionFailed) + + +def test_upload_many_with_processes(): + # Mocks are not pickleable, so we send token strings over the wire. + FILE_BLOB_PAIRS = [ + ("file_a.txt", BLOB_TOKEN_STRING), + ("file_b.txt", BLOB_TOKEN_STRING), + ] + + with mock.patch( + "google.cloud.storage.transfer_manager._call_method_on_maybe_pickled_blob", + new=_validate_blob_token_in_subprocess, + ): + results = transfer_manager.upload_many( + FILE_BLOB_PAIRS, + upload_kwargs=UPLOAD_KWARGS, + worker_type=transfer_manager.PROCESS, + raise_exception=True, + ) + for result in results: + assert result == FAKE_RESULT + + +def test_upload_many_with_processes_rejects_file_obj(): + # Mocks are not pickleable, so we send token strings over the wire. + FILE_BLOB_PAIRS = [ + ("file_a.txt", BLOB_TOKEN_STRING), + (tempfile.TemporaryFile(), BLOB_TOKEN_STRING), + ] + + with mock.patch( + "google.cloud.storage.transfer_manager._call_method_on_maybe_pickled_blob", + new=_validate_blob_token_in_subprocess, + ): + with pytest.raises(ValueError): + transfer_manager.upload_many( + FILE_BLOB_PAIRS, + upload_kwargs=UPLOAD_KWARGS, + worker_type=transfer_manager.PROCESS, + ) + + +def test_download_many_with_filenames(): + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), "file_a.txt"), + (mock.Mock(spec=Blob), "file_b.txt"), + ] + + for blob_mock, _ in BLOB_FILE_PAIRS: + blob_mock._handle_filename_and_download.return_value = FAKE_RESULT + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + ) + for mock_blob, file in BLOB_FILE_PAIRS: + mock_blob._handle_filename_and_download.assert_any_call( + file, **EXPECTED_DOWNLOAD_KWARGS + ) + for result in results: + assert result == FAKE_RESULT + + +def test_download_many_with_skip_if_exists(): + with tempfile.NamedTemporaryFile() as tf: + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), "file_a.txt"), + (mock.Mock(spec=Blob), tf.name), + ] + + for blob_mock, _ in BLOB_FILE_PAIRS: + blob_mock._handle_filename_and_download.return_value = FAKE_RESULT + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + skip_if_exists=True, + ) + mock_blob, file = BLOB_FILE_PAIRS[0] + mock_blob._handle_filename_and_download.assert_any_call( + file, **EXPECTED_DOWNLOAD_KWARGS + ) + mock_blob, _ = BLOB_FILE_PAIRS[1] + mock_blob._handle_filename_and_download.assert_not_called() + for result in results: + assert result == FAKE_RESULT + + +def test_download_many_with_file_objs(): + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), tempfile.TemporaryFile()), + (mock.Mock(spec=Blob), tempfile.TemporaryFile()), + ] + + for blob_mock, _ in BLOB_FILE_PAIRS: + blob_mock._prep_and_do_download.return_value = FAKE_RESULT + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + ) + for mock_blob, file in BLOB_FILE_PAIRS: + mock_blob._prep_and_do_download.assert_any_call(file, **DOWNLOAD_KWARGS) + for result in results: + assert result == FAKE_RESULT + + +def test_download_many_passes_concurrency_options(): + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), tempfile.TemporaryFile()), + (mock.Mock(spec=Blob), tempfile.TemporaryFile()), + ] + MAX_WORKERS = 7 + DEADLINE = 10 + with mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( + "concurrent.futures.wait" + ) as wait_patch: + transfer_manager.download_many( + BLOB_FILE_PAIRS, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + max_workers=MAX_WORKERS, + ) + pool_patch.assert_called_with(max_workers=MAX_WORKERS) + wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) + + +def test_download_many_suppresses_exceptions(): + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), "file_a.txt"), + (mock.Mock(spec=Blob), "file_b.txt"), + ] + for mock_blob, _ in BLOB_FILE_PAIRS: + mock_blob._handle_filename_and_download.side_effect = ConnectionError() + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, worker_type=transfer_manager.THREAD + ) + for result in results: + assert isinstance(result, ConnectionError) + + +def test_download_many_raises_exceptions(): + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), "file_a.txt"), + (mock.Mock(spec=Blob), "file_b.txt"), + ] + for mock_blob, _ in BLOB_FILE_PAIRS: + mock_blob._handle_filename_and_download.side_effect = ConnectionError() + + with pytest.raises(ConnectionError): + transfer_manager.download_many( + BLOB_FILE_PAIRS, raise_exception=True, worker_type=transfer_manager.THREAD + ) + + +def test_download_many_with_processes(): + # Mocks are not pickleable, so we send token strings over the wire. + BLOB_FILE_PAIRS = [ + (BLOB_TOKEN_STRING, "file_a.txt"), + (BLOB_TOKEN_STRING, "file_b.txt"), + ] + + with mock.patch( + "google.cloud.storage.transfer_manager._call_method_on_maybe_pickled_blob", + new=_validate_blob_token_in_subprocess, + ): + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.PROCESS, + ) + for result in results: + assert result == FAKE_RESULT + + +def test_download_many_with_processes_rejects_file_obj(): + # Mocks are not pickleable, so we send token strings over the wire. + BLOB_FILE_PAIRS = [ + (BLOB_TOKEN_STRING, "file_a.txt"), + (BLOB_TOKEN_STRING, tempfile.TemporaryFile()), + ] + + with mock.patch( + "google.cloud.storage.transfer_manager._call_method_on_maybe_pickled_blob", + new=_validate_blob_token_in_subprocess, + ): + with pytest.raises(ValueError): + transfer_manager.download_many( + BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.PROCESS, + ) + + +def test_upload_many_from_filenames(): + bucket = mock.Mock() + + FILENAMES = ["file_a.txt", "file_b.txt"] + ROOT = "mypath/" + PREFIX = "myprefix/" + KEY_NAME = "keyname" + BLOB_CONSTRUCTOR_KWARGS = {"kms_key_name": KEY_NAME} + UPLOAD_KWARGS = {"content-type": "text/fake"} + MAX_WORKERS = 7 + DEADLINE = 10 + WORKER_TYPE = transfer_manager.THREAD + + EXPECTED_FILE_BLOB_PAIRS = [ + (os.path.join(ROOT, filename), mock.ANY) for filename in FILENAMES + ] + + with mock.patch( + "google.cloud.storage.transfer_manager.upload_many" + ) as mock_upload_many: + transfer_manager.upload_many_from_filenames( + bucket, + FILENAMES, + source_directory=ROOT, + blob_name_prefix=PREFIX, + skip_if_exists=True, + blob_constructor_kwargs=BLOB_CONSTRUCTOR_KWARGS, + upload_kwargs=UPLOAD_KWARGS, + deadline=DEADLINE, + raise_exception=True, + worker_type=WORKER_TYPE, + max_workers=MAX_WORKERS, + ) + + mock_upload_many.assert_called_once_with( + EXPECTED_FILE_BLOB_PAIRS, + skip_if_exists=True, + upload_kwargs=UPLOAD_KWARGS, + deadline=DEADLINE, + raise_exception=True, + worker_type=WORKER_TYPE, + max_workers=MAX_WORKERS, + ) + bucket.blob.assert_any_call(PREFIX + FILENAMES[0], **BLOB_CONSTRUCTOR_KWARGS) + bucket.blob.assert_any_call(PREFIX + FILENAMES[1], **BLOB_CONSTRUCTOR_KWARGS) + + +def test_upload_many_from_filenames_minimal_args(): + bucket = mock.Mock() + + FILENAMES = ["file_a.txt", "file_b.txt"] + + EXPECTED_FILE_BLOB_PAIRS = [(filename, mock.ANY) for filename in FILENAMES] + + with mock.patch( + "google.cloud.storage.transfer_manager.upload_many" + ) as mock_upload_many: + transfer_manager.upload_many_from_filenames( + bucket, + FILENAMES, + ) + + mock_upload_many.assert_called_once_with( + EXPECTED_FILE_BLOB_PAIRS, + skip_if_exists=False, + upload_kwargs=None, + deadline=None, + raise_exception=False, + worker_type=transfer_manager.PROCESS, + max_workers=8, + ) + bucket.blob.assert_any_call(FILENAMES[0]) + bucket.blob.assert_any_call(FILENAMES[1]) + + +def test_upload_many_from_filenames_additional_properties(): + bucket = mock.Mock() + blob = mock.Mock() + bucket_blob = mock.Mock(return_value=blob) + blob.cache_control = None + bucket.blob = bucket_blob + + FILENAME = "file_a.txt" + ADDITIONAL_BLOB_ATTRIBUTES = {"cache_control": "no-cache"} + EXPECTED_FILE_BLOB_PAIRS = [(FILENAME, mock.ANY)] + + with mock.patch( + "google.cloud.storage.transfer_manager.upload_many" + ) as mock_upload_many: + transfer_manager.upload_many_from_filenames( + bucket, [FILENAME], additional_blob_attributes=ADDITIONAL_BLOB_ATTRIBUTES + ) + + mock_upload_many.assert_called_once_with( + EXPECTED_FILE_BLOB_PAIRS, + skip_if_exists=False, + upload_kwargs=None, + deadline=None, + raise_exception=False, + worker_type=transfer_manager.PROCESS, + max_workers=8, + ) + + for attrib, value in ADDITIONAL_BLOB_ATTRIBUTES.items(): + assert getattr(blob, attrib) == value + + +def test_download_many_to_path(): + bucket = mock.Mock() + + BLOBNAMES = ["file_a.txt", "file_b.txt", "dir_a/file_c.txt"] + PATH_ROOT = "mypath/" + BLOB_NAME_PREFIX = "myprefix/" + DOWNLOAD_KWARGS = {"accept-encoding": "fake-gzip"} + MAX_WORKERS = 7 + DEADLINE = 10 + WORKER_TYPE = transfer_manager.THREAD + + EXPECTED_BLOB_FILE_PAIRS = [ + (mock.ANY, os.path.join(PATH_ROOT, blobname)) for blobname in BLOBNAMES + ] + + with mock.patch( + "google.cloud.storage.transfer_manager.download_many" + ) as mock_download_many: + transfer_manager.download_many_to_path( + bucket, + BLOBNAMES, + destination_directory=PATH_ROOT, + blob_name_prefix=BLOB_NAME_PREFIX, + download_kwargs=DOWNLOAD_KWARGS, + deadline=DEADLINE, + create_directories=False, + raise_exception=True, + max_workers=MAX_WORKERS, + worker_type=WORKER_TYPE, + skip_if_exists=True, + ) + + mock_download_many.assert_called_once_with( + EXPECTED_BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + deadline=DEADLINE, + raise_exception=True, + max_workers=MAX_WORKERS, + worker_type=WORKER_TYPE, + skip_if_exists=True, + ) + for blobname in BLOBNAMES: + bucket.blob.assert_any_call(BLOB_NAME_PREFIX + blobname) + + +def test_download_many_to_path_creates_directories(): + bucket = mock.Mock() + + with tempfile.TemporaryDirectory() as tempdir: + DIR_NAME = "dir_a/dir_b" + BLOBNAMES = [ + "file_a.txt", + "file_b.txt", + os.path.join(DIR_NAME, "file_c.txt"), + ] + + EXPECTED_BLOB_FILE_PAIRS = [ + (mock.ANY, os.path.join(tempdir, blobname)) for blobname in BLOBNAMES + ] + + with mock.patch( + "google.cloud.storage.transfer_manager.download_many" + ) as mock_download_many: + transfer_manager.download_many_to_path( + bucket, + BLOBNAMES, + destination_directory=tempdir, + create_directories=True, + raise_exception=True, + ) + + mock_download_many.assert_called_once_with( + EXPECTED_BLOB_FILE_PAIRS, + download_kwargs=None, + deadline=None, + raise_exception=True, + worker_type=transfer_manager.PROCESS, + max_workers=8, + skip_if_exists=False, + ) + for blobname in BLOBNAMES: + bucket.blob.assert_any_call(blobname) + + assert os.path.isdir(os.path.join(tempdir, DIR_NAME)) + + +def test_download_chunks_concurrently(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MULTIPLE = 4 + blob_mock.size = CHUNK_SIZE * MULTIPLE + + expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() + expected_download_kwargs["command"] = "tm.download_sharded" + expected_download_kwargs["checksum"] = None + + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + result = transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + crc32c_checksum=False, + ) + for x in range(MULTIPLE): + blob_mock._prep_and_do_download.assert_any_call( + mock.ANY, + **expected_download_kwargs, + start=x * CHUNK_SIZE, + end=((x + 1) * CHUNK_SIZE) - 1, + ) + assert blob_mock._prep_and_do_download.call_count == 4 + assert result is None + + +def test_download_chunks_concurrently_with_crc32c(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MULTIPLE = 4 + BLOB_CHUNK = b"abcdefgh" + BLOB_CONTENTS = BLOB_CHUNK * MULTIPLE + blob_mock.size = len(BLOB_CONTENTS) + blob_mock.crc32c = "eOVVVw==" + + def write_to_file(f, *args, **kwargs): + f.write(BLOB_CHUNK) + + blob_mock._prep_and_do_download.side_effect = write_to_file + + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + crc32c_checksum=True, + ) + + +def test_download_chunks_concurrently_with_crc32c_failure(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MULTIPLE = 4 + BLOB_CHUNK = b"abcdefgh" + BLOB_CONTENTS = BLOB_CHUNK * MULTIPLE + blob_mock.size = len(BLOB_CONTENTS) + blob_mock.crc32c = "invalid" + + def write_to_file(f, *args, **kwargs): + f.write(BLOB_CHUNK) + + blob_mock._prep_and_do_download.side_effect = write_to_file + + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + with pytest.raises(DataCorruption): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + crc32c_checksum=True, + ) + + +def test_download_chunks_concurrently_raises_on_invalid_kwargs(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MULTIPLE = 4 + blob_mock.size = CHUNK_SIZE * MULTIPLE + + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + with pytest.raises(ValueError): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + worker_type=transfer_manager.THREAD, + download_kwargs={ + "start": CHUNK_SIZE, + }, + ) + with pytest.raises(ValueError): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + worker_type=transfer_manager.THREAD, + download_kwargs={ + "end": (CHUNK_SIZE * (MULTIPLE - 1)) - 1, + }, + ) + with pytest.raises(ValueError): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + worker_type=transfer_manager.THREAD, + download_kwargs={ + "checksum": "crc32c", + }, + ) + + +def test_download_chunks_concurrently_passes_concurrency_options(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MAX_WORKERS = 7 + DEADLINE = 10 + MULTIPLE = 4 + blob_mock.size = CHUNK_SIZE * MULTIPLE + + with mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( + "concurrent.futures.wait" + ) as wait_patch, mock.patch( + "google.cloud.storage.transfer_manager.open", mock.mock_open() + ): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + max_workers=MAX_WORKERS, + crc32c_checksum=False, + ) + pool_patch.assert_called_with(max_workers=MAX_WORKERS) + wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) + + +def test_upload_chunks_concurrently(): + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + bucket.user_project = None + + blob = Blob("blob", bucket) + blob.content_type = FAKE_CONTENT_TYPE + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + part_mock = mock.Mock() + ETAG = "efgh" + part_mock.etag = ETAG + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", + return_value=container_mock, + ), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock + ): + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + ) + container_mock.initiate.assert_called_once_with( + transport=transport, content_type=blob.content_type + ) + container_mock.register_part.assert_any_call(1, ETAG) + container_mock.register_part.assert_any_call(2, ETAG) + container_mock.finalize.assert_called_once_with(bucket.client._http) + + part_mock.upload.assert_called_with(transport) + + +def test_upload_chunks_concurrently_quotes_urls(): + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + bucket.user_project = None + + blob = Blob(b"../wrongbucket/blob", bucket) + blob.content_type = FAKE_CONTENT_TYPE + quoted_url = "https://example.com/bucket/..%2Fwrongbucket%2Fblob" + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + part_mock = mock.Mock() + ETAG = "efgh" + part_mock.etag = ETAG + container_cls_mock = mock.Mock(return_value=container_mock) + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", new=container_cls_mock + ), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock + ): + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + ) + + container_mock.initiate.assert_called_once_with( + transport=transport, content_type=blob.content_type + ) + container_mock.register_part.assert_any_call(1, ETAG) + container_mock.register_part.assert_any_call(2, ETAG) + container_mock.finalize.assert_called_once_with(bucket.client._http) + + container_cls_mock.assert_called_once_with( + quoted_url, FILENAME, headers=mock.ANY, retry=DEFAULT_RETRY + ) + + part_mock.upload.assert_called_with(transport) + + +def test_upload_chunks_concurrently_passes_concurrency_options(): + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + bucket.user_project = None + + blob = Blob("blob", bucket) + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + + MAX_WORKERS = 7 + DEADLINE = 10 + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", + return_value=container_mock, + ), mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( + "concurrent.futures.wait" + ) as wait_patch: + try: + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + max_workers=MAX_WORKERS, + deadline=DEADLINE, + retry=None, + ) + except ValueError: + pass # The futures don't actually work, so we expect this to abort. + # Conveniently, that gives us a chance to test the auto-delete + # exception handling feature. + container_mock.cancel.assert_called_once_with(transport) + + pool_patch.assert_called_with(max_workers=MAX_WORKERS) + wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) + + +def test_upload_chunks_concurrently_with_metadata_and_encryption(): + import datetime + from google.cloud.storage._helpers import _UTC + from google.cloud._helpers import _RFC3339_MICROS + + now = datetime.datetime.now(_UTC) + now_str = now.strftime(_RFC3339_MICROS) + + custom_metadata = {"key_a": "value_a", "key_b": "value_b"} + encryption_key = "b23ff11bba187db8c37077e6af3b25b8" + kms_key_name = "sample_key_name" + custom_headers = { + "x-goog-custom-audit-foo": "bar", + } + + METADATA = { + "cache_control": "private", + "content_disposition": "inline", + "content_language": "en-US", + "custom_time": now, + "metadata": custom_metadata, + "storage_class": "NEARLINE", + } + + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient( + identify_as_client=True, extra_headers=custom_headers + ) + transport = bucket.client._http + user_project = "my_project" + bucket.user_project = user_project + + blob = Blob("blob", bucket, kms_key_name=kms_key_name) + blob.content_type = FAKE_CONTENT_TYPE + + for key, value in METADATA.items(): + setattr(blob, key, value) + blob.metadata = {**custom_metadata} + blob.encryption_key = encryption_key + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + part_mock = mock.Mock() + ETAG = "efgh" + part_mock.etag = ETAG + container_cls_mock = mock.Mock(return_value=container_mock) + + invocation_id = "b9f8cbb0-6456-420c-819d-3f4ee3c0c455" + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", new=container_cls_mock + ), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock + ), mock.patch( + "google.cloud.storage._helpers._get_invocation_id", + return_value="gccl-invocation-id/" + invocation_id, + ): + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + ) + expected_headers = { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "User-Agent": "agent", + "X-Goog-API-Client": f"agent gccl-invocation-id/{invocation_id} gccl-gcs-cmd/tm.upload_sharded", + "content-type": FAKE_CONTENT_TYPE, + "x-upload-content-type": FAKE_CONTENT_TYPE, + "X-Goog-Encryption-Algorithm": "AES256", + "X-Goog-Encryption-Key": "YjIzZmYxMWJiYTE4N2RiOGMzNzA3N2U2YWYzYjI1Yjg=", + "X-Goog-Encryption-Key-Sha256": "B25Y4hgVlNXDliAklsNz9ykLk7qvgqDrSbdds5iu8r4=", + "Cache-Control": "private", + "Content-Disposition": "inline", + "Content-Language": "en-US", + "x-goog-storage-class": "NEARLINE", + "x-goog-custom-time": now_str, + "x-goog-meta-key_a": "value_a", + "x-goog-meta-key_b": "value_b", + "x-goog-user-project": "my_project", + "x-goog-encryption-kms-key-name": "sample_key_name", + **custom_headers, + } + container_cls_mock.assert_called_once_with( + URL, FILENAME, headers=expected_headers, retry=DEFAULT_RETRY + ) + container_mock.initiate.assert_called_once_with( + transport=transport, content_type=blob.content_type + ) + container_mock.register_part.assert_any_call(1, ETAG) + container_mock.register_part.assert_any_call(2, ETAG) + container_mock.finalize.assert_called_once_with(transport) + part_mock.upload.assert_called_with(blob.client._http) + + +class _PickleableMockBlob: + def __init__( + self, + name="", + size=None, + generation=None, + size_after_reload=None, + generation_after_reload=None, + ): + self.name = name + self.size = size + self.generation = generation + self._size_after_reload = size_after_reload + self._generation_after_reload = generation_after_reload + self.client = _PickleableMockClient() + + def reload(self): + self.size = self._size_after_reload + self.generation = self._generation_after_reload + + def _prep_and_do_download(self, *args, **kwargs): + return "SUCCESS" + + +class _PickleableMockConnection: + @staticmethod + def get_api_base_url_for_mtls(): + return HOSTNAME + + user_agent = USER_AGENT + + +class _PickleableMockClient: + def __init__(self, identify_as_client=False, extra_headers={}): + self._http = "my_transport" # used as an identifier for "called_with" + self._connection = _PickleableMockConnection() + self.identify_as_client = identify_as_client + self._extra_headers = extra_headers + + @property + def __class__(self): + if self.identify_as_client: + return Client + else: + return _PickleableMockClient + + +# Used in subprocesses only, so excluded from coverage +def _validate_blob_token_in_subprocess_for_chunk( + maybe_pickled_blob, filename, **kwargs +): # pragma: NO COVER + blob = pickle.loads(maybe_pickled_blob) + assert isinstance(blob, _PickleableMockBlob) + assert filename.startswith("file") + return FAKE_RESULT + + +def test_download_chunks_concurrently_with_processes(): + blob = _PickleableMockBlob( + "file_a_blob", size_after_reload=24, generation_after_reload=100 + ) + FILENAME = "file_a.txt" + + with mock.patch( + "google.cloud.storage.transfer_manager._download_and_write_chunk_in_place", + new=_validate_blob_token_in_subprocess_for_chunk, + ), mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + result = transfer_manager.download_chunks_concurrently( + blob, + FILENAME, + chunk_size=CHUNK_SIZE, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.PROCESS, + crc32c_checksum=False, + ) + assert result is None + + +def test__LazyClient(): + fake_cache = {} + MOCK_ID = 9999 + with mock.patch( + "google.cloud.storage.transfer_manager._cached_clients", new=fake_cache + ), mock.patch("google.cloud.storage.transfer_manager.Client"): + lazyclient = transfer_manager._LazyClient(MOCK_ID) + lazyclient_cached = transfer_manager._LazyClient(MOCK_ID) + assert lazyclient is lazyclient_cached + assert len(fake_cache) == 1 + + +def test__pickle_client(): + # This test nominally has coverage, but doesn't assert that the essential + # copyreg behavior in _pickle_client works. Unfortunately there doesn't seem + # to be a good way to check that without actually creating a Client, which + # will spin up HTTP connections undesirably. This is more fully checked in + # the system tests. + pkl = transfer_manager._pickle_client(FAKE_RESULT) + assert pickle.loads(pkl) == FAKE_RESULT + + +def test__download_and_write_chunk_in_place(): + pickled_mock = pickle.dumps(_PickleableMockBlob()) + FILENAME = "file_a.txt" + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + result = transfer_manager._download_and_write_chunk_in_place( + pickled_mock, FILENAME, 0, 8, {}, False + ) + assert result is not None + + +def test__upload_part(): + from google.cloud.storage.retry import DEFAULT_RETRY + + pickled_mock = pickle.dumps(_PickleableMockClient()) + FILENAME = "file_a.txt" + UPLOAD_ID = "abcd" + ETAG = "efgh" + + part = mock.Mock() + part.etag = ETAG + with mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part + ): + result = transfer_manager._upload_part( + pickled_mock, + URL, + UPLOAD_ID, + FILENAME, + 0, + 256, + 1, + None, + {"key", "value"}, + retry=DEFAULT_RETRY, + ) + part.upload.assert_called_once() + + assert result == (1, ETAG) + + +def test__get_pool_class_and_requirements_error(): + with pytest.raises(ValueError): + transfer_manager._get_pool_class_and_requirements("garbage") + + +def test__reduce_client(): + fake_cache = {} + client = mock.Mock() + custom_headers = { + "x-goog-custom-audit-foo": "bar", + } + client._extra_headers = custom_headers + + with mock.patch( + "google.cloud.storage.transfer_manager._cached_clients", new=fake_cache + ), mock.patch("google.cloud.storage.transfer_manager.Client"): + replicated_client, kwargs = transfer_manager._reduce_client(client) + assert replicated_client is not None + assert custom_headers in kwargs + + +def test__call_method_on_maybe_pickled_blob(): + blob = mock.Mock(spec=Blob) + blob._prep_and_do_download.return_value = "SUCCESS" + result = transfer_manager._call_method_on_maybe_pickled_blob( + blob, "_prep_and_do_download" + ) + assert result == "SUCCESS" + + pickled_blob = pickle.dumps(_PickleableMockBlob()) + result = transfer_manager._call_method_on_maybe_pickled_blob( + pickled_blob, "_prep_and_do_download" + ) + assert result == "SUCCESS" + + +def test__ChecksummingSparseFileWrapper(): + FILENAME = "file_a.txt" + import google_crc32c + + with mock.patch( + "google.cloud.storage.transfer_manager.open", mock.mock_open() + ) as open_mock: + # test no checksumming + wrapper = transfer_manager._ChecksummingSparseFileWrapper(FILENAME, 0, False) + wrapper.write(b"abcdefgh") + handle = open_mock() + handle.write.assert_called_with(b"abcdefgh") + wrapper.write(b"ijklmnop") + assert wrapper.crc is None + handle.write.assert_called_with(b"ijklmnop") + + with mock.patch( + "google.cloud.storage.transfer_manager.open", mock.mock_open() + ) as open_mock: + wrapper = transfer_manager._ChecksummingSparseFileWrapper(FILENAME, 0, True) + wrapper.write(b"abcdefgh") + handle = open_mock() + handle.write.assert_called_with(b"abcdefgh") + wrapper.write(b"ijklmnop") + assert wrapper.crc == google_crc32c.value(b"abcdefghijklmnop") + handle.write.assert_called_with(b"ijklmnop") diff --git a/tests/unit/url_signer_v4_test_data.json b/tests/unit/url_signer_v4_test_data.json index f556c1fb0..4edcabc34 100644 --- a/tests/unit/url_signer_v4_test_data.json +++ b/tests/unit/url_signer_v4_test_data.json @@ -1,457 +1,603 @@ { "signingV4Tests": [ - { - "description": "Simple GET", - "bucket": "test-bucket", - "object": "test-object", - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=70aa331de284c2ca9afeee6a2c20db752edfd80e0adb29aa049f3c8a1eed68f5726a494e843d119f5bf17fa78affe1d55c6a248a5c1f6ca7b9a4dbe86e394338179a91a6aaef51f43f4bbed7802b9256425b37aae1dd8fb90ebb1f3f9112e99a62bc510a1ad7d183fe9a6fe1ffaee963699e21e163f0854c7baf0674ce0a611857fdb3fe80904e4c46db795e51c6b3725bd6a447e98f54fcefc97ea1926438602f26009905ee1657a48e1e2fb2b04201f1edff520f3d062a8571b8e476873fdf8ead8e15fb9a3237f635921681f483206e36456fc8a865c522f187f0e464b65a81833a1418b55733fd6e45eb1ddd856092e227cb1042fbb6fdf3b4d6d47978a1" - }, - - { - "description": "Simple PUT", - "bucket": "test-bucket", - "object": "test-object", - "method": "PUT", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=5b1a52b42f7bdefc8948e91eaaa64955e98ea25ed3a1b48566497fda23d36019f184f98cbc54354cd1d6303ca198efcfaa0c270b6f0a2f7291de21a6ff9f27ed1bb316d986dc07fae0996349eb2024385d3e55312dd13cee67a610914749a45ff297235749ed53dead39ce8b71942294fecfa2c5c89234f51e2ff00d16c5ec7abb45f34f1b0fb6856e4bd6ea4fe43b0550e7a1e4eea919d9c065cc15b20f53632d9fcb9e2d21ae351912b3bb0b0fa9661740a7d69ce77083ede2f66cb160f1bd6285af7c8f8a616ae487d37373f176f32b2191defc699eb4df2db13b17a13e7a2a63b97b7d98dd801c871fc73ffec163c1a8784e31a250cd517e4529696e8693" - }, - - { - "description": "POST for resumable uploads", - "bucket": "test-bucket", - "object": "test-object", - "method": "POST", - "expiration": 10, - "headers": { - "x-goog-resumable": "start" - }, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-resumable&X-Goog-Signature=1d037090964e260c02e8a5e4c38c207c4bdf15828fc6261c2560f242f3f4bf1465feb06fc4ea4bc0d85aa550ee1d3d157486027df494563e3b2c08f89ea666b1ebafdb977b0a976d2c0f66c19502b5969f099b3c3bf8e8bae62f3c3f040a4ea736f339a412ab2aeb2351469144dcfe3a2ad20a5bacab1ff36b3de6d04a078679814a7061a652b8f77334ad6d6f4a020a9f5c35b85ba13a99e43e06622e42432a1e83fa940c85c34730568c611e23846f456764821afed00c54550e57c01f6aceecb4daf6e6b3e96f257cd7d88c04680b54174b453f2d2ee17eec93a22f731c16593b56fdf3144dd5deec6f0b3ae632f68c0f2da13debe36dc463ce5af6c58a97" - }, - - { - "description": "Vary expiration and timestamp", - "bucket": "test-bucket", - "object": "test-object", - "method": "GET", - "expiration": 20, - "timestamp": "20190301T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190301%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190301T090000Z&X-Goog-Expires=20&X-Goog-SignedHeaders=host&X-Goog-Signature=55a76a221957bc711a64c27bbf8ff716aaa35d2bbaa0aa652ba2a8e42aed4b727c53a5b54a96a58e872a2ef592aa97c0e050f14d3caeac118afbfcb58ec6694db48e331176f368b0948ee5086257832df5ead4ec17cd9d7317c5af71c3ebd539989424f2ae1f8450727587253f2d0f03c7c6cb2a85649ece988ffc591a17d157c3e39b355baab9ed1c8dacd8923cd0e31423d7c6cdcc11f5e32ced3fa2fc5fd28e64b18b99e848b7982ba3163d85be919f10b7a248df1533353ff2db6deb02e5282fa22ecbf71362e2324a242b17d185266940d1c3bb8815db0d0c8d8fac4f37b69da8ea5ebad4db4a5323be94d6e78a4b98d74a586baab476bba976aed8c72e" + { + "description": "Simple GET", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=8b83604b82216c1d048d156674589e1f85ac69749ddc7ce2cb0703bb34b8c49e65961b4b653bc03dbbdba6d1278b88350f905798fa4bca70e06a5020683d270d71e7ba14e03baaaea4bfa4ea1713123b1f3e62b355545a8b9e068a85a7f8eb370842e6b21982683bcaf2e1528e5189229337ef0fc1308714ede6c0ab507d0d5d14ca15ea7bf560451c970ed7223976bf811ef62cd15400fff35e24ca8ed7ce4804fc563ed87a31f0d4a976cb378be1da256ae262b0caed8628e61b4ab5cd2be4857cb27898edd3bc504bbf389cedfab962e331540c5a43220efdd694c11daac42864950b0885f18ff41ec3ec6c92754a04fd000de568f0741cda9ede48afe853", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n00e2fb794ea93d7adb703edaebdd509821fcc7d4f1a79ac5c8d2b394df109320" + }, + { + "description": "Simple PUT", + "bucket": "test-bucket", + "object": "test-object", + "method": "PUT", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=1dac9a1eede0368413937b6a75cd18dbd98fe6a7f9c79392b513916cc312e22d0d79591b724ccee43b89b727a41a46e9375674b390b7c02bda94aca90aee4b744580aee58a692657c1364a8aff8786ab9167c87af26c0a154640bb948ae991fdc639a2a058264b49828c899c9a260dd66f74d02067955f16de6eb65dac0543297cb201707b50da2dcfe42f4c5be75c5588ac775910540057b1b5aca20988b176fba96ebafed66e76c09ccec45a144e742c5f2bba460b8df2ccefd9f2c39f0b398696b073bed554b3534c78dc52dc5c41ad24adbd7b0447e1b5b10315e7be19db323d38f0c7f523f5d8f4fbcd468117fdbd806c556a7a01cc2d5fe5f0e2a2c282", + "scheme": "https", + "expectedCanonicalRequest": "PUT\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n78742860705da91404222d5d66ff89850292471199c3c2808d116ad12e6177b4" + }, + { + "description": "POST for resumable uploads", + "bucket": "test-bucket", + "object": "test-object", + "method": "POST", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-resumable&X-Goog-Signature=883e07a9dc782db4ba322e5981c75f35d07b6864c9580222d1ca00623cf91d75614511835adf8a9737ff6e3b52f4de0600a55d366b77c6e6487676344a15a444ba145fcba318e9094038268ece8c46200363957bd9ccf5424e28b444d7e7982b02c5278c05d1140c654d49bb7fdb57d3d709741e1a2bc6af80d9a79b4ca59eafbbc943ec37f437e9c1b1ad41bdd17e890de0bfd3090674381e23c75f3878e4895867da7abe84c6e56d2e0baaa5ca5cb2a7098c0b662deef9bb2731f61be276c814fd41813dade52c348922a00487c0e9ae6b92c8c60d30f2c89cd5e549d4fea961abe82e905cd3e8677acad7c31a9cc22f4c24e79f33b8b3310c0dfc0f37a160", + "headers": { + "X-Goog-Resumable": "start" }, - - { - "description": "Vary bucket and object", - "bucket": "test-bucket2", - "object": "test-object2", - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket2/test-object2?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=804095c3d06173dad8c138d6556737a6dfc20b2cc4f6d7b857928ade96fccab30be4eb6e467d3441670a6b5eb373d097f00d37a5fe423d2a370ac2ee0e52568b18231d6b98a25a647e5440fc75b10f1d3ad401b4b0d076a057c80a9b597ff6bad273672d4278a3966440767459c9d5d5ab1d5a39563bb559f45665658e7ba2f982adde68a7ff88d8de25f9568d56e24fad76ffde80852b22a3a07ac57a3af3aaffce39de64e0c4f3b82382d48b56abc8445e6480b4c130030481b3003abc831cebcb0f3b8086639891a99a2e7c8345331d59ed635b227987a5dddfd3fd71c3b5ae4391e472df8de0592ff830d385216264448a82ad4aef1ba2374d3226fd06bf" + "scheme": "https", + "expectedCanonicalRequest": "POST\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-resumable\nhost:storage.googleapis.com\nx-goog-resumable:start\n\nhost;x-goog-resumable\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n877f8b40179d2753296f2fd6de815ab40503c7a3c446a7b44aa4e74422ff4daf" + }, + { + "description": "Vary expiration and timestamp", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 20, + "timestamp": "2019-03-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-03-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-03-01T09%3A00%3A00Z&X-Goog-Expires=20&X-Goog-SignedHeaders=host&X-Goog-Signature=a8f699d7f9ce9d6960aa5715606dbfbdca31e5c514b69b27e11875b134c518396c5aba4318e303b38e6537509ef12d94332b39f80ead0274574016cb58d7d1d5e8508b28e9edbc8fe6392e16076e904aa8c64abb84a3e3554b9503b4395f1dbc4b9367e359f62f6a5c6d53659caab44c2e49595cf5a6800c251c16af163e0399174142880358576a28f392f9bdcf69a10a3ecf70331fefdb7e82dea03251d051ce48560d7606a2fce22a6548bb950da67b81737701448308d44346cabd829f2e9b1737516d15d9d905844e924fa9b3dac1a222b31fdbcf6a11006676915cf5282994a0d4dfe30ad7fe23686638dbbc79a983a698ad5c8d3eab51e5e2cb01e22c", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190301%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190301T090000Z&X-Goog-Expires=20&X-Goog-SignedHeaders=host\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190301T090000Z\n20190301/auto/storage/goog4_request\n779f19fdb6fd381390e2d5af04947cf21750277ee3c20e0c97b7e46a1dff8907" + }, + { + "description": "Vary bucket and object", + "bucket": "test-bucket2", + "object": "test-object2", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket2/test-object2?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=82d96c99f8a1aac4d6d3002331ee4b2349f09346af2f468a559402338d3813cc8bd2d3057404bb5c2e840594a44612bf78302b7f1ebd9286475469fcdd4abddb64d2d1ac0606aeb2f120bf576a6b0ba470abf1548fda902a20fa955746b78a4756817821f1064e9746d0fffde714227323aefa8e0acd9b0ec29af28abf41c3c072e13cf1c739554c4c7b17f2b1da20a7290f8b386a83db39c2e4076a4507f08e562c065dea193ae54b3ffe6840e7de0403f97943189dc9fd312e74de0d87799ba415b1b98354fa0e51983989024eb6efef4f0b6f7c4ef2eb3c65874feb770db1aea33b86bcfd2d9db66ebfa69a568d359113c2c76d260ff56c9cac5b36ff5bbe", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket2/test-object2\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\na139afbf35ac30e9864f63197f79609731ab1b0ca166e2a456dba156fcd3f9ce" + }, + { + "description": "Slashes in object name should not be URL encoded", + "bucket": "test-bucket", + "object": "path/with/slashes/under_score/amper&sand/file.ext", + "headers": { + "header/name/with/slash": "should-be-encoded" }, - - { - "description": "Simple headers", - "bucket": "test-bucket", - "object": "test-object", - "headers": { - "foo": "foo-value", - "BAR": "BAR-value" - }, - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=bar%3Bfoo%3Bhost&X-Goog-Signature=1e53ba6ef0f0ea2adb437f0a32b3d7d7dc521d9a53c79ef224849c67d16e771bc04096de5b0d87c113069545ab8638256d055ca216fa062be74b7a7d03bac14a3bd1e30264e261ea6891ab58d567bbce6bd80951d00644d5dc222e3e55a6d015bf18184bed0ab71208bdd6b0c1433898dfe6cf2b9052a4bb9ed7610bc3acda3a592e8dcf5e4241ed9a0cd777d9abaa85e4770c0681c447a163fac430de64549ec45a8d8fac37af8aecc5ba0375da87c2e1040ed51879b2b6874e2381b259fe4b297b4ea0b3ea8a86332ff452a562a184afeb57fdf41ba075ddb3823650b47efa0238c73866a06cffe4d47c30783b5d4d78a9d499bd381dffb5386decdd02ef76" + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/path/with/slashes/under_score/amper&sand/file.ext?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=header%2Fname%2Fwith%2Fslash%3Bhost&X-Goog-Signature=3e4ba8232a55ea1f414b8495f0cf54fece1bcb926ae3af6ba9c533f0bae55fcf8d3dfa53ac2e648c8eee8e7487530798c13fee7f3e93cdf4d56cf48c562b0bc3767ea642fd23b406704ea879cf74d4e7ee38866e88dcfa7a1ac13fcaed6af0941bfb6f607699968fec9eddd94cb73b6d82867d990e19deee7b26679a150f3caf62bb651fd9a0bde1d9f795e39cb25bffd1635e8f419b7e4a6883f4ca6090f283666954dbe24bba8e2d082cc0704a9d8f6ac49312a16c7717b2f96f14fee0b8c1da371ff4b4b7cb297c00063361b6ab3efb0ce4feaa7e84402c7686dea67c882851a850837af6e03171577515121236507122cf5fce2bd52da083b27f965d8e8b", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/path/with/slashes/under_score/amper%26sand/file.ext\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=header%2Fname%2Fwith%2Fslash%3Bhost\nheader/name/with/slash:should-be-encoded\nhost:storage.googleapis.com\n\nheader/name/with/slash;host\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\nf1d206dd8cbe1b892d4081ccddae0927d9f5fee5653fb2a2f43e7c20ed455cad" + }, + { + "description": "Forward Slashes should not be stripped", + "bucket": "test-bucket", + "object": "/path/with/slashes/under_score/amper&sand/file.ext", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket//path/with/slashes/under_score/amper&sand/file.ext?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=a6e6955547ab5906e0ed8df7b84ca5912a1bdc3efca055855d328f365c8cd69441d06d10c7281ea17c4311c5bd255a41f3842730420bc01a87034647470da613709b1d8f777b8e977f3e66768fa544e3e1f0fa6a188c445583ec1f2a97cb91748afb5bec7c2207a1e6ba150f05cb2af93226b44c34554cab08d10bbbfe84daa1235a33c13fb42b89bfc4c2dac13e60aff4b9b60242a67809b9b4afd77fb909447defc86f70e2e63ebd65efeac3bf35d0ec5aaa066a9203f99b2fc834eb1fee54e8b7c68f9ed3d78dd8f512aaef55ed5b9ff2495a0274d45e1dfa0dfd848dd5be38a27784ce2276e44d40c063f9ad3804194609802883449f4b61d67ab3921b20", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket//path/with/slashes/under_score/amper%26sand/file.ext\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n63c601ecd6ccfec84f1113fc906609cbdf7651395f4300cecd96ddd2c35164f8" + }, + { + "description": "Simple headers", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=bar%3Bfoo%3Bhost&X-Goog-Signature=3abfa4717ebc857380a6db5a9ef78082bc5410c2853d27d7b7df4cdcdf438119d1b00de07345f5fb8a968e47b948d4cd6ba8328f8a7b01991f0f4ae5b34a319a539e3dd451384d07da9bbf51ea120ceac189bb98700e95337aa410cb014facf19222d3e7eec08123049cde2878337427f7c72de6364cd306d6ba601034c0c54b4553f13d4c6a6cfb0b08f86935e914fb456549cbb2a57945bf331d75ec64166178fd080fedb90887e2228dde8bc1f63eb057525e1d844ee934bdb78e32880294c702f6d4841c335e79cd8fc796407bb00292906d58f5dabefcc47a470ef45cb40dde7d3b31ba78e753ad292b1a21001bc231994e7cf4c12e9202fb271a4e4b54", + "headers": { + "BAR": "BAR-value", + "foo": "foo-value" }, - - { - "description": "Headers should be trimmed", - "bucket": "test-bucket", - "object": "test-object", - "headers": { - "leading": " xyz", - "trailing": "abc ", - "collapsed": "abc def" - }, - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=collapsed%3Bhost%3Bleading%3Btrailing&X-Goog-Signature=254b939a0becaf1a03b601286bd30cb9c8b796b20c6b950f50f246c21fe7577dc3771cd08fb1afd566df6fee12d64be3c7c66c79fe93aefb632e313a31acc48d873f324a49dc768408d4372c3cc597aa037c1ca03c7709408e9e3bea97def67257bce8cc09e5200235c1300353eb22f9ca5676f896972d38cfe3a39bf61575e9e42be9eba08b42524d4459c578e36a6b0e22ea5cf43a13c5156c7e948e07e211fa358f4d3ad7a3f03fb412ab62951a48efd1b53a356268b7242063bbe0f90f2fd9d3fcfbc8ae8691b15b2e02409ba5fa5d465a70a407d9c54b90dd11c99b81978fae28e49dfbda7d61a5d624f3a24483aaa8e7df6dbd75bfe09d854cd2cb98b9" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=bar%3Bfoo%3Bhost\nbar:BAR-value\nfoo:foo-value\nhost:storage.googleapis.com\n\nbar;foo;host\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n59c1ac1a6ee7d773d5c4487ecc861d60b71c4871dd18fc7d8485fac09df1d296" + }, + { + "description": "Headers with colons", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=bar%3Bfoo%3Bhost&X-Goog-Signature=677a1ccd0c77c4f359436ab9133d78a0010e846018a9a2b42442be1a348b41fb6c4d74c3b94cd3fb2da70d302e3240bf12aceb14fdac10af556ec7164e4f5f49bcfaa7a3030d62b1ef3ee1cb1b702f632636afe68aa1902f2d48db79e4a7cf94490de59182c8487965c3d143b58bfa6e0628f0662c7da2d31d68cce9062f47cce6139b018946601ff649cfd7511c3d7fbcb4b500650ff7b02a6a09513c67b044e1cf7158046a17598fe84e21349d253d19d18da70796597e01821d6910a00ae587ae2563afd0f742a640d9f2868eb016c622abeff6449f3b39e1200f6007794a509ebe9fdb44ff1a894bac85712e5bab2c2b231c5a7ac24d01e86b278caac52d", + "headers": { + "BAR": "2023-02-10T03:", + "foo": "2023-02-10T02:00:00Z" }, - - { - "description": "Header value with multiple inline values", - "bucket": "test-bucket", - "object": "test-object", - "headers": { - "multiple": " xyz , abc, def , xyz " - }, - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bmultiple&X-Goog-Signature=97d1edf312b9635950ffc53df4735611b73ea643719ec2818d94cfd3746c5c18441b44dc198a7baa6d315e31cd215832ca81ece555e28fa16f67aa1ea5a030e8b403fe680db6a6435d401b874612d84416ea6891b6bda09051651d4152220bfee341f398e75db5f80ce6fab0087beb12c800c7754e9ef489cae0dc26cdf91a7f2bce903e1caeab9c34fb632591e404ba1a55e20d4404686603694774211db85d2bc2d6007877a130a68cb52cd3b0a067700c1f627a0514a8c0dea2ece785bdffc0f5fdeb48b3a5209c3bc23d75a9e893a1b545506e505a2364fbb2374f602b0ad16aa6e7358243c5161bd4f88a14cab54b85229b8351199fd075c8ce41277ef5" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=bar%3Bfoo%3Bhost\nbar:2023-02-10T03:\nfoo:2023-02-10T02:00:00Z\nhost:storage.googleapis.com\n\nbar;foo;host\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\na2a6df7e6bd818894e1f60ac3c393901b512ca1cf1061ba602dace3fb38c19a6" + }, + { + "description": "Headers should be trimmed", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=collapsed%3Bhost%3Bleading%3Btabs%3Btrailing&X-Goog-Signature=2a98fdeedd8447c056d6d84b44a65ae8c9dccdbf1ad4075caf281708be3bbab1d44cfc43e2612dba593008ecb09838edd0e478a8033335bcecd7d576b7d5199333e137036513c59f7f16d89941c3e4e179b2d387c8c0ffbf056763c904a08d2f3695c298bde0be5917647d287bc50d902ee5c3c65bff2a499ce20dd917621ec703232a9c2c4594b45385da152053dc0fc4c8d57f924823085c1636f0c42ca0146760a4c805792a213a065e241fd13382df28f2945d515fcb4fb70fbde2702c8547bdd43e38b344fe18aa6f44f60bbd69554834e8347efefe9e7a1687b1ecdc86fb285df59b50303f1f1954991fba593dc8d5737d804edd4dda083aa5d3b9b9f9", + "headers": { + "collapsed": "abc def", + "leading": " xyz", + "trailing": "abc ", + "tabs": "\tabc\t\t\t\tdef\t" }, - - { - "description": "Customer-supplied encryption key", - "bucket": "test-bucket", - "object": "test-object", - "headers": - { - "X-Goog-Encryption-Key": "key", - "X-Goog-Encryption-Key-Sha256": "key-hash", - "X-Goog-Encryption-Algorithm": "AES256" - }, - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-encryption-algorithm%3Bx-goog-encryption-key%3Bx-goog-encryption-key-sha256&X-Goog-Signature=326568a3a4b033441f89dff2835ed7b7fd22be6a8959b7d318cc3c9be97c14361cea2135ba566127aa8350afe6c5a2ebcc8ddb5448a41ae6d942d1afdc15445ef001a68117e43493277cec1966b91f00c78c2a430f8e0c7ffbdbc685d13ee271b4ea0fe5dd36ab729b6e4bae119e4bc0ea0c17f080de58b222e9dfb7658fbcece56244bdcaf0e24d4c71e41ca792c1b232814ce4b6b2af7227c0e7a688d0b9e294522a68a5f7c85e789c15bde2313edff5e349347d2c4f2b866ae13a40b530c8d263067f7f6ffe8f1535d1633667905ee6eadb78d46fdd1398ee8fced29499fc4e163212b67d1c0bedd7881b12c7feceb359b8878f7c229826dbfff4fc986e33" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=collapsed%3Bhost%3Bleading%3Btabs%3Btrailing\ncollapsed:abc def\nhost:storage.googleapis.com\nleading:xyz\ntabs:abc def\ntrailing:abc\n\ncollapsed;host;leading;tabs;trailing\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n19153e83555808dbfeb8969043cc8ce8d5db0cce91dc11fb9df58b8130f09d42" + }, + { + "description": "Header value with multiple inline values", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bmultiple&X-Goog-Signature=86b73c7b27f69d772d3d5a34f83fe2b73a484063103351c3080b3d345f5c6587f770bb09ee6d40283c2dcfb2607e362c2f9441be594ba87c8a84538b944d615366ab38d64e8bda9daf1495122109da9f94a903c64f1b158dd1168eeecf637ceeaffdc061f7109a396c0536c059e61690a6f0d5cb350b360e8e6de41731c912bb2b78b33760e343feaaaea842047b562a092185c66e006f2ce62b90b8e7b38466382e554ddc7dcaa4735c15545d101c7c247ae203e8d7200aa95a22626c7ea88b8ce874391dc7747bba3e24131eed78d7ef9f13d3fb21c65a8c721cf6ba90cf9cdbeecef7d84aabf59e62196607a336306d68a274d959a11eb034d35c1f260d4d", + "headers": { + "multiple": " xyz , abc, def , xyz " }, - - { - "description": "List Objects", - "bucket": "test-bucket", - "object": "", - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://storage.googleapis.com/test-bucket?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=73489345b256501979f5166c698ac45e53415fffe06fda3719c6a4a81b7bb99b391bb116e6728b4f555f65e954eb7b3a61c00bc05d13d8dbdb8f82e56f8a226e6b171e195dd9467851943c095ff18928c2351b24baac09757d9a81cf6a5847c11e706a24d0662662bb26438b332433994ddc19b5151d7e1716135a1400363cb3d5e10b854233ad34e351c53ff61914c14401d95d0d83fa379870024f243f7c70f85f491cafa3f9569f37c59c53379f1a9848d9a74264db6a50f36ea94d2bbaa2d26a2e3fe3930b5c65755bd69d1d024c8ece31da7ae7445ecd31b651814728402d3f771c0813e13133a59fb07f15d36cb623b6032e6776afb7a725c7e164d7ce" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bmultiple\nhost:storage.googleapis.com\nmultiple:xyz , abc, def , xyz\n\nhost;multiple\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n4df8e486146c31f1c8cd4e4c730554cde4326791ba48ec11fa969a3de064cd7f" + }, + { + "description": "Customer-supplied encryption key", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-encryption-algorithm%3Bx-goog-encryption-key%3Bx-goog-encryption-key-sha256&X-Goog-Signature=62e0aa8bee2140650fb578c91a5b65f776cae4ea69af35f790b23627fd468837ef44d6c7446aea1dc68b02d98abee1bc0f834fcac19076e3fe41aee7414c3d49faa58eea4c20ea8e9e0dd1ddef85aeacc1b009b40e59c65900bbf50719807236703d9751e83b72f46913e6fec83ccbcf7371e0af6e753a281df7a96db66e59715160b02affe7df8425a7e4b460e4f823a98cf7e6faa808b50b89374009fcfa36b541e6ad0dfbaf959f55673335c182a7f75325976eca7a214fb71d1963fba8c167c86b3782460ff6810526ce0deab4cba9546e4a5bca5acdbe807dc2b7c8cda9bad94c3ef81e1f04f22499e0f633f2b2946f6ffa8d63c71dc79585c74102ac54", + "headers": { + "X-Goog-Encryption-Algorithm": "AES256", + "X-Goog-Encryption-Key": "key", + "X-Goog-Encryption-Key-Sha256": "key-hash" }, - - { - "description": "HTTP Bucket Bound Hostname Support", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-encryption-algorithm%3Bx-goog-encryption-key%3Bx-goog-encryption-key-sha256\nhost:storage.googleapis.com\nx-goog-encryption-algorithm:AES256\nx-goog-encryption-key:key\nx-goog-encryption-key-sha256:key-hash\n\nhost;x-goog-encryption-algorithm;x-goog-encryption-key;x-goog-encryption-key-sha256\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n66a45104eba8bdd9748723b45cbd54c3f0f6dba337a5deb9fb6a66334223dc06" + }, + { + "description": "List Objects", + "bucket": "test-bucket", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=9450a0b8e6acfbbd40b750ea482d9bcfd0219491ff943a6040e3f8597aa229742613d453d85ad67e5d0610b3104c4329c93256e58c69f60b87c1f0e490f44b6558ddf0dcbca689e8cd76bf2c0000e783bd8a07d60aedc45077faad86c2ba961c9f48c0667b7b561d457b3750de60fe4bb55c910382205c8998aa543d36cb4e74ad3df3ef8d9d4d8a383651cd0eb7f6c0974868591c1b02d891286562a4a9036bbbfc9b9a912d0e12141c292e06dbf2a1727831de4b75f6c48c61266b593d6be1cf4063c005ff506ee8125fafd67c179b842deb407f650a111f1f54133de2bf1dca18b8baf2db599b053d0b5edd4c8edbb00a9687741d02431317446fd5643951", + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n51a7426c2a6c6ab80f336855fc629461ff182fb1d2cb552ac68e5ce8e25db487" + }, + { + "description": "Query Parameter Encoding", "bucket": "test-bucket", "object": "test-object", "method": "GET", "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "http://mydomain.tld/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=2394955666b2ca0f6a4953d730eea564762b7cb4fcbdc5a24305c4eedcfc45442cf863e2b5c6c8386371839adf34caad8d2fb4d291df12537bb310931ea76acbde3fc65a40b9729d7400c9d73ce7827043e31e218bf8280b4828aeccb0b7b10c025d7494c500db5b0f19af827ed64b9c70521fe4fa8248698ff6a55570120aee2cdbd35b2c9a6e7f6038b7c7d40f8497460c2435814ed9e8de5217db66ae0c374f17078d3d8c828dd6cc5eb278c9328e7c74dcc090f48a50a72c25b2dc4e90e8be5f630dc7df463f14d0c3f7c3313e5315d5d74286435f5b846d39f7444e75fa09f911cc9a9c96d843226c5c88f3e03b0a8a53f3800feee1c2c6123262148ba9", - "scheme": "http", - "urlStyle": "BUCKET_BOUND_HOSTNAME", - "bucketBoundHostname": "mydomain.tld" + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&aA0%C3%A9%2F%3D%25-_.~=~%20._-%25%3D%2F%C3%A90Aa&X-Goog-Signature=51959e717a8613a587fe507932d0501caa1f01bf879df6c019255c15ec652b7e64c61dc995814cd73886587ada3dfb3ac9ce2e88eb30e3165cdf8a74f1b57b050e0d9ce3d2549329d3895611d6788ed8d1cf622cd6c1e095695e1c84ef556b036253e504163a375d3a9768dad37aa0e784fc3238ddb6613c6b262cc361d20ef6c1832b8965f11231e510324766d5360c71fb7c3ad597544f1bf7b390fe86a32a12b765bbaa6edbf48ed706e31a2e32cc0b083d19f24332696f7049087b993339ac4f91cff8287dbf76ced628ae455af1b8803c1d04b0b2547a48a54395f3756aa6878bc906eeb35e04bb8595a100eb8cc6c189462d888a0700f3ce1548450877", + "queryParameters": { + "aA0é/=%-_.~": "~ ._-%=/é0Aa" }, - - { - "description": "HTTPS Bucket Bound Hostname Support", - "bucket": "test-bucket", - "object": "test-object", - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://mydomain.tld/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=2394955666b2ca0f6a4953d730eea564762b7cb4fcbdc5a24305c4eedcfc45442cf863e2b5c6c8386371839adf34caad8d2fb4d291df12537bb310931ea76acbde3fc65a40b9729d7400c9d73ce7827043e31e218bf8280b4828aeccb0b7b10c025d7494c500db5b0f19af827ed64b9c70521fe4fa8248698ff6a55570120aee2cdbd35b2c9a6e7f6038b7c7d40f8497460c2435814ed9e8de5217db66ae0c374f17078d3d8c828dd6cc5eb278c9328e7c74dcc090f48a50a72c25b2dc4e90e8be5f630dc7df463f14d0c3f7c3313e5315d5d74286435f5b846d39f7444e75fa09f911cc9a9c96d843226c5c88f3e03b0a8a53f3800feee1c2c6123262148ba9", - "scheme": "https", - "urlStyle": "BUCKET_BOUND_HOSTNAME", - "bucketBoundHostname": "mydomain.tld" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&aA0%C3%A9%2F%3D%25-_.~=~%20._-%25%3D%2F%C3%A90Aa\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n448f96c23dafa8210900554e138b2b5fd55bc53ef53b8637cecc3edec45a8fcf" + }, + { + "description": "Query Parameter Ordering", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-Meta-Foo=bar&X-Goog-SignedHeaders=host&prefix=%2Ffoo&X-Goog-Signature=99a55adc713a3daa0d066d29963c5b5bc3863a1555a7ae104999122242a441caf41f4aec83258d4d4fe8d44c650d9d5cae2ac36a89d9438401f3ff43ae424554be4e97bcb6ad76f1d3ce5c4af2c4b76f1a1197d0dd5ce4c27e4eb7b7bb94c8711ae5fe06d5064e38dc98d57b245ca963efbff3e6f59674e71072fdacf60b042229038636488b7f70b6a731b5e50915778498a59e43d744d7155fbb8dea72a716fd1b0b2b550e0e6fe62011642edf3bbe999fad59084e72ee94177153f0964c2745ff412c91ac5fafab101f591c4ccc99bc2a5aef42722893099469a7fc0250d114b90737f0bb0464b1be9d5780372d895edac979e7eb8f5df1bfb7105c754fd9", + "queryParameters": { + "prefix": "/foo", + "X-Goog-Meta-Foo": "bar" }, - - { - "description": "HTTP Bucket Bound Hostname Support", - "bucket": "test-bucket", - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "http://mydomain.tld/?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=13a256f7afda0a733cc21e9c804b1d2c37f64f9a18956fe11ccce57765115dda24c1d342d364b533e1ab95fcf5ca6b7646f2d5bb008ca9c731d01cdad932f56c21fe5528acfd9cc290c823316992fe791424c5c4b1a2d0b6857d90702be7ec05f3d585ffe1a58a7ab1782643512dad430581dafbeff8669e1b7ec3122c51dbf8c0509f9f746a6208c8d8847493476949959cacdbdc4e024c65055c9af2b51767d2bf8b4e95f10e9ecda3977e9a9cf47d4a4626da1711b79ee344dea82c459826f0e9c31350d8129dc0dc49b203ea301681ba5092e13e362bc657059fd07fd62f0751f6ced8ea50caeb5316c8ed8bca05c793d302853f2fe016305d258e1e212b", - "scheme": "http", - "urlStyle": "BUCKET_BOUND_HOSTNAME", - "bucketBoundHostname": "mydomain.tld" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-Meta-Foo=bar&X-Goog-SignedHeaders=host&prefix=%2Ffoo\nhost:storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n4dafe74ad142f32b7c25fc4e6b38fd3b8a6339d7f112247573fb0066f637db6c" + }, + { + "description": "Header Ordering", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-date&X-Goog-Signature=1676df8f9b69ad3b0b644d86c3dba8dc50dc30a79c7656053496784a86bd254ad95f985808a91262e6717c269e0863d8d6c2de4a70b8127cca888bd0c7bbd628776ffc732ee56c351ec013c1c9542eb5a9cd8b9b1b7a9fad5e1a0dd00bee5535b0d7ba1445ee5d055c8c0cfa14186464f8bb4d31e7eda7530d76387f8d298561b64450893547f33f049215617b1cad3439009a7b2405894125d45dcc0694a544c28f3cfeb191a11954aa15067a3f2993bf7e10057b267f0899500ff24948310211d9ee68f3f5ec96341336ebd5d1b29ce36e1e32a3eb869ab9e2a63fda521fd9091834ddf60cfeebe8bd2300a8073a87811436f5ce09a517a54435450b641219", + "headers": { + "X-Goog-Date": "20190201T090000Z" }, - - { - "description": "HTTPS Bucket Bound Hostname Support", - "bucket": "test-bucket", - "method": "GET", - "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://mydomain.tld/?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=13a256f7afda0a733cc21e9c804b1d2c37f64f9a18956fe11ccce57765115dda24c1d342d364b533e1ab95fcf5ca6b7646f2d5bb008ca9c731d01cdad932f56c21fe5528acfd9cc290c823316992fe791424c5c4b1a2d0b6857d90702be7ec05f3d585ffe1a58a7ab1782643512dad430581dafbeff8669e1b7ec3122c51dbf8c0509f9f746a6208c8d8847493476949959cacdbdc4e024c65055c9af2b51767d2bf8b4e95f10e9ecda3977e9a9cf47d4a4626da1711b79ee344dea82c459826f0e9c31350d8129dc0dc49b203ea301681ba5092e13e362bc657059fd07fd62f0751f6ced8ea50caeb5316c8ed8bca05c793d302853f2fe016305d258e1e212b", - "scheme": "https", - "urlStyle": "BUCKET_BOUND_HOSTNAME", - "bucketBoundHostname": "mydomain.tld" + "scheme": "https", + "expectedCanonicalRequest": "GET\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-date\nhost:storage.googleapis.com\nx-goog-date:20190201T090000Z\n\nhost;x-goog-date\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n4052143280d90d5f4a8c878ff7418be6fee5d34e50b1da28d8081a094b88fa61" + }, + { + "description": "Signed Payload Instead of UNSIGNED-PAYLOAD", + "bucket": "test-bucket", + "object": "test-object", + "method": "PUT", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://storage.googleapis.com/test-bucket/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-content-sha256%3Bx-testcasemetadata-payload-value&X-Goog-Signature=0609637b2365bef36a9c65e4da454674d7b08b7b49e4bf92710065039bff9fd75059f001d222f07c184210bd248c4b0a5045cfa600f296d5194aa7ff15234186fd9f4dd4985993b48d3c31c10c4a8561f839652cffbb8f53717753cd4dfca4d8e1bfa1e6a9e4d6cc74f131a81a1fe92da675f2c6ab8b4db0e68b11b0baedf1ae72ef35998fac27c826d95a3e0a60a0127d23809e91e5883622464a8e8fbb3d82ad329e5f94b93ca7f720927eddf9147edb80f5558688cff32ad23fab38d553341d2adf0e46661f24c86cc5e68087b2a5dd6568b9ac8fd088a753ae159a4a903491b89dbda731eb158b8eb5c180eef7907ce35269cb6243c3da0ed0b4ba0cc882", + "headers": { + "X-Goog-Content-SHA256": "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b982", + "X-TestCaseMetadata-Payload-Value": "hello" }, - - { + "scheme": "https", + "expectedCanonicalRequest": "PUT\n/test-bucket/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host%3Bx-goog-content-sha256%3Bx-testcasemetadata-payload-value\nhost:storage.googleapis.com\nx-goog-content-sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b982\nx-testcasemetadata-payload-value:hello\n\nhost;x-goog-content-sha256;x-testcasemetadata-payload-value\n2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b982", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\nbe21a0841a897930ff5cf72e6e74ec5274efd76c3fe4cde6678f24a0a3d6dbec" + }, + { "description": "Virtual Hosted Style", "bucket": "test-bucket", "object": "test-object", "method": "GET", "expiration": 10, - "timestamp": "20190201T090000Z", - "expectedUrl": "https://test-bucket.storage.googleapis.com/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=64884b89935fba49d88277eac77257cab8b5097b2ab1831e20e55ea3b25d1ee8e4cd7aeb0ab06322b38a2722187f1c88765856324f3d72591ccc4cc23bae5e596104490886894c1037f5b2fc8d6349fd19b92d5eaddeee7ffc464b9262298764d175fd6e9e0a3680458b164609af2a626bf2e1cace525d7446d305a5dfe815dd04e33b91ae3ba3d31394cb13824d3a9cb61f5d28b149079c17b8b82738267fcb76e621d4161132cde184d5193480a185308f026859c8913f660832a68a5e17e30f6894bf0c403cec1e4dea056bdfe4b85da59e555ff3ecbc872640636154b0dbf0d6ce74929ad49920cce2beb51513865c2875bce33baef08af806de79e860ca", + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://test-bucket.storage.googleapis.com/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=1b067f789addd86030589002285670ebde4c824bdc3e3684b67e0d9c3a13774c2403adbe72df199b72671c5da2edbe2c25aa6bfe73635676e64c67aff05acb7a04c7fb4e5fae33a4a05071425344c7632c6ee778fe3b2c1d71d7cdee4bc73d71252cc0da26c8662f824b16924328f927c7d74f719fd7ccf1ceea7a6700b68e2122737b4add68e9d8a2e52df012cab7afd5e903c8cc648d6ea18c0ce41dbd52eb1a5927a13c861ff4a967b04c7c9c396d35406009e1ed5cc5a46530d0dc028f611de5a8237d30ef8f1be697cea727a384c6a71dcbe81eeaebc95f9ec08374bf3d9c23009bff982284ad5fff6d6c9160cfa97c623e84f48ec2f32249f1b5e2c7f8", "scheme": "https", "urlStyle": "VIRTUAL_HOSTED_STYLE", "expectedCanonicalRequest": "GET\n/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:test-bucket.storage.googleapis.com\n\nhost\nUNSIGNED-PAYLOAD", "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\n89eeae48258eccdcb1f592fb908008e3f5d36a949c002c1e614c94356dc18fc6" - } + }, + { + "description": "Get Bucket with HTTP Bucket Bound Hostname Support", + "bucket": "test-bucket", + "method": "GET", + "expiration": 10, + "timestamp": "20190201T090000Z", + "expectedUrl": "http://mydomain.tld/?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=13a256f7afda0a733cc21e9c804b1d2c37f64f9a18956fe11ccce57765115dda24c1d342d364b533e1ab95fcf5ca6b7646f2d5bb008ca9c731d01cdad932f56c21fe5528acfd9cc290c823316992fe791424c5c4b1a2d0b6857d90702be7ec05f3d585ffe1a58a7ab1782643512dad430581dafbeff8669e1b7ec3122c51dbf8c0509f9f746a6208c8d8847493476949959cacdbdc4e024c65055c9af2b51767d2bf8b4e95f10e9ecda3977e9a9cf47d4a4626da1711b79ee344dea82c459826f0e9c31350d8129dc0dc49b203ea301681ba5092e13e362bc657059fd07fd62f0751f6ced8ea50caeb5316c8ed8bca05c793d302853f2fe016305d258e1e212b", + "scheme": "http", + "urlStyle": "BUCKET_BOUND_HOSTNAME", + "bucketBoundHostname": "mydomain.tld" + }, + { + "description": "Get Bucket with HTTPS Bucket Bound Hostname Support", + "bucket": "test-bucket", + "method": "GET", + "expiration": 10, + "timestamp": "20190201T090000Z", + "expectedUrl": "https://mydomain.tld/?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=13a256f7afda0a733cc21e9c804b1d2c37f64f9a18956fe11ccce57765115dda24c1d342d364b533e1ab95fcf5ca6b7646f2d5bb008ca9c731d01cdad932f56c21fe5528acfd9cc290c823316992fe791424c5c4b1a2d0b6857d90702be7ec05f3d585ffe1a58a7ab1782643512dad430581dafbeff8669e1b7ec3122c51dbf8c0509f9f746a6208c8d8847493476949959cacdbdc4e024c65055c9af2b51767d2bf8b4e95f10e9ecda3977e9a9cf47d4a4626da1711b79ee344dea82c459826f0e9c31350d8129dc0dc49b203ea301681ba5092e13e362bc657059fd07fd62f0751f6ced8ea50caeb5316c8ed8bca05c793d302853f2fe016305d258e1e212b", + "scheme": "https", + "urlStyle": "BUCKET_BOUND_HOSTNAME", + "bucketBoundHostname": "mydomain.tld" + }, + { + "description": "HTTP Bucket Bound Hostname Support", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "http://mydomain.tld/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=352cf27e2dae4545fd6c3eb62bb4852f9a2a41013a9279c2bdbb9a37a1de4cadd1cedb887eedd190131fb1fbae44eb4b340cde735176885aca75b46f251ba5017b97f0816d2750b80484ca64707d630172793e953da918e9fd8afcbe03f0cf380f53cc8117eff41584b5da5c19cdc4aee0736bdd446126da96c6373ad67e6dce79d4c72a502148d0814e7ff3a94fb3a7a891e35a180a32b468f28837f443bfa56aab9424451d5f8b010c2c08442204b1c1d99cb10b45a2418ffd965cf9bbc07f1a45f060d6a05d62edf4229d382af849e7b757e00526957e96358737a2855c4683fa3e3b405e7d423cae46a402b191c7c76e6a903d8a49fab7f63083fc0d5f0c", + "scheme": "http", + "urlStyle": "BUCKET_BOUND_HOSTNAME", + "bucketBoundHostname": "mydomain.tld", + "expectedCanonicalRequest": "GET\n/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:mydomain.tld\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\nd6c309924b51a5abbe4d6356f7bf29c2120c6b14649b1e97b3bc9309adca7d4b" + }, + { + "description": "HTTPS Bucket Bound Hostname Support", + "bucket": "test-bucket", + "object": "test-object", + "method": "GET", + "expiration": 10, + "timestamp": "2019-02-01T09:00:00Z", + "expectedUrl": "https://mydomain.tld/test-object?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F2019-02-%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=2019-02-01T09%3A00%3A00Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host&X-Goog-Signature=352cf27e2dae4545fd6c3eb62bb4852f9a2a41013a9279c2bdbb9a37a1de4cadd1cedb887eedd190131fb1fbae44eb4b340cde735176885aca75b46f251ba5017b97f0816d2750b80484ca64707d630172793e953da918e9fd8afcbe03f0cf380f53cc8117eff41584b5da5c19cdc4aee0736bdd446126da96c6373ad67e6dce79d4c72a502148d0814e7ff3a94fb3a7a891e35a180a32b468f28837f443bfa56aab9424451d5f8b010c2c08442204b1c1d99cb10b45a2418ffd965cf9bbc07f1a45f060d6a05d62edf4229d382af849e7b757e00526957e96358737a2855c4683fa3e3b405e7d423cae46a402b191c7c76e6a903d8a49fab7f63083fc0d5f0c", + "scheme": "https", + "urlStyle": "BUCKET_BOUND_HOSTNAME", + "bucketBoundHostname": "mydomain.tld", + "expectedCanonicalRequest": "GET\n/test-object\nX-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=test-iam-credentials%40test-project-id.iam.gserviceaccount.com%2F20190201%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20190201T090000Z&X-Goog-Expires=10&X-Goog-SignedHeaders=host\nhost:mydomain.tld\n\nhost\nUNSIGNED-PAYLOAD", + "expectedStringToSign": "GOOG4-RSA-SHA256\n20190201T090000Z\n20190201/auto/storage/goog4_request\nd6c309924b51a5abbe4d6356f7bf29c2120c6b14649b1e97b3bc9309adca7d4b" + } ], - "postPolicyV4Tests": [ - { - "description": "POST Policy Simple", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z" - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902670-h3q7wvodjor6bc7y/", - "fields": { - "key": "test-object", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", - "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + { + "description": "POST Policy Simple", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z" }, - - { - "description": "POST Policy Simple Virtual Hosted Style", - "policyInput": { - "scheme": "https", - "urlStyle": "VIRTUAL_HOSTED_STYLE", - "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z" - }, - "policyOutput": { - "url": "https://rsaposttest-1579902670-h3q7wvodjor6bc7y.storage.googleapis.com/", - "fields": { - "key": "test-object", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", - "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902670-h3q7wvodjor6bc7y/", + "fields": { + "key": "test-object", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", + "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Simple Virtual Hosted Style", + "policyInput": { + "scheme": "https", + "urlStyle": "VIRTUAL_HOSTED_STYLE", + "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z" }, - - { - "description": "POST Policy Simple Bucket Bound Hostname", - "policyInput": { - "scheme": "https", - "urlStyle": "BUCKET_BOUND_HOSTNAME", - "bucketBoundHostname": "mydomain.tld", - "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z" - }, - "policyOutput": { - "url": "https://mydomain.tld/", - "fields": { - "key": "test-object", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", - "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://rsaposttest-1579902670-h3q7wvodjor6bc7y.storage.googleapis.com/", + "fields": { + "key": "test-object", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", + "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Simple Bucket Bound Hostname", + "policyInput": { + "scheme": "https", + "urlStyle": "BUCKET_BOUND_HOSTNAME", + "bucketBoundHostname": "mydomain.tld", + "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z" }, - - { - "description": "POST Policy Simple Bucket Bound Hostname HTTP", - "policyInput": { - "scheme": "http", - "urlStyle": "BUCKET_BOUND_HOSTNAME", - "bucketBoundHostname": "mydomain.tld", - "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z" - }, - "policyOutput": { - "url": "http://mydomain.tld/", - "fields": { - "key": "test-object", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", - "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://mydomain.tld/", + "fields": { + "key": "test-object", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", + "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Simple Bucket Bound Hostname HTTP", + "policyInput": { + "scheme": "http", + "urlStyle": "BUCKET_BOUND_HOSTNAME", + "bucketBoundHostname": "mydomain.tld", + "bucket": "rsaposttest-1579902670-h3q7wvodjor6bc7y", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z" }, - - { - "description": "POST Policy ACL matching", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902662-x2kd7kjwh2w5izcw", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z", - "conditions": { - "startsWith": [ - "$acl", - "public" - ] - } - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902662-x2kd7kjwh2w5izcw/", - "fields": { - "key": "test-object", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "8633cb542c81d25b2ee26fd017101649771023349a9812ca59f4162df275192c7193213ccff0dddd58c1007698d46e2cb3ab14b64fe52558252feda8d4f9b27d5d4fa8264d8b005e4cc7edcd6fd60ca5df5d6022cbff3b351de46d9e7f501b737f4d04233b7bd4df8f1a1740dcc6807a619801b71cc3c22d4c3aa5c1a4dafde9d5d87400fa04d54c569ba1cf6af66fcc6d257430d88502447c1ce65a99fe5f1370c3f40a843fe4bb9ce115605a96947e4660977765ffdf31ef0fbc3c2c89db48fbf1204be8bb47d46d38adb18bf36f3861ef4be393f4b0ad8ca72b13eda2d7e359bd379789c3e4801cc12f5374d6eb604fa36b3de9a640222f13e3ef8fdadbaf", - "policy": "eyJjb25kaXRpb25zIjpbWyJzdGFydHMtd2l0aCIsIiRhY2wiLCJwdWJsaWMiXSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NjIteDJrZDdrandoMnc1aXpjdyJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" - }, - "expectedDecodedPolicy": "{\"conditions\":[[\"starts-with\",\"$acl\",\"public\"],{\"bucket\":\"rsaposttest-1579902662-x2kd7kjwh2w5izcw\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "http://mydomain.tld/", + "fields": { + "key": "test-object", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "14c84353d4b5ae3d210290ab4ab185a974db36f697ebbdc011e7deda08cd5ecb7bd2682f0ac87b06540ddbfd9a74c4dbcc355795bb6d9383f2a3b5acc45615e058647b94896c2a18abb1fab04fa099b6770a2836b2232a810471b1e48461f37906dad134756d075bbfb6cba28b1d0da70579a3365b2ba336b43d44da476a13eb21a45241b0c483aaaa7aa40d17812c24e125d16670d1accf6eae42007b7000a4ee51247c5f76f070c9b360611f8dc713fef027ffd38ac19f6d68140701a036b143a522bf3e4d2a3db44decb5f32ed1bf062ae87e576d50fee0adce4ee9aeb61fa6b2605cf1f63ea9d886ac5d75135fdbc102fcf8e320f38570eabe1697fefef9", + "policy": "eyJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcwLWgzcTd3dm9kam9yNmJjN3kifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"bucket\":\"rsaposttest-1579902670-h3q7wvodjor6bc7y\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy ACL matching", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902662-x2kd7kjwh2w5izcw", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "conditions": { + "startsWith": [ + "$acl", + "public" + ] + } }, - - { - "description": "POST Policy Within Content-Range", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902672-lpd47iogn6hx4sle", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z", - "conditions": { - "contentLengthRange": [ - 246, - 266 - ] - } - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902672-lpd47iogn6hx4sle/", - "fields": { - "key": "test-object", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "1d045155adcf3d0fe063d7b78ea1a4f86cdc8361f58ea90b4fd724c563a84d9b0e02a8b01e7a5c7587b32eb40839e28cf279bc8b4eb1e9a6f1c9bae372e799cea10ef34baaf310f99acd9849785a89fb69533c2ba8db6b6b4f87a1dcbbdeea8316f822092e6cad18b80f9610c219f239a606d182a092ae439ccbaa3543709faae8cc3410e9eafb2885f6f74b9ec4eb5982dfe43492cc8c863330314616f5cd34d4b2a3ec6ad857a9a47d68381d714b010fc243e17fe68b3ccdfe205222ca63bc4d7d7177dd7ec4e9376e3d3ae05a5d629b9ceceab127628c2669f35fa735dc01a225e6a7c98db930694f6e6a77e20ec0c8e509d230cf73cc530cdc237c6f079d", - "policy": "eyJjb25kaXRpb25zIjpbWyJjb250ZW50LWxlbmd0aC1yYW5nZSIsMjQ2LDI2Nl0seyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcyLWxwZDQ3aW9nbjZoeDRzbGUifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[[\"content-length-range\",246,266],{\"bucket\":\"rsaposttest-1579902672-lpd47iogn6hx4sle\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902662-x2kd7kjwh2w5izcw/", + "fields": { + "key": "test-object", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "8633cb542c81d25b2ee26fd017101649771023349a9812ca59f4162df275192c7193213ccff0dddd58c1007698d46e2cb3ab14b64fe52558252feda8d4f9b27d5d4fa8264d8b005e4cc7edcd6fd60ca5df5d6022cbff3b351de46d9e7f501b737f4d04233b7bd4df8f1a1740dcc6807a619801b71cc3c22d4c3aa5c1a4dafde9d5d87400fa04d54c569ba1cf6af66fcc6d257430d88502447c1ce65a99fe5f1370c3f40a843fe4bb9ce115605a96947e4660977765ffdf31ef0fbc3c2c89db48fbf1204be8bb47d46d38adb18bf36f3861ef4be393f4b0ad8ca72b13eda2d7e359bd379789c3e4801cc12f5374d6eb604fa36b3de9a640222f13e3ef8fdadbaf", + "policy": "eyJjb25kaXRpb25zIjpbWyJzdGFydHMtd2l0aCIsIiRhY2wiLCJwdWJsaWMiXSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NjIteDJrZDdrandoMnc1aXpjdyJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" + }, + "expectedDecodedPolicy": "{\"conditions\":[[\"starts-with\",\"$acl\",\"public\"],{\"bucket\":\"rsaposttest-1579902662-x2kd7kjwh2w5izcw\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Within Content-Range", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902672-lpd47iogn6hx4sle", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "conditions": { + "contentLengthRange": [ + 246, + 266 + ] + } }, - - { - "description": "POST Policy Cache-Control File Header", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902669-nwk5s7vvfjgdjs62", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z", - "fields": { - "acl": "public-read", - "cache-control": "public,max-age=86400" - } - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902669-nwk5s7vvfjgdjs62/", - "fields": { - "key": "test-object", - "acl": "public-read", - "cache-control": "public,max-age=86400", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "7a6747dc40f0a8ba1bb0e9140d4299e6f9fd083017bbd98ed8ac21e05e577c70cfefcf26d0a0d378052aaf9b5511ee85f04bff03ffb1044c847c2cf624a4536495079d12d0391cecfa28010a8ee7dc99f57e93203e11b1257dc2a2a17542f0defff102f2bd2dba0439678d35e3ee2a7fb146ab282f77dec6d01a4bb002f96ba33fd70dbbe89919012a3b9a9f4c8058bf1249a8b34d1988e9bba5c73b650653262d05d5fabecaef5aaa8d3a2e70512db297f1aca65fb574bebfda728ed4b5715916679f94873f9fa2c3702f1a9dc4aa7a7c440138a9a419503d0029559d62869e70851247075c561b219c62719582b0a8257e4ce5123d19f87482cdbfe5c185f2", - "policy": "eyJjb25kaXRpb25zIjpbeyJhY2wiOiJwdWJsaWMtcmVhZCJ9LHsiY2FjaGUtY29udHJvbCI6InB1YmxpYyxtYXgtYWdlPTg2NDAwIn0seyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjY5LW53azVzN3Z2ZmpnZGpzNjIifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"acl\":\"public-read\"},{\"cache-control\":\"public,max-age=86400\"},{\"bucket\":\"rsaposttest-1579902669-nwk5s7vvfjgdjs62\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902672-lpd47iogn6hx4sle/", + "fields": { + "key": "test-object", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "1d045155adcf3d0fe063d7b78ea1a4f86cdc8361f58ea90b4fd724c563a84d9b0e02a8b01e7a5c7587b32eb40839e28cf279bc8b4eb1e9a6f1c9bae372e799cea10ef34baaf310f99acd9849785a89fb69533c2ba8db6b6b4f87a1dcbbdeea8316f822092e6cad18b80f9610c219f239a606d182a092ae439ccbaa3543709faae8cc3410e9eafb2885f6f74b9ec4eb5982dfe43492cc8c863330314616f5cd34d4b2a3ec6ad857a9a47d68381d714b010fc243e17fe68b3ccdfe205222ca63bc4d7d7177dd7ec4e9376e3d3ae05a5d629b9ceceab127628c2669f35fa735dc01a225e6a7c98db930694f6e6a77e20ec0c8e509d230cf73cc530cdc237c6f079d", + "policy": "eyJjb25kaXRpb25zIjpbWyJjb250ZW50LWxlbmd0aC1yYW5nZSIsMjQ2LDI2Nl0seyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjcyLWxwZDQ3aW9nbjZoeDRzbGUifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[[\"content-length-range\",246,266],{\"bucket\":\"rsaposttest-1579902672-lpd47iogn6hx4sle\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Cache-Control File Header", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902669-nwk5s7vvfjgdjs62", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "fields": { + "acl": "public-read", + "cache-control": "public,max-age=86400" + } + }, + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902669-nwk5s7vvfjgdjs62/", + "fields": { + "key": "test-object", + "acl": "public-read", + "cache-control": "public,max-age=86400", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "7a6747dc40f0a8ba1bb0e9140d4299e6f9fd083017bbd98ed8ac21e05e577c70cfefcf26d0a0d378052aaf9b5511ee85f04bff03ffb1044c847c2cf624a4536495079d12d0391cecfa28010a8ee7dc99f57e93203e11b1257dc2a2a17542f0defff102f2bd2dba0439678d35e3ee2a7fb146ab282f77dec6d01a4bb002f96ba33fd70dbbe89919012a3b9a9f4c8058bf1249a8b34d1988e9bba5c73b650653262d05d5fabecaef5aaa8d3a2e70512db297f1aca65fb574bebfda728ed4b5715916679f94873f9fa2c3702f1a9dc4aa7a7c440138a9a419503d0029559d62869e70851247075c561b219c62719582b0a8257e4ce5123d19f87482cdbfe5c185f2", + "policy": "eyJjb25kaXRpb25zIjpbeyJhY2wiOiJwdWJsaWMtcmVhZCJ9LHsiY2FjaGUtY29udHJvbCI6InB1YmxpYyxtYXgtYWdlPTg2NDAwIn0seyJidWNrZXQiOiJyc2Fwb3N0dGVzdC0xNTc5OTAyNjY5LW53azVzN3Z2ZmpnZGpzNjIifSx7ImtleSI6InRlc3Qtb2JqZWN0In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"acl\":\"public-read\"},{\"cache-control\":\"public,max-age=86400\"},{\"bucket\":\"rsaposttest-1579902669-nwk5s7vvfjgdjs62\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Success With Status", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902678-pt5yms55j47r6qy4", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "fields": { + "success_action_status": "200" + } }, - - { - "description": "POST Policy Success With Status", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902678-pt5yms55j47r6qy4", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z", - "fields": { - "success_action_status": "200" - } - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902678-pt5yms55j47r6qy4/", - "fields": { - "key": "test-object", - "success_action_status": "200", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "715d3148bb583601983680441caef60a5b6c14b62e62672e8cd5b7ca970837259c573121fa127635432f10fc6321775d6c4fc0601004dc6708887356256f95f0b7ea02ec347f75ad9884f5b02d7cdfa99d777edb936d0334a07bcfd9742c67a2b025b2de9f2beec43461dc5d18ad84cd6d0f069e5ecacda4367e5035116560751978cfc5a2ebc459d92d3d48ee8b98a3f24f84a12bf3c64c52b731c4220b3ed787c7314eb604525c807abf259e0d9c50848c08e57d2eb12ce5fa45337f6466e78e0c2e0d19a6fa5b70d6347d49c654bd95ba544006625530bbf5e6d1f1e204f2b39396a3091edc30229ed64680768f37bfdac29c92b1274e94e929639159c664", - "policy": "eyJjb25kaXRpb25zIjpbeyJzdWNjZXNzX2FjdGlvbl9zdGF0dXMiOiIyMDAifSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NzgtcHQ1eW1zNTVqNDdyNnF5NCJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"success_action_status\":\"200\"},{\"bucket\":\"rsaposttest-1579902678-pt5yms55j47r6qy4\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902678-pt5yms55j47r6qy4/", + "fields": { + "key": "test-object", + "success_action_status": "200", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "715d3148bb583601983680441caef60a5b6c14b62e62672e8cd5b7ca970837259c573121fa127635432f10fc6321775d6c4fc0601004dc6708887356256f95f0b7ea02ec347f75ad9884f5b02d7cdfa99d777edb936d0334a07bcfd9742c67a2b025b2de9f2beec43461dc5d18ad84cd6d0f069e5ecacda4367e5035116560751978cfc5a2ebc459d92d3d48ee8b98a3f24f84a12bf3c64c52b731c4220b3ed787c7314eb604525c807abf259e0d9c50848c08e57d2eb12ce5fa45337f6466e78e0c2e0d19a6fa5b70d6347d49c654bd95ba544006625530bbf5e6d1f1e204f2b39396a3091edc30229ed64680768f37bfdac29c92b1274e94e929639159c664", + "policy": "eyJjb25kaXRpb25zIjpbeyJzdWNjZXNzX2FjdGlvbl9zdGF0dXMiOiIyMDAifSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NzgtcHQ1eW1zNTVqNDdyNnF5NCJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"success_action_status\":\"200\"},{\"bucket\":\"rsaposttest-1579902678-pt5yms55j47r6qy4\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Success With Redirect", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902671-6ldm6caw4se52vrx", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "fields": { + "success_action_redirect": "http://www.google.com/" + } }, - - { - "description": "POST Policy Success With Redirect", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902671-6ldm6caw4se52vrx", - "object": "test-object", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z", - "fields": { - "success_action_redirect": "http://www.google.com/" - } - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902671-6ldm6caw4se52vrx/", - "fields": { - "key": "test-object", - "success_action_redirect": "http://www.google.com/", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "81fafe1673360887b2cb2650c80e59681ad6792da7ebe1eb1d281df7e7beff257e82a1007096811ab36c93091f2ae623f0e90a27cf925a8002f8234ddb49315cc5968fa4f209aca80f1e9f214ff4d24522bb4a1904ea365e852bcd3a0bdb0ab5bacc3f82b70d04e30afc8f82e277c6837006ad6b5eaf08423d88400e88cba979f3474ed4dc8cf10181dfcb6dc9850096ee07b7180891cb806394d1b0c1f0708640474ace629b5fb75366ab370d909ebdcd30fe5d0f1c33947ca2c0f26c05543bc381fabe514772d9b6f1f2b6cf9ac40a0bd266fb52ebe9043e721e338f40cbd3f0d84838d29bece5c76e4fad115400017b5187dd5be3094a3f90865032776fc7", - "policy": "eyJjb25kaXRpb25zIjpbeyJzdWNjZXNzX2FjdGlvbl9yZWRpcmVjdCI6Imh0dHA6Ly93d3cuZ29vZ2xlLmNvbS8ifSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NzEtNmxkbTZjYXc0c2U1MnZyeCJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"success_action_redirect\":\"http://www.google.com/\"},{\"bucket\":\"rsaposttest-1579902671-6ldm6caw4se52vrx\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902671-6ldm6caw4se52vrx/", + "fields": { + "key": "test-object", + "success_action_redirect": "http://www.google.com/", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "81fafe1673360887b2cb2650c80e59681ad6792da7ebe1eb1d281df7e7beff257e82a1007096811ab36c93091f2ae623f0e90a27cf925a8002f8234ddb49315cc5968fa4f209aca80f1e9f214ff4d24522bb4a1904ea365e852bcd3a0bdb0ab5bacc3f82b70d04e30afc8f82e277c6837006ad6b5eaf08423d88400e88cba979f3474ed4dc8cf10181dfcb6dc9850096ee07b7180891cb806394d1b0c1f0708640474ace629b5fb75366ab370d909ebdcd30fe5d0f1c33947ca2c0f26c05543bc381fabe514772d9b6f1f2b6cf9ac40a0bd266fb52ebe9043e721e338f40cbd3f0d84838d29bece5c76e4fad115400017b5187dd5be3094a3f90865032776fc7", + "policy": "eyJjb25kaXRpb25zIjpbeyJzdWNjZXNzX2FjdGlvbl9yZWRpcmVjdCI6Imh0dHA6Ly93d3cuZ29vZ2xlLmNvbS8ifSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NzEtNmxkbTZjYXc0c2U1MnZyeCJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"success_action_redirect\":\"http://www.google.com/\"},{\"bucket\":\"rsaposttest-1579902671-6ldm6caw4se52vrx\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy Character Escaping", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902671-6ldm6caw4se52vrx", + "object": "$test-object-é", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "fields": { + "success_action_redirect": "http://www.google.com/", + "x-goog-meta-custom-1": "$test-object-é-metadata" + } + }, + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902671-6ldm6caw4se52vrx/", + "fields": { + "key": "$test-object-é", + "success_action_redirect": "http://www.google.com/", + "x-goog-meta-custom-1": "$test-object-é-metadata", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "5eaf9f931bc4ab76dbf2c95d1bc08843a5cfadc4d5de87b2503e8fb791c7b9b6948f77b0d85f9b336a9683abffc648879d0d92cf94c5781407b057a9049fb7bd17625171328acc9e7c0b094739ec992e7a834b1698a370dc2d7ad19abaf5a02c158a6d71a872ad60ae07ae0c3952c298d25106fc062902db33e91a49199ffc2eff0eab191dcb4339c4afb2d82cbb3871447c4fd9ef524d0571083bdbd041f99f4a8a35395b9e2ed04c8994cdd9c5bb7396115adfd2c433d0647f756e5cc4e5b9fd7a587d50c83dc8407b4d372450219b77bcf278d0cba6a8afdf4b38a4ed6caef422acd299e0477f292d7fa688a55080d5e0aa7fddb09d81e700ad986ae77908", + "policy": "eyJjb25kaXRpb25zIjpbeyJzdWNjZXNzX2FjdGlvbl9yZWRpcmVjdCI6Imh0dHA6Ly93d3cuZ29vZ2xlLmNvbS8ifSx7IngtZ29vZy1tZXRhLWN1c3RvbS0xIjoiJHRlc3Qtb2JqZWN0LVx1MDBlOS1tZXRhZGF0YSJ9LHsiYnVja2V0IjoicnNhcG9zdHRlc3QtMTU3OTkwMjY3MS02bGRtNmNhdzRzZTUydnJ4In0seyJrZXkiOiIkdGVzdC1vYmplY3QtXHUwMGU5In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"success_action_redirect\":\"http://www.google.com/\"},{\"x-goog-meta-custom-1\":\"$test-object-\u00e9-metadata\"},{\"bucket\":\"rsaposttest-1579902671-6ldm6caw4se52vrx\"},{\"key\":\"$test-object-\u00e9\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" + } + }, + { + "description": "POST Policy With Additional Metadata", + "policyInput": { + "scheme": "https", + "bucket": "rsaposttest-1579902671-6ldm6caw4se52vrx", + "object": "test-object", + "expiration": 10, + "timestamp": "2020-01-23T04:35:30Z", + "fields": { + "content-disposition": "attachment; filename=\"~._-%=/é0Aa\"", + "content-encoding": "gzip", + "content-type": "text/plain", + "success_action_redirect": "http://www.google.com/" + } }, - - { - "description": "POST Policy Character Escaping", - "policyInput": { - "scheme": "https", - "bucket": "rsaposttest-1579902671-6ldm6caw4se52vrx", - "object": "$test-object-é", - "expiration": 10, - "timestamp": "2020-01-23T04:35:30Z", - "fields": { - "success_action_redirect": "http://www.google.com/", - "x-goog-meta-custom-1": "$test-object-é-metadata" - } - }, - "policyOutput": { - "url": "https://storage.googleapis.com/rsaposttest-1579902671-6ldm6caw4se52vrx/", - "fields": { - "key": "$test-object-é", - "success_action_redirect": "http://www.google.com/", - "x-goog-meta-custom-1": "$test-object-é-metadata", - "x-goog-algorithm": "GOOG4-RSA-SHA256", - "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", - "x-goog-date": "20200123T043530Z", - "x-goog-signature": "5eaf9f931bc4ab76dbf2c95d1bc08843a5cfadc4d5de87b2503e8fb791c7b9b6948f77b0d85f9b336a9683abffc648879d0d92cf94c5781407b057a9049fb7bd17625171328acc9e7c0b094739ec992e7a834b1698a370dc2d7ad19abaf5a02c158a6d71a872ad60ae07ae0c3952c298d25106fc062902db33e91a49199ffc2eff0eab191dcb4339c4afb2d82cbb3871447c4fd9ef524d0571083bdbd041f99f4a8a35395b9e2ed04c8994cdd9c5bb7396115adfd2c433d0647f756e5cc4e5b9fd7a587d50c83dc8407b4d372450219b77bcf278d0cba6a8afdf4b38a4ed6caef422acd299e0477f292d7fa688a55080d5e0aa7fddb09d81e700ad986ae77908", - "policy": "eyJjb25kaXRpb25zIjpbeyJzdWNjZXNzX2FjdGlvbl9yZWRpcmVjdCI6Imh0dHA6Ly93d3cuZ29vZ2xlLmNvbS8ifSx7IngtZ29vZy1tZXRhLWN1c3RvbS0xIjoiJHRlc3Qtb2JqZWN0LVx1MDBlOS1tZXRhZGF0YSJ9LHsiYnVja2V0IjoicnNhcG9zdHRlc3QtMTU3OTkwMjY3MS02bGRtNmNhdzRzZTUydnJ4In0seyJrZXkiOiIkdGVzdC1vYmplY3QtXHUwMGU5In0seyJ4LWdvb2ctZGF0ZSI6IjIwMjAwMTIzVDA0MzUzMFoifSx7IngtZ29vZy1jcmVkZW50aWFsIjoidGVzdC1pYW0tY3JlZGVudGlhbHNAZHVtbXktcHJvamVjdC1pZC5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbS8yMDIwMDEyMy9hdXRvL3N0b3JhZ2UvZ29vZzRfcmVxdWVzdCJ9LHsieC1nb29nLWFsZ29yaXRobSI6IkdPT0c0LVJTQS1TSEEyNTYifV0sImV4cGlyYXRpb24iOiIyMDIwLTAxLTIzVDA0OjM1OjQwWiJ9" - }, - "expectedDecodedPolicy": "{\"conditions\":[{\"success_action_redirect\":\"http://www.google.com/\"},{\"x-goog-meta-custom-1\":\"$test-object-\u00e9-metadata\"},{\"bucket\":\"rsaposttest-1579902671-6ldm6caw4se52vrx\"},{\"key\":\"$test-object-\u00e9\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" - } + "policyOutput": { + "url": "https://storage.googleapis.com/rsaposttest-1579902671-6ldm6caw4se52vrx/", + "fields": { + "content-disposition": "attachment; filename=\"~._-%=/é0Aa\"", + "content-encoding": "gzip", + "content-type": "text/plain", + "key": "test-object", + "success_action_redirect": "http://www.google.com/", + "x-goog-algorithm": "GOOG4-RSA-SHA256", + "x-goog-credential": "test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request", + "x-goog-date": "20200123T043530Z", + "x-goog-signature": "26d9a4e0d9eb5f48267b121b588b1ce8b27e2db7fc5b2a8c42ba6d72010a0876fe234c5acc939a9152b57bbce67f07424afb21030f214cab3ae3208f00026bb8b7eb92b961011afe2a109babc25d11db5b4059e982552ef100dc17adf787a26eaa5a7c80fd947f1565dbc1b513c436bfe3b9dd1a5a9a06c6436b12a7c78214190814ca263e2d90aa20bc2ff01167381dd0be22de1e70e7582e6dd404b666273746f4f535a2ed711d40a760ba699ddf6b5e1faff13cd691729824f65a2162cd3ffb95d171c2f6f5b403b28361cd2e91543c6e2acd2f18fe42baf42e2b415475c297ae82ea19924b380a1b389a6d4e44567a022efde15f2f8ba06ab4cc8dd77006", + "policy": "eyJjb25kaXRpb25zIjpbeyJjb250ZW50LWRpc3Bvc2l0aW9uIjoiYXR0YWNobWVudDsgZmlsZW5hbWU9XCJ+Ll8tJT0vXHUwMGU5MEFhXCIifSx7ImNvbnRlbnQtZW5jb2RpbmciOiJnemlwIn0seyJjb250ZW50LXR5cGUiOiJ0ZXh0L3BsYWluIn0seyJzdWNjZXNzX2FjdGlvbl9yZWRpcmVjdCI6Imh0dHA6Ly93d3cuZ29vZ2xlLmNvbS8ifSx7ImJ1Y2tldCI6InJzYXBvc3R0ZXN0LTE1Nzk5MDI2NzEtNmxkbTZjYXc0c2U1MnZyeCJ9LHsia2V5IjoidGVzdC1vYmplY3QifSx7IngtZ29vZy1kYXRlIjoiMjAyMDAxMjNUMDQzNTMwWiJ9LHsieC1nb29nLWNyZWRlbnRpYWwiOiJ0ZXN0LWlhbS1jcmVkZW50aWFsc0BkdW1teS1wcm9qZWN0LWlkLmlhbS5nc2VydmljZWFjY291bnQuY29tLzIwMjAwMTIzL2F1dG8vc3RvcmFnZS9nb29nNF9yZXF1ZXN0In0seyJ4LWdvb2ctYWxnb3JpdGhtIjoiR09PRzQtUlNBLVNIQTI1NiJ9XSwiZXhwaXJhdGlvbiI6IjIwMjAtMDEtMjNUMDQ6MzU6NDBaIn0=" + }, + "expectedDecodedPolicy": "{\"conditions\":[{\"content-disposition\":\"attachment; filename=\"~._-%=/é0Aa\"\"},{\"content-encoding\":\"gzip\"},{\"content-type\":\"text/plain\"},{\"success_action_redirect\":\"http://www.google.com/\"},{\"bucket\":\"rsaposttest-1579902671-6ldm6caw4se52vrx\"},{\"key\":\"test-object\"},{\"x-goog-date\":\"20200123T043530Z\"},{\"x-goog-credential\":\"test-iam-credentials@test-project-id.iam.gserviceaccount.com/20200123/auto/storage/goog4_request\"},{\"x-goog-algorithm\":\"GOOG4-RSA-SHA256\"}],\"expiration\":\"2020-01-23T04:35:40Z\"}" } + } ] -} \ No newline at end of file + } \ No newline at end of file