diff --git a/.repo-metadata.json b/.repo-metadata.json index cf0adc4a..47b2eb69 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -6,7 +6,7 @@ "issue_tracker": "https://issuetracker.google.com/savedsearches/559758", "release_level": "stable", "language": "python", - "library_type": "GAPIC_AUTO", + "library_type": "GAPIC_COMBO", "repo": "googleapis/python-speech", "distribution_name": "google-cloud-speech", "api_id": "speech.googleapis.com", diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cf0230d..ea6819dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,26 @@ [1]: https://pypi.org/project/google-cloud-speech/#history +## [2.16.2](https://github.com/googleapis/python-speech/compare/v2.16.1...v2.16.2) (2022-10-07) + + +### Bug Fixes + +* **deps:** Allow protobuf 3.19.5 ([#456](https://github.com/googleapis/python-speech/issues/456)) ([01f7324](https://github.com/googleapis/python-speech/commit/01f732416cebc4ead860eea1e987afe136018f62)) + + +### Documentation + +* **samples:** Add create_recognizer code sample ([#450](https://github.com/googleapis/python-speech/issues/450)) ([4ea3edc](https://github.com/googleapis/python-speech/commit/4ea3edc7b62b1a815caa7af34383e9cd2eddf5fe)) +* **samples:** Add quickstart code sample for STT V2 ([#451](https://github.com/googleapis/python-speech/issues/451)) ([5b1ce98](https://github.com/googleapis/python-speech/commit/5b1ce983f812a63837c8f9338ed46b11e2c21ddc)) + +## [2.16.1](https://github.com/googleapis/python-speech/compare/v2.16.0...v2.16.1) (2022-10-05) + + +### Bug Fixes + +* Remove SpeechHelpers from STT V2 client library ([#452](https://github.com/googleapis/python-speech/issues/452)) ([acfb24f](https://github.com/googleapis/python-speech/commit/acfb24f69e25c369388675f08e41062f090b4817)) + ## [2.16.0](https://github.com/googleapis/python-speech/compare/v2.15.1...v2.16.0) (2022-10-03) diff --git a/google/cloud/speech_v2/__init__.py b/google/cloud/speech_v2/__init__.py index 2e218826..9cca1200 100644 --- a/google/cloud/speech_v2/__init__.py +++ b/google/cloud/speech_v2/__init__.py @@ -69,13 +69,6 @@ from .types.cloud_speech import UpdateRecognizerRequest from .types.cloud_speech import WordInfo -from google.cloud.speech_v1.helpers import SpeechHelpers - - -class SpeechClient(SpeechHelpers, SpeechClient): - __doc__ = SpeechClient.__doc__ - - __all__ = ( "SpeechAsyncClient", "AutoDetectDecodingConfig", diff --git a/owlbot.py b/owlbot.py index 4c196779..cda41e36 100644 --- a/owlbot.py +++ b/owlbot.py @@ -23,11 +23,12 @@ default_version = "v1" for library in s.get_staging_dirs(default_version): - # Add the manually written SpeechHelpers to v1 and v1p1beta1 - # See google/cloud/speech_v1/helpers.py for details - count = s.replace(library / f"google/cloud/speech_{library.name}/__init__.py", - """__all__ = \(""", - """from google.cloud.speech_v1.helpers import SpeechHelpers + if "v1" in library.name: + # Add the manually written SpeechHelpers to v1 and v1p1beta1 + # See google/cloud/speech_v1/helpers.py for details + count = s.replace(library / f"google/cloud/speech_{library.name}/__init__.py", + """__all__ = \(""", + """from google.cloud.speech_v1.helpers import SpeechHelpers class SpeechClient(SpeechHelpers, SpeechClient): __doc__ = SpeechClient.__doc__ @@ -35,6 +36,7 @@ class SpeechClient(SpeechHelpers, SpeechClient): __all__ = ( """, ) + assert count == 1 if library.name == "v1": # Import from speech_v1 to get the client with SpeechHelpers @@ -42,9 +44,8 @@ class SpeechClient(SpeechHelpers, SpeechClient): """from google\.cloud\.speech_v1\.services\.speech\.client import SpeechClient""", """from google.cloud.speech_v1 import SpeechClient""" ) + assert count == 1 - # Don't move over __init__.py, as we modify it to make the generated client - # use helpers.py. s.move(library, excludes=["setup.py"]) s.remove_staging_dirs() diff --git a/samples/microphone/requirements.txt b/samples/microphone/requirements.txt index 8a059dd5..4dfb57e0 100644 --- a/samples/microphone/requirements.txt +++ b/samples/microphone/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-speech==2.15.1 +google-cloud-speech==2.16.1 pyaudio==0.2.12 six==1.16.0 diff --git a/samples/snippets/adaptation_v2_custom_class_reference.py b/samples/snippets/adaptation_v2_custom_class_reference.py new file mode 100644 index 00000000..542b0d51 --- /dev/null +++ b/samples/snippets/adaptation_v2_custom_class_reference.py @@ -0,0 +1,92 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_custom_class_reference] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_id, custom_class_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Create a persistent CustomClass to reference in phrases + request = cloud_speech.CreateCustomClassRequest( + parent=f"projects/{project_id}/locations/global", + custom_class_id=custom_class_id, + custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}])) + + operation = client.create_custom_class(request=request) + custom_class = operation.result() + + # Create a persistent PhraseSet to reference in a recognition request + request = cloud_speech.CreatePhraseSetRequest( + parent=f"projects/{project_id}/locations/global", + phrase_set_id=phrase_set_id, + phrase_set=cloud_speech.PhraseSet(phrases=[{"value": f"${{{custom_class.name}}}", "boost": 20}])) + + operation = client.create_phrase_set(request=request) + phrase_set = operation.result() + + # Add a reference of the PhraseSet into the recognition request + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + phrase_set=phrase_set.name + ) + ] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + print(custom_class) + print(phrase_set) + print(config) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_custom_class_reference] + + +if __name__ == "__main__": + adaptation_v2_custom_class_reference() diff --git a/samples/snippets/adaptation_v2_custom_class_reference_test.py b/samples/snippets/adaptation_v2_custom_class_reference_test.py new file mode 100644 index 00000000..a76aa6c9 --- /dev/null +++ b/samples/snippets/adaptation_v2_custom_class_reference_test.py @@ -0,0 +1,70 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_custom_class_reference + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def delete_phrase_set(name): + client = SpeechClient() + request = cloud_speech.DeletePhraseSetRequest(name=name) + client.delete_phrase_set(request=request) + + +def delete_custom_class(name): + client = SpeechClient() + request = cloud_speech.DeleteCustomClassRequest(name=name) + client.delete_custom_class(request=request) + + +def test_adaptation_v2_custom_class_reference(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + phrase_set_id = "phrase-set-" + str(uuid4()) + custom_class_id = "custom-class-" + str(uuid4()) + response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference( + project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) + + delete_phrase_set( + f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}" + ) + + delete_custom_class( + f"projects/{project_id}/locations/global/customClasses/{custom_class_id}" + ) diff --git a/samples/snippets/adaptation_v2_inline_custom_class.py b/samples/snippets/adaptation_v2_inline_custom_class.py new file mode 100644 index 00000000..060a0a56 --- /dev/null +++ b/samples/snippets/adaptation_v2_inline_custom_class.py @@ -0,0 +1,73 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_inline_custom_class] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Build inline phrase set to produce a more accurate transcript + phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}]) + custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}]) + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + inline_phrase_set=phrase_set + ) + ], + custom_classes=[custom_class] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_inline_custom_class] + + +if __name__ == "__main__": + adaptation_v2_inline_custom_class() diff --git a/samples/snippets/adaptation_v2_inline_custom_class_test.py b/samples/snippets/adaptation_v2_inline_custom_class_test.py new file mode 100644 index 00000000..79cdf786 --- /dev/null +++ b/samples/snippets/adaptation_v2_inline_custom_class_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_inline_custom_class + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_adaptation_v2_inline_custom_class(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class( + project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/samples/snippets/adaptation_v2_inline_phrase_set.py b/samples/snippets/adaptation_v2_inline_phrase_set.py new file mode 100644 index 00000000..de2939b9 --- /dev/null +++ b/samples/snippets/adaptation_v2_inline_phrase_set.py @@ -0,0 +1,71 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_inline_phrase_set] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Build inline phrase set to produce a more accurate transcript + phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}]) + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + inline_phrase_set=phrase_set + ) + ] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_inline_phrase_set] + + +if __name__ == "__main__": + adaptation_v2_inline_phrase_set() diff --git a/samples/snippets/adaptation_v2_inline_phrase_set_test.py b/samples/snippets/adaptation_v2_inline_phrase_set_test.py new file mode 100644 index 00000000..4254381c --- /dev/null +++ b/samples/snippets/adaptation_v2_inline_phrase_set_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_inline_phrase_set + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_adaptation_v2_inline_phrase_set(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = adaptation_v2_inline_phrase_set.adaptation_v2_inline_phrase_set( + project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/samples/snippets/adaptation_v2_phrase_set_reference.py b/samples/snippets/adaptation_v2_phrase_set_reference.py new file mode 100644 index 00000000..b89660d2 --- /dev/null +++ b/samples/snippets/adaptation_v2_phrase_set_reference.py @@ -0,0 +1,79 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_adaptation_v2_phrase_set_reference] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def adaptation_v2_phrase_set_reference(project_id, recognizer_id, phrase_set_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_short" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # Create a persistent PhraseSet to reference in a recognition request + request = cloud_speech.CreatePhraseSetRequest( + parent=f"projects/{project_id}/locations/global", + phrase_set_id=phrase_set_id, + phrase_set=cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])) + + operation = client.create_phrase_set(request=request) + phrase_set = operation.result() + + # Add a reference of the PhraseSet into the recognition request + adaptation = cloud_speech.SpeechAdaptation( + phrase_sets=[ + cloud_speech.SpeechAdaptation.AdaptationPhraseSet( + phrase_set=phrase_set.name + ) + ] + ) + config = cloud_speech.RecognitionConfig( + auto_decoding_config={}, adaptation=adaptation + ) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_adaptation_v2_phrase_set_reference] + + +if __name__ == "__main__": + adaptation_v2_phrase_set_reference() diff --git a/samples/snippets/adaptation_v2_phrase_set_reference_test.py b/samples/snippets/adaptation_v2_phrase_set_reference_test.py new file mode 100644 index 00000000..933d552a --- /dev/null +++ b/samples/snippets/adaptation_v2_phrase_set_reference_test.py @@ -0,0 +1,59 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import adaptation_v2_phrase_set_reference + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def delete_phrase_set(name): + client = SpeechClient() + request = cloud_speech.DeletePhraseSetRequest(name=name) + client.delete_phrase_set(request=request) + + +def test_adaptation_v2_phrase_set_reference(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + phrase_set_id = "phrase-set-" + str(uuid4()) + response = adaptation_v2_phrase_set_reference.adaptation_v2_phrase_set_reference( + project_id, recognizer_id, phrase_set_id, os.path.join(RESOURCES, "baby_keem.wav") + ) + + assert re.search( + r"play Baby Keem", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) + + delete_phrase_set( + f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}" + ) diff --git a/samples/snippets/create_recognizer.py b/samples/snippets/create_recognizer.py new file mode 100644 index 00000000..986e7c5c --- /dev/null +++ b/samples/snippets/create_recognizer.py @@ -0,0 +1,42 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_create_recognizer] +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def create_recognizer(project_id, recognizer_id): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + print("Created Recognizer:", recognizer.name) + return recognizer +# [END speech_create_recognizer] + + +if __name__ == "__main__": + create_recognizer() diff --git a/samples/snippets/create_recognizer_test.py b/samples/snippets/create_recognizer_test.py new file mode 100644 index 00000000..01dbb366 --- /dev/null +++ b/samples/snippets/create_recognizer_test.py @@ -0,0 +1,35 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import create_recognizer + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_create_recognizer(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer = create_recognizer.create_recognizer( + project_id, "recognizer-" + str(uuid4()) + ) + delete_recognizer(recognizer.name) diff --git a/samples/snippets/quickstart_v2.py b/samples/snippets/quickstart_v2.py new file mode 100644 index 00000000..d045c42c --- /dev/null +++ b/samples/snippets/quickstart_v2.py @@ -0,0 +1,60 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_quickstart_v2] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def quickstart_v2(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_quickstart_v2] + + +if __name__ == "__main__": + quickstart_v2() diff --git a/samples/snippets/quickstart_v2_test.py b/samples/snippets/quickstart_v2_test.py new file mode 100644 index 00000000..5991c540 --- /dev/null +++ b/samples/snippets/quickstart_v2_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import quickstart_v2 + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_quickstart_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = quickstart_v2.quickstart_v2( + project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav") + ) + + assert re.search( + r"how old is the Brooklyn Bridge", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 62c4df65..253d4535 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-speech==2.15.1 +google-cloud-speech==2.16.1 google-cloud-storage==2.5.0 diff --git a/samples/snippets/resources/audio.wav b/samples/snippets/resources/audio.wav new file mode 100644 index 00000000..140a3022 Binary files /dev/null and b/samples/snippets/resources/audio.wav differ diff --git a/samples/snippets/resources/audio_silence_padding.wav b/samples/snippets/resources/audio_silence_padding.wav new file mode 100644 index 00000000..db883c38 Binary files /dev/null and b/samples/snippets/resources/audio_silence_padding.wav differ diff --git a/samples/snippets/resources/baby_keem.wav b/samples/snippets/resources/baby_keem.wav new file mode 100644 index 00000000..4e7a5ca9 Binary files /dev/null and b/samples/snippets/resources/baby_keem.wav differ diff --git a/samples/snippets/transcribe_file_v2.py b/samples/snippets/transcribe_file_v2.py new file mode 100644 index 00000000..ef923051 --- /dev/null +++ b/samples/snippets/transcribe_file_v2.py @@ -0,0 +1,60 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_file_v2] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_file_v2(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, content=content + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_transcribe_file_v2] + + +if __name__ == "__main__": + transcribe_file_v2() diff --git a/samples/snippets/transcribe_file_v2_test.py b/samples/snippets/transcribe_file_v2_test.py new file mode 100644 index 00000000..1dfb270f --- /dev/null +++ b/samples/snippets/transcribe_file_v2_test.py @@ -0,0 +1,48 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import transcribe_file_v2 + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_transcribe_file_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = transcribe_file_v2.transcribe_file_v2( + project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav") + ) + + assert re.search( + r"how old is the Brooklyn Bridge", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/samples/snippets/transcribe_gcs_v2.py b/samples/snippets/transcribe_gcs_v2.py new file mode 100644 index 00000000..0d9bdefe --- /dev/null +++ b/samples/snippets/transcribe_gcs_v2.py @@ -0,0 +1,54 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_gcs_v2] +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_gcs_v2(project_id, recognizer_id, gcs_uri): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + request = cloud_speech.RecognizeRequest( + recognizer=recognizer.name, config=config, uri=gcs_uri + ) + + # Transcribes the audio into text + response = client.recognize(request=request) + + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return response +# [END speech_transcribe_gcs_v2] + + +if __name__ == "__main__": + transcribe_gcs_v2() diff --git a/samples/snippets/transcribe_gcs_v2_test.py b/samples/snippets/transcribe_gcs_v2_test.py new file mode 100644 index 00000000..888dd379 --- /dev/null +++ b/samples/snippets/transcribe_gcs_v2_test.py @@ -0,0 +1,46 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import transcribe_gcs_v2 + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_transcribe_gcs_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + response = transcribe_gcs_v2.transcribe_gcs_v2( + project_id, recognizer_id, "gs://cloud-samples-data/speech/audio.flac" + ) + + assert re.search( + r"how old is the Brooklyn Bridge", + response.results[0].alternatives[0].transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/samples/snippets/transcribe_streaming_v2.py b/samples/snippets/transcribe_streaming_v2.py new file mode 100644 index 00000000..d6f3fa57 --- /dev/null +++ b/samples/snippets/transcribe_streaming_v2.py @@ -0,0 +1,81 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_streaming_v2] +import io + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_streaming_v2(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # In practice, stream should be a generator yielding chunks of audio data + chunk_length = len(content) // 5 + stream = [ + content[start : start + chunk_length] + for start in range(0, len(content), chunk_length) + ] + audio_requests = ( + cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream + ) + + recognition_config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + streaming_config = cloud_speech.StreamingRecognitionConfig( + config=recognition_config + ) + config_request = cloud_speech.StreamingRecognizeRequest( + recognizer=recognizer.name, streaming_config=streaming_config + ) + + def requests(config, audio): + yield config + for message in audio: + yield message + + # Transcribes the audio into text + responses_iterator = client.streaming_recognize( + requests=requests(config_request, audio_requests) + ) + responses = [] + for response in responses_iterator: + responses.append(response) + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return responses +# [END speech_transcribe_streaming_v2] + + +if __name__ == "__main__": + transcribe_streaming_v2() diff --git a/samples/snippets/transcribe_streaming_v2_test.py b/samples/snippets/transcribe_streaming_v2_test.py new file mode 100644 index 00000000..5202bb37 --- /dev/null +++ b/samples/snippets/transcribe_streaming_v2_test.py @@ -0,0 +1,53 @@ +# Copyright 2022, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from uuid import uuid4 + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +import transcribe_streaming_v2 + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def delete_recognizer(name): + client = SpeechClient() + request = cloud_speech.DeleteRecognizerRequest(name=name) + client.delete_recognizer(request=request) + + +def test_transcribe_streaming_v2(capsys): + project_id = os.getenv("GOOGLE_CLOUD_PROJECT") + + recognizer_id = "recognizer-" + str(uuid4()) + responses = transcribe_streaming_v2.transcribe_streaming_v2( + project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav") + ) + + transcript = "" + for response in responses: + for result in response.results: + transcript += result.alternatives[0].transcript + + assert re.search( + r"how old is the Brooklyn Bridge", + transcript, + re.DOTALL | re.I, + ) + + delete_recognizer( + f"projects/{project_id}/locations/global/recognizers/{recognizer_id}" + ) diff --git a/samples/snippets/transcribe_streaming_voice_activity_events.py b/samples/snippets/transcribe_streaming_voice_activity_events.py new file mode 100644 index 00000000..93e4be89 --- /dev/null +++ b/samples/snippets/transcribe_streaming_voice_activity_events.py @@ -0,0 +1,100 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_streaming_voice_activity_events] +import io +import argparse + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + + +def transcribe_streaming_voice_activity_events(project_id, recognizer_id, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # In practice, stream should be a generator yielding chunks of audio data + chunk_length = len(content) // 5 + stream = [ + content[start : start + chunk_length] + for start in range(0, len(content), chunk_length) + ] + audio_requests = ( + cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream + ) + + recognition_config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + # Sets the flag to enable voice activity events + streaming_features = cloud_speech.StreamingRecognitionFeatures(enable_voice_activity_events=True) + streaming_config = cloud_speech.StreamingRecognitionConfig( + config=recognition_config, + streaming_features=streaming_features + ) + + config_request = cloud_speech.StreamingRecognizeRequest( + recognizer=recognizer.name, streaming_config=streaming_config + ) + + def requests(config, audio): + yield config + for message in audio: + yield message + + # Transcribes the audio into text + responses_iterator = client.streaming_recognize( + requests=requests(config_request, audio_requests) + ) + responses = [] + for response in responses_iterator: + responses.append(response) + if response.speech_event_type == cloud_speech.StreamingRecognizeResponse.speech_event_type.SPEECH_ACTIVITY_BEGIN: + print("Speech started.") + if response.speech_event_type == cloud_speech.StreamingRecognizeResponse.speech_event_type.SPEECH_ACTIVITY_END: + print("Speech ended.") + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return responses + + +# [END speech_transcribe_streaming_voice_activity_events] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("project_id", help="project to create recognizer in") + parser.add_argument("recognizer_id", help="name of recognizer to create") + parser.add_argument("audio_file", help="audio file to stream") + args = parser.parse_args() + transcribe_streaming_voice_activity_events(args.project_id, args.recognizer_id, args.audio_file) diff --git a/samples/snippets/transcribe_streaming_voice_activity_timeouts.py b/samples/snippets/transcribe_streaming_voice_activity_timeouts.py new file mode 100644 index 00000000..d03055ae --- /dev/null +++ b/samples/snippets/transcribe_streaming_voice_activity_timeouts.py @@ -0,0 +1,110 @@ +# Copyright 2022 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START speech_transcribe_streaming_voice_activity_timeouts] +import io +import argparse +from time import sleep + +from google.cloud.speech_v2 import SpeechClient +from google.cloud.speech_v2.types import cloud_speech + +from google.protobuf import duration_pb2 # type: ignore + +def transcribe_streaming_voice_activity_timeouts(project_id, recognizer_id, speech_start_timeout, speech_end_timeout, audio_file): + # Instantiates a client + client = SpeechClient() + + request = cloud_speech.CreateRecognizerRequest( + parent=f"projects/{project_id}/locations/global", + recognizer_id=recognizer_id, + recognizer=cloud_speech.Recognizer( + language_codes=["en-US"], model="latest_long" + ), + ) + + # Creates a Recognizer + operation = client.create_recognizer(request=request) + recognizer = operation.result() + + # Reads a file as bytes + with io.open(audio_file, "rb") as f: + content = f.read() + + # In practice, stream should be a generator yielding chunks of audio data + chunk_length = len(content) // 5 + stream = [ + content[start : start + chunk_length] + for start in range(0, len(content), chunk_length) + ] + audio_requests = ( + cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream + ) + + recognition_config = cloud_speech.RecognitionConfig(auto_decoding_config={}) + + # Sets the flag to enable voice activity events and timeout + speech_start_timeout=duration_pb2.Duration(seconds=speech_start_timeout) + speech_end_timeout=duration_pb2.Duration(seconds=speech_end_timeout) + voice_activity_timeout = cloud_speech.StreamingRecognitionFeatures.VoiceActivityTimeout(speech_start_timeout=speech_start_timeout, speech_end_timeout=speech_end_timeout) + streaming_features = cloud_speech.StreamingRecognitionFeatures(enable_voice_activity_events=True, voice_activity_timeout=voice_activity_timeout) + + streaming_config = cloud_speech.StreamingRecognitionConfig( + config=recognition_config, + streaming_features=streaming_features + ) + + config_request = cloud_speech.StreamingRecognizeRequest( + recognizer=recognizer.name, streaming_config=streaming_config + ) + + def requests(config, audio): + yield config + for message in audio: + sleep(0.5) + yield message + + # Transcribes the audio into text + responses_iterator = client.streaming_recognize( + requests=requests(config_request, audio_requests) + ) + + responses = [] + for response in responses_iterator: + responses.append(response) + if response.speech_event_type == cloud_speech.StreamingRecognizeResponse.speech_event_type.SPEECH_ACTIVITY_BEGIN: + print("Speech started.") + if response.speech_event_type == cloud_speech.StreamingRecognizeResponse.speech_event_type.SPEECH_ACTIVITY_END: + print("Speech ended.") + for result in response.results: + print("Transcript: {}".format(result.alternatives[0].transcript)) + + return responses + + +# [END speech_transcribe_streaming_voice_activity_timeouts] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("project_id", help="project to create recognizer in") + parser.add_argument("recognizer_id", help="name of recognizer to create") + parser.add_argument("speech_start_timeout", help="timeout in seconds for speech start") + parser.add_argument("speech_end_timeout", help="timeout in seconds for speech end") + parser.add_argument("audio_file", help="audio file to stream") + args = parser.parse_args() + transcribe_streaming_voice_activity_timeouts(args.project_id, args.recognizer_id, args.speech_start_timeout, args.speech_end_timeout, args.audio_file) diff --git a/setup.py b/setup.py index 3aa1b2fb..c84cde4b 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-speech" description = "Google Cloud Speech API client library" -version = "2.16.0" +version = "2.16.2" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' @@ -31,7 +31,7 @@ dependencies = [ "google-api-core[grpc] >= 1.32.0, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*", "proto-plus >= 1.22.0, <2.0.0dev", - "protobuf >= 3.20.2, <5.0.0dev", + "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", ] extras = {"libcst": "libcst >= 0.2.5"} diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index eb58105b..12b4d142 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -8,4 +8,4 @@ google-api-core==1.32.0 libcst==0.2.5 proto-plus==1.22.0 -protobuf==3.20.2 +protobuf==3.19.5