From 07b5203a15a186aab537442a4f4a4071aab3046f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 10 Aug 2021 11:14:19 +0000 Subject: [PATCH] feat: add total_billed_time response field (#224) Committer: @cherba PiperOrigin-RevId: 389755489 Source-Link: https://github.com/googleapis/googleapis/commit/10185d07a4db1d76a888a119aeab1f2287b35105 Source-Link: https://github.com/googleapis/googleapis-gen/commit/27d21b1b5a0ca1ec55013da57c30c3ac1ac35449 --- .../speech_v1/services/speech/async_client.py | 3 +- .../cloud/speech_v1/services/speech/client.py | 3 +- google/cloud/speech_v1/types/cloud_speech.py | 66 ++++++++++++++----- tests/unit/gapic/speech_v1/test_speech.py | 1 + 4 files changed, 53 insertions(+), 20 deletions(-) diff --git a/google/cloud/speech_v1/services/speech/async_client.py b/google/cloud/speech_v1/services/speech/async_client.py index 2c06a667..83f54612 100644 --- a/google/cloud/speech_v1/services/speech/async_client.py +++ b/google/cloud/speech_v1/services/speech/async_client.py @@ -38,6 +38,7 @@ from google.api_core import operation # type: ignore from google.api_core import operation_async # type: ignore from google.cloud.speech_v1.types import cloud_speech +from google.protobuf import duration_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore from .transports.base import SpeechTransport, DEFAULT_CLIENT_INFO from .transports.grpc_asyncio import SpeechGrpcAsyncIOTransport @@ -379,7 +380,7 @@ def streaming_recognize( single_utterance is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten + Here's an example of a series of StreamingRecognizeResponses that might be returned while processing audio: diff --git a/google/cloud/speech_v1/services/speech/client.py b/google/cloud/speech_v1/services/speech/client.py index 8932e4c3..fe9a005f 100644 --- a/google/cloud/speech_v1/services/speech/client.py +++ b/google/cloud/speech_v1/services/speech/client.py @@ -43,6 +43,7 @@ from google.api_core import operation # type: ignore from google.api_core import operation_async # type: ignore from google.cloud.speech_v1.types import cloud_speech +from google.protobuf import duration_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore from .transports.base import SpeechTransport, DEFAULT_CLIENT_INFO from .transports.grpc import SpeechGrpcTransport @@ -553,7 +554,7 @@ def streaming_recognize( single_utterance is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten + Here's an example of a series of StreamingRecognizeResponses that might be returned while processing audio: diff --git a/google/cloud/speech_v1/types/cloud_speech.py b/google/cloud/speech_v1/types/cloud_speech.py index f1420b46..51cbdd63 100644 --- a/google/cloud/speech_v1/types/cloud_speech.py +++ b/google/cloud/speech_v1/types/cloud_speech.py @@ -138,6 +138,17 @@ class StreamingRecognitionConfig(proto.Message): ``END_OF_SINGLE_UTTERANCE`` event and cease recognition. It will return no more than one ``StreamingRecognitionResult`` with the ``is_final`` flag set to ``true``. + + The ``single_utterance`` field can only be used with + specified models, otherwise an error is thrown. The + ``model`` field in [``RecognitionConfig``][] must be set to: + + - ``command_and_search`` + - ``phone_call`` AND additional field + ``useEnhanced``\ =\ ``true`` + - The ``model`` field is left undefined. In this case the + API auto-selects a model based on any other parameters + that you set in ``RecognitionConfig``. interim_results (bool): If ``true``, interim results (tentative hypotheses) may be returned as they become available (these interim results are @@ -214,7 +225,7 @@ class RecognitionConfig(proto.Message): [SpeechContext][google.cloud.speech.v1.SpeechContext]. A means to provide context to assist the speech recognition. For more information, see `speech - adaptation `__. + adaptation `__. enable_word_time_offsets (bool): If ``true``, the top result includes a list of words and the start and end time offsets (timestamps) for those words. If @@ -226,11 +237,7 @@ class RecognitionConfig(proto.Message): available in select languages. Setting this for requests in other languages has no effect at all. The default 'false' value does not add - punctuation to result hypotheses. Note: This is - currently offered as an experimental service, - complimentary to all users. In the future this - may be exclusively available as a premium - feature. + punctuation to result hypotheses. diarization_config (google.cloud.speech_v1.types.SpeakerDiarizationConfig): Config to enable speaker diarization and set additional parameters to make diarization better @@ -270,7 +277,7 @@ class RecognitionConfig(proto.Message): video - Best for audio that originated from from video or includes multiple + Best for audio that originated from video or includes multiple speakers. Ideally the audio is recorded at a 16khz or greater sampling rate. This is a premium model that costs more than the standard rate. @@ -306,7 +313,7 @@ class AudioEncoding(proto.Enum): The accuracy of the speech recognition can be reduced if lossy codecs are used to capture or transmit audio, particularly if background noise is present. Lossy codecs include ``MULAW``, - ``AMR``, ``AMR_WB``, ``OGG_OPUS``, ``SPEEX_WITH_HEADER_BYTE``, and + ``AMR``, ``AMR_WB``, ``OGG_OPUS``, ``SPEEX_WITH_HEADER_BYTE``, ``MP3``. The ``FLAC`` and ``WAV`` audio file formats include a header that @@ -370,7 +377,7 @@ class SpeakerDiarizationConfig(proto.Message): automatically determine the correct number of speakers. If not set, the default value is 6. speaker_tag (int): - Unused. + Output only. Unused. """ enable_speaker_diarization = proto.Field(proto.BOOL, number=1,) @@ -531,11 +538,17 @@ class RecognizeResponse(proto.Message): results (Sequence[google.cloud.speech_v1.types.SpeechRecognitionResult]): Sequential list of transcription results corresponding to sequential portions of audio. + total_billed_time (google.protobuf.duration_pb2.Duration): + When available, billed audio seconds for the + corresponding request. """ results = proto.RepeatedField( proto.MESSAGE, number=2, message="SpeechRecognitionResult", ) + total_billed_time = proto.Field( + proto.MESSAGE, number=3, message=duration_pb2.Duration, + ) class LongRunningRecognizeResponse(proto.Message): @@ -550,11 +563,17 @@ class LongRunningRecognizeResponse(proto.Message): results (Sequence[google.cloud.speech_v1.types.SpeechRecognitionResult]): Sequential list of transcription results corresponding to sequential portions of audio. + total_billed_time (google.protobuf.duration_pb2.Duration): + When available, billed audio seconds for the + corresponding request. """ results = proto.RepeatedField( proto.MESSAGE, number=2, message="SpeechRecognitionResult", ) + total_billed_time = proto.Field( + proto.MESSAGE, number=3, message=duration_pb2.Duration, + ) class LongRunningRecognizeMetadata(proto.Message): @@ -572,6 +591,10 @@ class LongRunningRecognizeMetadata(proto.Message): Time when the request was received. last_update_time (google.protobuf.timestamp_pb2.Timestamp): Time of the most recent processing update. + uri (str): + Output only. The URI of the audio file being + transcribed. Empty if the audio was sent as byte + content. """ progress_percent = proto.Field(proto.INT32, number=1,) @@ -579,6 +602,7 @@ class LongRunningRecognizeMetadata(proto.Message): last_update_time = proto.Field( proto.MESSAGE, number=3, message=timestamp_pb2.Timestamp, ) + uri = proto.Field(proto.STRING, number=4,) class StreamingRecognizeResponse(proto.Message): @@ -588,9 +612,8 @@ class StreamingRecognizeResponse(proto.Message): client. If there is no recognizable audio, and ``single_utterance`` is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten - ``StreamingRecognizeResponse``\ s that might be returned while - processing audio: + Here's an example of a series of ``StreamingRecognizeResponse``\ s + that might be returned while processing audio: 1. results { alternatives { transcript: "tube" } stability: 0.01 } @@ -648,6 +671,10 @@ class StreamingRecognizeResponse(proto.Message): ``is_final=false`` results (the interim results). speech_event_type (google.cloud.speech_v1.types.StreamingRecognizeResponse.SpeechEventType): Indicates the type of speech event. + total_billed_time (google.protobuf.duration_pb2.Duration): + When available, billed audio seconds for the + stream. Set only if this is the last response in + the stream. """ class SpeechEventType(proto.Enum): @@ -660,6 +687,9 @@ class SpeechEventType(proto.Enum): proto.MESSAGE, number=2, message="StreamingRecognitionResult", ) speech_event_type = proto.Field(proto.ENUM, number=4, enum=SpeechEventType,) + total_billed_time = proto.Field( + proto.MESSAGE, number=5, message=duration_pb2.Duration, + ) class StreamingRecognitionResult(proto.Message): @@ -784,12 +814,12 @@ class WordInfo(proto.Message): The word corresponding to this set of information. speaker_tag (int): - A distinct integer value is assigned for every speaker - within the audio. This field specifies which one of those - speakers was detected to have spoken this word. Value ranges - from '1' to diarization_speaker_count. speaker_tag is set if - enable_speaker_diarization = 'true' and only in the top - alternative. + Output only. A distinct integer value is assigned for every + speaker within the audio. This field specifies which one of + those speakers was detected to have spoken this word. Value + ranges from '1' to diarization_speaker_count. speaker_tag is + set if enable_speaker_diarization = 'true' and only in the + top alternative. """ start_time = proto.Field(proto.MESSAGE, number=1, message=duration_pb2.Duration,) diff --git a/tests/unit/gapic/speech_v1/test_speech.py b/tests/unit/gapic/speech_v1/test_speech.py index f07d10cd..23e6806d 100644 --- a/tests/unit/gapic/speech_v1/test_speech.py +++ b/tests/unit/gapic/speech_v1/test_speech.py @@ -41,6 +41,7 @@ from google.cloud.speech_v1.types import cloud_speech from google.longrunning import operations_pb2 from google.oauth2 import service_account +from google.protobuf import duration_pb2 # type: ignore from google.rpc import status_pb2 # type: ignore import google.auth