From b6bddbe46172debd962c3d8e566a7c410fb4f279 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 8 Apr 2021 07:49:50 -0700 Subject: [PATCH] feat: Support for spoken punctuation and spoken emojis (#143) --- google/cloud/speech_v1p1beta1/__init__.py | 4 +-- .../speech_v1p1beta1/proto/cloud_speech.proto | 26 +++++++++++--- .../services/speech/async_client.py | 2 +- .../services/speech/client.py | 2 +- .../speech_v1p1beta1/types/cloud_speech.py | 35 ++++++++++++++++--- synth.metadata | 6 ++-- .../gapic/speech_v1p1beta1/test_speech.py | 1 + 7 files changed, 60 insertions(+), 16 deletions(-) diff --git a/google/cloud/speech_v1p1beta1/__init__.py b/google/cloud/speech_v1p1beta1/__init__.py index a45dedd4..576dd5d0 100644 --- a/google/cloud/speech_v1p1beta1/__init__.py +++ b/google/cloud/speech_v1p1beta1/__init__.py @@ -60,6 +60,7 @@ class SpeechClient(SpeechHelpers, SpeechClient): __all__ = ( + "AdaptationClient", "CreateCustomClassRequest", "CreatePhraseSetRequest", "CustomClass", @@ -82,7 +83,6 @@ class SpeechClient(SpeechHelpers, SpeechClient): "RecognizeResponse", "SpeakerDiarizationConfig", "SpeechAdaptation", - "SpeechClient", "SpeechContext", "SpeechRecognitionAlternative", "SpeechRecognitionResult", @@ -94,5 +94,5 @@ class SpeechClient(SpeechHelpers, SpeechClient): "UpdateCustomClassRequest", "UpdatePhraseSetRequest", "WordInfo", - "AdaptationClient", + "SpeechClient", ) diff --git a/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto b/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto index 9a8e256f..cf183d01 100644 --- a/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto +++ b/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto @@ -1,3 +1,4 @@ + // Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,6 +25,7 @@ import "google/longrunning/operations.proto"; import "google/protobuf/any.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; +import "google/protobuf/wrappers.proto"; import "google/rpc/status.proto"; option cc_enable_arenas = true; @@ -316,7 +318,7 @@ message RecognitionConfig { // Speech adaptation configuration improves the accuracy of speech // recognition. When speech adaptation is set it supersedes the // `speech_contexts` field. For more information, see the [speech - // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength) + // adaptation](https://cloud.google.com/speech-to-text/docs/adaptation) // documentation. SpeechAdaptation adaptation = 20; @@ -324,7 +326,7 @@ message RecognitionConfig { // A means to provide context to assist the speech recognition. For more // information, see // [speech - // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength). + // adaptation](https://cloud.google.com/speech-to-text/docs/adaptation). repeated SpeechContext speech_contexts = 6; // If `true`, the top result includes a list of words and @@ -344,6 +346,22 @@ message RecognitionConfig { // The default 'false' value does not add punctuation to result hypotheses. bool enable_automatic_punctuation = 11; + // The spoken punctuation behavior for the call + // If not set, uses default behavior based on model of choice + // e.g. command_and_search will enable spoken punctuation by default + // If 'true', replaces spoken punctuation with the corresponding symbols in + // the request. For example, "how are you question mark" becomes "how are + // you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation + // for support. If 'false', spoken punctuation is not replaced. + google.protobuf.BoolValue enable_spoken_punctuation = 22; + + // The spoken emoji behavior for the call + // If not set, uses default behavior based on model of choice + // If 'true', adds spoken emoji formatting for the request. This will replace + // spoken emojis with the corresponding Unicode symbols in the final + // transcript. If 'false', spoken emojis are not replaced. + google.protobuf.BoolValue enable_spoken_emojis = 23; + // If 'true', enables speaker detection for each recognized word in // the top alternative of the recognition result using a speaker_tag provided // in the WordInfo. @@ -674,8 +692,8 @@ message LongRunningRecognizeMetadata { // audio, and `single_utterance` is set to false, then no messages are streamed // back to the client. // -// Here's an example of a series of ten `StreamingRecognizeResponse`s that might -// be returned while processing audio: +// Here's an example of a series of `StreamingRecognizeResponse`s that might be +// returned while processing audio: // // 1. results { alternatives { transcript: "tube" } stability: 0.01 } // diff --git a/google/cloud/speech_v1p1beta1/services/speech/async_client.py b/google/cloud/speech_v1p1beta1/services/speech/async_client.py index f9ae9342..41200ed8 100644 --- a/google/cloud/speech_v1p1beta1/services/speech/async_client.py +++ b/google/cloud/speech_v1p1beta1/services/speech/async_client.py @@ -394,7 +394,7 @@ def streaming_recognize( single_utterance is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten + Here's an example of a series of StreamingRecognizeResponses that might be returned while processing audio: diff --git a/google/cloud/speech_v1p1beta1/services/speech/client.py b/google/cloud/speech_v1p1beta1/services/speech/client.py index e4280c93..382b448e 100644 --- a/google/cloud/speech_v1p1beta1/services/speech/client.py +++ b/google/cloud/speech_v1p1beta1/services/speech/client.py @@ -584,7 +584,7 @@ def streaming_recognize( single_utterance is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten + Here's an example of a series of StreamingRecognizeResponses that might be returned while processing audio: diff --git a/google/cloud/speech_v1p1beta1/types/cloud_speech.py b/google/cloud/speech_v1p1beta1/types/cloud_speech.py index 3af3ae02..02b49c25 100644 --- a/google/cloud/speech_v1p1beta1/types/cloud_speech.py +++ b/google/cloud/speech_v1p1beta1/types/cloud_speech.py @@ -21,6 +21,7 @@ from google.cloud.speech_v1p1beta1.types import resource from google.protobuf import duration_pb2 as duration # type: ignore from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.protobuf import wrappers_pb2 as wrappers # type: ignore from google.rpc import status_pb2 as status # type: ignore @@ -271,14 +272,14 @@ class RecognitionConfig(proto.Message): speech recognition. When speech adaptation is set it supersedes the ``speech_contexts`` field. For more information, see the `speech - adaptation `__ + adaptation `__ documentation. speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]): Array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext]. A means to provide context to assist the speech recognition. For more information, see `speech - adaptation `__. + adaptation `__. enable_word_time_offsets (bool): If ``true``, the top result includes a list of words and the start and end time offsets (timestamps) for those words. If @@ -296,6 +297,23 @@ class RecognitionConfig(proto.Message): requests in other languages has no effect at all. The default 'false' value does not add punctuation to result hypotheses. + enable_spoken_punctuation (google.protobuf.wrappers_pb2.BoolValue): + The spoken punctuation behavior for the call If not set, + uses default behavior based on model of choice e.g. + command_and_search will enable spoken punctuation by default + If 'true', replaces spoken punctuation with the + corresponding symbols in the request. For example, "how are + you question mark" becomes "how are you?". See + https://cloud.google.com/speech-to-text/docs/spoken-punctuation + for support. If 'false', spoken punctuation is not replaced. + enable_spoken_emojis (google.protobuf.wrappers_pb2.BoolValue): + The spoken emoji behavior for the call + If not set, uses default behavior based on model + of choice If 'true', adds spoken emoji + formatting for the request. This will replace + spoken emojis with the corresponding Unicode + symbols in the final transcript. If 'false', + spoken emojis are not replaced. enable_speaker_diarization (bool): If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using @@ -436,6 +454,14 @@ class AudioEncoding(proto.Enum): enable_automatic_punctuation = proto.Field(proto.BOOL, number=11) + enable_spoken_punctuation = proto.Field( + proto.MESSAGE, number=22, message=wrappers.BoolValue, + ) + + enable_spoken_emojis = proto.Field( + proto.MESSAGE, number=23, message=wrappers.BoolValue, + ) + enable_speaker_diarization = proto.Field(proto.BOOL, number=16) diarization_speaker_count = proto.Field(proto.INT32, number=17) @@ -749,9 +775,8 @@ class StreamingRecognizeResponse(proto.Message): client. If there is no recognizable audio, and ``single_utterance`` is set to false, then no messages are streamed back to the client. - Here's an example of a series of ten - ``StreamingRecognizeResponse``\ s that might be returned while - processing audio: + Here's an example of a series of ``StreamingRecognizeResponse``\ s + that might be returned while processing audio: 1. results { alternatives { transcript: "tube" } stability: 0.01 } diff --git a/synth.metadata b/synth.metadata index c00909ec..a5f8e43d 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,15 +4,15 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-speech.git", - "sha": "5da4b5590e092c993688f9a048efd08ff2e65407" + "sha": "cc9cc3ecdef32a8bf1198aa2ff8561398bf359f8" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "149a3a84c29c9b8189576c7442ccb6dcf6a8f95b", - "internalRef": "364411656" + "sha": "847464c110e3cb6a5078b6b15086c73c4b622938", + "internalRef": "367346981" } }, { diff --git a/tests/unit/gapic/speech_v1p1beta1/test_speech.py b/tests/unit/gapic/speech_v1p1beta1/test_speech.py index 09d260c1..a09a0f61 100644 --- a/tests/unit/gapic/speech_v1p1beta1/test_speech.py +++ b/tests/unit/gapic/speech_v1p1beta1/test_speech.py @@ -42,6 +42,7 @@ from google.cloud.speech_v1p1beta1.types import resource from google.longrunning import operations_pb2 from google.oauth2 import service_account +from google.protobuf import wrappers_pb2 as wrappers # type: ignore from google.rpc import status_pb2 as status # type: ignore