From b6bddbe46172debd962c3d8e566a7c410fb4f279 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Thu, 8 Apr 2021 07:49:50 -0700
Subject: [PATCH] feat: Support for spoken punctuation and spoken emojis (#143)

---
 google/cloud/speech_v1p1beta1/__init__.py     |  4 +--
 .../speech_v1p1beta1/proto/cloud_speech.proto | 26 +++++++++++---
 .../services/speech/async_client.py           |  2 +-
 .../services/speech/client.py                 |  2 +-
 .../speech_v1p1beta1/types/cloud_speech.py    | 35 ++++++++++++++++---
 synth.metadata                                |  6 ++--
 .../gapic/speech_v1p1beta1/test_speech.py     |  1 +
 7 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/google/cloud/speech_v1p1beta1/__init__.py b/google/cloud/speech_v1p1beta1/__init__.py
index a45dedd4..576dd5d0 100644
--- a/google/cloud/speech_v1p1beta1/__init__.py
+++ b/google/cloud/speech_v1p1beta1/__init__.py
@@ -60,6 +60,7 @@ class SpeechClient(SpeechHelpers, SpeechClient):
 
 
 __all__ = (
+    "AdaptationClient",
     "CreateCustomClassRequest",
     "CreatePhraseSetRequest",
     "CustomClass",
@@ -82,7 +83,6 @@ class SpeechClient(SpeechHelpers, SpeechClient):
     "RecognizeResponse",
     "SpeakerDiarizationConfig",
     "SpeechAdaptation",
-    "SpeechClient",
     "SpeechContext",
     "SpeechRecognitionAlternative",
     "SpeechRecognitionResult",
@@ -94,5 +94,5 @@ class SpeechClient(SpeechHelpers, SpeechClient):
     "UpdateCustomClassRequest",
     "UpdatePhraseSetRequest",
     "WordInfo",
-    "AdaptationClient",
+    "SpeechClient",
 )
diff --git a/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto b/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto
index 9a8e256f..cf183d01 100644
--- a/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto
+++ b/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto
@@ -1,3 +1,4 @@
+
 // Copyright 2021 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,6 +25,7 @@ import "google/longrunning/operations.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
+import "google/protobuf/wrappers.proto";
 import "google/rpc/status.proto";
 
 option cc_enable_arenas = true;
@@ -316,7 +318,7 @@ message RecognitionConfig {
   // Speech adaptation configuration improves the accuracy of speech
   // recognition. When speech adaptation is set it supersedes the
   // `speech_contexts` field. For more information, see the [speech
-  // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
+  // adaptation](https://cloud.google.com/speech-to-text/docs/adaptation)
   // documentation.
   SpeechAdaptation adaptation = 20;
 
@@ -324,7 +326,7 @@ message RecognitionConfig {
   // A means to provide context to assist the speech recognition. For more
   // information, see
   // [speech
-  // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
+  // adaptation](https://cloud.google.com/speech-to-text/docs/adaptation).
   repeated SpeechContext speech_contexts = 6;
 
   // If `true`, the top result includes a list of words and
@@ -344,6 +346,22 @@ message RecognitionConfig {
   // The default 'false' value does not add punctuation to result hypotheses.
   bool enable_automatic_punctuation = 11;
 
+  // The spoken punctuation behavior for the call
+  // If not set, uses default behavior based on model of choice
+  // e.g. command_and_search will enable spoken punctuation by default
+  // If 'true', replaces spoken punctuation with the corresponding symbols in
+  // the request. For example, "how are you question mark" becomes "how are
+  // you?". See https://cloud.google.com/speech-to-text/docs/spoken-punctuation
+  // for support. If 'false', spoken punctuation is not replaced.
+  google.protobuf.BoolValue enable_spoken_punctuation = 22;
+
+  // The spoken emoji behavior for the call
+  // If not set, uses default behavior based on model of choice
+  // If 'true', adds spoken emoji formatting for the request. This will replace
+  // spoken emojis with the corresponding Unicode symbols in the final
+  // transcript. If 'false', spoken emojis are not replaced.
+  google.protobuf.BoolValue enable_spoken_emojis = 23;
+
   // If 'true', enables speaker detection for each recognized word in
   // the top alternative of the recognition result using a speaker_tag provided
   // in the WordInfo.
@@ -674,8 +692,8 @@ message LongRunningRecognizeMetadata {
 // audio, and `single_utterance` is set to false, then no messages are streamed
 // back to the client.
 //
-// Here's an example of a series of ten `StreamingRecognizeResponse`s that might
-// be returned while processing audio:
+// Here's an example of a series of `StreamingRecognizeResponse`s that might be
+// returned while processing audio:
 //
 // 1. results { alternatives { transcript: "tube" } stability: 0.01 }
 //
diff --git a/google/cloud/speech_v1p1beta1/services/speech/async_client.py b/google/cloud/speech_v1p1beta1/services/speech/async_client.py
index f9ae9342..41200ed8 100644
--- a/google/cloud/speech_v1p1beta1/services/speech/async_client.py
+++ b/google/cloud/speech_v1p1beta1/services/speech/async_client.py
@@ -394,7 +394,7 @@ def streaming_recognize(
                    single_utterance is set to false, then no messages
                    are streamed back to the client.
 
-                   Here's an example of a series of ten
+                   Here's an example of a series of
                    StreamingRecognizeResponses that might be returned
                    while processing audio:
 
diff --git a/google/cloud/speech_v1p1beta1/services/speech/client.py b/google/cloud/speech_v1p1beta1/services/speech/client.py
index e4280c93..382b448e 100644
--- a/google/cloud/speech_v1p1beta1/services/speech/client.py
+++ b/google/cloud/speech_v1p1beta1/services/speech/client.py
@@ -584,7 +584,7 @@ def streaming_recognize(
                    single_utterance is set to false, then no messages
                    are streamed back to the client.
 
-                   Here's an example of a series of ten
+                   Here's an example of a series of
                    StreamingRecognizeResponses that might be returned
                    while processing audio:
 
diff --git a/google/cloud/speech_v1p1beta1/types/cloud_speech.py b/google/cloud/speech_v1p1beta1/types/cloud_speech.py
index 3af3ae02..02b49c25 100644
--- a/google/cloud/speech_v1p1beta1/types/cloud_speech.py
+++ b/google/cloud/speech_v1p1beta1/types/cloud_speech.py
@@ -21,6 +21,7 @@
 from google.cloud.speech_v1p1beta1.types import resource
 from google.protobuf import duration_pb2 as duration  # type: ignore
 from google.protobuf import timestamp_pb2 as timestamp  # type: ignore
+from google.protobuf import wrappers_pb2 as wrappers  # type: ignore
 from google.rpc import status_pb2 as status  # type: ignore
 
 
@@ -271,14 +272,14 @@ class RecognitionConfig(proto.Message):
             speech recognition. When speech adaptation is set it
             supersedes the ``speech_contexts`` field. For more
             information, see the `speech
-            adaptation <https://cloud.google.com/speech-to-text/docs/context-strength>`__
+            adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__
             documentation.
         speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]):
             Array of
             [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
             A means to provide context to assist the speech recognition.
             For more information, see `speech
-            adaptation <https://cloud.google.com/speech-to-text/docs/context-strength>`__.
+            adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__.
         enable_word_time_offsets (bool):
             If ``true``, the top result includes a list of words and the
             start and end time offsets (timestamps) for those words. If
@@ -296,6 +297,23 @@ class RecognitionConfig(proto.Message):
             requests in other languages has no effect at
             all. The default 'false' value does not add
             punctuation to result hypotheses.
+        enable_spoken_punctuation (google.protobuf.wrappers_pb2.BoolValue):
+            The spoken punctuation behavior for the call If not set,
+            uses default behavior based on model of choice e.g.
+            command_and_search will enable spoken punctuation by default
+            If 'true', replaces spoken punctuation with the
+            corresponding symbols in the request. For example, "how are
+            you question mark" becomes "how are you?". See
+            https://cloud.google.com/speech-to-text/docs/spoken-punctuation
+            for support. If 'false', spoken punctuation is not replaced.
+        enable_spoken_emojis (google.protobuf.wrappers_pb2.BoolValue):
+            The spoken emoji behavior for the call
+            If not set, uses default behavior based on model
+            of choice If 'true', adds spoken emoji
+            formatting for the request. This will replace
+            spoken emojis with the corresponding Unicode
+            symbols in the final transcript. If 'false',
+            spoken emojis are not replaced.
         enable_speaker_diarization (bool):
             If 'true', enables speaker detection for each recognized
             word in the top alternative of the recognition result using
@@ -436,6 +454,14 @@ class AudioEncoding(proto.Enum):
 
     enable_automatic_punctuation = proto.Field(proto.BOOL, number=11)
 
+    enable_spoken_punctuation = proto.Field(
+        proto.MESSAGE, number=22, message=wrappers.BoolValue,
+    )
+
+    enable_spoken_emojis = proto.Field(
+        proto.MESSAGE, number=23, message=wrappers.BoolValue,
+    )
+
     enable_speaker_diarization = proto.Field(proto.BOOL, number=16)
 
     diarization_speaker_count = proto.Field(proto.INT32, number=17)
@@ -749,9 +775,8 @@ class StreamingRecognizeResponse(proto.Message):
     client. If there is no recognizable audio, and ``single_utterance``
     is set to false, then no messages are streamed back to the client.
 
-    Here's an example of a series of ten
-    ``StreamingRecognizeResponse``\ s that might be returned while
-    processing audio:
+    Here's an example of a series of ``StreamingRecognizeResponse``\ s
+    that might be returned while processing audio:
 
     1. results { alternatives { transcript: "tube" } stability: 0.01 }
 
diff --git a/synth.metadata b/synth.metadata
index c00909ec..a5f8e43d 100644
--- a/synth.metadata
+++ b/synth.metadata
@@ -4,15 +4,15 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/python-speech.git",
-        "sha": "5da4b5590e092c993688f9a048efd08ff2e65407"
+        "sha": "cc9cc3ecdef32a8bf1198aa2ff8561398bf359f8"
       }
     },
     {
       "git": {
         "name": "googleapis",
         "remote": "https://github.com/googleapis/googleapis.git",
-        "sha": "149a3a84c29c9b8189576c7442ccb6dcf6a8f95b",
-        "internalRef": "364411656"
+        "sha": "847464c110e3cb6a5078b6b15086c73c4b622938",
+        "internalRef": "367346981"
       }
     },
     {
diff --git a/tests/unit/gapic/speech_v1p1beta1/test_speech.py b/tests/unit/gapic/speech_v1p1beta1/test_speech.py
index 09d260c1..a09a0f61 100644
--- a/tests/unit/gapic/speech_v1p1beta1/test_speech.py
+++ b/tests/unit/gapic/speech_v1p1beta1/test_speech.py
@@ -42,6 +42,7 @@
 from google.cloud.speech_v1p1beta1.types import resource
 from google.longrunning import operations_pb2
 from google.oauth2 import service_account
+from google.protobuf import wrappers_pb2 as wrappers  # type: ignore
 from google.rpc import status_pb2 as status  # type: ignore