feat: Add transcript normalization (#235)

- [ ] Regenerate this pull request now. Committer: @jameszhao PiperOrigin-RevId: 391603958 Source-Link: googleapis/googleapis@76bed90 Source-Link: googleapis/googleapis-gen@c7aaaa7
googleapis · Aug 19, 2021 · f76effc · f76effc
1 parent 51e7caf
commit f76effc
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 1 deletion.
diff --git a/google/cloud/speech_v1p1beta1/__init__.py b/google/cloud/speech_v1p1beta1/__init__.py
@@ -52,6 +52,7 @@
 from .types.resource import CustomClass
 from .types.resource import PhraseSet
 from .types.resource import SpeechAdaptation
+from .types.resource import TranscriptNormalization
 
 from google.cloud.speech_v1.helpers import SpeechHelpers
 
@@ -94,6 +95,7 @@ class SpeechClient(SpeechHelpers, SpeechClient):
     "StreamingRecognitionResult",
     "StreamingRecognizeRequest",
     "StreamingRecognizeResponse",
+    "TranscriptNormalization",
     "TranscriptOutputConfig",
     "UpdateCustomClassRequest",
     "UpdatePhraseSetRequest",

diff --git a/google/cloud/speech_v1p1beta1/types/__init__.py b/google/cloud/speech_v1p1beta1/types/__init__.py
@@ -51,6 +51,7 @@
     CustomClass,
     PhraseSet,
     SpeechAdaptation,
+    TranscriptNormalization,
 )
 
 __all__ = (
@@ -87,4 +88,5 @@
     "CustomClass",
     "PhraseSet",
     "SpeechAdaptation",
+    "TranscriptNormalization",
 )
diff --git a/google/cloud/speech_v1p1beta1/types/cloud_speech.py b/google/cloud/speech_v1p1beta1/types/cloud_speech.py
@@ -265,6 +265,13 @@ class RecognitionConfig(proto.Message):
             information, see the `speech
             adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__
             documentation.
+        transcript_normalization (google.cloud.speech_v1p1beta1.types.TranscriptNormalization):
+            Use transcription normalization to
+            automatically replace parts of the transcript
+            with phrases of your choosing. For
+            StreamingRecognize, this normalization only
+            applies to stable partial transcripts (stability
+            > 0.8) and final transcripts.
         speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]):
             Array of
             [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
@@ -427,6 +434,9 @@ class AudioEncoding(proto.Enum):
     adaptation = proto.Field(
         proto.MESSAGE, number=20, message=resource.SpeechAdaptation,
     )
+    transcript_normalization = proto.Field(
+        proto.MESSAGE, number=24, message=resource.TranscriptNormalization,
+    )
     speech_contexts = proto.RepeatedField(
         proto.MESSAGE, number=6, message="SpeechContext",
     )

diff --git a/google/cloud/speech_v1p1beta1/types/resource.py b/google/cloud/speech_v1p1beta1/types/resource.py
@@ -18,7 +18,12 @@
 
 __protobuf__ = proto.module(
     package="google.cloud.speech.v1p1beta1",
-    manifest={"CustomClass", "PhraseSet", "SpeechAdaptation",},
+    manifest={
+        "CustomClass",
+        "PhraseSet",
+        "SpeechAdaptation",
+        "TranscriptNormalization",
+    },
 )
 
 
@@ -150,4 +155,40 @@ class SpeechAdaptation(proto.Message):
     )
 
 
+class TranscriptNormalization(proto.Message):
+    r"""Transcription normalization configuration. Use transcription
+    normalization to automatically replace parts of the transcript
+    with phrases of your choosing. For StreamingRecognize, this
+    normalization only applies to stable partial transcripts
+    (stability > 0.8) and final transcripts.
+
+    Attributes:
+        entries (Sequence[google.cloud.speech_v1p1beta1.types.TranscriptNormalization.Entry]):
+            A list of replacement entries. We will perform replacement
+            with one entry at a time. For example, the second entry in
+            ["cat" => "dog", "mountain cat" => "mountain dog"] will
+            never be applied because we will always process the first
+            entry before it. At most 100 entries.
+    """
+
+    class Entry(proto.Message):
+        r"""A single replacement configuration.
+        Attributes:
+            search (str):
+                What to replace. Max length is 100
+                characters.
+            replace (str):
+                What to replace with. Max length is 100
+                characters.
+            case_sensitive (bool):
+                Whether the search is case sensitive.
+        """
+
+        search = proto.Field(proto.STRING, number=1,)
+        replace = proto.Field(proto.STRING, number=2,)
+        case_sensitive = proto.Field(proto.BOOL, number=3,)
+
+    entries = proto.RepeatedField(proto.MESSAGE, number=1, message=Entry,)
+
+
 __all__ = tuple(sorted(__protobuf__.manifest))