From f76effc3c5477db858571d89c5b52078e85ff94e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 19 Aug 2021 20:32:25 +0000 Subject: [PATCH] feat: Add transcript normalization (#235) - [ ] Regenerate this pull request now. Committer: @jameszhao PiperOrigin-RevId: 391603958 Source-Link: https://github.com/googleapis/googleapis/commit/76bed90afcd459c0345b1bd0ab3e5cd53260e04d Source-Link: https://github.com/googleapis/googleapis-gen/commit/c7aaaa72a271bfc79f7631cd91bf4a944ffb202f --- google/cloud/speech_v1p1beta1/__init__.py | 2 + .../cloud/speech_v1p1beta1/types/__init__.py | 2 + .../speech_v1p1beta1/types/cloud_speech.py | 10 +++++ .../cloud/speech_v1p1beta1/types/resource.py | 43 ++++++++++++++++++- 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/google/cloud/speech_v1p1beta1/__init__.py b/google/cloud/speech_v1p1beta1/__init__.py index 4b6f5cbe..ec90e3e0 100644 --- a/google/cloud/speech_v1p1beta1/__init__.py +++ b/google/cloud/speech_v1p1beta1/__init__.py @@ -52,6 +52,7 @@ from .types.resource import CustomClass from .types.resource import PhraseSet from .types.resource import SpeechAdaptation +from .types.resource import TranscriptNormalization from google.cloud.speech_v1.helpers import SpeechHelpers @@ -94,6 +95,7 @@ class SpeechClient(SpeechHelpers, SpeechClient): "StreamingRecognitionResult", "StreamingRecognizeRequest", "StreamingRecognizeResponse", + "TranscriptNormalization", "TranscriptOutputConfig", "UpdateCustomClassRequest", "UpdatePhraseSetRequest", diff --git a/google/cloud/speech_v1p1beta1/types/__init__.py b/google/cloud/speech_v1p1beta1/types/__init__.py index 1274808d..fd172826 100644 --- a/google/cloud/speech_v1p1beta1/types/__init__.py +++ b/google/cloud/speech_v1p1beta1/types/__init__.py @@ -51,6 +51,7 @@ CustomClass, PhraseSet, SpeechAdaptation, + TranscriptNormalization, ) __all__ = ( @@ -87,4 +88,5 @@ "CustomClass", "PhraseSet", "SpeechAdaptation", + "TranscriptNormalization", ) diff --git a/google/cloud/speech_v1p1beta1/types/cloud_speech.py b/google/cloud/speech_v1p1beta1/types/cloud_speech.py index ee343023..9dd978f0 100644 --- a/google/cloud/speech_v1p1beta1/types/cloud_speech.py +++ b/google/cloud/speech_v1p1beta1/types/cloud_speech.py @@ -265,6 +265,13 @@ class RecognitionConfig(proto.Message): information, see the `speech adaptation `__ documentation. + transcript_normalization (google.cloud.speech_v1p1beta1.types.TranscriptNormalization): + Use transcription normalization to + automatically replace parts of the transcript + with phrases of your choosing. For + StreamingRecognize, this normalization only + applies to stable partial transcripts (stability + > 0.8) and final transcripts. speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]): Array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext]. @@ -427,6 +434,9 @@ class AudioEncoding(proto.Enum): adaptation = proto.Field( proto.MESSAGE, number=20, message=resource.SpeechAdaptation, ) + transcript_normalization = proto.Field( + proto.MESSAGE, number=24, message=resource.TranscriptNormalization, + ) speech_contexts = proto.RepeatedField( proto.MESSAGE, number=6, message="SpeechContext", ) diff --git a/google/cloud/speech_v1p1beta1/types/resource.py b/google/cloud/speech_v1p1beta1/types/resource.py index 1e709d8e..822a4210 100644 --- a/google/cloud/speech_v1p1beta1/types/resource.py +++ b/google/cloud/speech_v1p1beta1/types/resource.py @@ -18,7 +18,12 @@ __protobuf__ = proto.module( package="google.cloud.speech.v1p1beta1", - manifest={"CustomClass", "PhraseSet", "SpeechAdaptation",}, + manifest={ + "CustomClass", + "PhraseSet", + "SpeechAdaptation", + "TranscriptNormalization", + }, ) @@ -150,4 +155,40 @@ class SpeechAdaptation(proto.Message): ) +class TranscriptNormalization(proto.Message): + r"""Transcription normalization configuration. Use transcription + normalization to automatically replace parts of the transcript + with phrases of your choosing. For StreamingRecognize, this + normalization only applies to stable partial transcripts + (stability > 0.8) and final transcripts. + + Attributes: + entries (Sequence[google.cloud.speech_v1p1beta1.types.TranscriptNormalization.Entry]): + A list of replacement entries. We will perform replacement + with one entry at a time. For example, the second entry in + ["cat" => "dog", "mountain cat" => "mountain dog"] will + never be applied because we will always process the first + entry before it. At most 100 entries. + """ + + class Entry(proto.Message): + r"""A single replacement configuration. + Attributes: + search (str): + What to replace. Max length is 100 + characters. + replace (str): + What to replace with. Max length is 100 + characters. + case_sensitive (bool): + Whether the search is case sensitive. + """ + + search = proto.Field(proto.STRING, number=1,) + replace = proto.Field(proto.STRING, number=2,) + case_sensitive = proto.Field(proto.BOOL, number=3,) + + entries = proto.RepeatedField(proto.MESSAGE, number=1, message=Entry,) + + __all__ = tuple(sorted(__protobuf__.manifest))