Skip to content
This repository has been archived by the owner on Apr 20, 2024. It is now read-only.

Commit

Permalink
feat: Add transcript normalization (#235)
Browse files Browse the repository at this point in the history
- [ ] Regenerate this pull request now.

Committer: @jameszhao
PiperOrigin-RevId: 391603958

Source-Link: googleapis/googleapis@76bed90

Source-Link: googleapis/googleapis-gen@c7aaaa7
  • Loading branch information
gcf-owl-bot[bot] committed Aug 19, 2021
1 parent 51e7caf commit f76effc
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 1 deletion.
2 changes: 2 additions & 0 deletions google/cloud/speech_v1p1beta1/__init__.py
Expand Up @@ -52,6 +52,7 @@
from .types.resource import CustomClass
from .types.resource import PhraseSet
from .types.resource import SpeechAdaptation
from .types.resource import TranscriptNormalization

from google.cloud.speech_v1.helpers import SpeechHelpers

Expand Down Expand Up @@ -94,6 +95,7 @@ class SpeechClient(SpeechHelpers, SpeechClient):
"StreamingRecognitionResult",
"StreamingRecognizeRequest",
"StreamingRecognizeResponse",
"TranscriptNormalization",
"TranscriptOutputConfig",
"UpdateCustomClassRequest",
"UpdatePhraseSetRequest",
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/speech_v1p1beta1/types/__init__.py
Expand Up @@ -51,6 +51,7 @@
CustomClass,
PhraseSet,
SpeechAdaptation,
TranscriptNormalization,
)

__all__ = (
Expand Down Expand Up @@ -87,4 +88,5 @@
"CustomClass",
"PhraseSet",
"SpeechAdaptation",
"TranscriptNormalization",
)
10 changes: 10 additions & 0 deletions google/cloud/speech_v1p1beta1/types/cloud_speech.py
Expand Up @@ -265,6 +265,13 @@ class RecognitionConfig(proto.Message):
information, see the `speech
adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__
documentation.
transcript_normalization (google.cloud.speech_v1p1beta1.types.TranscriptNormalization):
Use transcription normalization to
automatically replace parts of the transcript
with phrases of your choosing. For
StreamingRecognize, this normalization only
applies to stable partial transcripts (stability
> 0.8) and final transcripts.
speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]):
Array of
[SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
Expand Down Expand Up @@ -427,6 +434,9 @@ class AudioEncoding(proto.Enum):
adaptation = proto.Field(
proto.MESSAGE, number=20, message=resource.SpeechAdaptation,
)
transcript_normalization = proto.Field(
proto.MESSAGE, number=24, message=resource.TranscriptNormalization,
)
speech_contexts = proto.RepeatedField(
proto.MESSAGE, number=6, message="SpeechContext",
)
Expand Down
43 changes: 42 additions & 1 deletion google/cloud/speech_v1p1beta1/types/resource.py
Expand Up @@ -18,7 +18,12 @@

__protobuf__ = proto.module(
package="google.cloud.speech.v1p1beta1",
manifest={"CustomClass", "PhraseSet", "SpeechAdaptation",},
manifest={
"CustomClass",
"PhraseSet",
"SpeechAdaptation",
"TranscriptNormalization",
},
)


Expand Down Expand Up @@ -150,4 +155,40 @@ class SpeechAdaptation(proto.Message):
)


class TranscriptNormalization(proto.Message):
r"""Transcription normalization configuration. Use transcription
normalization to automatically replace parts of the transcript
with phrases of your choosing. For StreamingRecognize, this
normalization only applies to stable partial transcripts
(stability > 0.8) and final transcripts.
Attributes:
entries (Sequence[google.cloud.speech_v1p1beta1.types.TranscriptNormalization.Entry]):
A list of replacement entries. We will perform replacement
with one entry at a time. For example, the second entry in
["cat" => "dog", "mountain cat" => "mountain dog"] will
never be applied because we will always process the first
entry before it. At most 100 entries.
"""

class Entry(proto.Message):
r"""A single replacement configuration.
Attributes:
search (str):
What to replace. Max length is 100
characters.
replace (str):
What to replace with. Max length is 100
characters.
case_sensitive (bool):
Whether the search is case sensitive.
"""

search = proto.Field(proto.STRING, number=1,)
replace = proto.Field(proto.STRING, number=2,)
case_sensitive = proto.Field(proto.BOOL, number=3,)

entries = proto.RepeatedField(proto.MESSAGE, number=1, message=Entry,)


__all__ = tuple(sorted(__protobuf__.manifest))

0 comments on commit f76effc

Please sign in to comment.