This repository has been archived by the owner on Sep 5, 2023. It is now read-only.
/
media_translation.py
242 lines (198 loc) · 9.48 KB
/
media_translation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import proto # type: ignore
from google.rpc import status_pb2 # type: ignore
__protobuf__ = proto.module(
package="google.cloud.mediatranslation.v1beta1",
manifest={
"TranslateSpeechConfig",
"StreamingTranslateSpeechConfig",
"StreamingTranslateSpeechRequest",
"StreamingTranslateSpeechResult",
"StreamingTranslateSpeechResponse",
},
)
class TranslateSpeechConfig(proto.Message):
r"""Provides information to the speech translation that specifies
how to process the request.
Attributes:
audio_encoding (str):
Required. Encoding of audio data. Supported formats:
- ``linear16``
Uncompressed 16-bit signed little-endian samples (Linear
PCM).
- ``flac``
``flac`` (Free Lossless Audio Codec) is the recommended
encoding because it is lossless--therefore recognition is
not compromised--and requires only about half the
bandwidth of ``linear16``.
- ``mulaw``
8-bit samples that compand 14-bit audio samples using
G.711 PCMU/mu-law.
- ``amr``
Adaptive Multi-Rate Narrowband codec.
``sample_rate_hertz`` must be 8000.
- ``amr-wb``
Adaptive Multi-Rate Wideband codec. ``sample_rate_hertz``
must be 16000.
- ``ogg-opus``
Opus encoded audio frames in
`Ogg <https://wikipedia.org/wiki/Ogg>`__ container.
``sample_rate_hertz`` must be one of 8000, 12000, 16000,
24000, or 48000.
- ``mp3``
MP3 audio. Support all standard MP3 bitrates (which range
from 32-320 kbps). When using this encoding,
``sample_rate_hertz`` has to match the sample rate of the
file being used.
source_language_code (str):
Required. Source language code (BCP-47) of
the input audio.
target_language_code (str):
Required. Target language code (BCP-47) of
the output.
sample_rate_hertz (int):
Optional. Sample rate in Hertz of the audio
data. Valid values are: 8000-48000. 16000 is
optimal. For best results, set the sampling rate
of the audio source to 16000 Hz. If that's not
possible, use the native sample rate of the
audio source (instead of re-sampling).
model (str):
Optional. ``google-provided-model/video`` and
``google-provided-model/enhanced-phone-call`` are premium
models. ``google-provided-model/phone-call`` is not premium
model.
"""
audio_encoding = proto.Field(proto.STRING, number=1,)
source_language_code = proto.Field(proto.STRING, number=2,)
target_language_code = proto.Field(proto.STRING, number=3,)
sample_rate_hertz = proto.Field(proto.INT32, number=4,)
model = proto.Field(proto.STRING, number=5,)
class StreamingTranslateSpeechConfig(proto.Message):
r"""Config used for streaming translation.
Attributes:
audio_config (google.cloud.mediatranslation_v1beta1.types.TranslateSpeechConfig):
Required. The common config for all the
following audio contents.
single_utterance (bool):
Optional. If ``false`` or omitted, the system performs
continuous translation (continuing to wait for and process
audio even if the user pauses speaking) until the client
closes the input stream (gRPC API) or until the maximum time
limit has been reached. May return multiple
``StreamingTranslateSpeechResult``\ s with the ``is_final``
flag set to ``true``.
If ``true``, the speech translator will detect a single
spoken utterance. When it detects that the user has paused
or stopped speaking, it will return an
``END_OF_SINGLE_UTTERANCE`` event and cease translation.
When the client receives 'END_OF_SINGLE_UTTERANCE' event,
the client should stop sending the requests. However,
clients should keep receiving remaining responses until the
stream is terminated. To construct the complete sentence in
a streaming way, one should override (if 'is_final' of
previous response is false), or append (if 'is_final' of
previous response is true).
"""
audio_config = proto.Field(
proto.MESSAGE, number=1, message="TranslateSpeechConfig",
)
single_utterance = proto.Field(proto.BOOL, number=2,)
class StreamingTranslateSpeechRequest(proto.Message):
r"""The top-level message sent by the client for the
``StreamingTranslateSpeech`` method. Multiple
``StreamingTranslateSpeechRequest`` messages are sent. The first
message must contain a ``streaming_config`` message and must not
contain ``audio_content`` data. All subsequent messages must contain
``audio_content`` data and must not contain a ``streaming_config``
message.
Attributes:
streaming_config (google.cloud.mediatranslation_v1beta1.types.StreamingTranslateSpeechConfig):
Provides information to the recognizer that specifies how to
process the request. The first
``StreamingTranslateSpeechRequest`` message must contain a
``streaming_config`` message.
audio_content (bytes):
The audio data to be translated. Sequential chunks of audio
data are sent in sequential
``StreamingTranslateSpeechRequest`` messages. The first
``StreamingTranslateSpeechRequest`` message must not contain
``audio_content`` data and all subsequent
``StreamingTranslateSpeechRequest`` messages must contain
``audio_content`` data. The audio bytes must be encoded as
specified in ``StreamingTranslateSpeechConfig``. Note: as
with all bytes fields, protobuffers use a pure binary
representation (not base64).
"""
streaming_config = proto.Field(
proto.MESSAGE,
number=1,
oneof="streaming_request",
message="StreamingTranslateSpeechConfig",
)
audio_content = proto.Field(proto.BYTES, number=2, oneof="streaming_request",)
class StreamingTranslateSpeechResult(proto.Message):
r"""A streaming speech translation result corresponding to a
portion of the audio that is currently being processed.
Attributes:
text_translation_result (google.cloud.mediatranslation_v1beta1.types.StreamingTranslateSpeechResult.TextTranslationResult):
Text translation result.
"""
class TextTranslationResult(proto.Message):
r"""Text translation result.
Attributes:
translation (str):
Output only. The translated sentence.
is_final (bool):
Output only. If ``false``, this
``StreamingTranslateSpeechResult`` represents an interim
result that may change. If ``true``, this is the final time
the translation service will return this particular
``StreamingTranslateSpeechResult``, the streaming translator
will not return any further hypotheses for this portion of
the transcript and corresponding audio.
"""
translation = proto.Field(proto.STRING, number=1,)
is_final = proto.Field(proto.BOOL, number=2,)
text_translation_result = proto.Field(
proto.MESSAGE, number=1, oneof="result", message=TextTranslationResult,
)
class StreamingTranslateSpeechResponse(proto.Message):
r"""A streaming speech translation response corresponding to a
portion of the audio currently processed.
Attributes:
error (google.rpc.status_pb2.Status):
Output only. If set, returns a
[google.rpc.Status][google.rpc.Status] message that
specifies the error for the operation.
result (google.cloud.mediatranslation_v1beta1.types.StreamingTranslateSpeechResult):
Output only. The translation result that is currently being
processed (is_final could be true or false).
speech_event_type (google.cloud.mediatranslation_v1beta1.types.StreamingTranslateSpeechResponse.SpeechEventType):
Output only. Indicates the type of speech
event.
"""
class SpeechEventType(proto.Enum):
r"""Indicates the type of speech event."""
SPEECH_EVENT_TYPE_UNSPECIFIED = 0
END_OF_SINGLE_UTTERANCE = 1
error = proto.Field(proto.MESSAGE, number=1, message=status_pb2.Status,)
result = proto.Field(
proto.MESSAGE, number=2, message="StreamingTranslateSpeechResult",
)
speech_event_type = proto.Field(proto.ENUM, number=3, enum=SpeechEventType,)
__all__ = tuple(sorted(__protobuf__.manifest))