Skip to content
This repository was archived by the owner on Dec 19, 2023. It is now read-only.

Commit 1f184a3

Browse files
yoshi-automationcallmehiphop
authored andcommitted
fix: add proto field behavior options (#265)
1 parent 89c4e90 commit 1f184a3

File tree

10 files changed

+485
-326
lines changed

10 files changed

+485
-326
lines changed

protos/google/cloud/texttospeech/v1/cloud_tts.proto

Lines changed: 113 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 Google LLC.
1+
// Copyright 2019 Google LLC.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@ syntax = "proto3";
1818
package google.cloud.texttospeech.v1;
1919

2020
import "google/api/annotations.proto";
21+
import "google/api/client.proto";
22+
import "google/api/field_behavior.proto";
2123

2224
option cc_enable_arenas = true;
2325
option csharp_namespace = "Google.Cloud.TextToSpeech.V1";
@@ -29,35 +31,79 @@ option php_namespace = "Google\\Cloud\\TextToSpeech\\V1";
2931

3032
// Service that implements Google Cloud Text-to-Speech API.
3133
service TextToSpeech {
34+
option (google.api.default_host) = "texttospeech.googleapis.com";
35+
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
36+
3237
// Returns a list of Voice supported for synthesis.
3338
rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse) {
3439
option (google.api.http) = {
3540
get: "/v1/voices"
3641
};
42+
option (google.api.method_signature) = "language_code";
3743
}
3844

3945
// Synthesizes speech synchronously: receive results after all text input
4046
// has been processed.
41-
rpc SynthesizeSpeech(SynthesizeSpeechRequest)
42-
returns (SynthesizeSpeechResponse) {
47+
rpc SynthesizeSpeech(SynthesizeSpeechRequest) returns (SynthesizeSpeechResponse) {
4348
option (google.api.http) = {
4449
post: "/v1/text:synthesize"
4550
body: "*"
4651
};
52+
option (google.api.method_signature) = "input,voice,audio_config";
4753
}
4854
}
4955

5056
// The top-level message sent by the client for the `ListVoices` method.
5157
message ListVoicesRequest {
52-
// Optional (but recommended)
58+
// Optional. Recommended.
5359
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. If
5460
// specified, the ListVoices call will only return voices that can be used to
5561
// synthesize this language_code. E.g. when specifying "en-NZ", you will get
5662
// supported "en-*" voices; when specifying "no", you will get supported
5763
// "no-*" (Norwegian) and "nb-*" (Norwegian Bokmal) voices; specifying "zh"
5864
// will also get supported "cmn-*" voices; specifying "zh-hk" will also get
5965
// supported "yue-*" voices.
60-
string language_code = 1;
66+
string language_code = 1 [(google.api.field_behavior) = OPTIONAL];
67+
}
68+
69+
// Gender of the voice as described in
70+
// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
71+
enum SsmlVoiceGender {
72+
// An unspecified gender.
73+
// In VoiceSelectionParams, this means that the client doesn't care which
74+
// gender the selected voice will have. In the Voice field of
75+
// ListVoicesResponse, this may mean that the voice doesn't fit any of the
76+
// other categories in this enum, or that the gender of the voice isn't known.
77+
SSML_VOICE_GENDER_UNSPECIFIED = 0;
78+
79+
// A male voice.
80+
MALE = 1;
81+
82+
// A female voice.
83+
FEMALE = 2;
84+
85+
// A gender-neutral voice.
86+
NEUTRAL = 3;
87+
}
88+
89+
// Configuration to set up audio encoder. The encoding determines the output
90+
// audio format that we'd like.
91+
enum AudioEncoding {
92+
// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
93+
AUDIO_ENCODING_UNSPECIFIED = 0;
94+
95+
// Uncompressed 16-bit signed little-endian samples (Linear PCM).
96+
// Audio content returned as LINEAR16 also contains a WAV header.
97+
LINEAR16 = 1;
98+
99+
// MP3 audio at 32kbps.
100+
MP3 = 2;
101+
102+
// Opus encoded audio wrapped in an ogg container. The result will be a
103+
// file which can be played natively on Android, and in browsers (at least
104+
// Chrome and Firefox). The quality of the encoding is considerably higher
105+
// than MP3 while using approximately the same bitrate.
106+
OGG_OPUS = 3;
61107
}
62108

63109
// The message returned to the client by the `ListVoices` method.
@@ -86,13 +132,13 @@ message Voice {
86132
// The top-level message sent by the client for the `SynthesizeSpeech` method.
87133
message SynthesizeSpeechRequest {
88134
// Required. The Synthesizer requires either plain text or SSML as input.
89-
SynthesisInput input = 1;
135+
SynthesisInput input = 1 [(google.api.field_behavior) = REQUIRED];
90136

91137
// Required. The desired voice of the synthesized audio.
92-
VoiceSelectionParams voice = 2;
138+
VoiceSelectionParams voice = 2 [(google.api.field_behavior) = REQUIRED];
93139

94140
// Required. The configuration of the synthesized audio.
95-
AudioConfig audio_config = 3;
141+
AudioConfig audio_config = 3 [(google.api.field_behavior) = REQUIRED];
96142
}
97143

98144
// Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -115,9 +161,9 @@ message SynthesisInput {
115161

116162
// Description of which voice to use for a synthesis request.
117163
message VoiceSelectionParams {
118-
// The language (and optionally also the region) of the voice expressed as a
164+
// Required. The language (and potentially also the region) of the voice expressed as a
119165
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag, e.g.
120-
// "en-US". Required. This should not include a script tag (e.g. use
166+
// "en-US". This should not include a script tag (e.g. use
121167
// "cmn-cn" rather than "cmn-Hant-cn"), because the script will be inferred
122168
// from the input provided in the SynthesisInput. The TTS service
123169
// will use this parameter to help choose an appropriate voice. Note that
@@ -126,13 +172,13 @@ message VoiceSelectionParams {
126172
// (e.g. using en-US rather than en-CA if there isn't a Canadian voice
127173
// available), or even a different language, e.g. using "nb" (Norwegian
128174
// Bokmal) instead of "no" (Norwegian)".
129-
string language_code = 1;
175+
string language_code = 1 [(google.api.field_behavior) = REQUIRED];
130176

131-
// The name of the voice. Optional; if not set, the service will choose a
177+
// The name of the voice. If not set, the service will choose a
132178
// voice based on the other parameters such as language_code and gender.
133179
string name = 2;
134180

135-
// The preferred gender of the voice. Optional; if not set, the service will
181+
// The preferred gender of the voice. If not set, the service will
136182
// choose a voice based on the other parameters such as language_code and
137183
// name. Note that this is only a preference, not requirement; if a
138184
// voice of the appropriate gender is not available, the synthesizer should
@@ -142,94 +188,66 @@ message VoiceSelectionParams {
142188

143189
// Description of audio data to be synthesized.
144190
message AudioConfig {
145-
// Required. The format of the requested audio byte stream.
146-
AudioEncoding audio_encoding = 1;
147-
148-
// Optional speaking rate/speed, in the range [0.25, 4.0]. 1.0 is the normal
149-
// native speed supported by the specific voice. 2.0 is twice as fast, and
150-
// 0.5 is half as fast. If unset(0.0), defaults to the native 1.0 speed. Any
151-
// other values < 0.25 or > 4.0 will return an error.
152-
double speaking_rate = 2;
153-
154-
// Optional speaking pitch, in the range [-20.0, 20.0]. 20 means increase 20
155-
// semitones from the original pitch. -20 means decrease 20 semitones from the
156-
// original pitch.
157-
double pitch = 3;
158-
159-
// Optional volume gain (in dB) of the normal native volume supported by the
160-
// specific voice, in the range [-96.0, 16.0]. If unset, or set to a value of
161-
// 0.0 (dB), will play at normal native signal amplitude. A value of -6.0 (dB)
162-
// will play at approximately half the amplitude of the normal native signal
163-
// amplitude. A value of +6.0 (dB) will play at approximately twice the
164-
// amplitude of the normal native signal amplitude. Strongly recommend not to
165-
// exceed +10 (dB) as there's usually no effective increase in loudness for
166-
// any value greater than that.
167-
double volume_gain_db = 4;
168-
169-
// The synthesis sample rate (in hertz) for this audio. Optional. If this is
170-
// different from the voice's natural sample rate, then the synthesizer will
171-
// honor this request by converting to the desired sample rate (which might
172-
// result in worse audio quality), unless the specified sample rate is not
173-
// supported for the encoding chosen, in which case it will fail the request
174-
// and return [google.rpc.Code.INVALID_ARGUMENT][].
175-
int32 sample_rate_hertz = 5;
176-
177-
// An identifier which selects 'audio effects' profiles that are applied on
178-
// (post synthesized) text to speech.
179-
// Effects are applied on top of each other in the order they are given.
180-
// See
181-
//
182-
// [audio-profiles](https:
183-
// //cloud.google.com/text-to-speech/docs/audio-profiles)
184-
// for current supported profile ids.
185-
repeated string effects_profile_id = 6;
191+
// Required. The format of the audio byte stream.
192+
AudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
193+
194+
// Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
195+
// the normal native speed supported by the specific voice. 2.0 is twice as
196+
// fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
197+
// speed. Any other values < 0.25 or > 4.0 will return an error.
198+
double speaking_rate = 2 [
199+
(google.api.field_behavior) = INPUT_ONLY,
200+
(google.api.field_behavior) = OPTIONAL
201+
];
202+
203+
// Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means
204+
// increase 20 semitones from the original pitch. -20 means decrease 20
205+
// semitones from the original pitch.
206+
double pitch = 3 [
207+
(google.api.field_behavior) = INPUT_ONLY,
208+
(google.api.field_behavior) = OPTIONAL
209+
];
210+
211+
// Optional. Input only. Volume gain (in dB) of the normal native volume
212+
// supported by the specific voice, in the range [-96.0, 16.0]. If unset, or
213+
// set to a value of 0.0 (dB), will play at normal native signal amplitude. A
214+
// value of -6.0 (dB) will play at approximately half the amplitude of the
215+
// normal native signal amplitude. A value of +6.0 (dB) will play at
216+
// approximately twice the amplitude of the normal native signal amplitude.
217+
// Strongly recommend not to exceed +10 (dB) as there's usually no effective
218+
// increase in loudness for any value greater than that.
219+
double volume_gain_db = 4 [
220+
(google.api.field_behavior) = INPUT_ONLY,
221+
(google.api.field_behavior) = OPTIONAL
222+
];
223+
224+
// Optional. The synthesis sample rate (in hertz) for this audio. When this is
225+
// specified in SynthesizeSpeechRequest, if this is different from the voice's
226+
// natural sample rate, then the synthesizer will honor this request by
227+
// converting to the desired sample rate (which might result in worse audio
228+
// quality), unless the specified sample rate is not supported for the
229+
// encoding chosen, in which case it will fail the request and return
230+
// [google.rpc.Code.INVALID_ARGUMENT][].
231+
int32 sample_rate_hertz = 5 [(google.api.field_behavior) = OPTIONAL];
232+
233+
// Optional. Input only. An identifier which selects 'audio effects' profiles
234+
// that are applied on (post synthesized) text to speech. Effects are applied
235+
// on top of each other in the order they are given. See
236+
// [audio
237+
// profiles](https://cloud.google.com/text-to-speech/docs/audio-profiles) for
238+
// current supported profile ids.
239+
repeated string effects_profile_id = 6 [
240+
(google.api.field_behavior) = INPUT_ONLY,
241+
(google.api.field_behavior) = OPTIONAL
242+
];
186243
}
187244

188245
// The message returned to the client by the `SynthesizeSpeech` method.
189246
message SynthesizeSpeechResponse {
190247
// The audio data bytes encoded as specified in the request, including the
191-
// header (For LINEAR16 audio, we include the WAV header). Note: as
248+
// header for encodings that are wrapped in containers (e.g. MP3, OGG_OPUS).
249+
// For LINEAR16 audio, we include the WAV header. Note: as
192250
// with all bytes fields, protobuffers use a pure binary representation,
193251
// whereas JSON representations use base64.
194252
bytes audio_content = 1;
195253
}
196-
197-
// Gender of the voice as described in
198-
// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
199-
enum SsmlVoiceGender {
200-
// An unspecified gender.
201-
// In VoiceSelectionParams, this means that the client doesn't care which
202-
// gender the selected voice will have. In the Voice field of
203-
// ListVoicesResponse, this may mean that the voice doesn't fit any of the
204-
// other categories in this enum, or that the gender of the voice isn't known.
205-
SSML_VOICE_GENDER_UNSPECIFIED = 0;
206-
207-
// A male voice.
208-
MALE = 1;
209-
210-
// A female voice.
211-
FEMALE = 2;
212-
213-
// A gender-neutral voice.
214-
NEUTRAL = 3;
215-
}
216-
217-
// Configuration to set up audio encoder. The encoding determines the output
218-
// audio format that we'd like.
219-
enum AudioEncoding {
220-
// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
221-
AUDIO_ENCODING_UNSPECIFIED = 0;
222-
223-
// Uncompressed 16-bit signed little-endian samples (Linear PCM).
224-
// Audio content returned as LINEAR16 also contains a WAV header.
225-
LINEAR16 = 1;
226-
227-
// MP3 audio.
228-
MP3 = 2;
229-
230-
// Opus encoded audio wrapped in an ogg container. The result will be a
231-
// file which can be played natively on Android, and in browsers (at least
232-
// Chrome and Firefox). The quality of the encoding is considerably higher
233-
// than MP3 while using approximately the same bitrate.
234-
OGG_OPUS = 3;
235-
}

0 commit comments

Comments
 (0)