googleapis
diff --git a/‎protos/google/cloud/texttospeech/v1/cloud_tts.proto
Lines changed: 113 additions & 95 deletions b/‎protos/google/cloud/texttospeech/v1/cloud_tts.proto
Lines changed: 113 additions & 95 deletions
@@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC.
+// Copyright 2019 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@ syntax = "proto3";
 package google.cloud.texttospeech.v1;
 
 import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
 
 option cc_enable_arenas = true;
 option csharp_namespace = "Google.Cloud.TextToSpeech.V1";
@@ -29,35 +31,79 @@ option php_namespace = "Google\\Cloud\\TextToSpeech\\V1";
 
 // Service that implements Google Cloud Text-to-Speech API.
 service TextToSpeech {
+  option (google.api.default_host) = "texttospeech.googleapis.com";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+
   // Returns a list of Voice supported for synthesis.
   rpc ListVoices(ListVoicesRequest) returns (ListVoicesResponse) {
     option (google.api.http) = {
       get: "/v1/voices"
     };
+    option (google.api.method_signature) = "language_code";
   }
 
   // Synthesizes speech synchronously: receive results after all text input
   // has been processed.
-  rpc SynthesizeSpeech(SynthesizeSpeechRequest)
-      returns (SynthesizeSpeechResponse) {
+  rpc SynthesizeSpeech(SynthesizeSpeechRequest) returns (SynthesizeSpeechResponse) {
     option (google.api.http) = {
       post: "/v1/text:synthesize"
       body: "*"
     };
+    option (google.api.method_signature) = "input,voice,audio_config";
   }
 }
 
 // The top-level message sent by the client for the `ListVoices` method.
 message ListVoicesRequest {
-  // Optional (but recommended)
+  // Optional. Recommended.
   // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. If
   // specified, the ListVoices call will only return voices that can be used to
   // synthesize this language_code. E.g. when specifying "en-NZ", you will get
   // supported "en-*" voices; when specifying "no", you will get supported
   // "no-*" (Norwegian) and "nb-*" (Norwegian Bokmal) voices; specifying "zh"
   // will also get supported "cmn-*" voices; specifying "zh-hk" will also get
   // supported "yue-*" voices.
-  string language_code = 1;
+  string language_code = 1 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Gender of the voice as described in
+// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
+enum SsmlVoiceGender {
+  // An unspecified gender.
+  // In VoiceSelectionParams, this means that the client doesn't care which
+  // gender the selected voice will have. In the Voice field of
+  // ListVoicesResponse, this may mean that the voice doesn't fit any of the
+  // other categories in this enum, or that the gender of the voice isn't known.
+  SSML_VOICE_GENDER_UNSPECIFIED = 0;
+
+  // A male voice.
+  MALE = 1;
+
+  // A female voice.
+  FEMALE = 2;
+
+  // A gender-neutral voice.
+  NEUTRAL = 3;
+}
+
+// Configuration to set up audio encoder. The encoding determines the output
+// audio format that we'd like.
+enum AudioEncoding {
+  // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
+  AUDIO_ENCODING_UNSPECIFIED = 0;
+
+  // Uncompressed 16-bit signed little-endian samples (Linear PCM).
+  // Audio content returned as LINEAR16 also contains a WAV header.
+  LINEAR16 = 1;
+
+  // MP3 audio at 32kbps.
+  MP3 = 2;
+
+  // Opus encoded audio wrapped in an ogg container. The result will be a
+  // file which can be played natively on Android, and in browsers (at least
+  // Chrome and Firefox). The quality of the encoding is considerably higher
+  // than MP3 while using approximately the same bitrate.
+  OGG_OPUS = 3;
 }
 
 // The message returned to the client by the `ListVoices` method.
@@ -86,13 +132,13 @@ message Voice {
 // The top-level message sent by the client for the `SynthesizeSpeech` method.
 message SynthesizeSpeechRequest {
   // Required. The Synthesizer requires either plain text or SSML as input.
-  SynthesisInput input = 1;
+  SynthesisInput input = 1 [(google.api.field_behavior) = REQUIRED];
 
   // Required. The desired voice of the synthesized audio.
-  VoiceSelectionParams voice = 2;
+  VoiceSelectionParams voice = 2 [(google.api.field_behavior) = REQUIRED];
 
   // Required. The configuration of the synthesized audio.
-  AudioConfig audio_config = 3;
+  AudioConfig audio_config = 3 [(google.api.field_behavior) = REQUIRED];
 }
 
 // Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -115,9 +161,9 @@ message SynthesisInput {
 
 // Description of which voice to use for a synthesis request.
 message VoiceSelectionParams {
-  // The language (and optionally also the region) of the voice expressed as a
+  // Required. The language (and potentially also the region) of the voice expressed as a
   // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag, e.g.
-  // "en-US". Required. This should not include a script tag (e.g. use
+  // "en-US". This should not include a script tag (e.g. use
   // "cmn-cn" rather than "cmn-Hant-cn"), because the script will be inferred
   // from the input provided in the SynthesisInput.  The TTS service
   // will use this parameter to help choose an appropriate voice.  Note that
@@ -126,13 +172,13 @@ message VoiceSelectionParams {
   // (e.g. using en-US rather than en-CA if there isn't a Canadian voice
   // available), or even a different language, e.g. using "nb" (Norwegian
   // Bokmal) instead of "no" (Norwegian)".
-  string language_code = 1;
+  string language_code = 1 [(google.api.field_behavior) = REQUIRED];
 
-  // The name of the voice. Optional; if not set, the service will choose a
+  // The name of the voice. If not set, the service will choose a
   // voice based on the other parameters such as language_code and gender.
   string name = 2;
 
-  // The preferred gender of the voice. Optional; if not set, the service will
+  // The preferred gender of the voice. If not set, the service will
   // choose a voice based on the other parameters such as language_code and
   // name. Note that this is only a preference, not requirement; if a
   // voice of the appropriate gender is not available, the synthesizer should
@@ -142,94 +188,66 @@ message VoiceSelectionParams {
 
 // Description of audio data to be synthesized.
 message AudioConfig {
-  // Required. The format of the requested audio byte stream.
-  AudioEncoding audio_encoding = 1;
-
-  // Optional speaking rate/speed, in the range [0.25, 4.0]. 1.0 is the normal
-  // native speed supported by the specific voice. 2.0 is twice as fast, and
-  // 0.5 is half as fast. If unset(0.0), defaults to the native 1.0 speed. Any
-  // other values < 0.25 or > 4.0 will return an error.
-  double speaking_rate = 2;
-
-  // Optional speaking pitch, in the range [-20.0, 20.0]. 20 means increase 20
-  // semitones from the original pitch. -20 means decrease 20 semitones from the
-  // original pitch.
-  double pitch = 3;
-
-  // Optional volume gain (in dB) of the normal native volume supported by the
-  // specific voice, in the range [-96.0, 16.0]. If unset, or set to a value of
-  // 0.0 (dB), will play at normal native signal amplitude. A value of -6.0 (dB)
-  // will play at approximately half the amplitude of the normal native signal
-  // amplitude. A value of +6.0 (dB) will play at approximately twice the
-  // amplitude of the normal native signal amplitude. Strongly recommend not to
-  // exceed +10 (dB) as there's usually no effective increase in loudness for
-  // any value greater than that.
-  double volume_gain_db = 4;
-
-  // The synthesis sample rate (in hertz) for this audio. Optional.  If this is
-  // different from the voice's natural sample rate, then the synthesizer will
-  // honor this request by converting to the desired sample rate (which might
-  // result in worse audio quality), unless the specified sample rate is not
-  // supported for the encoding chosen, in which case it will fail the request
-  // and return [google.rpc.Code.INVALID_ARGUMENT][].
-  int32 sample_rate_hertz = 5;
-
-  // An identifier which selects 'audio effects' profiles that are applied on
-  // (post synthesized) text to speech.
-  // Effects are applied on top of each other in the order they are given.
-  // See
-  //
-  // [audio-profiles](https:
-  // //cloud.google.com/text-to-speech/docs/audio-profiles)
-  // for current supported profile ids.
-  repeated string effects_profile_id = 6;
+  // Required. The format of the audio byte stream.
+  AudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+  // the normal native speed supported by the specific voice. 2.0 is twice as
+  // fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+  // speed. Any other values < 0.25 or > 4.0 will return an error.
+  double speaking_rate = 2 [
+    (google.api.field_behavior) = INPUT_ONLY,
+    (google.api.field_behavior) = OPTIONAL
+  ];
+
+  // Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means
+  // increase 20 semitones from the original pitch. -20 means decrease 20
+  // semitones from the original pitch.
+  double pitch = 3 [
+    (google.api.field_behavior) = INPUT_ONLY,
+    (google.api.field_behavior) = OPTIONAL
+  ];
+
+  // Optional. Input only. Volume gain (in dB) of the normal native volume
+  // supported by the specific voice, in the range [-96.0, 16.0]. If unset, or
+  // set to a value of 0.0 (dB), will play at normal native signal amplitude. A
+  // value of -6.0 (dB) will play at approximately half the amplitude of the
+  // normal native signal amplitude. A value of +6.0 (dB) will play at
+  // approximately twice the amplitude of the normal native signal amplitude.
+  // Strongly recommend not to exceed +10 (dB) as there's usually no effective
+  // increase in loudness for any value greater than that.
+  double volume_gain_db = 4 [
+    (google.api.field_behavior) = INPUT_ONLY,
+    (google.api.field_behavior) = OPTIONAL
+  ];
+
+  // Optional. The synthesis sample rate (in hertz) for this audio. When this is
+  // specified in SynthesizeSpeechRequest, if this is different from the voice's
+  // natural sample rate, then the synthesizer will honor this request by
+  // converting to the desired sample rate (which might result in worse audio
+  // quality), unless the specified sample rate is not supported for the
+  // encoding chosen, in which case it will fail the request and return
+  // [google.rpc.Code.INVALID_ARGUMENT][].
+  int32 sample_rate_hertz = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Input only. An identifier which selects 'audio effects' profiles
+  // that are applied on (post synthesized) text to speech. Effects are applied
+  // on top of each other in the order they are given. See
+  // [audio
+  // profiles](https://cloud.google.com/text-to-speech/docs/audio-profiles) for
+  // current supported profile ids.
+  repeated string effects_profile_id = 6 [
+    (google.api.field_behavior) = INPUT_ONLY,
+    (google.api.field_behavior) = OPTIONAL
+  ];
 }
 
 // The message returned to the client by the `SynthesizeSpeech` method.
 message SynthesizeSpeechResponse {
   // The audio data bytes encoded as specified in the request, including the
-  // header (For LINEAR16 audio, we include the WAV header). Note: as
+  // header for encodings that are wrapped in containers (e.g. MP3, OGG_OPUS).
+  // For LINEAR16 audio, we include the WAV header. Note: as
   // with all bytes fields, protobuffers use a pure binary representation,
   // whereas JSON representations use base64.
   bytes audio_content = 1;
 }
-
-// Gender of the voice as described in
-// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
-enum SsmlVoiceGender {
-  // An unspecified gender.
-  // In VoiceSelectionParams, this means that the client doesn't care which
-  // gender the selected voice will have. In the Voice field of
-  // ListVoicesResponse, this may mean that the voice doesn't fit any of the
-  // other categories in this enum, or that the gender of the voice isn't known.
-  SSML_VOICE_GENDER_UNSPECIFIED = 0;
-
-  // A male voice.
-  MALE = 1;
-
-  // A female voice.
-  FEMALE = 2;
-
-  // A gender-neutral voice.
-  NEUTRAL = 3;
-}
-
-// Configuration to set up audio encoder. The encoding determines the output
-// audio format that we'd like.
-enum AudioEncoding {
-  // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
-  AUDIO_ENCODING_UNSPECIFIED = 0;
-
-  // Uncompressed 16-bit signed little-endian samples (Linear PCM).
-  // Audio content returned as LINEAR16 also contains a WAV header.
-  LINEAR16 = 1;
-
-  // MP3 audio.
-  MP3 = 2;
-
-  // Opus encoded audio wrapped in an ogg container. The result will be a
-  // file which can be played natively on Android, and in browsers (at least
-  // Chrome and Firefox). The quality of the encoding is considerably higher
-  // than MP3 while using approximately the same bitrate.
-  OGG_OPUS = 3;
-}