feat: Support output transcript to Google Cloud Storage for LongRunni…

…ngRecognize (#128) * chore: upgrade gapic-generator-python to 0.42.2 PiperOrigin-RevId: 361662015 Source-Author: Google APIs <noreply@google.com> Source-Date: Mon Mar 8 14:47:18 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf Source-Link: googleapis/googleapis@28a5919 * feat: Support output transcript to GCS for LongRunningRecognize. PiperOrigin-RevId: 362294447 Source-Author: Google APIs <noreply@google.com> Source-Date: Thu Mar 11 08:07:37 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: b6ebac16c3aecb798d4f25443d96df2f42a965ca Source-Link: googleapis/googleapis@b6ebac1 * feat: Support output transcript to GCS for LongRunningRecognize. PiperOrigin-RevId: 362934100 Source-Author: Google APIs <noreply@google.com> Source-Date: Mon Mar 15 07:18:03 2021 -0700 Source-Repo: googleapis/googleapis Source-Sha: 72326861be446be27d53af95c87e6e313367c371 Source-Link: googleapis/googleapis@7232686
googleapis · Mar 19, 2021 · 5974564 · 5974564
1 parent a3bfd74
commit 5974564
Show file tree

Hide file tree

Showing 11 changed files with 469 additions and 101 deletions.
diff --git a/google/cloud/speech_v1/types/__init__.py b/google/cloud/speech_v1/types/__init__.py
@@ -16,41 +16,41 @@
 #
 
 from .cloud_speech import (
-    RecognizeRequest,
+    LongRunningRecognizeMetadata,
     LongRunningRecognizeRequest,
-    StreamingRecognizeRequest,
-    StreamingRecognitionConfig,
+    LongRunningRecognizeResponse,
+    RecognitionAudio,
     RecognitionConfig,
-    SpeakerDiarizationConfig,
     RecognitionMetadata,
-    SpeechContext,
-    RecognitionAudio,
+    RecognizeRequest,
     RecognizeResponse,
-    LongRunningRecognizeResponse,
-    LongRunningRecognizeMetadata,
-    StreamingRecognizeResponse,
-    StreamingRecognitionResult,
-    SpeechRecognitionResult,
+    SpeakerDiarizationConfig,
+    SpeechContext,
     SpeechRecognitionAlternative,
+    SpeechRecognitionResult,
+    StreamingRecognitionConfig,
+    StreamingRecognitionResult,
+    StreamingRecognizeRequest,
+    StreamingRecognizeResponse,
     WordInfo,
 )
 
 __all__ = (
-    "RecognizeRequest",
+    "LongRunningRecognizeMetadata",
     "LongRunningRecognizeRequest",
-    "StreamingRecognizeRequest",
-    "StreamingRecognitionConfig",
+    "LongRunningRecognizeResponse",
+    "RecognitionAudio",
     "RecognitionConfig",
-    "SpeakerDiarizationConfig",
     "RecognitionMetadata",
-    "SpeechContext",
-    "RecognitionAudio",
+    "RecognizeRequest",
     "RecognizeResponse",
-    "LongRunningRecognizeResponse",
-    "LongRunningRecognizeMetadata",
-    "StreamingRecognizeResponse",
-    "StreamingRecognitionResult",
-    "SpeechRecognitionResult",
+    "SpeakerDiarizationConfig",
+    "SpeechContext",
     "SpeechRecognitionAlternative",
+    "SpeechRecognitionResult",
+    "StreamingRecognitionConfig",
+    "StreamingRecognitionResult",
+    "StreamingRecognizeRequest",
+    "StreamingRecognizeResponse",
     "WordInfo",
 )
diff --git a/google/cloud/speech_v1p1beta1/__init__.py b/google/cloud/speech_v1p1beta1/__init__.py
@@ -33,6 +33,7 @@
 from .types.cloud_speech import StreamingRecognitionResult
 from .types.cloud_speech import StreamingRecognizeRequest
 from .types.cloud_speech import StreamingRecognizeResponse
+from .types.cloud_speech import TranscriptOutputConfig
 from .types.cloud_speech import WordInfo
 from .types.cloud_speech_adaptation import CreateCustomClassRequest
 from .types.cloud_speech_adaptation import CreatePhraseSetRequest
@@ -59,7 +60,6 @@ class SpeechClient(SpeechHelpers, SpeechClient):
 
 
 __all__ = (
-    "AdaptationClient",
     "CreateCustomClassRequest",
     "CreatePhraseSetRequest",
     "CustomClass",
@@ -82,15 +82,17 @@ class SpeechClient(SpeechHelpers, SpeechClient):
     "RecognizeResponse",
     "SpeakerDiarizationConfig",
     "SpeechAdaptation",
+    "SpeechClient",
     "SpeechContext",
     "SpeechRecognitionAlternative",
     "SpeechRecognitionResult",
     "StreamingRecognitionConfig",
     "StreamingRecognitionResult",
     "StreamingRecognizeRequest",
     "StreamingRecognizeResponse",
+    "TranscriptOutputConfig",
     "UpdateCustomClassRequest",
     "UpdatePhraseSetRequest",
     "WordInfo",
-    "SpeechClient",
+    "AdaptationClient",
 )
diff --git a/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto b/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto
@@ -19,7 +19,6 @@ package google.cloud.speech.v1p1beta1;
 import "google/api/annotations.proto";
 import "google/api/client.proto";
 import "google/api/field_behavior.proto";
-import "google/api/resource.proto";
 import "google/cloud/speech/v1p1beta1/resource.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/any.proto";
@@ -37,8 +36,7 @@ option objc_class_prefix = "GCS";
 // Service that implements Google Cloud Speech API.
 service Speech {
   option (google.api.default_host) = "speech.googleapis.com";
-  option (google.api.oauth_scopes) =
-      "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
 
   // Performs synchronous speech recognition: receive results after all audio
   // has been sent and processed.
@@ -56,8 +54,7 @@ service Speech {
   // a `LongRunningRecognizeResponse` message.
   // For more information on asynchronous speech recognition, see the
   // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
-  rpc LongRunningRecognize(LongRunningRecognizeRequest)
-      returns (google.longrunning.Operation) {
+  rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1p1beta1/speech:longrunningrecognize"
       body: "*"
@@ -71,8 +68,8 @@ service Speech {
 
   // Performs bidirectional streaming speech recognition: receive results while
   // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest)
-      returns (stream StreamingRecognizeResponse) {}
+  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
+  }
 }
 
 // The top-level message sent by the client for the `Recognize` method.
@@ -94,6 +91,19 @@ message LongRunningRecognizeRequest {
 
   // Required. The audio data to be recognized.
   RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. Specifies an optional destination for the recognition results.
+  TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Specifies an optional destination for the recognition results.
+message TranscriptOutputConfig {
+  oneof output_type {
+    // Specifies a Cloud Storage URI for the recognition results. Must be
+    // specified in the format: `gs://bucket_name/object_name`, and the bucket
+    // must already exist.
+    string gcs_uri = 1;
+  }
 }
 
 // The top-level message sent by the client for the `StreamingRecognize` method.
@@ -171,7 +181,7 @@ message RecognitionConfig {
   // a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
   // recognition can be reduced if lossy codecs are used to capture or transmit
   // audio, particularly if background noise is present. Lossy codecs include
-  // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
+  // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
   //
   // The `FLAC` and `WAV` audio file formats include a header that describes the
   // included audio content. You can request recognition for `WAV` files that
@@ -182,8 +192,7 @@ message RecognitionConfig {
   // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
   // encoding configuration must match the encoding described in the audio
   // header; otherwise the request returns an
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
-  // code.
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
   enum AudioEncoding {
     // Not specified.
     ENCODING_UNSPECIFIED = 0;
@@ -237,8 +246,7 @@ message RecognitionConfig {
 
   // Encoding of audio data sent in all `RecognitionAudio` messages.
   // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see
-  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
   AudioEncoding encoding = 1;
 
   // Sample rate in Hertz of the audio data sent in all
@@ -247,8 +255,7 @@ message RecognitionConfig {
   // source to 16000 Hz. If that's not possible, use the native sample rate of
   // the audio source (instead of re-sampling).
   // This field is optional for FLAC and WAV audio files, but is
-  // required for all other audio formats. For details, see
-  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
   int32 sample_rate_hertz = 2;
 
   // The number of channels in the input audio data.
@@ -424,8 +431,10 @@ message SpeakerDiarizationConfig {
   int32 max_speaker_count = 3;
 
   // Output only. Unused.
-  int32 speaker_tag = 5
-      [deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
+  int32 speaker_tag = 5 [
+    deprecated = true,
+    (google.api.field_behavior) = OUTPUT_ONLY
+  ];
 }
 
 // Description of audio data to be recognized.
@@ -589,8 +598,8 @@ message SpeechContext {
 
 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
-// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
-// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
+// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
+// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
 message RecognitionAudio {
   // The audio source, which is either inline content or a Google Cloud
   // Storage uri.
@@ -605,9 +614,8 @@ message RecognitionAudio {
     // Currently, only Google Cloud Storage URIs are
     // supported, which must be specified in the following format:
     // `gs://bucket_name/object_name` (other URI formats return
-    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
-    // For more information, see [Request
-    // URIs](https://cloud.google.com/storage/docs/reference-uris).
+    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
+    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
     string uri = 2;
   }
 }
@@ -630,6 +638,12 @@ message LongRunningRecognizeResponse {
   // Sequential list of transcription results corresponding to
   // sequential portions of audio.
   repeated SpeechRecognitionResult results = 2;
+
+  // Original output config if present in the request.
+  TranscriptOutputConfig output_config = 6;
+
+  // If the transcript output fails this field contains the relevant error.
+  google.rpc.Status output_error = 7;
 }
 
 // Describes the progress of a long-running `LongRunningRecognize` call. It is
@@ -646,9 +660,12 @@ message LongRunningRecognizeMetadata {
   // Time of the most recent processing update.
   google.protobuf.Timestamp last_update_time = 3;
 
-  // Output only. The URI of the audio file being transcribed. Empty if the
-  // audio was sent as byte content.
+  // Output only. The URI of the audio file being transcribed. Empty if the audio was sent
+  // as byte content.
   string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. A copy of the TranscriptOutputConfig if it was set in the request.
+  TranscriptOutputConfig output_config = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // `StreamingRecognizeResponse` is the only message returned to the client by
@@ -762,9 +779,9 @@ message StreamingRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 5;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
-  // language tag of the language in this result. This language code was
-  // detected to have the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
+  // of the language in this result. This language code was detected to have
+  // the most likelihood of being spoken in the audio.
   string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
@@ -781,9 +798,9 @@ message SpeechRecognitionResult {
   // For audio_channel_count = N, its output values can range from '1' to 'N'.
   int32 channel_tag = 2;
 
-  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
-  // language tag of the language in this result. This language code was
-  // detected to have the most likelihood of being spoken in the audio.
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
+  // of the language in this result. This language code was detected to have
+  // the most likelihood of being spoken in the audio.
   string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 

diff --git a/google/cloud/speech_v1p1beta1/proto/cloud_speech_adaptation.proto b/google/cloud/speech_v1p1beta1/proto/cloud_speech_adaptation.proto
@@ -69,6 +69,7 @@ service Adaptation {
       patch: "/v1p1beta1/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
       body: "phrase_set"
     };
+    option (google.api.method_signature) = "phrase_set,update_mask";
   }
 
   // Delete a phrase set.
@@ -110,6 +111,7 @@ service Adaptation {
       patch: "/v1p1beta1/{custom_class.name=projects/*/locations/*/customClasses/*}"
       body: "custom_class"
     };
+    option (google.api.method_signature) = "custom_class,update_mask";
   }
 
   // Delete a custom class.