Skip to content
This repository has been archived by the owner on Apr 20, 2024. It is now read-only.

Commit

Permalink
feat: Support output transcript to Google Cloud Storage for LongRunni…
Browse files Browse the repository at this point in the history
…ngRecognize (#128)


* chore: upgrade gapic-generator-python to 0.42.2

PiperOrigin-RevId: 361662015

Source-Author: Google APIs <noreply@google.com>
Source-Date: Mon Mar 8 14:47:18 2021 -0800
Source-Repo: googleapis/googleapis
Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf
Source-Link: googleapis/googleapis@28a5919

* feat: Support output transcript to GCS for LongRunningRecognize.

PiperOrigin-RevId: 362294447

Source-Author: Google APIs <noreply@google.com>
Source-Date: Thu Mar 11 08:07:37 2021 -0800
Source-Repo: googleapis/googleapis
Source-Sha: b6ebac16c3aecb798d4f25443d96df2f42a965ca
Source-Link: googleapis/googleapis@b6ebac1

* feat: Support output transcript to GCS for LongRunningRecognize.

PiperOrigin-RevId: 362934100

Source-Author: Google APIs <noreply@google.com>
Source-Date: Mon Mar 15 07:18:03 2021 -0700
Source-Repo: googleapis/googleapis
Source-Sha: 72326861be446be27d53af95c87e6e313367c371
Source-Link: googleapis/googleapis@7232686
  • Loading branch information
yoshi-automation committed Mar 19, 2021
1 parent a3bfd74 commit 5974564
Show file tree
Hide file tree
Showing 11 changed files with 469 additions and 101 deletions.
44 changes: 22 additions & 22 deletions google/cloud/speech_v1/types/__init__.py
Expand Up @@ -16,41 +16,41 @@
#

from .cloud_speech import (
RecognizeRequest,
LongRunningRecognizeMetadata,
LongRunningRecognizeRequest,
StreamingRecognizeRequest,
StreamingRecognitionConfig,
LongRunningRecognizeResponse,
RecognitionAudio,
RecognitionConfig,
SpeakerDiarizationConfig,
RecognitionMetadata,
SpeechContext,
RecognitionAudio,
RecognizeRequest,
RecognizeResponse,
LongRunningRecognizeResponse,
LongRunningRecognizeMetadata,
StreamingRecognizeResponse,
StreamingRecognitionResult,
SpeechRecognitionResult,
SpeakerDiarizationConfig,
SpeechContext,
SpeechRecognitionAlternative,
SpeechRecognitionResult,
StreamingRecognitionConfig,
StreamingRecognitionResult,
StreamingRecognizeRequest,
StreamingRecognizeResponse,
WordInfo,
)

__all__ = (
"RecognizeRequest",
"LongRunningRecognizeMetadata",
"LongRunningRecognizeRequest",
"StreamingRecognizeRequest",
"StreamingRecognitionConfig",
"LongRunningRecognizeResponse",
"RecognitionAudio",
"RecognitionConfig",
"SpeakerDiarizationConfig",
"RecognitionMetadata",
"SpeechContext",
"RecognitionAudio",
"RecognizeRequest",
"RecognizeResponse",
"LongRunningRecognizeResponse",
"LongRunningRecognizeMetadata",
"StreamingRecognizeResponse",
"StreamingRecognitionResult",
"SpeechRecognitionResult",
"SpeakerDiarizationConfig",
"SpeechContext",
"SpeechRecognitionAlternative",
"SpeechRecognitionResult",
"StreamingRecognitionConfig",
"StreamingRecognitionResult",
"StreamingRecognizeRequest",
"StreamingRecognizeResponse",
"WordInfo",
)
6 changes: 4 additions & 2 deletions google/cloud/speech_v1p1beta1/__init__.py
Expand Up @@ -33,6 +33,7 @@
from .types.cloud_speech import StreamingRecognitionResult
from .types.cloud_speech import StreamingRecognizeRequest
from .types.cloud_speech import StreamingRecognizeResponse
from .types.cloud_speech import TranscriptOutputConfig
from .types.cloud_speech import WordInfo
from .types.cloud_speech_adaptation import CreateCustomClassRequest
from .types.cloud_speech_adaptation import CreatePhraseSetRequest
Expand All @@ -59,7 +60,6 @@ class SpeechClient(SpeechHelpers, SpeechClient):


__all__ = (
"AdaptationClient",
"CreateCustomClassRequest",
"CreatePhraseSetRequest",
"CustomClass",
Expand All @@ -82,15 +82,17 @@ class SpeechClient(SpeechHelpers, SpeechClient):
"RecognizeResponse",
"SpeakerDiarizationConfig",
"SpeechAdaptation",
"SpeechClient",
"SpeechContext",
"SpeechRecognitionAlternative",
"SpeechRecognitionResult",
"StreamingRecognitionConfig",
"StreamingRecognitionResult",
"StreamingRecognizeRequest",
"StreamingRecognizeResponse",
"TranscriptOutputConfig",
"UpdateCustomClassRequest",
"UpdatePhraseSetRequest",
"WordInfo",
"SpeechClient",
"AdaptationClient",
)
75 changes: 46 additions & 29 deletions google/cloud/speech_v1p1beta1/proto/cloud_speech.proto
Expand Up @@ -19,7 +19,6 @@ package google.cloud.speech.v1p1beta1;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/speech/v1p1beta1/resource.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/any.proto";
Expand All @@ -37,8 +36,7 @@ option objc_class_prefix = "GCS";
// Service that implements Google Cloud Speech API.
service Speech {
option (google.api.default_host) = "speech.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";

// Performs synchronous speech recognition: receive results after all audio
// has been sent and processed.
Expand All @@ -56,8 +54,7 @@ service Speech {
// a `LongRunningRecognizeResponse` message.
// For more information on asynchronous speech recognition, see the
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
rpc LongRunningRecognize(LongRunningRecognizeRequest)
returns (google.longrunning.Operation) {
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1p1beta1/speech:longrunningrecognize"
body: "*"
Expand All @@ -71,8 +68,8 @@ service Speech {

// Performs bidirectional streaming speech recognition: receive results while
// sending audio. This method is only available via the gRPC API (not REST).
rpc StreamingRecognize(stream StreamingRecognizeRequest)
returns (stream StreamingRecognizeResponse) {}
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
}
}

// The top-level message sent by the client for the `Recognize` method.
Expand All @@ -94,6 +91,19 @@ message LongRunningRecognizeRequest {

// Required. The audio data to be recognized.
RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. Specifies an optional destination for the recognition results.
TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Specifies an optional destination for the recognition results.
message TranscriptOutputConfig {
oneof output_type {
// Specifies a Cloud Storage URI for the recognition results. Must be
// specified in the format: `gs://bucket_name/object_name`, and the bucket
// must already exist.
string gcs_uri = 1;
}
}

// The top-level message sent by the client for the `StreamingRecognize` method.
Expand Down Expand Up @@ -171,7 +181,7 @@ message RecognitionConfig {
// a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
// recognition can be reduced if lossy codecs are used to capture or transmit
// audio, particularly if background noise is present. Lossy codecs include
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
// `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
//
// The `FLAC` and `WAV` audio file formats include a header that describes the
// included audio content. You can request recognition for `WAV` files that
Expand All @@ -182,8 +192,7 @@ message RecognitionConfig {
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
// encoding configuration must match the encoding described in the audio
// header; otherwise the request returns an
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
// code.
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
enum AudioEncoding {
// Not specified.
ENCODING_UNSPECIFIED = 0;
Expand Down Expand Up @@ -237,8 +246,7 @@ message RecognitionConfig {

// Encoding of audio data sent in all `RecognitionAudio` messages.
// This field is optional for `FLAC` and `WAV` audio files and required
// for all other audio formats. For details, see
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
AudioEncoding encoding = 1;

// Sample rate in Hertz of the audio data sent in all
Expand All @@ -247,8 +255,7 @@ message RecognitionConfig {
// source to 16000 Hz. If that's not possible, use the native sample rate of
// the audio source (instead of re-sampling).
// This field is optional for FLAC and WAV audio files, but is
// required for all other audio formats. For details, see
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
// required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
int32 sample_rate_hertz = 2;

// The number of channels in the input audio data.
Expand Down Expand Up @@ -424,8 +431,10 @@ message SpeakerDiarizationConfig {
int32 max_speaker_count = 3;

// Output only. Unused.
int32 speaker_tag = 5
[deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
int32 speaker_tag = 5 [
deprecated = true,
(google.api.field_behavior) = OUTPUT_ONLY
];
}

// Description of audio data to be recognized.
Expand Down Expand Up @@ -589,8 +598,8 @@ message SpeechContext {

// Contains audio data in the encoding specified in the `RecognitionConfig`.
// Either `content` or `uri` must be supplied. Supplying both or neither
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
message RecognitionAudio {
// The audio source, which is either inline content or a Google Cloud
// Storage uri.
Expand All @@ -605,9 +614,8 @@ message RecognitionAudio {
// Currently, only Google Cloud Storage URIs are
// supported, which must be specified in the following format:
// `gs://bucket_name/object_name` (other URI formats return
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
// For more information, see [Request
// URIs](https://cloud.google.com/storage/docs/reference-uris).
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
// [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
string uri = 2;
}
}
Expand All @@ -630,6 +638,12 @@ message LongRunningRecognizeResponse {
// Sequential list of transcription results corresponding to
// sequential portions of audio.
repeated SpeechRecognitionResult results = 2;

// Original output config if present in the request.
TranscriptOutputConfig output_config = 6;

// If the transcript output fails this field contains the relevant error.
google.rpc.Status output_error = 7;
}

// Describes the progress of a long-running `LongRunningRecognize` call. It is
Expand All @@ -646,9 +660,12 @@ message LongRunningRecognizeMetadata {
// Time of the most recent processing update.
google.protobuf.Timestamp last_update_time = 3;

// Output only. The URI of the audio file being transcribed. Empty if the
// audio was sent as byte content.
// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
// as byte content.
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. A copy of the TranscriptOutputConfig if it was set in the request.
TranscriptOutputConfig output_config = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// `StreamingRecognizeResponse` is the only message returned to the client by
Expand Down Expand Up @@ -762,9 +779,9 @@ message StreamingRecognitionResult {
// For audio_channel_count = N, its output values can range from '1' to 'N'.
int32 channel_tag = 5;

// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
// language tag of the language in this result. This language code was
// detected to have the most likelihood of being spoken in the audio.
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
// of the language in this result. This language code was detected to have
// the most likelihood of being spoken in the audio.
string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
}

Expand All @@ -781,9 +798,9 @@ message SpeechRecognitionResult {
// For audio_channel_count = N, its output values can range from '1' to 'N'.
int32 channel_tag = 2;

// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
// language tag of the language in this result. This language code was
// detected to have the most likelihood of being spoken in the audio.
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
// of the language in this result. This language code was detected to have
// the most likelihood of being spoken in the audio.
string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
}

Expand Down
Expand Up @@ -69,6 +69,7 @@ service Adaptation {
patch: "/v1p1beta1/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
body: "phrase_set"
};
option (google.api.method_signature) = "phrase_set,update_mask";
}

// Delete a phrase set.
Expand Down Expand Up @@ -110,6 +111,7 @@ service Adaptation {
patch: "/v1p1beta1/{custom_class.name=projects/*/locations/*/customClasses/*}"
body: "custom_class"
};
option (google.api.method_signature) = "custom_class,update_mask";
}

// Delete a custom class.
Expand Down

0 comments on commit 5974564

Please sign in to comment.