From 3ccf3fe57e5edef83f11efb13f4d44305b810f26 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 12 Nov 2019 10:57:25 -0800 Subject: [PATCH] feat: add speaker_tag to WordInfo (#40) * [CHANGE ME] Re-generated to pick up changes in the API or client library generator. * fix: allow proto interface changes --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .kokoro/continuous/propose_release.sh | 2 +- .kokoro/release/bump_snapshot.sh | 2 +- .kokoro/release/drop.cfg | 3 - .kokoro/release/promote.cfg | 4 - .kokoro/release/publish_javadoc.sh | 2 +- .../cloud/speech/v1/SpeechSmokeTest.java | 2 +- .../speech/v1p1beta1/SpeechSmokeTest.java | 2 +- .../clirr-ignored-differences.xml | 19 ++ .../speech/v1/SpeakerDiarizationConfig.java | 40 ++--- .../v1/SpeakerDiarizationConfigOrBuilder.java | 10 +- .../google/cloud/speech/v1/SpeechProto.java | 163 +++++++++--------- .../com/google/cloud/speech/v1/WordInfo.java | 95 ++++++++++ .../cloud/speech/v1/WordInfoOrBuilder.java | 15 ++ .../google/cloud/speech/v1/cloud_speech.proto | 16 +- .../v1/test/speech_transcribe_async.test.yaml | 28 +++ .../speech_transcribe_async_gcs.test.yaml | 28 +++ ...ribe_async_word_time_offsets_gcs.test.yaml | 37 ++++ ...speech_transcribe_enhanced_model.test.yaml | 29 ++++ ...peech_transcribe_model_selection.test.yaml | 52 ++++++ ...h_transcribe_model_selection_gcs.test.yaml | 52 ++++++ .../speech_transcribe_multichannel.test.yaml | 31 ++++ ...eech_transcribe_multichannel_gcs.test.yaml | 32 ++++ .../v1/test/speech_transcribe_sync.test.yaml | 28 +++ .../test/speech_transcribe_sync_gcs.test.yaml | 28 +++ .../test/speech_adaptation_beta.test.yaml | 11 ++ .../speech_contexts_classes_beta.test.yaml | 11 ++ .../test/speech_quickstart_beta.test.yaml | 11 ++ ...transcribe_auto_punctuation_beta.test.yaml | 28 +++ ...eech_transcribe_diarization_beta.test.yaml | 40 +++++ ...ch_transcribe_multilanguage_beta.test.yaml | 33 ++++ ...scribe_recognition_metadata_beta.test.yaml | 27 +++ ...cribe_word_level_confidence_beta.test.yaml | 35 ++++ synth.metadata | 10 +- 34 files changed, 793 insertions(+), 135 deletions(-) create mode 100644 proto-google-cloud-speech-v1/clirr-ignored-differences.xml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_gcs.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_enhanced_model.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection_gcs.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel_gcs.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync_gcs.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_adaptation_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_contexts_classes_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_quickstart_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml create mode 100644 samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index baec78fae..ec92f4336 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -21,7 +21,7 @@ If you are still having issues, please include as much information as possible: General, Core, and Other are also allowed as types 2. OS type and version: 3. Java version: -4. google-cloud-speect version(s): +4. google-cloud-speech version(s): #### Steps to reproduce diff --git a/.kokoro/continuous/propose_release.sh b/.kokoro/continuous/propose_release.sh index 7a276238c..18f3485b8 100755 --- a/.kokoro/continuous/propose_release.sh +++ b/.kokoro/continuous/propose_release.sh @@ -22,7 +22,7 @@ if [ -f ${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-url-release-please ]; th # Groom the release PR as new commits are merged. npx release-please release-pr --token=${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-token-release-please \ --repo-url=googleapis/java-speech \ - --package-name="google-cloud-speect" \ + --package-name="google-cloud-speech" \ --api-url=${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-url-release-please \ --proxy-key=${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-key-release-please \ --release-type=java-yoshi diff --git a/.kokoro/release/bump_snapshot.sh b/.kokoro/release/bump_snapshot.sh index b35ffa15a..aa468f49d 100755 --- a/.kokoro/release/bump_snapshot.sh +++ b/.kokoro/release/bump_snapshot.sh @@ -22,7 +22,7 @@ if [ -f ${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-url-release-please ]; th # Groom the snapshot release PR immediately after publishing a release npx release-please release-pr --token=${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-token-release-please \ --repo-url=googleapis/java-speech \ - --package-name="google-cloud-speect" \ + --package-name="google-cloud-speech" \ --api-url=${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-url-release-please \ --proxy-key=${KOKORO_KEYSTORE_DIR}/73713_github-magic-proxy-key-release-please \ --snapshot \ diff --git a/.kokoro/release/drop.cfg b/.kokoro/release/drop.cfg index 68e9f359c..456d2afff 100644 --- a/.kokoro/release/drop.cfg +++ b/.kokoro/release/drop.cfg @@ -4,6 +4,3 @@ env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/java-speech/.kokoro/release/drop.sh" } - -# Download staging properties file. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/java/releases/java-speech" \ No newline at end of file diff --git a/.kokoro/release/promote.cfg b/.kokoro/release/promote.cfg index c7106f56c..bec881a41 100644 --- a/.kokoro/release/promote.cfg +++ b/.kokoro/release/promote.cfg @@ -4,7 +4,3 @@ env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/java-speech/.kokoro/release/promote.sh" } - -# Download staging properties file. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/java/releases/java-speech" - diff --git a/.kokoro/release/publish_javadoc.sh b/.kokoro/release/publish_javadoc.sh index 69af68507..193feaf7a 100755 --- a/.kokoro/release/publish_javadoc.sh +++ b/.kokoro/release/publish_javadoc.sh @@ -33,7 +33,7 @@ python3 -m pip install gcp-docuploader # compile all packages mvn clean install -B -DskipTests=true -NAME=google-cloud-speect +NAME=google-cloud-speech VERSION=$(grep ${NAME}: versions.txt | cut -d: -f3) # build the docs diff --git a/google-cloud-speech/src/test/java/com/google/cloud/speech/v1/SpeechSmokeTest.java b/google-cloud-speech/src/test/java/com/google/cloud/speech/v1/SpeechSmokeTest.java index 5a71258c0..fb9584578 100644 --- a/google-cloud-speech/src/test/java/com/google/cloud/speech/v1/SpeechSmokeTest.java +++ b/google-cloud-speech/src/test/java/com/google/cloud/speech/v1/SpeechSmokeTest.java @@ -53,7 +53,7 @@ public static void executeNoCatch() throws Exception { .setSampleRateHertz(sampleRateHertz) .setEncoding(encoding) .build(); - String uri = "gs://gapic-toolkit/hello.flac"; + String uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"; RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(uri).build(); RecognizeResponse response = client.recognize(config, audio); diff --git a/google-cloud-speech/src/test/java/com/google/cloud/speech/v1p1beta1/SpeechSmokeTest.java b/google-cloud-speech/src/test/java/com/google/cloud/speech/v1p1beta1/SpeechSmokeTest.java index b79277cd9..b82699a82 100644 --- a/google-cloud-speech/src/test/java/com/google/cloud/speech/v1p1beta1/SpeechSmokeTest.java +++ b/google-cloud-speech/src/test/java/com/google/cloud/speech/v1p1beta1/SpeechSmokeTest.java @@ -53,7 +53,7 @@ public static void executeNoCatch() throws Exception { .setSampleRateHertz(sampleRateHertz) .setEncoding(encoding) .build(); - String uri = "gs://gapic-toolkit/hello.flac"; + String uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"; RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(uri).build(); RecognizeResponse response = client.recognize(config, audio); diff --git a/proto-google-cloud-speech-v1/clirr-ignored-differences.xml b/proto-google-cloud-speech-v1/clirr-ignored-differences.xml new file mode 100644 index 000000000..83a043328 --- /dev/null +++ b/proto-google-cloud-speech-v1/clirr-ignored-differences.xml @@ -0,0 +1,19 @@ + + + + + 7012 + com/google/cloud/speech/v1/*OrBuilder + * get*(*) + + + 7012 + com/google/cloud/speech/v1/*OrBuilder + boolean contains*(*) + + + 7012 + com/google/cloud/speech/v1/*OrBuilder + boolean has*(*) + + diff --git a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfig.java b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfig.java index 33504da02..45ddb844d 100644 --- a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfig.java +++ b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfig.java @@ -174,15 +174,13 @@ public int getMaxSpeakerCount() { * * *
-   * A distinct integer value is assigned for every speaker within
-   * the audio. This field specifies which one of those speakers was detected to
-   * have spoken this word. Value ranges from '1' to diarization_speaker_count.
-   * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
-   * top alternative.
+   * Unused.
    * 
* - * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + * int32 speaker_tag = 5 [deprecated = true, (.google.api.field_behavior) = OUTPUT_ONLY]; + * */ + @java.lang.Deprecated public int getSpeakerTag() { return speakerTag_; } @@ -704,15 +702,13 @@ public Builder clearMaxSpeakerCount() { * * *
-     * A distinct integer value is assigned for every speaker within
-     * the audio. This field specifies which one of those speakers was detected to
-     * have spoken this word. Value ranges from '1' to diarization_speaker_count.
-     * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
-     * top alternative.
+     * Unused.
      * 
* - * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + * int32 speaker_tag = 5 [deprecated = true, (.google.api.field_behavior) = OUTPUT_ONLY]; + * */ + @java.lang.Deprecated public int getSpeakerTag() { return speakerTag_; } @@ -720,15 +716,13 @@ public int getSpeakerTag() { * * *
-     * A distinct integer value is assigned for every speaker within
-     * the audio. This field specifies which one of those speakers was detected to
-     * have spoken this word. Value ranges from '1' to diarization_speaker_count.
-     * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
-     * top alternative.
+     * Unused.
      * 
* - * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + * int32 speaker_tag = 5 [deprecated = true, (.google.api.field_behavior) = OUTPUT_ONLY]; + * */ + @java.lang.Deprecated public Builder setSpeakerTag(int value) { speakerTag_ = value; @@ -739,15 +733,13 @@ public Builder setSpeakerTag(int value) { * * *
-     * A distinct integer value is assigned for every speaker within
-     * the audio. This field specifies which one of those speakers was detected to
-     * have spoken this word. Value ranges from '1' to diarization_speaker_count.
-     * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
-     * top alternative.
+     * Unused.
      * 
* - * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + * int32 speaker_tag = 5 [deprecated = true, (.google.api.field_behavior) = OUTPUT_ONLY]; + * */ + @java.lang.Deprecated public Builder clearSpeakerTag() { speakerTag_ = 0; diff --git a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfigOrBuilder.java b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfigOrBuilder.java index 4fabb7a0b..d3aaeacaf 100644 --- a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfigOrBuilder.java +++ b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeakerDiarizationConfigOrBuilder.java @@ -66,14 +66,12 @@ public interface SpeakerDiarizationConfigOrBuilder * * *
-   * A distinct integer value is assigned for every speaker within
-   * the audio. This field specifies which one of those speakers was detected to
-   * have spoken this word. Value ranges from '1' to diarization_speaker_count.
-   * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
-   * top alternative.
+   * Unused.
    * 
* - * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + * int32 speaker_tag = 5 [deprecated = true, (.google.api.field_behavior) = OUTPUT_ONLY]; + * */ + @java.lang.Deprecated int getSpeakerTag(); } diff --git a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeechProto.java b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeechProto.java index 39323e613..607071767 100644 --- a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeechProto.java +++ b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/SpeechProto.java @@ -145,89 +145,90 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { + "ioEncoding\022\030\n\024ENCODING_UNSPECIFIED\020\000\022\014\n\010" + "LINEAR16\020\001\022\010\n\004FLAC\020\002\022\t\n\005MULAW\020\003\022\007\n\003AMR\020\004" + "\022\n\n\006AMR_WB\020\005\022\014\n\010OGG_OPUS\020\006\022\032\n\026SPEEX_WITH" - + "_HEADER_BYTE\020\007\"\216\001\n\030SpeakerDiarizationCon" + + "_HEADER_BYTE\020\007\"\220\001\n\030SpeakerDiarizationCon" + "fig\022\"\n\032enable_speaker_diarization\030\001 \001(\010\022" + "\031\n\021min_speaker_count\030\002 \001(\005\022\031\n\021max_speake" - + "r_count\030\003 \001(\005\022\030\n\013speaker_tag\030\005 \001(\005B\003\340A\003\"" - + "\240\010\n\023RecognitionMetadata\022U\n\020interaction_t" - + "ype\030\001 \001(\0162;.google.cloud.speech.v1.Recog" - + "nitionMetadata.InteractionType\022$\n\034indust" - + "ry_naics_code_of_audio\030\003 \001(\r\022[\n\023micropho" - + "ne_distance\030\004 \001(\0162>.google.cloud.speech." - + "v1.RecognitionMetadata.MicrophoneDistanc" - + "e\022Z\n\023original_media_type\030\005 \001(\0162=.google." - + "cloud.speech.v1.RecognitionMetadata.Orig" - + "inalMediaType\022^\n\025recording_device_type\030\006" - + " \001(\0162?.google.cloud.speech.v1.Recognitio" - + "nMetadata.RecordingDeviceType\022\035\n\025recordi" - + "ng_device_name\030\007 \001(\t\022\032\n\022original_mime_ty" - + "pe\030\010 \001(\t\022\023\n\013audio_topic\030\n \001(\t\"\305\001\n\017Intera" - + "ctionType\022 \n\034INTERACTION_TYPE_UNSPECIFIE" - + "D\020\000\022\016\n\nDISCUSSION\020\001\022\020\n\014PRESENTATION\020\002\022\016\n" - + "\nPHONE_CALL\020\003\022\r\n\tVOICEMAIL\020\004\022\033\n\027PROFESSI" - + "ONALLY_PRODUCED\020\005\022\020\n\014VOICE_SEARCH\020\006\022\021\n\rV" - + "OICE_COMMAND\020\007\022\r\n\tDICTATION\020\010\"d\n\022Microph" - + "oneDistance\022#\n\037MICROPHONE_DISTANCE_UNSPE" - + "CIFIED\020\000\022\r\n\tNEARFIELD\020\001\022\014\n\010MIDFIELD\020\002\022\014\n" - + "\010FARFIELD\020\003\"N\n\021OriginalMediaType\022#\n\037ORIG" - + "INAL_MEDIA_TYPE_UNSPECIFIED\020\000\022\t\n\005AUDIO\020\001" - + "\022\t\n\005VIDEO\020\002\"\244\001\n\023RecordingDeviceType\022%\n!R" - + "ECORDING_DEVICE_TYPE_UNSPECIFIED\020\000\022\016\n\nSM" - + "ARTPHONE\020\001\022\006\n\002PC\020\002\022\016\n\nPHONE_LINE\020\003\022\013\n\007VE" - + "HICLE\020\004\022\030\n\024OTHER_OUTDOOR_DEVICE\020\005\022\027\n\023OTH" - + "ER_INDOOR_DEVICE\020\006\" \n\rSpeechContext\022\017\n\007p" - + "hrases\030\001 \003(\t\"D\n\020RecognitionAudio\022\021\n\007cont" - + "ent\030\001 \001(\014H\000\022\r\n\003uri\030\002 \001(\tH\000B\016\n\014audio_sour" - + "ce\"U\n\021RecognizeResponse\022@\n\007results\030\002 \003(\013" - + "2/.google.cloud.speech.v1.SpeechRecognit" - + "ionResult\"`\n\034LongRunningRecognizeRespons" - + "e\022@\n\007results\030\002 \003(\0132/.google.cloud.speech" - + ".v1.SpeechRecognitionResult\"\236\001\n\034LongRunn" - + "ingRecognizeMetadata\022\030\n\020progress_percent" - + "\030\001 \001(\005\022.\n\nstart_time\030\002 \001(\0132\032.google.prot" - + "obuf.Timestamp\0224\n\020last_update_time\030\003 \001(\013" - + "2\032.google.protobuf.Timestamp\"\261\002\n\032Streami" - + "ngRecognizeResponse\022!\n\005error\030\001 \001(\0132\022.goo" - + "gle.rpc.Status\022C\n\007results\030\002 \003(\01322.google" - + ".cloud.speech.v1.StreamingRecognitionRes" - + "ult\022]\n\021speech_event_type\030\004 \001(\0162B.google." - + "cloud.speech.v1.StreamingRecognizeRespon" - + "se.SpeechEventType\"L\n\017SpeechEventType\022\034\n" - + "\030SPEECH_EVENT_UNSPECIFIED\020\000\022\033\n\027END_OF_SI" - + "NGLE_UTTERANCE\020\001\"\362\001\n\032StreamingRecognitio" - + "nResult\022J\n\014alternatives\030\001 \003(\01324.google.c" - + "loud.speech.v1.SpeechRecognitionAlternat" - + "ive\022\020\n\010is_final\030\002 \001(\010\022\021\n\tstability\030\003 \001(\002" - + "\0222\n\017result_end_time\030\004 \001(\0132\031.google.proto" - + "buf.Duration\022\023\n\013channel_tag\030\005 \001(\005\022\032\n\rlan" - + "guage_code\030\006 \001(\tB\003\340A\003\"z\n\027SpeechRecogniti" - + "onResult\022J\n\014alternatives\030\001 \003(\01324.google." - + "cloud.speech.v1.SpeechRecognitionAlterna" - + "tive\022\023\n\013channel_tag\030\002 \001(\005\"w\n\034SpeechRecog" - + "nitionAlternative\022\022\n\ntranscript\030\001 \001(\t\022\022\n" - + "\nconfidence\030\002 \001(\002\022/\n\005words\030\003 \003(\0132 .googl" - + "e.cloud.speech.v1.WordInfo\"t\n\010WordInfo\022-" - + "\n\nstart_time\030\001 \001(\0132\031.google.protobuf.Dur" - + "ation\022+\n\010end_time\030\002 \001(\0132\031.google.protobu" - + "f.Duration\022\014\n\004word\030\003 \001(\t2\321\004\n\006Speech\022\220\001\n\t" - + "Recognize\022(.google.cloud.speech.v1.Recog" - + "nizeRequest\032).google.cloud.speech.v1.Rec" - + "ognizeResponse\".\202\323\344\223\002\031\"\024/v1/speech:recog" - + "nize:\001*\332A\014config,audio\022\344\001\n\024LongRunningRe" - + "cognize\0223.google.cloud.speech.v1.LongRun" - + "ningRecognizeRequest\032\035.google.longrunnin" - + "g.Operation\"x\202\323\344\223\002$\"\037/v1/speech:longrunn" - + "ingrecognize:\001*\332A\014config,audio\312A<\n\034LongR" - + "unningRecognizeResponse\022\034LongRunningReco" - + "gnizeMetadata\022\201\001\n\022StreamingRecognize\0221.g" + + "r_count\030\003 \001(\005\022\032\n\013speaker_tag\030\005 \001(\005B\005\030\001\340A" + + "\003\"\240\010\n\023RecognitionMetadata\022U\n\020interaction" + + "_type\030\001 \001(\0162;.google.cloud.speech.v1.Rec" + + "ognitionMetadata.InteractionType\022$\n\034indu" + + "stry_naics_code_of_audio\030\003 \001(\r\022[\n\023microp" + + "hone_distance\030\004 \001(\0162>.google.cloud.speec" + + "h.v1.RecognitionMetadata.MicrophoneDista" + + "nce\022Z\n\023original_media_type\030\005 \001(\0162=.googl" + + "e.cloud.speech.v1.RecognitionMetadata.Or" + + "iginalMediaType\022^\n\025recording_device_type" + + "\030\006 \001(\0162?.google.cloud.speech.v1.Recognit" + + "ionMetadata.RecordingDeviceType\022\035\n\025recor" + + "ding_device_name\030\007 \001(\t\022\032\n\022original_mime_" + + "type\030\010 \001(\t\022\023\n\013audio_topic\030\n \001(\t\"\305\001\n\017Inte" + + "ractionType\022 \n\034INTERACTION_TYPE_UNSPECIF" + + "IED\020\000\022\016\n\nDISCUSSION\020\001\022\020\n\014PRESENTATION\020\002\022" + + "\016\n\nPHONE_CALL\020\003\022\r\n\tVOICEMAIL\020\004\022\033\n\027PROFES" + + "SIONALLY_PRODUCED\020\005\022\020\n\014VOICE_SEARCH\020\006\022\021\n" + + "\rVOICE_COMMAND\020\007\022\r\n\tDICTATION\020\010\"d\n\022Micro" + + "phoneDistance\022#\n\037MICROPHONE_DISTANCE_UNS" + + "PECIFIED\020\000\022\r\n\tNEARFIELD\020\001\022\014\n\010MIDFIELD\020\002\022" + + "\014\n\010FARFIELD\020\003\"N\n\021OriginalMediaType\022#\n\037OR" + + "IGINAL_MEDIA_TYPE_UNSPECIFIED\020\000\022\t\n\005AUDIO" + + "\020\001\022\t\n\005VIDEO\020\002\"\244\001\n\023RecordingDeviceType\022%\n" + + "!RECORDING_DEVICE_TYPE_UNSPECIFIED\020\000\022\016\n\n" + + "SMARTPHONE\020\001\022\006\n\002PC\020\002\022\016\n\nPHONE_LINE\020\003\022\013\n\007" + + "VEHICLE\020\004\022\030\n\024OTHER_OUTDOOR_DEVICE\020\005\022\027\n\023O" + + "THER_INDOOR_DEVICE\020\006\" \n\rSpeechContext\022\017\n" + + "\007phrases\030\001 \003(\t\"D\n\020RecognitionAudio\022\021\n\007co" + + "ntent\030\001 \001(\014H\000\022\r\n\003uri\030\002 \001(\tH\000B\016\n\014audio_so" + + "urce\"U\n\021RecognizeResponse\022@\n\007results\030\002 \003" + + "(\0132/.google.cloud.speech.v1.SpeechRecogn" + + "itionResult\"`\n\034LongRunningRecognizeRespo" + + "nse\022@\n\007results\030\002 \003(\0132/.google.cloud.spee" + + "ch.v1.SpeechRecognitionResult\"\236\001\n\034LongRu" + + "nningRecognizeMetadata\022\030\n\020progress_perce" + + "nt\030\001 \001(\005\022.\n\nstart_time\030\002 \001(\0132\032.google.pr" + + "otobuf.Timestamp\0224\n\020last_update_time\030\003 \001" + + "(\0132\032.google.protobuf.Timestamp\"\261\002\n\032Strea" + + "mingRecognizeResponse\022!\n\005error\030\001 \001(\0132\022.g" + + "oogle.rpc.Status\022C\n\007results\030\002 \003(\01322.goog" + + "le.cloud.speech.v1.StreamingRecognitionR" + + "esult\022]\n\021speech_event_type\030\004 \001(\0162B.googl" + + "e.cloud.speech.v1.StreamingRecognizeResp" + + "onse.SpeechEventType\"L\n\017SpeechEventType\022" + + "\034\n\030SPEECH_EVENT_UNSPECIFIED\020\000\022\033\n\027END_OF_" + + "SINGLE_UTTERANCE\020\001\"\362\001\n\032StreamingRecognit" + + "ionResult\022J\n\014alternatives\030\001 \003(\01324.google" + + ".cloud.speech.v1.SpeechRecognitionAltern" + + "ative\022\020\n\010is_final\030\002 \001(\010\022\021\n\tstability\030\003 \001" + + "(\002\0222\n\017result_end_time\030\004 \001(\0132\031.google.pro" + + "tobuf.Duration\022\023\n\013channel_tag\030\005 \001(\005\022\032\n\rl" + + "anguage_code\030\006 \001(\tB\003\340A\003\"z\n\027SpeechRecogni" + + "tionResult\022J\n\014alternatives\030\001 \003(\01324.googl" + + "e.cloud.speech.v1.SpeechRecognitionAlter" + + "native\022\023\n\013channel_tag\030\002 \001(\005\"w\n\034SpeechRec" + + "ognitionAlternative\022\022\n\ntranscript\030\001 \001(\t\022" + + "\022\n\nconfidence\030\002 \001(\002\022/\n\005words\030\003 \003(\0132 .goo" + + "gle.cloud.speech.v1.WordInfo\"\216\001\n\010WordInf" + + "o\022-\n\nstart_time\030\001 \001(\0132\031.google.protobuf." + + "Duration\022+\n\010end_time\030\002 \001(\0132\031.google.prot" + + "obuf.Duration\022\014\n\004word\030\003 \001(\t\022\030\n\013speaker_t" + + "ag\030\005 \001(\005B\003\340A\0032\321\004\n\006Speech\022\220\001\n\tRecognize\022(" + + ".google.cloud.speech.v1.RecognizeRequest" + + "\032).google.cloud.speech.v1.RecognizeRespo" + + "nse\".\202\323\344\223\002\031\"\024/v1/speech:recognize:\001*\332A\014c" + + "onfig,audio\022\344\001\n\024LongRunningRecognize\0223.g" + + "oogle.cloud.speech.v1.LongRunningRecogni" + + "zeRequest\032\035.google.longrunning.Operation" + + "\"x\202\323\344\223\002$\"\037/v1/speech:longrunningrecogniz" + + "e:\001*\332A\014config,audio\312A<\n\034LongRunningRecog" + + "nizeResponse\022\034LongRunningRecognizeMetada" + + "ta\022\201\001\n\022StreamingRecognize\0221.google.cloud" + + ".speech.v1.StreamingRecognizeRequest\0322.g" + "oogle.cloud.speech.v1.StreamingRecognize" - + "Request\0322.google.cloud.speech.v1.Streami" - + "ngRecognizeResponse\"\000(\0010\001\032I\312A\025speech.goo" - + "gleapis.com\322A.https://www.googleapis.com" - + "/auth/cloud-platformBr\n\032com.google.cloud" - + ".speech.v1B\013SpeechProtoP\001Z + * A distinct integer value is assigned for every speaker within + * the audio. This field specifies which one of those speakers was detected to + * have spoken this word. Value ranges from '1' to diarization_speaker_count. + * speaker_tag is set if enable_speaker_diarization = 'true' and only in the + * top alternative. + * + * + * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + */ + public int getSpeakerTag() { + return speakerTag_; + } + private byte memoizedIsInitialized = -1; @java.lang.Override @@ -310,6 +334,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io if (!getWordBytes().isEmpty()) { com.google.protobuf.GeneratedMessageV3.writeString(output, 3, word_); } + if (speakerTag_ != 0) { + output.writeInt32(5, speakerTag_); + } unknownFields.writeTo(output); } @@ -328,6 +355,9 @@ public int getSerializedSize() { if (!getWordBytes().isEmpty()) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(3, word_); } + if (speakerTag_ != 0) { + size += com.google.protobuf.CodedOutputStream.computeInt32Size(5, speakerTag_); + } size += unknownFields.getSerializedSize(); memoizedSize = size; return size; @@ -352,6 +382,7 @@ public boolean equals(final java.lang.Object obj) { if (!getEndTime().equals(other.getEndTime())) return false; } if (!getWord().equals(other.getWord())) return false; + if (getSpeakerTag() != other.getSpeakerTag()) return false; if (!unknownFields.equals(other.unknownFields)) return false; return true; } @@ -373,6 +404,8 @@ public int hashCode() { } hash = (37 * hash) + WORD_FIELD_NUMBER; hash = (53 * hash) + getWord().hashCode(); + hash = (37 * hash) + SPEAKER_TAG_FIELD_NUMBER; + hash = (53 * hash) + getSpeakerTag(); hash = (29 * hash) + unknownFields.hashCode(); memoizedHashCode = hash; return hash; @@ -531,6 +564,8 @@ public Builder clear() { } word_ = ""; + speakerTag_ = 0; + return this; } @@ -568,6 +603,7 @@ public com.google.cloud.speech.v1.WordInfo buildPartial() { result.endTime_ = endTimeBuilder_.build(); } result.word_ = word_; + result.speakerTag_ = speakerTag_; onBuilt(); return result; } @@ -627,6 +663,9 @@ public Builder mergeFrom(com.google.cloud.speech.v1.WordInfo other) { word_ = other.word_; onChanged(); } + if (other.getSpeakerTag() != 0) { + setSpeakerTag(other.getSpeakerTag()); + } this.mergeUnknownFields(other.unknownFields); onChanged(); return this; @@ -1190,6 +1229,62 @@ public Builder setWordBytes(com.google.protobuf.ByteString value) { return this; } + private int speakerTag_; + /** + * + * + *
+     * A distinct integer value is assigned for every speaker within
+     * the audio. This field specifies which one of those speakers was detected to
+     * have spoken this word. Value ranges from '1' to diarization_speaker_count.
+     * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
+     * top alternative.
+     * 
+ * + * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + */ + public int getSpeakerTag() { + return speakerTag_; + } + /** + * + * + *
+     * A distinct integer value is assigned for every speaker within
+     * the audio. This field specifies which one of those speakers was detected to
+     * have spoken this word. Value ranges from '1' to diarization_speaker_count.
+     * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
+     * top alternative.
+     * 
+ * + * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + */ + public Builder setSpeakerTag(int value) { + + speakerTag_ = value; + onChanged(); + return this; + } + /** + * + * + *
+     * A distinct integer value is assigned for every speaker within
+     * the audio. This field specifies which one of those speakers was detected to
+     * have spoken this word. Value ranges from '1' to diarization_speaker_count.
+     * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
+     * top alternative.
+     * 
+ * + * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + */ + public Builder clearSpeakerTag() { + + speakerTag_ = 0; + onChanged(); + return this; + } + @java.lang.Override public final Builder setUnknownFields(final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); diff --git a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/WordInfoOrBuilder.java b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/WordInfoOrBuilder.java index c2c671c52..edaea25bd 100644 --- a/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/WordInfoOrBuilder.java +++ b/proto-google-cloud-speech-v1/src/main/java/com/google/cloud/speech/v1/WordInfoOrBuilder.java @@ -135,4 +135,19 @@ public interface WordInfoOrBuilder * string word = 3; */ com.google.protobuf.ByteString getWordBytes(); + + /** + * + * + *
+   * A distinct integer value is assigned for every speaker within
+   * the audio. This field specifies which one of those speakers was detected to
+   * have spoken this word. Value ranges from '1' to diarization_speaker_count.
+   * speaker_tag is set if enable_speaker_diarization = 'true' and only in the
+   * top alternative.
+   * 
+ * + * int32 speaker_tag = 5 [(.google.api.field_behavior) = OUTPUT_ONLY]; + */ + int getSpeakerTag(); } diff --git a/proto-google-cloud-speech-v1/src/main/proto/google/cloud/speech/v1/cloud_speech.proto b/proto-google-cloud-speech-v1/src/main/proto/google/cloud/speech/v1/cloud_speech.proto index 0887915a1..f343fa21d 100644 --- a/proto-google-cloud-speech-v1/src/main/proto/google/cloud/speech/v1/cloud_speech.proto +++ b/proto-google-cloud-speech-v1/src/main/proto/google/cloud/speech/v1/cloud_speech.proto @@ -367,13 +367,9 @@ message SpeakerDiarizationConfig { // number of speakers. If not set, the default value is 6. int32 max_speaker_count = 3; - // A distinct integer value is assigned for every speaker within - // the audio. This field specifies which one of those speakers was detected to - // have spoken this word. Value ranges from '1' to diarization_speaker_count. - // speaker_tag is set if enable_speaker_diarization = 'true' and only in the - // top alternative. + // Unused. int32 speaker_tag = 5 - [(google.api.field_behavior) = OUTPUT_ONLY]; + [(google.api.field_behavior) = OUTPUT_ONLY, deprecated = true]; } // Description of audio data to be recognized. @@ -752,4 +748,12 @@ message WordInfo { // The word corresponding to this set of information. string word = 3; + + // A distinct integer value is assigned for every speaker within + // the audio. This field specifies which one of those speakers was detected to + // have spoken this word. Value ranges from '1' to diarization_speaker_count. + // speaker_tag is set if enable_speaker_diarization = 'true' and only in the + // top alternative. + int32 speaker_tag = 5 + [(google.api.field_behavior) = OUTPUT_ONLY]; } diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async.test.yaml new file mode 100644 index 000000000..f26cfbabc --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcribe Audio File using Long Running Operation (Local File) (LRO) + cases: + + # This sample should default to using brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_async (no arguments) + spec: + - call: + sample: speech_transcribe_async + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_async (--local_file_path) + spec: + - call: + sample: speech_transcribe_async + params: + local_file_path: + literal: "resources/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_gcs.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_gcs.test.yaml new file mode 100644 index 000000000..d3d83133e --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_gcs.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) + cases: + + # This sample should default to using gs://cloud-samples-data/brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_async_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_async_gcs + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_async_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_async_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml new file mode 100644 index 000000000..11784726d --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml @@ -0,0 +1,37 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Getting word timestamps (Cloud Storage) (LRO) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/brooklyn_bridge.flac + - name: speech_transcribe_async_word_time_offsets_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_async_word_time_offsets_gcs + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + - literal: "Word: how" + - literal: "Word: old" + - literal: "Word: is" + - literal: "Start time: 0 seconds" + - literal: "End time: 1 seconds" + + # Confirm that another file can be transcribed (use another file) + - name: speech_transcribe_async_word_time_offsets_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_async_word_time_offsets_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/multi.flac" + - assert_contains: + - literal: "how are you doing" + - literal: "Word: how" + - literal: "Word: are" + - literal: "Word: you" + - literal: "Start time: 0 seconds" + - literal: "End time: 1 seconds" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_enhanced_model.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_enhanced_model.test.yaml new file mode 100644 index 000000000..6eab33b52 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_enhanced_model.test.yaml @@ -0,0 +1,29 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Using Enhanced Models (Local File) + cases: + + # This sample should default to using hello.wav + # and the phone_call model (only currently available enhanced model) + # + # Note: if the project used to run these tests isn't eligible for + # enhanced models, you will receive an error. + - name: speech_transcribe_enhanced_model (no arguments) + spec: + - call: + sample: speech_transcribe_enhanced_model + - assert_contains: + - literal: "hello" + + # Confirm that another file can be transcribed (use another .wav file) + - name: speech_transcribe_enhanced_model (--local_file_path) + spec: + - call: + sample: speech_transcribe_enhanced_model + params: + local_file_path: + literal: "resources/commercial_mono.wav" + - assert_contains: + - literal: "Chrome" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection.test.yaml new file mode 100644 index 000000000..b5ec2d902 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection.test.yaml @@ -0,0 +1,52 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Selecting a Transcription Model (Local File) + cases: + + # This sample should default to using hello.wav + # and the phone_call model + - name: speech_transcribe_model_selection (no arguments) + spec: + - call: + sample: speech_transcribe_model_selection + - assert_contains: + - literal: "Hello" + + # Confirm that another file can be transcribed (use another .wav file) + - name: speech_transcribe_model_selection (--local_file_path) + spec: + - call: + sample: speech_transcribe_model_selection + params: + local_file_path: + literal: "resources/commercial_mono.wav" + - assert_contains: + - literal: "Chrome" + + # Confirm that --model can be specified and the sample does not blow up + # + # Note: we are not using example audio files which result in deterministically + # different results when using different models. so we simply test + # that regular transcription continues to work. + - name: speech_transcribe_model_selection (--model) + spec: + - call: + sample: speech_transcribe_model_selection + params: + model: + literal: video + - assert_contains: + - literal: "hello" + + # Confirm that --model is being passed through by providing an invalid model + - name: speech_transcribe_model_selection (invalid --model) + spec: + - call_may_fail: + sample: speech_transcribe_model_selection + params: + model: + literal: I_DONT_EXIST + - assert_contains: + - literal: "Incorrect model specified" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection_gcs.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection_gcs.test.yaml new file mode 100644 index 000000000..60c45c975 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_model_selection_gcs.test.yaml @@ -0,0 +1,52 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Selecting a Transcription Model (Cloud Storage) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/hello.wav + # and the phone_call model + - name: speech_transcribe_model_selection_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_model_selection_gcs + - assert_contains: + - literal: "Hello" + + # Confirm that another file can be transcribed (use another .wav file) + - name: speech_transcribe_model_selection_gcs (--local_file_path) + spec: + - call: + sample: speech_transcribe_model_selection_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/commercial_mono.wav" + - assert_contains: + - literal: "Chrome" + + # Confirm that --model can be specified and the sample does not blow up + # + # Note: we are not using example audio files which result in deterministically + # different results when using different models. so we simply test + # that regular transcription continues to work. + - name: speech_transcribe_model_selection_gcs (--model) + spec: + - call: + sample: speech_transcribe_model_selection_gcs + params: + model: + literal: video + - assert_contains: + - literal: "hello" + + # Confirm that --model is being passed through by providing an invalid model + - name: speech_transcribe_model_selection_gcs (invalid --model) + spec: + - call_may_fail: + sample: speech_transcribe_model_selection_gcs + params: + model: + literal: I_DONT_EXIST + - assert_contains: + - literal: "Incorrect model specified" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel.test.yaml new file mode 100644 index 000000000..9d5379dc9 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel.test.yaml @@ -0,0 +1,31 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Multi-Channel Audio Transcription (Local File) + cases: + + # This sample should default to using multi.wav (2 channels) + - name: speech_transcribe_multichannel (no arguments) + spec: + - call: + sample: speech_transcribe_multichannel + - assert_contains: + - literal: "Channel tag: 1" + - literal: "Channel tag: 2" + - literal: "how are you doing" + + # Confirm that another file can be transcribed (use another 2 channel .wav file) + - name: speech_transcribe_multichannel (--local_file_path) + spec: + - call: + sample: speech_transcribe_multichannel + params: + local_file_path: + literal: "resources/brooklyn_bridge.wav" + - assert_contains: + # Only one channel of data is present in brooklyn_bridge.wav + - literal: "Channel tag:" + - literal: "how old is the Brooklyn Bridge" + - assert_not_contains: + - literal: "how are you doing" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel_gcs.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel_gcs.test.yaml new file mode 100644 index 000000000..64c9340ce --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_multichannel_gcs.test.yaml @@ -0,0 +1,32 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Multi-Channel Audio Transcription (Cloud Storage) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/multi.wav + # with 2 audio channels of data + - name: speech_transcribe_multichannel_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_multichannel_gcs + - assert_contains: + - literal: "Channel tag: 1" + - literal: "Channel tag: 2" + - literal: "how are you doing" + + # Confirm that another file can be transcribed (use another 2 channel .wav file) + - name: speech_transcribe_multichannel_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_multichannel_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/brooklyn_bridge.wav" + - assert_contains: + # Only one channel of data is present in brooklyn_bridge.wav + - literal: "Channel tag:" + - literal: "how old is the Brooklyn Bridge" + - assert_not_contains: + - literal: "how are you doing" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync.test.yaml new file mode 100644 index 000000000..47cc8c1a4 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcribe Audio File (Local File) + cases: + + # This sample should default to using brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_sync (no arguments) + spec: + - call: + sample: speech_transcribe_sync + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_sync (--local_file_path) + spec: + - call: + sample: speech_transcribe_sync + params: + local_file_path: + literal: "resources/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync_gcs.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync_gcs.test.yaml new file mode 100644 index 000000000..3defdf289 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1/test/speech_transcribe_sync_gcs.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcript Audio File (Cloud Storage) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_sync_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_sync_gcs + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_sync_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_sync_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_adaptation_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_adaptation_beta.test.yaml new file mode 100644 index 000000000..4efe8e83f --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_adaptation_beta.test.yaml @@ -0,0 +1,11 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Speech-to-Text Sample Tests For Speech Adaptation + cases: + - name: speech_adaptation_beta + spec: + - call: {sample: speech_adaptation_beta} + - assert_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_contexts_classes_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_contexts_classes_beta.test.yaml new file mode 100644 index 000000000..b6dccfc71 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_contexts_classes_beta.test.yaml @@ -0,0 +1,11 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Speech-to-Text Sample Tests For Speech Contexts Static Classes + cases: + - name: speech_contexts_classes_beta + spec: + - call: {sample: speech_contexts_classes_beta} + - assert_contains: + - literal: "the time is 5:45 p.m." diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_quickstart_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_quickstart_beta.test.yaml new file mode 100644 index 000000000..bd5bf6700 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_quickstart_beta.test.yaml @@ -0,0 +1,11 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Speech-to-Text Sample Tests For Quickstart + cases: + - name: speech_quickstart_beta + spec: + - call: {sample: speech_quickstart_beta} + - assert_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml new file mode 100644 index 000000000..1ab5f79a0 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Getting punctuation in results (Local File) (Beta) + cases: + + # This sample should default to using commercial_mono.wav + - name: speech_transcribe_auto_punctuation_beta (no arguments) + spec: + - call: + sample: speech_transcribe_auto_punctuation_beta + - assert_contains: + # Simply assert that actual punctuation is present from commercial_mono.wav + - literal: "?" + - literal: "," + - literal: "" + + # Confirm that another file can be transcribed (use another file) + - name: speech_transcribe_auto_punctuation_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_auto_punctuation_beta + params: + local_file_path: + literal: "resources/brooklyn_bridge.flac" + - assert_contains: + - literal: "How old is the Brooklyn Bridge?" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml new file mode 100644 index 000000000..409e4b54b --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml @@ -0,0 +1,40 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Separating different speakers (Local File) (LRO) (Beta) + cases: + + # This sample should default to using commercial_mono.wav + - name: speech_transcribe_diarization_beta (no arguments) + spec: + - call: + sample: speech_transcribe_diarization_beta + - assert_contains: + - literal: "Word: Chrome" + # The identifier given to each speaker is non-deterministic. + # For two speakers, it can be 0 and 1, 0 and 2, or other variations. + # + # The example audio file has two speakers, but this test is + # not verifying that greater than one speaker is detected. + - literal: "Speaker tag:" + + # Confirm that another file can be transcribed (use another .flac file) + - name: speech_transcribe_diarization_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_diarization_beta + params: + local_file_path: + literal: "resources/multi.flac" + - assert_contains: + - literal: "how are you doing" + - literal: "Word: doing" + # The identifier given to each speaker is non-deterministic. + # For two speakers, it can be 0 and 1, 0 and 2, or other variations. + # + # The example audio file has two speakers, but this test is + # not verifying that greater than one speaker is detected. + - literal: "Speaker tag:" + - assert_not_contains: + - literal: "Chrome" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml new file mode 100644 index 000000000..d9f2d7109 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml @@ -0,0 +1,33 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Detecting language spoken automatically (Local File) (Beta) + cases: + + # This sample should default to using brooklyn_bridge.flac + - name: speech_transcribe_multilanguage_beta (no arguments) + spec: + - call: + sample: speech_transcribe_multilanguage_beta + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + # Note: the primary language_code given was not English, but + # English was provided in the list of alternative_language_codes + - literal: "Detected language: en-us" + + # Confirm that another file can be transcribed (use another .flac file) + - name: speech_transcribe_multilanguage_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_multilanguage_beta + params: + local_file_path: + literal: "resources/multi.flac" + - assert_contains: + - literal: "how are you doing" + # Note: the primary language_code given was not English, but + # English was provided in the list of alternative_language_codes + - literal: "Detected language: en-us" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml new file mode 100644 index 000000000..57cf24a1d --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml @@ -0,0 +1,27 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Adding recognition metadata (Local File) (Beta) + cases: + + # This sample should default to using commercial_mono.wav + - name: speech_transcribe_recognition_metadata_beta (no arguments) + spec: + - call: + sample: speech_transcribe_recognition_metadata_beta + - assert_contains: + - literal: "Chrome" + + # Confirm that another file can be transcribed (use another file) + - name: speech_transcribe_recognition_metadata_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_recognition_metadata_beta + params: + local_file_path: + literal: "resources/brooklyn_bridge.flac" + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + - assert_not_contains: + - literal: "Chrome" diff --git a/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml new file mode 100644 index 000000000..0d7d60570 --- /dev/null +++ b/samples/src/main/java/com/google/cloud/examples/speech/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml @@ -0,0 +1,35 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Enabling word-level confidence (Local File) (Beta) + cases: + + # This sample should default to using brooklyn_bridge.flac + - name: speech_transcribe_word_level_confidence_beta (no arguments) + spec: + - call: + sample: speech_transcribe_word_level_confidence_beta + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + - literal: "Word: how" + - literal: "Word: old" + - literal: "Word: is" + - literal: "Confidence: 0." + + # Confirm that another file can be transcribed (use another .flac file) + - name: speech_transcribe_word_level_confidence_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_word_level_confidence_beta + params: + local_file_path: + literal: "resources/multi.flac" + - assert_contains: + - literal: "how are you doing" + - literal: "Word: how" + - literal: "Word: are" + - literal: "Word: you" + - literal: "Confidence: 0." + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/synth.metadata b/synth.metadata index 7ecbffcf6..49db30968 100644 --- a/synth.metadata +++ b/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-10-19T07:55:16.522871Z", + "updateTime": "2019-11-11T21:20:21.991041Z", "sources": [ { "generator": { "name": "artman", - "version": "0.40.1", - "dockerImage": "googleapis/artman@sha256:168646efbffe41e8f8fa86a60fa0d5724fab67fa37f35082cf6cfc85cedce3c7" + "version": "0.41.1", + "dockerImage": "googleapis/artman@sha256:545c758c76c3f779037aa259023ec3d1ef2d57d2c8cd00a222cb187d63ceac5e" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "0e9a6d15fcb944ed40921ba0aad2082ee1bc7edd", - "internalRef": "275543900" + "sha": "f69562be0608904932bdcfbc5ad8b9a22d9dceb8", + "internalRef": "279774957" } }, {