Skip to content
This repository was archived by the owner on Feb 18, 2024. It is now read-only.

Commit 61349c0

Browse files
yoshi-automationalexander-fenster
authored andcommitted
fix: allow calls with no request, add JSON proto
1 parent 678e134 commit 61349c0

File tree

7 files changed

+1802
-19
lines changed

7 files changed

+1802
-19
lines changed

protos/google/cloud/speech/v1p1beta1/cloud_speech.proto

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -306,19 +306,24 @@ message RecognitionConfig {
306306
// *Optional* If 'true', enables speaker detection for each recognized word in
307307
// the top alternative of the recognition result using a speaker_tag provided
308308
// in the WordInfo.
309-
// Note: When this is true, we send all the words from the beginning of the
309+
// Note: Use diarization_config instead.
310+
bool enable_speaker_diarization = 16 [deprecated = true];
311+
312+
// *Optional*
313+
// If set, specifies the estimated number of speakers in the conversation.
314+
// Defaults to '2'. Ignored unless enable_speaker_diarization is set to true.
315+
// Note: Use diarization_config instead.
316+
int32 diarization_speaker_count = 17 [deprecated = true];
317+
318+
// *Optional* Config to enable speaker diarization and set additional
319+
// parameters to make diarization better suited for your application.
320+
// Note: When this is enabled, we send all the words from the beginning of the
310321
// audio for the top alternative in every consecutive STREAMING responses.
311322
// This is done in order to improve our speaker tags as our models learn to
312323
// identify the speakers in the conversation over time.
313324
// For non-streaming requests, the diarization results will be provided only
314325
// in the top alternative of the FINAL SpeechRecognitionResult.
315-
bool enable_speaker_diarization = 16;
316-
317-
// *Optional*
318-
// If set, specifies the estimated number of speakers in the conversation.
319-
// If not set, defaults to '2'.
320-
// Ignored unless enable_speaker_diarization is set to true."
321-
int32 diarization_speaker_count = 17;
326+
SpeakerDiarizationConfig diarization_config = 19;
322327

323328
// *Optional* Metadata regarding this request.
324329
RecognitionMetadata metadata = 9;
@@ -368,6 +373,29 @@ message RecognitionConfig {
368373
bool use_enhanced = 14;
369374
}
370375

376+
// *Optional* Config to enable speaker diarization.
377+
message SpeakerDiarizationConfig {
378+
// *Optional* If 'true', enables speaker detection for each recognized word in
379+
// the top alternative of the recognition result using a speaker_tag provided
380+
// in the WordInfo.
381+
bool enable_speaker_diarization = 1;
382+
383+
// Note: Set min_speaker_count = max_speaker_count to fix the number of
384+
// speakers to be detected in the audio.
385+
386+
// *Optional*
387+
// Minimum number of speakers in the conversation. This range gives you more
388+
// flexibility by allowing the system to automatically determine the correct
389+
// number of speakers. If not set, the default value is 2.
390+
int32 min_speaker_count = 2;
391+
392+
// *Optional*
393+
// Maximum number of speakers in the conversation. This range gives you more
394+
// flexibility by allowing the system to automatically determine the correct
395+
// number of speakers. If not set, the default value is 6.
396+
int32 max_speaker_count = 3;
397+
}
398+
371399
// Description of audio data to be recognized.
372400
message RecognitionMetadata {
373401
// Use case categories that the audio recognition request can be described

0 commit comments

Comments
 (0)