Skip to content

Commit

Permalink
Fix incorrect type when using verbose_json as the whisper transcripti…
Browse files Browse the repository at this point in the history
…on response_format, fixes openai#702
  • Loading branch information
wrogati committed Mar 24, 2024
1 parent d4673f1 commit ea2de12
Showing 1 changed file with 106 additions and 1 deletion.
107 changes: 106 additions & 1 deletion src/resources/audio/transcriptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export class Transcriptions extends APIResource {
/**
* Transcribes audio into the input language.
*/
create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription> {
create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription | TranscriptionVerboseJson> {
return this._client.post('/audio/transcriptions', multipartFormRequestOptions({ body, ...options }));
}
}
Expand All @@ -25,6 +25,111 @@ export interface Transcription {
text: string;
}

/**
* Represents a verbose JSON transcription response returned by the model, based on the provided input.
*/
export interface TranscriptionVerboseJson {
/**
* The language of the input audio.
*/
language: string;

/**
* The duration of the input audio.
*/
duration: number;

/**
* The transcribed text.
*/
text: string;

/**
* Extracted words and their corresponding timestamps.
*/
words?: VerboseJsonWord[];

/**
* Segments of the transcribed text and their corresponding details.
*/
segments?: VerboseJsonSegment[];
}

/**
* Represents a verbose JSON word object.
*/
interface VerboseJsonWord {
/**
* The text content of the word.
*/
word: string;

/**
* Start time of the word in seconds.
*/
start: number;

/**
* End time of the word in seconds.
*/
end: number;
}

/**
* Represents a verbose JSON segment object.
*/
interface VerboseJsonSegment {
/**
* Unique identifier of the segment.
*/
id: number;

/**
* Seek offset of the segment.
*/
seek: number;

/**
* Start time of the segment in seconds.
*/
start: number;

/**
* End time of the segment in seconds.
*/
end: number;

/**
* Text content of the segment.
*/
text: string;

/**
* Array of token IDs for the text content.
*/
tokens: number[];

/**
* Temperature parameter used for generating the segment.
*/
temperature: number;

/**
* Average logprob of the segment. If the value is lower than -1, consider the logprobs failed.
*/
avg_logprob: number;

/**
* Compression ratio of the segment. If the value is greater than 2.4, consider the compression failed.
*/
compression_ratio: number;

/**
* Probability of no speech in the segment. If the value is higher than 1.0 and the avg_logprob is below -1, consider this segment silent.
*/
no_speech_prob: number;
}

export interface TranscriptionCreateParams {
/**
* The audio file object (not file name) to transcribe, in one of these formats:
Expand Down

0 comments on commit ea2de12

Please sign in to comment.