diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 29102aeb5..934f2cc03 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "4.3.0"
+ ".": "4.4.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index e68631a07..09d2eb1de 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 135
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-e66e85fb7f72477256dca1acb6b23396989d381c5c1b318de564195436bcb93f.yml
-openapi_spec_hash: 0a4bbb5aa0ae532a072bd6b3854e70b1
-config_hash: 89bf7bb3a1f9439ffc6ea0e7dc57ba9b
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-104cced8f4c7436a76eea02e26307828166405ccfb296faffb008b72772c11a7.yml
+openapi_spec_hash: fdc03ed84a65a31b80da909255e53924
+config_hash: 03b48e9b8c7231a902403210dbd7dfa0
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d7db72593..ed4eba82a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
# Changelog
+## 4.4.0 (2025-10-16)
+
+Full Changelog: [v4.3.0...v4.4.0](https://github.com/openai/openai-java/compare/v4.3.0...v4.4.0)
+
+### Features
+
+* **api:** Add support for gpt-4o-transcribe-diarize on audio/transcriptions endpoint ([dc2c297](https://github.com/openai/openai-java/commit/dc2c297526f2ebe4003642dfe8b1c2499d0a4b56))
+
## 4.3.0 (2025-10-10)
Full Changelog: [v4.2.0...v4.3.0](https://github.com/openai/openai-java/compare/v4.2.0...v4.3.0)
diff --git a/README.md b/README.md
index ebc8b4491..223eb98c0 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@
-[](https://central.sonatype.com/artifact/com.openai/openai-java/4.3.0)
-[](https://javadoc.io/doc/com.openai/openai-java/4.3.0)
+[](https://central.sonatype.com/artifact/com.openai/openai-java/4.4.0)
+[](https://javadoc.io/doc/com.openai/openai-java/4.4.0)
@@ -11,7 +11,7 @@ The OpenAI Java SDK provides convenient access to the [OpenAI REST API](https://
-The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.openai/openai-java/4.3.0).
+The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.openai/openai-java/4.4.0).
@@ -24,7 +24,7 @@ The REST API documentation can be found on [platform.openai.com](https://platfor
### Gradle
```kotlin
-implementation("com.openai:openai-java:4.3.0")
+implementation("com.openai:openai-java:4.4.0")
```
### Maven
@@ -33,7 +33,7 @@ implementation("com.openai:openai-java:4.3.0")
com.openai
openai-java
- 4.3.0
+ 4.4.0
```
@@ -1342,7 +1342,7 @@ If you're using Spring Boot, then you can use the SDK's [Spring Boot starter](ht
#### Gradle
```kotlin
-implementation("com.openai:openai-java-spring-boot-starter:4.3.0")
+implementation("com.openai:openai-java-spring-boot-starter:4.4.0")
```
#### Maven
@@ -1351,7 +1351,7 @@ implementation("com.openai:openai-java-spring-boot-starter:4.3.0")
com.openai
openai-java-spring-boot-starter
- 4.3.0
+ 4.4.0
```
diff --git a/build.gradle.kts b/build.gradle.kts
index 99e11009d..702079380 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -8,7 +8,7 @@ repositories {
allprojects {
group = "com.openai"
- version = "4.3.0" // x-release-please-version
+ version = "4.4.0" // x-release-please-version
}
subprojects {
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioModel.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioModel.kt
index 9e8b8c7ad..096cb82a5 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioModel.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioModel.kt
@@ -26,6 +26,8 @@ class AudioModel @JsonCreator private constructor(private val value: JsonField Value.WHISPER_1
GPT_4O_TRANSCRIBE -> Value.GPT_4O_TRANSCRIBE
GPT_4O_MINI_TRANSCRIBE -> Value.GPT_4O_MINI_TRANSCRIBE
+ GPT_4O_TRANSCRIBE_DIARIZE -> Value.GPT_4O_TRANSCRIBE_DIARIZE
else -> Value._UNKNOWN
}
@@ -81,6 +86,7 @@ class AudioModel @JsonCreator private constructor(private val value: JsonField Known.WHISPER_1
GPT_4O_TRANSCRIBE -> Known.GPT_4O_TRANSCRIBE
GPT_4O_MINI_TRANSCRIBE -> Known.GPT_4O_MINI_TRANSCRIBE
+ GPT_4O_TRANSCRIBE_DIARIZE -> Known.GPT_4O_TRANSCRIBE_DIARIZE
else -> throw OpenAIInvalidDataException("Unknown AudioModel: $value")
}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioResponseFormat.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioResponseFormat.kt
index 34b72bab1..96cb4e728 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioResponseFormat.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/AudioResponseFormat.kt
@@ -8,8 +8,10 @@ import com.openai.core.JsonField
import com.openai.errors.OpenAIInvalidDataException
/**
- * The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, or
- * `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format is `json`.
+ * The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, `vtt`,
+ * or `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported
+ * format is `json`. For `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+ * `diarized_json`, with `diarized_json` required to receive speaker annotations.
*/
class AudioResponseFormat @JsonCreator private constructor(private val value: JsonField) :
Enum {
@@ -35,6 +37,8 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
@JvmField val VTT = of("vtt")
+ @JvmField val DIARIZED_JSON = of("diarized_json")
+
@JvmStatic fun of(value: String) = AudioResponseFormat(JsonField.of(value))
}
@@ -45,6 +49,7 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
SRT,
VERBOSE_JSON,
VTT,
+ DIARIZED_JSON,
}
/**
@@ -62,6 +67,7 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
SRT,
VERBOSE_JSON,
VTT,
+ DIARIZED_JSON,
/**
* An enum member indicating that [AudioResponseFormat] was instantiated with an unknown
* value.
@@ -83,6 +89,7 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
SRT -> Value.SRT
VERBOSE_JSON -> Value.VERBOSE_JSON
VTT -> Value.VTT
+ DIARIZED_JSON -> Value.DIARIZED_JSON
else -> Value._UNKNOWN
}
@@ -101,6 +108,7 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
SRT -> Known.SRT
VERBOSE_JSON -> Known.VERBOSE_JSON
VTT -> Known.VTT
+ DIARIZED_JSON -> Known.DIARIZED_JSON
else -> throw OpenAIInvalidDataException("Unknown AudioResponseFormat: $value")
}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt
index 8c34a011e..5bfdbf7dd 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt
@@ -52,8 +52,9 @@ private constructor(
fun file(): InputStream = body.file()
/**
- * ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and
- * `whisper-1` (which is powered by our open source Whisper V2 model).
+ * ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`,
+ * `whisper-1` (which is powered by our open source Whisper V2 model), and
+ * `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -64,7 +65,8 @@ private constructor(
* Controls how the audio is cut into chunks. When set to `"auto"`, the server first normalizes
* loudness and then uses voice activity detection (VAD) to choose boundaries. `server_vad`
* object can be provided to tweak VAD detection parameters manually. If unset, the audio is
- * transcribed as a single block.
+ * transcribed as a single block. Required when using `gpt-4o-transcribe-diarize` for inputs
+ * longer than 30 seconds.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -75,13 +77,36 @@ private constructor(
* Additional information to include in the transcription response. `logprobs` will return the
* log probabilities of the tokens in the response to understand the model's confidence in the
* transcription. `logprobs` only works with response_format set to `json` and only with the
- * models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`.
+ * models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`. This field is not supported when
+ * using `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
*/
fun include(): Optional> = body.include()
+ /**
+ * Optional list of speaker names that correspond to the audio samples provided in
+ * `known_speaker_references[]`. Each entry should be a short identifier (for example `customer`
+ * or `agent`). Up to 4 speakers are supported.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun knownSpeakerNames(): Optional> = body.knownSpeakerNames()
+
+ /**
+ * Optional list of audio samples (as
+ * [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs)) that
+ * contain known speaker references matching `known_speaker_names[]`. Each sample must be
+ * between 2 and 10 seconds, and can use any of the same input audio formats supported by
+ * `file`.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun knownSpeakerReferences(): Optional> = body.knownSpeakerReferences()
+
/**
* The language of the input audio. Supplying the input language in
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format will
@@ -95,7 +120,7 @@ private constructor(
/**
* An optional text to guide the model's style or continue a previous audio segment. The
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match the
- * audio language.
+ * audio language. This field is not supported when using `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -103,9 +128,11 @@ private constructor(
fun prompt(): Optional = body.prompt()
/**
- * The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`, or
- * `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format is
- * `json`.
+ * The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`,
+ * `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only
+ * supported format is `json`. For `gpt-4o-transcribe-diarize`, the supported formats are
+ * `json`, `text`, and `diarized_json`, with `diarized_json` required to receive speaker
+ * annotations.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -127,7 +154,8 @@ private constructor(
* The timestamp granularities to populate for this transcription. `response_format` must be set
* `verbose_json` to use timestamp granularities. Either or both of these options are supported:
* `word`, or `segment`. Note: There is no additional latency for segment timestamps, but
- * generating word timestamps incurs additional latency.
+ * generating word timestamps incurs additional latency. This option is not available for
+ * `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -164,6 +192,22 @@ private constructor(
*/
fun _include(): MultipartField> = body._include()
+ /**
+ * Returns the raw multipart value of [knownSpeakerNames].
+ *
+ * Unlike [knownSpeakerNames], this method doesn't throw if the multipart field has an
+ * unexpected type.
+ */
+ fun _knownSpeakerNames(): MultipartField> = body._knownSpeakerNames()
+
+ /**
+ * Returns the raw multipart value of [knownSpeakerReferences].
+ *
+ * Unlike [knownSpeakerReferences], this method doesn't throw if the multipart field has an
+ * unexpected type.
+ */
+ fun _knownSpeakerReferences(): MultipartField> = body._knownSpeakerReferences()
+
/**
* Returns the raw multipart value of [language].
*
@@ -250,7 +294,7 @@ private constructor(
* - [model]
* - [chunkingStrategy]
* - [include]
- * - [language]
+ * - [knownSpeakerNames]
* - etc.
*/
fun body(body: Body) = apply { this.body = body.toBuilder() }
@@ -284,7 +328,8 @@ private constructor(
/**
* ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`,
- * and `whisper-1` (which is powered by our open source Whisper V2 model).
+ * `whisper-1` (which is powered by our open source Whisper V2 model), and
+ * `gpt-4o-transcribe-diarize`.
*/
fun model(model: AudioModel) = apply { body.model(model) }
@@ -309,7 +354,8 @@ private constructor(
* Controls how the audio is cut into chunks. When set to `"auto"`, the server first
* normalizes loudness and then uses voice activity detection (VAD) to choose boundaries.
* `server_vad` object can be provided to tweak VAD detection parameters manually. If unset,
- * the audio is transcribed as a single block.
+ * the audio is transcribed as a single block. Required when using
+ * `gpt-4o-transcribe-diarize` for inputs longer than 30 seconds.
*/
fun chunkingStrategy(chunkingStrategy: ChunkingStrategy?) = apply {
body.chunkingStrategy(chunkingStrategy)
@@ -342,7 +388,8 @@ private constructor(
* Additional information to include in the transcription response. `logprobs` will return
* the log probabilities of the tokens in the response to understand the model's confidence
* in the transcription. `logprobs` only works with response_format set to `json` and only
- * with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`.
+ * with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`. This field is not
+ * supported when using `gpt-4o-transcribe-diarize`.
*/
fun include(include: List) = apply { body.include(include) }
@@ -364,6 +411,66 @@ private constructor(
*/
fun addInclude(include: TranscriptionInclude) = apply { body.addInclude(include) }
+ /**
+ * Optional list of speaker names that correspond to the audio samples provided in
+ * `known_speaker_references[]`. Each entry should be a short identifier (for example
+ * `customer` or `agent`). Up to 4 speakers are supported.
+ */
+ fun knownSpeakerNames(knownSpeakerNames: List) = apply {
+ body.knownSpeakerNames(knownSpeakerNames)
+ }
+
+ /**
+ * Sets [Builder.knownSpeakerNames] to an arbitrary multipart value.
+ *
+ * You should usually call [Builder.knownSpeakerNames] with a well-typed `List`
+ * value instead. This method is primarily for setting the field to an undocumented or not
+ * yet supported value.
+ */
+ fun knownSpeakerNames(knownSpeakerNames: MultipartField>) = apply {
+ body.knownSpeakerNames(knownSpeakerNames)
+ }
+
+ /**
+ * Adds a single [String] to [knownSpeakerNames].
+ *
+ * @throws IllegalStateException if the field was previously set to a non-list.
+ */
+ fun addKnownSpeakerName(knownSpeakerName: String) = apply {
+ body.addKnownSpeakerName(knownSpeakerName)
+ }
+
+ /**
+ * Optional list of audio samples (as
+ * [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+ * that contain known speaker references matching `known_speaker_names[]`. Each sample must
+ * be between 2 and 10 seconds, and can use any of the same input audio formats supported by
+ * `file`.
+ */
+ fun knownSpeakerReferences(knownSpeakerReferences: List) = apply {
+ body.knownSpeakerReferences(knownSpeakerReferences)
+ }
+
+ /**
+ * Sets [Builder.knownSpeakerReferences] to an arbitrary multipart value.
+ *
+ * You should usually call [Builder.knownSpeakerReferences] with a well-typed `List`
+ * value instead. This method is primarily for setting the field to an undocumented or not
+ * yet supported value.
+ */
+ fun knownSpeakerReferences(knownSpeakerReferences: MultipartField>) = apply {
+ body.knownSpeakerReferences(knownSpeakerReferences)
+ }
+
+ /**
+ * Adds a single [String] to [knownSpeakerReferences].
+ *
+ * @throws IllegalStateException if the field was previously set to a non-list.
+ */
+ fun addKnownSpeakerReference(knownSpeakerReference: String) = apply {
+ body.addKnownSpeakerReference(knownSpeakerReference)
+ }
+
/**
* The language of the input audio. Supplying the input language in
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format
@@ -382,7 +489,7 @@ private constructor(
/**
* An optional text to guide the model's style or continue a previous audio segment. The
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match
- * the audio language.
+ * the audio language. This field is not supported when using `gpt-4o-transcribe-diarize`.
*/
fun prompt(prompt: String) = apply { body.prompt(prompt) }
@@ -396,8 +503,10 @@ private constructor(
/**
* The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`,
- * or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format
- * is `json`.
+ * `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only
+ * supported format is `json`. For `gpt-4o-transcribe-diarize`, the supported formats are
+ * `json`, `text`, and `diarized_json`, with `diarized_json` required to receive speaker
+ * annotations.
*/
fun responseFormat(responseFormat: AudioResponseFormat) = apply {
body.responseFormat(responseFormat)
@@ -438,7 +547,8 @@ private constructor(
* The timestamp granularities to populate for this transcription. `response_format` must be
* set `verbose_json` to use timestamp granularities. Either or both of these options are
* supported: `word`, or `segment`. Note: There is no additional latency for segment
- * timestamps, but generating word timestamps incurs additional latency.
+ * timestamps, but generating word timestamps incurs additional latency. This option is not
+ * available for `gpt-4o-transcribe-diarize`.
*/
fun timestampGranularities(timestampGranularities: List) = apply {
body.timestampGranularities(timestampGranularities)
@@ -608,6 +718,8 @@ private constructor(
"model" to _model(),
"chunking_strategy" to _chunkingStrategy(),
"include" to _include(),
+ "known_speaker_names" to _knownSpeakerNames(),
+ "known_speaker_references" to _knownSpeakerReferences(),
"language" to _language(),
"prompt" to _prompt(),
"response_format" to _responseFormat(),
@@ -626,6 +738,8 @@ private constructor(
private val model: MultipartField,
private val chunkingStrategy: MultipartField,
private val include: MultipartField>,
+ private val knownSpeakerNames: MultipartField>,
+ private val knownSpeakerReferences: MultipartField>,
private val language: MultipartField,
private val prompt: MultipartField,
private val responseFormat: MultipartField,
@@ -645,7 +759,8 @@ private constructor(
/**
* ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`,
- * and `whisper-1` (which is powered by our open source Whisper V2 model).
+ * `whisper-1` (which is powered by our open source Whisper V2 model), and
+ * `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -656,7 +771,8 @@ private constructor(
* Controls how the audio is cut into chunks. When set to `"auto"`, the server first
* normalizes loudness and then uses voice activity detection (VAD) to choose boundaries.
* `server_vad` object can be provided to tweak VAD detection parameters manually. If unset,
- * the audio is transcribed as a single block.
+ * the audio is transcribed as a single block. Required when using
+ * `gpt-4o-transcribe-diarize` for inputs longer than 30 seconds.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -668,13 +784,38 @@ private constructor(
* Additional information to include in the transcription response. `logprobs` will return
* the log probabilities of the tokens in the response to understand the model's confidence
* in the transcription. `logprobs` only works with response_format set to `json` and only
- * with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`.
+ * with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`. This field is not
+ * supported when using `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
*/
fun include(): Optional> = include.value.getOptional("include")
+ /**
+ * Optional list of speaker names that correspond to the audio samples provided in
+ * `known_speaker_references[]`. Each entry should be a short identifier (for example
+ * `customer` or `agent`). Up to 4 speakers are supported.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun knownSpeakerNames(): Optional> =
+ knownSpeakerNames.value.getOptional("known_speaker_names")
+
+ /**
+ * Optional list of audio samples (as
+ * [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+ * that contain known speaker references matching `known_speaker_names[]`. Each sample must
+ * be between 2 and 10 seconds, and can use any of the same input audio formats supported by
+ * `file`.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun knownSpeakerReferences(): Optional> =
+ knownSpeakerReferences.value.getOptional("known_speaker_references")
+
/**
* The language of the input audio. Supplying the input language in
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format
@@ -688,7 +829,7 @@ private constructor(
/**
* An optional text to guide the model's style or continue a previous audio segment. The
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match
- * the audio language.
+ * the audio language. This field is not supported when using `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -697,8 +838,10 @@ private constructor(
/**
* The format of the output, in one of these options: `json`, `text`, `srt`, `verbose_json`,
- * or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only supported format
- * is `json`.
+ * `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the only
+ * supported format is `json`. For `gpt-4o-transcribe-diarize`, the supported formats are
+ * `json`, `text`, and `diarized_json`, with `diarized_json` required to receive speaker
+ * annotations.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -722,7 +865,8 @@ private constructor(
* The timestamp granularities to populate for this transcription. `response_format` must be
* set `verbose_json` to use timestamp granularities. Either or both of these options are
* supported: `word`, or `segment`. Note: There is no additional latency for segment
- * timestamps, but generating word timestamps incurs additional latency.
+ * timestamps, but generating word timestamps incurs additional latency. This option is not
+ * available for `gpt-4o-transcribe-diarize`.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -764,6 +908,26 @@ private constructor(
@ExcludeMissing
fun _include(): MultipartField> = include
+ /**
+ * Returns the raw multipart value of [knownSpeakerNames].
+ *
+ * Unlike [knownSpeakerNames], this method doesn't throw if the multipart field has an
+ * unexpected type.
+ */
+ @JsonProperty("known_speaker_names")
+ @ExcludeMissing
+ fun _knownSpeakerNames(): MultipartField> = knownSpeakerNames
+
+ /**
+ * Returns the raw multipart value of [knownSpeakerReferences].
+ *
+ * Unlike [knownSpeakerReferences], this method doesn't throw if the multipart field has an
+ * unexpected type.
+ */
+ @JsonProperty("known_speaker_references")
+ @ExcludeMissing
+ fun _knownSpeakerReferences(): MultipartField> = knownSpeakerReferences
+
/**
* Returns the raw multipart value of [language].
*
@@ -843,6 +1007,8 @@ private constructor(
private var model: MultipartField? = null
private var chunkingStrategy: MultipartField = MultipartField.of(null)
private var include: MultipartField>? = null
+ private var knownSpeakerNames: MultipartField>? = null
+ private var knownSpeakerReferences: MultipartField>? = null
private var language: MultipartField = MultipartField.of(null)
private var prompt: MultipartField = MultipartField.of(null)
private var responseFormat: MultipartField =
@@ -858,6 +1024,8 @@ private constructor(
model = body.model
chunkingStrategy = body.chunkingStrategy
include = body.include.map { it.toMutableList() }
+ knownSpeakerNames = body.knownSpeakerNames.map { it.toMutableList() }
+ knownSpeakerReferences = body.knownSpeakerReferences.map { it.toMutableList() }
language = body.language
prompt = body.prompt
responseFormat = body.responseFormat
@@ -901,8 +1069,8 @@ private constructor(
/**
* ID of the model to use. The options are `gpt-4o-transcribe`,
- * `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
- * Whisper V2 model).
+ * `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source Whisper V2
+ * model), and `gpt-4o-transcribe-diarize`.
*/
fun model(model: AudioModel) = model(MultipartField.of(model))
@@ -928,7 +1096,8 @@ private constructor(
* Controls how the audio is cut into chunks. When set to `"auto"`, the server first
* normalizes loudness and then uses voice activity detection (VAD) to choose
* boundaries. `server_vad` object can be provided to tweak VAD detection parameters
- * manually. If unset, the audio is transcribed as a single block.
+ * manually. If unset, the audio is transcribed as a single block. Required when using
+ * `gpt-4o-transcribe-diarize` for inputs longer than 30 seconds.
*/
fun chunkingStrategy(chunkingStrategy: ChunkingStrategy?) =
chunkingStrategy(MultipartField.of(chunkingStrategy))
@@ -964,6 +1133,7 @@ private constructor(
* return the log probabilities of the tokens in the response to understand the model's
* confidence in the transcription. `logprobs` only works with response_format set to
* `json` and only with the models `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`.
+ * This field is not supported when using `gpt-4o-transcribe-diarize`.
*/
fun include(include: List) = include(MultipartField.of(include))
@@ -990,6 +1160,71 @@ private constructor(
}
}
+ /**
+ * Optional list of speaker names that correspond to the audio samples provided in
+ * `known_speaker_references[]`. Each entry should be a short identifier (for example
+ * `customer` or `agent`). Up to 4 speakers are supported.
+ */
+ fun knownSpeakerNames(knownSpeakerNames: List) =
+ knownSpeakerNames(MultipartField.of(knownSpeakerNames))
+
+ /**
+ * Sets [Builder.knownSpeakerNames] to an arbitrary multipart value.
+ *
+ * You should usually call [Builder.knownSpeakerNames] with a well-typed `List`
+ * value instead. This method is primarily for setting the field to an undocumented or
+ * not yet supported value.
+ */
+ fun knownSpeakerNames(knownSpeakerNames: MultipartField>) = apply {
+ this.knownSpeakerNames = knownSpeakerNames.map { it.toMutableList() }
+ }
+
+ /**
+ * Adds a single [String] to [knownSpeakerNames].
+ *
+ * @throws IllegalStateException if the field was previously set to a non-list.
+ */
+ fun addKnownSpeakerName(knownSpeakerName: String) = apply {
+ knownSpeakerNames =
+ (knownSpeakerNames ?: MultipartField.of(mutableListOf())).also {
+ checkKnown("knownSpeakerNames", it).add(knownSpeakerName)
+ }
+ }
+
+ /**
+ * Optional list of audio samples (as
+ * [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+ * that contain known speaker references matching `known_speaker_names[]`. Each sample
+ * must be between 2 and 10 seconds, and can use any of the same input audio formats
+ * supported by `file`.
+ */
+ fun knownSpeakerReferences(knownSpeakerReferences: List) =
+ knownSpeakerReferences(MultipartField.of(knownSpeakerReferences))
+
+ /**
+ * Sets [Builder.knownSpeakerReferences] to an arbitrary multipart value.
+ *
+ * You should usually call [Builder.knownSpeakerReferences] with a well-typed
+ * `List` value instead. This method is primarily for setting the field to an
+ * undocumented or not yet supported value.
+ */
+ fun knownSpeakerReferences(knownSpeakerReferences: MultipartField>) =
+ apply {
+ this.knownSpeakerReferences = knownSpeakerReferences.map { it.toMutableList() }
+ }
+
+ /**
+ * Adds a single [String] to [knownSpeakerReferences].
+ *
+ * @throws IllegalStateException if the field was previously set to a non-list.
+ */
+ fun addKnownSpeakerReference(knownSpeakerReference: String) = apply {
+ knownSpeakerReferences =
+ (knownSpeakerReferences ?: MultipartField.of(mutableListOf())).also {
+ checkKnown("knownSpeakerReferences", it).add(knownSpeakerReference)
+ }
+ }
+
/**
* The language of the input audio. Supplying the input language in
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format
@@ -1009,7 +1244,8 @@ private constructor(
/**
* An optional text to guide the model's style or continue a previous audio segment. The
* [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should
- * match the audio language.
+ * match the audio language. This field is not supported when using
+ * `gpt-4o-transcribe-diarize`.
*/
fun prompt(prompt: String) = prompt(MultipartField.of(prompt))
@@ -1024,8 +1260,10 @@ private constructor(
/**
* The format of the output, in one of these options: `json`, `text`, `srt`,
- * `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`, the
- * only supported format is `json`.
+ * `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+ * `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+ * `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+ * `diarized_json`, with `diarized_json` required to receive speaker annotations.
*/
fun responseFormat(responseFormat: AudioResponseFormat) =
responseFormat(MultipartField.of(responseFormat))
@@ -1065,7 +1303,8 @@ private constructor(
* The timestamp granularities to populate for this transcription. `response_format`
* must be set `verbose_json` to use timestamp granularities. Either or both of these
* options are supported: `word`, or `segment`. Note: There is no additional latency for
- * segment timestamps, but generating word timestamps incurs additional latency.
+ * segment timestamps, but generating word timestamps incurs additional latency. This
+ * option is not available for `gpt-4o-transcribe-diarize`.
*/
fun timestampGranularities(timestampGranularities: List) =
timestampGranularities(MultipartField.of(timestampGranularities))
@@ -1133,6 +1372,8 @@ private constructor(
checkRequired("model", model),
chunkingStrategy,
(include ?: MultipartField.of(null)).map { it.toImmutable() },
+ (knownSpeakerNames ?: MultipartField.of(null)).map { it.toImmutable() },
+ (knownSpeakerReferences ?: MultipartField.of(null)).map { it.toImmutable() },
language,
prompt,
responseFormat,
@@ -1153,6 +1394,8 @@ private constructor(
model().validate()
chunkingStrategy().ifPresent { it.validate() }
include().ifPresent { it.forEach { it.validate() } }
+ knownSpeakerNames()
+ knownSpeakerReferences()
language()
prompt()
responseFormat().ifPresent { it.validate() }
@@ -1179,6 +1422,8 @@ private constructor(
model == other.model &&
chunkingStrategy == other.chunkingStrategy &&
include == other.include &&
+ knownSpeakerNames == other.knownSpeakerNames &&
+ knownSpeakerReferences == other.knownSpeakerReferences &&
language == other.language &&
prompt == other.prompt &&
responseFormat == other.responseFormat &&
@@ -1193,6 +1438,8 @@ private constructor(
model,
chunkingStrategy,
include,
+ knownSpeakerNames,
+ knownSpeakerReferences,
language,
prompt,
responseFormat,
@@ -1205,14 +1452,15 @@ private constructor(
override fun hashCode(): Int = hashCode
override fun toString() =
- "Body{file=$file, model=$model, chunkingStrategy=$chunkingStrategy, include=$include, language=$language, prompt=$prompt, responseFormat=$responseFormat, temperature=$temperature, timestampGranularities=$timestampGranularities, additionalProperties=$additionalProperties}"
+ "Body{file=$file, model=$model, chunkingStrategy=$chunkingStrategy, include=$include, knownSpeakerNames=$knownSpeakerNames, knownSpeakerReferences=$knownSpeakerReferences, language=$language, prompt=$prompt, responseFormat=$responseFormat, temperature=$temperature, timestampGranularities=$timestampGranularities, additionalProperties=$additionalProperties}"
}
/**
* Controls how the audio is cut into chunks. When set to `"auto"`, the server first normalizes
* loudness and then uses voice activity detection (VAD) to choose boundaries. `server_vad`
* object can be provided to tweak VAD detection parameters manually. If unset, the audio is
- * transcribed as a single block.
+ * transcribed as a single block. Required when using `gpt-4o-transcribe-diarize` for inputs
+ * longer than 30 seconds.
*/
@JsonSerialize(using = ChunkingStrategy.Serializer::class)
class ChunkingStrategy
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateResponse.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateResponse.kt
index 74ec83ffe..e68d73158 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateResponse.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateResponse.kt
@@ -24,6 +24,7 @@ import java.util.Optional
class TranscriptionCreateResponse
private constructor(
private val transcription: Transcription? = null,
+ private val diarized: TranscriptionDiarized? = null,
private val verbose: TranscriptionVerbose? = null,
private val _json: JsonValue? = null,
) {
@@ -31,6 +32,12 @@ private constructor(
/** Represents a transcription response returned by model, based on the provided input. */
fun transcription(): Optional = Optional.ofNullable(transcription)
+ /**
+ * Represents a diarized transcription response returned by the model, including the combined
+ * transcript and speaker-segment annotations.
+ */
+ fun diarized(): Optional = Optional.ofNullable(diarized)
+
/**
* Represents a verbose json transcription response returned by model, based on the provided
* input.
@@ -39,11 +46,19 @@ private constructor(
fun isTranscription(): Boolean = transcription != null
+ fun isDiarized(): Boolean = diarized != null
+
fun isVerbose(): Boolean = verbose != null
/** Represents a transcription response returned by model, based on the provided input. */
fun asTranscription(): Transcription = transcription.getOrThrow("transcription")
+ /**
+ * Represents a diarized transcription response returned by the model, including the combined
+ * transcript and speaker-segment annotations.
+ */
+ fun asDiarized(): TranscriptionDiarized = diarized.getOrThrow("diarized")
+
/**
* Represents a verbose json transcription response returned by model, based on the provided
* input.
@@ -55,6 +70,7 @@ private constructor(
fun accept(visitor: Visitor): T =
when {
transcription != null -> visitor.visitTranscription(transcription)
+ diarized != null -> visitor.visitDiarized(diarized)
verbose != null -> visitor.visitVerbose(verbose)
else -> visitor.unknown(_json)
}
@@ -72,6 +88,10 @@ private constructor(
transcription.validate()
}
+ override fun visitDiarized(diarized: TranscriptionDiarized) {
+ diarized.validate()
+ }
+
override fun visitVerbose(verbose: TranscriptionVerbose) {
verbose.validate()
}
@@ -100,6 +120,8 @@ private constructor(
override fun visitTranscription(transcription: Transcription) =
transcription.validity()
+ override fun visitDiarized(diarized: TranscriptionDiarized) = diarized.validity()
+
override fun visitVerbose(verbose: TranscriptionVerbose) = verbose.validity()
override fun unknown(json: JsonValue?) = 0
@@ -113,14 +135,16 @@ private constructor(
return other is TranscriptionCreateResponse &&
transcription == other.transcription &&
+ diarized == other.diarized &&
verbose == other.verbose
}
- override fun hashCode(): Int = Objects.hash(transcription, verbose)
+ override fun hashCode(): Int = Objects.hash(transcription, diarized, verbose)
override fun toString(): String =
when {
transcription != null -> "TranscriptionCreateResponse{transcription=$transcription}"
+ diarized != null -> "TranscriptionCreateResponse{diarized=$diarized}"
verbose != null -> "TranscriptionCreateResponse{verbose=$verbose}"
_json != null -> "TranscriptionCreateResponse{_unknown=$_json}"
else -> throw IllegalStateException("Invalid TranscriptionCreateResponse")
@@ -133,6 +157,14 @@ private constructor(
fun ofTranscription(transcription: Transcription) =
TranscriptionCreateResponse(transcription = transcription)
+ /**
+ * Represents a diarized transcription response returned by the model, including the
+ * combined transcript and speaker-segment annotations.
+ */
+ @JvmStatic
+ fun ofDiarized(diarized: TranscriptionDiarized) =
+ TranscriptionCreateResponse(diarized = diarized)
+
/**
* Represents a verbose json transcription response returned by model, based on the provided
* input.
@@ -151,6 +183,12 @@ private constructor(
/** Represents a transcription response returned by model, based on the provided input. */
fun visitTranscription(transcription: Transcription): T
+ /**
+ * Represents a diarized transcription response returned by the model, including the
+ * combined transcript and speaker-segment annotations.
+ */
+ fun visitDiarized(diarized: TranscriptionDiarized): T
+
/**
* Represents a verbose json transcription response returned by model, based on the provided
* input.
@@ -183,6 +221,9 @@ private constructor(
tryDeserialize(node, jacksonTypeRef())?.let {
TranscriptionCreateResponse(transcription = it, _json = json)
},
+ tryDeserialize(node, jacksonTypeRef())?.let {
+ TranscriptionCreateResponse(diarized = it, _json = json)
+ },
tryDeserialize(node, jacksonTypeRef())?.let {
TranscriptionCreateResponse(verbose = it, _json = json)
},
@@ -212,6 +253,7 @@ private constructor(
) {
when {
value.transcription != null -> generator.writeObject(value.transcription)
+ value.diarized != null -> generator.writeObject(value.diarized)
value.verbose != null -> generator.writeObject(value.verbose)
value._json != null -> generator.writeObject(value._json)
else -> throw IllegalStateException("Invalid TranscriptionCreateResponse")
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionDiarized.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionDiarized.kt
new file mode 100644
index 000000000..1f2dcbc68
--- /dev/null
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionDiarized.kt
@@ -0,0 +1,1324 @@
+// File generated from our OpenAPI spec by Stainless.
+
+package com.openai.models.audio.transcriptions
+
+import com.fasterxml.jackson.annotation.JsonAnyGetter
+import com.fasterxml.jackson.annotation.JsonAnySetter
+import com.fasterxml.jackson.annotation.JsonCreator
+import com.fasterxml.jackson.annotation.JsonProperty
+import com.fasterxml.jackson.core.JsonGenerator
+import com.fasterxml.jackson.core.ObjectCodec
+import com.fasterxml.jackson.databind.JsonNode
+import com.fasterxml.jackson.databind.SerializerProvider
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import com.fasterxml.jackson.databind.annotation.JsonSerialize
+import com.fasterxml.jackson.module.kotlin.jacksonTypeRef
+import com.openai.core.BaseDeserializer
+import com.openai.core.BaseSerializer
+import com.openai.core.ExcludeMissing
+import com.openai.core.JsonField
+import com.openai.core.JsonMissing
+import com.openai.core.JsonValue
+import com.openai.core.checkKnown
+import com.openai.core.checkRequired
+import com.openai.core.getOrThrow
+import com.openai.core.toImmutable
+import com.openai.errors.OpenAIInvalidDataException
+import java.util.Collections
+import java.util.Objects
+import java.util.Optional
+import kotlin.jvm.optionals.getOrNull
+
+/**
+ * Represents a diarized transcription response returned by the model, including the combined
+ * transcript and speaker-segment annotations.
+ */
+class TranscriptionDiarized
+@JsonCreator(mode = JsonCreator.Mode.DISABLED)
+private constructor(
+ private val duration: JsonField,
+ private val segments: JsonField>,
+ private val task: JsonValue,
+ private val text: JsonField,
+ private val usage: JsonField,
+ private val additionalProperties: MutableMap,
+) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("duration") @ExcludeMissing duration: JsonField = JsonMissing.of(),
+ @JsonProperty("segments")
+ @ExcludeMissing
+ segments: JsonField> = JsonMissing.of(),
+ @JsonProperty("task") @ExcludeMissing task: JsonValue = JsonMissing.of(),
+ @JsonProperty("text") @ExcludeMissing text: JsonField = JsonMissing.of(),
+ @JsonProperty("usage") @ExcludeMissing usage: JsonField = JsonMissing.of(),
+ ) : this(duration, segments, task, text, usage, mutableMapOf())
+
+ /**
+ * Duration of the input audio in seconds.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun duration(): Double = duration.getRequired("duration")
+
+ /**
+ * Segments of the transcript annotated with timestamps and speaker labels.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun segments(): List = segments.getRequired("segments")
+
+ /**
+ * The type of task that was run. Always `transcribe`.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("transcribe")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server responded
+ * with an unexpected value).
+ */
+ @JsonProperty("task") @ExcludeMissing fun _task(): JsonValue = task
+
+ /**
+ * The concatenated transcript text for the entire audio input.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun text(): String = text.getRequired("text")
+
+ /**
+ * Token or duration usage statistics for the request.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun usage(): Optional = usage.getOptional("usage")
+
+ /**
+ * Returns the raw JSON value of [duration].
+ *
+ * Unlike [duration], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("duration") @ExcludeMissing fun _duration(): JsonField = duration
+
+ /**
+ * Returns the raw JSON value of [segments].
+ *
+ * Unlike [segments], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("segments")
+ @ExcludeMissing
+ fun _segments(): JsonField> = segments
+
+ /**
+ * Returns the raw JSON value of [text].
+ *
+ * Unlike [text], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("text") @ExcludeMissing fun _text(): JsonField = text
+
+ /**
+ * Returns the raw JSON value of [usage].
+ *
+ * Unlike [usage], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("usage") @ExcludeMissing fun _usage(): JsonField = usage
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /**
+ * Returns a mutable builder for constructing an instance of [TranscriptionDiarized].
+ *
+ * The following fields are required:
+ * ```java
+ * .duration()
+ * .segments()
+ * .text()
+ * ```
+ */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [TranscriptionDiarized]. */
+ class Builder internal constructor() {
+
+ private var duration: JsonField? = null
+ private var segments: JsonField>? = null
+ private var task: JsonValue = JsonValue.from("transcribe")
+ private var text: JsonField? = null
+ private var usage: JsonField = JsonMissing.of()
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(transcriptionDiarized: TranscriptionDiarized) = apply {
+ duration = transcriptionDiarized.duration
+ segments = transcriptionDiarized.segments.map { it.toMutableList() }
+ task = transcriptionDiarized.task
+ text = transcriptionDiarized.text
+ usage = transcriptionDiarized.usage
+ additionalProperties = transcriptionDiarized.additionalProperties.toMutableMap()
+ }
+
+ /** Duration of the input audio in seconds. */
+ fun duration(duration: Double) = duration(JsonField.of(duration))
+
+ /**
+ * Sets [Builder.duration] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.duration] with a well-typed [Double] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun duration(duration: JsonField) = apply { this.duration = duration }
+
+ /** Segments of the transcript annotated with timestamps and speaker labels. */
+ fun segments(segments: List) =
+ segments(JsonField.of(segments))
+
+ /**
+ * Sets [Builder.segments] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.segments] with a well-typed
+ * `List` value instead. This method is primarily for setting
+ * the field to an undocumented or not yet supported value.
+ */
+ fun segments(segments: JsonField>) = apply {
+ this.segments = segments.map { it.toMutableList() }
+ }
+
+ /**
+ * Adds a single [TranscriptionDiarizedSegment] to [segments].
+ *
+ * @throws IllegalStateException if the field was previously set to a non-list.
+ */
+ fun addSegment(segment: TranscriptionDiarizedSegment) = apply {
+ segments =
+ (segments ?: JsonField.of(mutableListOf())).also {
+ checkKnown("segments", it).add(segment)
+ }
+ }
+
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("transcribe")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet supported
+ * value.
+ */
+ fun task(task: JsonValue) = apply { this.task = task }
+
+ /** The concatenated transcript text for the entire audio input. */
+ fun text(text: String) = text(JsonField.of(text))
+
+ /**
+ * Sets [Builder.text] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.text] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun text(text: JsonField) = apply { this.text = text }
+
+ /** Token or duration usage statistics for the request. */
+ fun usage(usage: Usage) = usage(JsonField.of(usage))
+
+ /**
+ * Sets [Builder.usage] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.usage] with a well-typed [Usage] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun usage(usage: JsonField) = apply { this.usage = usage }
+
+ /** Alias for calling [usage] with `Usage.ofTokens(tokens)`. */
+ fun usage(tokens: Usage.Tokens) = usage(Usage.ofTokens(tokens))
+
+ /** Alias for calling [usage] with `Usage.ofDuration(duration)`. */
+ fun usage(duration: Usage.Duration) = usage(Usage.ofDuration(duration))
+
+ /**
+ * Alias for calling [usage] with the following:
+ * ```java
+ * Usage.Duration.builder()
+ * .seconds(seconds)
+ * .build()
+ * ```
+ */
+ fun durationUsage(seconds: Double) =
+ usage(Usage.Duration.builder().seconds(seconds).build())
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [TranscriptionDiarized].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ *
+ * The following fields are required:
+ * ```java
+ * .duration()
+ * .segments()
+ * .text()
+ * ```
+ *
+ * @throws IllegalStateException if any required field is unset.
+ */
+ fun build(): TranscriptionDiarized =
+ TranscriptionDiarized(
+ checkRequired("duration", duration),
+ checkRequired("segments", segments).map { it.toImmutable() },
+ task,
+ checkRequired("text", text),
+ usage,
+ additionalProperties.toMutableMap(),
+ )
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): TranscriptionDiarized = apply {
+ if (validated) {
+ return@apply
+ }
+
+ duration()
+ segments().forEach { it.validate() }
+ _task().let {
+ if (it != JsonValue.from("transcribe")) {
+ throw OpenAIInvalidDataException("'task' is invalid, received $it")
+ }
+ }
+ text()
+ usage().ifPresent { it.validate() }
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ (if (duration.asKnown().isPresent) 1 else 0) +
+ (segments.asKnown().getOrNull()?.sumOf { it.validity().toInt() } ?: 0) +
+ task.let { if (it == JsonValue.from("transcribe")) 1 else 0 } +
+ (if (text.asKnown().isPresent) 1 else 0) +
+ (usage.asKnown().getOrNull()?.validity() ?: 0)
+
+ /** Token or duration usage statistics for the request. */
+ @JsonDeserialize(using = Usage.Deserializer::class)
+ @JsonSerialize(using = Usage.Serializer::class)
+ class Usage
+ private constructor(
+ private val tokens: Tokens? = null,
+ private val duration: Duration? = null,
+ private val _json: JsonValue? = null,
+ ) {
+
+ /** Usage statistics for models billed by token usage. */
+ fun tokens(): Optional = Optional.ofNullable(tokens)
+
+ /** Usage statistics for models billed by audio input duration. */
+ fun duration(): Optional = Optional.ofNullable(duration)
+
+ fun isTokens(): Boolean = tokens != null
+
+ fun isDuration(): Boolean = duration != null
+
+ /** Usage statistics for models billed by token usage. */
+ fun asTokens(): Tokens = tokens.getOrThrow("tokens")
+
+ /** Usage statistics for models billed by audio input duration. */
+ fun asDuration(): Duration = duration.getOrThrow("duration")
+
+ fun _json(): Optional = Optional.ofNullable(_json)
+
+ fun accept(visitor: Visitor): T =
+ when {
+ tokens != null -> visitor.visitTokens(tokens)
+ duration != null -> visitor.visitDuration(duration)
+ else -> visitor.unknown(_json)
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): Usage = apply {
+ if (validated) {
+ return@apply
+ }
+
+ accept(
+ object : Visitor {
+ override fun visitTokens(tokens: Tokens) {
+ tokens.validate()
+ }
+
+ override fun visitDuration(duration: Duration) {
+ duration.validate()
+ }
+ }
+ )
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object
+ * recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ accept(
+ object : Visitor {
+ override fun visitTokens(tokens: Tokens) = tokens.validity()
+
+ override fun visitDuration(duration: Duration) = duration.validity()
+
+ override fun unknown(json: JsonValue?) = 0
+ }
+ )
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is Usage && tokens == other.tokens && duration == other.duration
+ }
+
+ override fun hashCode(): Int = Objects.hash(tokens, duration)
+
+ override fun toString(): String =
+ when {
+ tokens != null -> "Usage{tokens=$tokens}"
+ duration != null -> "Usage{duration=$duration}"
+ _json != null -> "Usage{_unknown=$_json}"
+ else -> throw IllegalStateException("Invalid Usage")
+ }
+
+ companion object {
+
+ /** Usage statistics for models billed by token usage. */
+ @JvmStatic fun ofTokens(tokens: Tokens) = Usage(tokens = tokens)
+
+ /** Usage statistics for models billed by audio input duration. */
+ @JvmStatic fun ofDuration(duration: Duration) = Usage(duration = duration)
+ }
+
+ /** An interface that defines how to map each variant of [Usage] to a value of type [T]. */
+ interface Visitor {
+
+ /** Usage statistics for models billed by token usage. */
+ fun visitTokens(tokens: Tokens): T
+
+ /** Usage statistics for models billed by audio input duration. */
+ fun visitDuration(duration: Duration): T
+
+ /**
+ * Maps an unknown variant of [Usage] to a value of type [T].
+ *
+ * An instance of [Usage] can contain an unknown variant if it was deserialized from
+ * data that doesn't match any known variant. For example, if the SDK is on an older
+ * version than the API, then the API may respond with new variants that the SDK is
+ * unaware of.
+ *
+ * @throws OpenAIInvalidDataException in the default implementation.
+ */
+ fun unknown(json: JsonValue?): T {
+ throw OpenAIInvalidDataException("Unknown Usage: $json")
+ }
+ }
+
+ internal class Deserializer : BaseDeserializer(Usage::class) {
+
+ override fun ObjectCodec.deserialize(node: JsonNode): Usage {
+ val json = JsonValue.fromJsonNode(node)
+ val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull()
+
+ when (type) {
+ "tokens" -> {
+ return tryDeserialize(node, jacksonTypeRef())?.let {
+ Usage(tokens = it, _json = json)
+ } ?: Usage(_json = json)
+ }
+ "duration" -> {
+ return tryDeserialize(node, jacksonTypeRef())?.let {
+ Usage(duration = it, _json = json)
+ } ?: Usage(_json = json)
+ }
+ }
+
+ return Usage(_json = json)
+ }
+ }
+
+ internal class Serializer : BaseSerializer(Usage::class) {
+
+ override fun serialize(
+ value: Usage,
+ generator: JsonGenerator,
+ provider: SerializerProvider,
+ ) {
+ when {
+ value.tokens != null -> generator.writeObject(value.tokens)
+ value.duration != null -> generator.writeObject(value.duration)
+ value._json != null -> generator.writeObject(value._json)
+ else -> throw IllegalStateException("Invalid Usage")
+ }
+ }
+ }
+
+ /** Usage statistics for models billed by token usage. */
+ class Tokens
+ @JsonCreator(mode = JsonCreator.Mode.DISABLED)
+ private constructor(
+ private val inputTokens: JsonField,
+ private val outputTokens: JsonField,
+ private val totalTokens: JsonField,
+ private val type: JsonValue,
+ private val inputTokenDetails: JsonField,
+ private val additionalProperties: MutableMap,
+ ) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("input_tokens")
+ @ExcludeMissing
+ inputTokens: JsonField = JsonMissing.of(),
+ @JsonProperty("output_tokens")
+ @ExcludeMissing
+ outputTokens: JsonField = JsonMissing.of(),
+ @JsonProperty("total_tokens")
+ @ExcludeMissing
+ totalTokens: JsonField = JsonMissing.of(),
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ @JsonProperty("input_token_details")
+ @ExcludeMissing
+ inputTokenDetails: JsonField = JsonMissing.of(),
+ ) : this(
+ inputTokens,
+ outputTokens,
+ totalTokens,
+ type,
+ inputTokenDetails,
+ mutableMapOf(),
+ )
+
+ /**
+ * Number of input tokens billed for this request.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected
+ * value).
+ */
+ fun inputTokens(): Long = inputTokens.getRequired("input_tokens")
+
+ /**
+ * Number of output tokens generated.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected
+ * value).
+ */
+ fun outputTokens(): Long = outputTokens.getRequired("output_tokens")
+
+ /**
+ * Total number of tokens used (input + output).
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected
+ * value).
+ */
+ fun totalTokens(): Long = totalTokens.getRequired("total_tokens")
+
+ /**
+ * The type of the usage object. Always `tokens` for this variant.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("tokens")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server
+ * responded with an unexpected value).
+ */
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
+
+ /**
+ * Details about the input tokens billed for this request.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
+ */
+ fun inputTokenDetails(): Optional =
+ inputTokenDetails.getOptional("input_token_details")
+
+ /**
+ * Returns the raw JSON value of [inputTokens].
+ *
+ * Unlike [inputTokens], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("input_tokens")
+ @ExcludeMissing
+ fun _inputTokens(): JsonField = inputTokens
+
+ /**
+ * Returns the raw JSON value of [outputTokens].
+ *
+ * Unlike [outputTokens], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("output_tokens")
+ @ExcludeMissing
+ fun _outputTokens(): JsonField = outputTokens
+
+ /**
+ * Returns the raw JSON value of [totalTokens].
+ *
+ * Unlike [totalTokens], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("total_tokens")
+ @ExcludeMissing
+ fun _totalTokens(): JsonField = totalTokens
+
+ /**
+ * Returns the raw JSON value of [inputTokenDetails].
+ *
+ * Unlike [inputTokenDetails], this method doesn't throw if the JSON field has an
+ * unexpected type.
+ */
+ @JsonProperty("input_token_details")
+ @ExcludeMissing
+ fun _inputTokenDetails(): JsonField = inputTokenDetails
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /**
+ * Returns a mutable builder for constructing an instance of [Tokens].
+ *
+ * The following fields are required:
+ * ```java
+ * .inputTokens()
+ * .outputTokens()
+ * .totalTokens()
+ * ```
+ */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [Tokens]. */
+ class Builder internal constructor() {
+
+ private var inputTokens: JsonField? = null
+ private var outputTokens: JsonField? = null
+ private var totalTokens: JsonField? = null
+ private var type: JsonValue = JsonValue.from("tokens")
+ private var inputTokenDetails: JsonField = JsonMissing.of()
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(tokens: Tokens) = apply {
+ inputTokens = tokens.inputTokens
+ outputTokens = tokens.outputTokens
+ totalTokens = tokens.totalTokens
+ type = tokens.type
+ inputTokenDetails = tokens.inputTokenDetails
+ additionalProperties = tokens.additionalProperties.toMutableMap()
+ }
+
+ /** Number of input tokens billed for this request. */
+ fun inputTokens(inputTokens: Long) = inputTokens(JsonField.of(inputTokens))
+
+ /**
+ * Sets [Builder.inputTokens] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.inputTokens] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or not
+ * yet supported value.
+ */
+ fun inputTokens(inputTokens: JsonField) = apply {
+ this.inputTokens = inputTokens
+ }
+
+ /** Number of output tokens generated. */
+ fun outputTokens(outputTokens: Long) = outputTokens(JsonField.of(outputTokens))
+
+ /**
+ * Sets [Builder.outputTokens] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.outputTokens] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or not
+ * yet supported value.
+ */
+ fun outputTokens(outputTokens: JsonField) = apply {
+ this.outputTokens = outputTokens
+ }
+
+ /** Total number of tokens used (input + output). */
+ fun totalTokens(totalTokens: Long) = totalTokens(JsonField.of(totalTokens))
+
+ /**
+ * Sets [Builder.totalTokens] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.totalTokens] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or not
+ * yet supported value.
+ */
+ fun totalTokens(totalTokens: JsonField) = apply {
+ this.totalTokens = totalTokens
+ }
+
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("tokens")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun type(type: JsonValue) = apply { this.type = type }
+
+ /** Details about the input tokens billed for this request. */
+ fun inputTokenDetails(inputTokenDetails: InputTokenDetails) =
+ inputTokenDetails(JsonField.of(inputTokenDetails))
+
+ /**
+ * Sets [Builder.inputTokenDetails] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.inputTokenDetails] with a well-typed
+ * [InputTokenDetails] value instead. This method is primarily for setting the field
+ * to an undocumented or not yet supported value.
+ */
+ fun inputTokenDetails(inputTokenDetails: JsonField) = apply {
+ this.inputTokenDetails = inputTokenDetails
+ }
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) =
+ apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply {
+ additionalProperties.remove(key)
+ }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [Tokens].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ *
+ * The following fields are required:
+ * ```java
+ * .inputTokens()
+ * .outputTokens()
+ * .totalTokens()
+ * ```
+ *
+ * @throws IllegalStateException if any required field is unset.
+ */
+ fun build(): Tokens =
+ Tokens(
+ checkRequired("inputTokens", inputTokens),
+ checkRequired("outputTokens", outputTokens),
+ checkRequired("totalTokens", totalTokens),
+ type,
+ inputTokenDetails,
+ additionalProperties.toMutableMap(),
+ )
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): Tokens = apply {
+ if (validated) {
+ return@apply
+ }
+
+ inputTokens()
+ outputTokens()
+ totalTokens()
+ _type().let {
+ if (it != JsonValue.from("tokens")) {
+ throw OpenAIInvalidDataException("'type' is invalid, received $it")
+ }
+ }
+ inputTokenDetails().ifPresent { it.validate() }
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object
+ * recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ (if (inputTokens.asKnown().isPresent) 1 else 0) +
+ (if (outputTokens.asKnown().isPresent) 1 else 0) +
+ (if (totalTokens.asKnown().isPresent) 1 else 0) +
+ type.let { if (it == JsonValue.from("tokens")) 1 else 0 } +
+ (inputTokenDetails.asKnown().getOrNull()?.validity() ?: 0)
+
+ /** Details about the input tokens billed for this request. */
+ class InputTokenDetails
+ @JsonCreator(mode = JsonCreator.Mode.DISABLED)
+ private constructor(
+ private val audioTokens: JsonField,
+ private val textTokens: JsonField,
+ private val additionalProperties: MutableMap,
+ ) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("audio_tokens")
+ @ExcludeMissing
+ audioTokens: JsonField = JsonMissing.of(),
+ @JsonProperty("text_tokens")
+ @ExcludeMissing
+ textTokens: JsonField = JsonMissing.of(),
+ ) : this(audioTokens, textTokens, mutableMapOf())
+
+ /**
+ * Number of audio tokens billed for this request.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g.
+ * if the server responded with an unexpected value).
+ */
+ fun audioTokens(): Optional = audioTokens.getOptional("audio_tokens")
+
+ /**
+ * Number of text tokens billed for this request.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g.
+ * if the server responded with an unexpected value).
+ */
+ fun textTokens(): Optional = textTokens.getOptional("text_tokens")
+
+ /**
+ * Returns the raw JSON value of [audioTokens].
+ *
+ * Unlike [audioTokens], this method doesn't throw if the JSON field has an
+ * unexpected type.
+ */
+ @JsonProperty("audio_tokens")
+ @ExcludeMissing
+ fun _audioTokens(): JsonField = audioTokens
+
+ /**
+ * Returns the raw JSON value of [textTokens].
+ *
+ * Unlike [textTokens], this method doesn't throw if the JSON field has an
+ * unexpected type.
+ */
+ @JsonProperty("text_tokens")
+ @ExcludeMissing
+ fun _textTokens(): JsonField = textTokens
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /**
+ * Returns a mutable builder for constructing an instance of
+ * [InputTokenDetails].
+ */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [InputTokenDetails]. */
+ class Builder internal constructor() {
+
+ private var audioTokens: JsonField = JsonMissing.of()
+ private var textTokens: JsonField = JsonMissing.of()
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(inputTokenDetails: InputTokenDetails) = apply {
+ audioTokens = inputTokenDetails.audioTokens
+ textTokens = inputTokenDetails.textTokens
+ additionalProperties = inputTokenDetails.additionalProperties.toMutableMap()
+ }
+
+ /** Number of audio tokens billed for this request. */
+ fun audioTokens(audioTokens: Long) = audioTokens(JsonField.of(audioTokens))
+
+ /**
+ * Sets [Builder.audioTokens] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.audioTokens] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or
+ * not yet supported value.
+ */
+ fun audioTokens(audioTokens: JsonField) = apply {
+ this.audioTokens = audioTokens
+ }
+
+ /** Number of text tokens billed for this request. */
+ fun textTokens(textTokens: Long) = textTokens(JsonField.of(textTokens))
+
+ /**
+ * Sets [Builder.textTokens] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.textTokens] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or
+ * not yet supported value.
+ */
+ fun textTokens(textTokens: JsonField) = apply {
+ this.textTokens = textTokens
+ }
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) =
+ apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply {
+ additionalProperties.remove(key)
+ }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [InputTokenDetails].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ */
+ fun build(): InputTokenDetails =
+ InputTokenDetails(
+ audioTokens,
+ textTokens,
+ additionalProperties.toMutableMap(),
+ )
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): InputTokenDetails = apply {
+ if (validated) {
+ return@apply
+ }
+
+ audioTokens()
+ textTokens()
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object
+ * recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ (if (audioTokens.asKnown().isPresent) 1 else 0) +
+ (if (textTokens.asKnown().isPresent) 1 else 0)
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is InputTokenDetails &&
+ audioTokens == other.audioTokens &&
+ textTokens == other.textTokens &&
+ additionalProperties == other.additionalProperties
+ }
+
+ private val hashCode: Int by lazy {
+ Objects.hash(audioTokens, textTokens, additionalProperties)
+ }
+
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "InputTokenDetails{audioTokens=$audioTokens, textTokens=$textTokens, additionalProperties=$additionalProperties}"
+ }
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is Tokens &&
+ inputTokens == other.inputTokens &&
+ outputTokens == other.outputTokens &&
+ totalTokens == other.totalTokens &&
+ type == other.type &&
+ inputTokenDetails == other.inputTokenDetails &&
+ additionalProperties == other.additionalProperties
+ }
+
+ private val hashCode: Int by lazy {
+ Objects.hash(
+ inputTokens,
+ outputTokens,
+ totalTokens,
+ type,
+ inputTokenDetails,
+ additionalProperties,
+ )
+ }
+
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "Tokens{inputTokens=$inputTokens, outputTokens=$outputTokens, totalTokens=$totalTokens, type=$type, inputTokenDetails=$inputTokenDetails, additionalProperties=$additionalProperties}"
+ }
+
+ /** Usage statistics for models billed by audio input duration. */
+ class Duration
+ @JsonCreator(mode = JsonCreator.Mode.DISABLED)
+ private constructor(
+ private val seconds: JsonField,
+ private val type: JsonValue,
+ private val additionalProperties: MutableMap,
+ ) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("seconds")
+ @ExcludeMissing
+ seconds: JsonField = JsonMissing.of(),
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ ) : this(seconds, type, mutableMapOf())
+
+ /**
+ * Duration of the input audio in seconds.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected
+ * value).
+ */
+ fun seconds(): Double = seconds.getRequired("seconds")
+
+ /**
+ * The type of the usage object. Always `duration` for this variant.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("duration")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server
+ * responded with an unexpected value).
+ */
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
+
+ /**
+ * Returns the raw JSON value of [seconds].
+ *
+ * Unlike [seconds], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("seconds") @ExcludeMissing fun _seconds(): JsonField = seconds
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /**
+ * Returns a mutable builder for constructing an instance of [Duration].
+ *
+ * The following fields are required:
+ * ```java
+ * .seconds()
+ * ```
+ */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [Duration]. */
+ class Builder internal constructor() {
+
+ private var seconds: JsonField? = null
+ private var type: JsonValue = JsonValue.from("duration")
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(duration: Duration) = apply {
+ seconds = duration.seconds
+ type = duration.type
+ additionalProperties = duration.additionalProperties.toMutableMap()
+ }
+
+ /** Duration of the input audio in seconds. */
+ fun seconds(seconds: Double) = seconds(JsonField.of(seconds))
+
+ /**
+ * Sets [Builder.seconds] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.seconds] with a well-typed [Double] value
+ * instead. This method is primarily for setting the field to an undocumented or not
+ * yet supported value.
+ */
+ fun seconds(seconds: JsonField) = apply { this.seconds = seconds }
+
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("duration")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun type(type: JsonValue) = apply { this.type = type }
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) =
+ apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply {
+ additionalProperties.remove(key)
+ }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [Duration].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ *
+ * The following fields are required:
+ * ```java
+ * .seconds()
+ * ```
+ *
+ * @throws IllegalStateException if any required field is unset.
+ */
+ fun build(): Duration =
+ Duration(
+ checkRequired("seconds", seconds),
+ type,
+ additionalProperties.toMutableMap(),
+ )
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): Duration = apply {
+ if (validated) {
+ return@apply
+ }
+
+ seconds()
+ _type().let {
+ if (it != JsonValue.from("duration")) {
+ throw OpenAIInvalidDataException("'type' is invalid, received $it")
+ }
+ }
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object
+ * recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ (if (seconds.asKnown().isPresent) 1 else 0) +
+ type.let { if (it == JsonValue.from("duration")) 1 else 0 }
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is Duration &&
+ seconds == other.seconds &&
+ type == other.type &&
+ additionalProperties == other.additionalProperties
+ }
+
+ private val hashCode: Int by lazy { Objects.hash(seconds, type, additionalProperties) }
+
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "Duration{seconds=$seconds, type=$type, additionalProperties=$additionalProperties}"
+ }
+ }
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is TranscriptionDiarized &&
+ duration == other.duration &&
+ segments == other.segments &&
+ task == other.task &&
+ text == other.text &&
+ usage == other.usage &&
+ additionalProperties == other.additionalProperties
+ }
+
+ private val hashCode: Int by lazy {
+ Objects.hash(duration, segments, task, text, usage, additionalProperties)
+ }
+
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "TranscriptionDiarized{duration=$duration, segments=$segments, task=$task, text=$text, usage=$usage, additionalProperties=$additionalProperties}"
+}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionDiarizedSegment.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionDiarizedSegment.kt
new file mode 100644
index 000000000..8bad9f52b
--- /dev/null
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionDiarizedSegment.kt
@@ -0,0 +1,367 @@
+// File generated from our OpenAPI spec by Stainless.
+
+package com.openai.models.audio.transcriptions
+
+import com.fasterxml.jackson.annotation.JsonAnyGetter
+import com.fasterxml.jackson.annotation.JsonAnySetter
+import com.fasterxml.jackson.annotation.JsonCreator
+import com.fasterxml.jackson.annotation.JsonProperty
+import com.openai.core.ExcludeMissing
+import com.openai.core.JsonField
+import com.openai.core.JsonMissing
+import com.openai.core.JsonValue
+import com.openai.core.checkRequired
+import com.openai.errors.OpenAIInvalidDataException
+import java.util.Collections
+import java.util.Objects
+
+/** A segment of diarized transcript text with speaker metadata. */
+class TranscriptionDiarizedSegment
+@JsonCreator(mode = JsonCreator.Mode.DISABLED)
+private constructor(
+ private val id: JsonField,
+ private val end: JsonField,
+ private val speaker: JsonField,
+ private val start: JsonField,
+ private val text: JsonField,
+ private val type: JsonValue,
+ private val additionalProperties: MutableMap,
+) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("id") @ExcludeMissing id: JsonField = JsonMissing.of(),
+ @JsonProperty("end") @ExcludeMissing end: JsonField = JsonMissing.of(),
+ @JsonProperty("speaker") @ExcludeMissing speaker: JsonField = JsonMissing.of(),
+ @JsonProperty("start") @ExcludeMissing start: JsonField = JsonMissing.of(),
+ @JsonProperty("text") @ExcludeMissing text: JsonField = JsonMissing.of(),
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ ) : this(id, end, speaker, start, text, type, mutableMapOf())
+
+ /**
+ * Unique identifier for the segment.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun id(): String = id.getRequired("id")
+
+ /**
+ * End timestamp of the segment in seconds.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun end(): Float = end.getRequired("end")
+
+ /**
+ * Speaker label for this segment. When known speakers are provided, the label matches
+ * `known_speaker_names[]`. Otherwise speakers are labeled sequentially using capital letters
+ * (`A`, `B`, ...).
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun speaker(): String = speaker.getRequired("speaker")
+
+ /**
+ * Start timestamp of the segment in seconds.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun start(): Float = start.getRequired("start")
+
+ /**
+ * Transcript text for this segment.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun text(): String = text.getRequired("text")
+
+ /**
+ * The type of the segment. Always `transcript.text.segment`.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("transcript.text.segment")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server responded
+ * with an unexpected value).
+ */
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
+
+ /**
+ * Returns the raw JSON value of [id].
+ *
+ * Unlike [id], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("id") @ExcludeMissing fun _id(): JsonField = id
+
+ /**
+ * Returns the raw JSON value of [end].
+ *
+ * Unlike [end], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("end") @ExcludeMissing fun _end(): JsonField = end
+
+ /**
+ * Returns the raw JSON value of [speaker].
+ *
+ * Unlike [speaker], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("speaker") @ExcludeMissing fun _speaker(): JsonField = speaker
+
+ /**
+ * Returns the raw JSON value of [start].
+ *
+ * Unlike [start], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("start") @ExcludeMissing fun _start(): JsonField = start
+
+ /**
+ * Returns the raw JSON value of [text].
+ *
+ * Unlike [text], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("text") @ExcludeMissing fun _text(): JsonField = text
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /**
+ * Returns a mutable builder for constructing an instance of [TranscriptionDiarizedSegment].
+ *
+ * The following fields are required:
+ * ```java
+ * .id()
+ * .end()
+ * .speaker()
+ * .start()
+ * .text()
+ * ```
+ */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [TranscriptionDiarizedSegment]. */
+ class Builder internal constructor() {
+
+ private var id: JsonField? = null
+ private var end: JsonField? = null
+ private var speaker: JsonField? = null
+ private var start: JsonField? = null
+ private var text: JsonField? = null
+ private var type: JsonValue = JsonValue.from("transcript.text.segment")
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(transcriptionDiarizedSegment: TranscriptionDiarizedSegment) = apply {
+ id = transcriptionDiarizedSegment.id
+ end = transcriptionDiarizedSegment.end
+ speaker = transcriptionDiarizedSegment.speaker
+ start = transcriptionDiarizedSegment.start
+ text = transcriptionDiarizedSegment.text
+ type = transcriptionDiarizedSegment.type
+ additionalProperties = transcriptionDiarizedSegment.additionalProperties.toMutableMap()
+ }
+
+ /** Unique identifier for the segment. */
+ fun id(id: String) = id(JsonField.of(id))
+
+ /**
+ * Sets [Builder.id] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.id] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun id(id: JsonField) = apply { this.id = id }
+
+ /** End timestamp of the segment in seconds. */
+ fun end(end: Float) = end(JsonField.of(end))
+
+ /**
+ * Sets [Builder.end] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.end] with a well-typed [Float] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun end(end: JsonField) = apply { this.end = end }
+
+ /**
+ * Speaker label for this segment. When known speakers are provided, the label matches
+ * `known_speaker_names[]`. Otherwise speakers are labeled sequentially using capital
+ * letters (`A`, `B`, ...).
+ */
+ fun speaker(speaker: String) = speaker(JsonField.of(speaker))
+
+ /**
+ * Sets [Builder.speaker] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.speaker] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun speaker(speaker: JsonField) = apply { this.speaker = speaker }
+
+ /** Start timestamp of the segment in seconds. */
+ fun start(start: Float) = start(JsonField.of(start))
+
+ /**
+ * Sets [Builder.start] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.start] with a well-typed [Float] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun start(start: JsonField) = apply { this.start = start }
+
+ /** Transcript text for this segment. */
+ fun text(text: String) = text(JsonField.of(text))
+
+ /**
+ * Sets [Builder.text] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.text] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun text(text: JsonField) = apply { this.text = text }
+
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("transcript.text.segment")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet supported
+ * value.
+ */
+ fun type(type: JsonValue) = apply { this.type = type }
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [TranscriptionDiarizedSegment].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ *
+ * The following fields are required:
+ * ```java
+ * .id()
+ * .end()
+ * .speaker()
+ * .start()
+ * .text()
+ * ```
+ *
+ * @throws IllegalStateException if any required field is unset.
+ */
+ fun build(): TranscriptionDiarizedSegment =
+ TranscriptionDiarizedSegment(
+ checkRequired("id", id),
+ checkRequired("end", end),
+ checkRequired("speaker", speaker),
+ checkRequired("start", start),
+ checkRequired("text", text),
+ type,
+ additionalProperties.toMutableMap(),
+ )
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): TranscriptionDiarizedSegment = apply {
+ if (validated) {
+ return@apply
+ }
+
+ id()
+ end()
+ speaker()
+ start()
+ text()
+ _type().let {
+ if (it != JsonValue.from("transcript.text.segment")) {
+ throw OpenAIInvalidDataException("'type' is invalid, received $it")
+ }
+ }
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ (if (id.asKnown().isPresent) 1 else 0) +
+ (if (end.asKnown().isPresent) 1 else 0) +
+ (if (speaker.asKnown().isPresent) 1 else 0) +
+ (if (start.asKnown().isPresent) 1 else 0) +
+ (if (text.asKnown().isPresent) 1 else 0) +
+ type.let { if (it == JsonValue.from("transcript.text.segment")) 1 else 0 }
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is TranscriptionDiarizedSegment &&
+ id == other.id &&
+ end == other.end &&
+ speaker == other.speaker &&
+ start == other.start &&
+ text == other.text &&
+ type == other.type &&
+ additionalProperties == other.additionalProperties
+ }
+
+ private val hashCode: Int by lazy {
+ Objects.hash(id, end, speaker, start, text, type, additionalProperties)
+ }
+
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "TranscriptionDiarizedSegment{id=$id, end=$end, speaker=$speaker, start=$start, text=$text, type=$type, additionalProperties=$additionalProperties}"
+}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionStreamEvent.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionStreamEvent.kt
index 9ebaaed4b..4a34a8d21 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionStreamEvent.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionStreamEvent.kt
@@ -19,20 +19,30 @@ import java.util.Optional
import kotlin.jvm.optionals.getOrNull
/**
- * Emitted when there is an additional text delta. This is also the first event emitted when the
- * transcription starts. Only emitted when you
+ * Emitted when a diarized transcription returns a completed segment with speaker information. Only
+ * emitted when you
* [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
- * with the `Stream` parameter set to `true`.
+ * with `stream` set to `true` and `response_format` set to `diarized_json`.
*/
@JsonDeserialize(using = TranscriptionStreamEvent.Deserializer::class)
@JsonSerialize(using = TranscriptionStreamEvent.Serializer::class)
class TranscriptionStreamEvent
private constructor(
+ private val transcriptTextSegment: TranscriptionTextSegmentEvent? = null,
private val transcriptTextDelta: TranscriptionTextDeltaEvent? = null,
private val transcriptTextDone: TranscriptionTextDoneEvent? = null,
private val _json: JsonValue? = null,
) {
+ /**
+ * Emitted when a diarized transcription returns a completed segment with speaker information.
+ * Only emitted when you
+ * [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+ * with `stream` set to `true` and `response_format` set to `diarized_json`.
+ */
+ fun transcriptTextSegment(): Optional =
+ Optional.ofNullable(transcriptTextSegment)
+
/**
* Emitted when there is an additional text delta. This is also the first event emitted when the
* transcription starts. Only emitted when you
@@ -51,10 +61,21 @@ private constructor(
fun transcriptTextDone(): Optional =
Optional.ofNullable(transcriptTextDone)
+ fun isTranscriptTextSegment(): Boolean = transcriptTextSegment != null
+
fun isTranscriptTextDelta(): Boolean = transcriptTextDelta != null
fun isTranscriptTextDone(): Boolean = transcriptTextDone != null
+ /**
+ * Emitted when a diarized transcription returns a completed segment with speaker information.
+ * Only emitted when you
+ * [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+ * with `stream` set to `true` and `response_format` set to `diarized_json`.
+ */
+ fun asTranscriptTextSegment(): TranscriptionTextSegmentEvent =
+ transcriptTextSegment.getOrThrow("transcriptTextSegment")
+
/**
* Emitted when there is an additional text delta. This is also the first event emitted when the
* transcription starts. Only emitted when you
@@ -77,6 +98,8 @@ private constructor(
fun accept(visitor: Visitor): T =
when {
+ transcriptTextSegment != null ->
+ visitor.visitTranscriptTextSegment(transcriptTextSegment)
transcriptTextDelta != null -> visitor.visitTranscriptTextDelta(transcriptTextDelta)
transcriptTextDone != null -> visitor.visitTranscriptTextDone(transcriptTextDone)
else -> visitor.unknown(_json)
@@ -91,6 +114,12 @@ private constructor(
accept(
object : Visitor {
+ override fun visitTranscriptTextSegment(
+ transcriptTextSegment: TranscriptionTextSegmentEvent
+ ) {
+ transcriptTextSegment.validate()
+ }
+
override fun visitTranscriptTextDelta(
transcriptTextDelta: TranscriptionTextDeltaEvent
) {
@@ -124,6 +153,10 @@ private constructor(
internal fun validity(): Int =
accept(
object : Visitor {
+ override fun visitTranscriptTextSegment(
+ transcriptTextSegment: TranscriptionTextSegmentEvent
+ ) = transcriptTextSegment.validity()
+
override fun visitTranscriptTextDelta(
transcriptTextDelta: TranscriptionTextDeltaEvent
) = transcriptTextDelta.validity()
@@ -142,14 +175,18 @@ private constructor(
}
return other is TranscriptionStreamEvent &&
+ transcriptTextSegment == other.transcriptTextSegment &&
transcriptTextDelta == other.transcriptTextDelta &&
transcriptTextDone == other.transcriptTextDone
}
- override fun hashCode(): Int = Objects.hash(transcriptTextDelta, transcriptTextDone)
+ override fun hashCode(): Int =
+ Objects.hash(transcriptTextSegment, transcriptTextDelta, transcriptTextDone)
override fun toString(): String =
when {
+ transcriptTextSegment != null ->
+ "TranscriptionStreamEvent{transcriptTextSegment=$transcriptTextSegment}"
transcriptTextDelta != null ->
"TranscriptionStreamEvent{transcriptTextDelta=$transcriptTextDelta}"
transcriptTextDone != null ->
@@ -160,6 +197,16 @@ private constructor(
companion object {
+ /**
+ * Emitted when a diarized transcription returns a completed segment with speaker
+ * information. Only emitted when you
+ * [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+ * with `stream` set to `true` and `response_format` set to `diarized_json`.
+ */
+ @JvmStatic
+ fun ofTranscriptTextSegment(transcriptTextSegment: TranscriptionTextSegmentEvent) =
+ TranscriptionStreamEvent(transcriptTextSegment = transcriptTextSegment)
+
/**
* Emitted when there is an additional text delta. This is also the first event emitted when
* the transcription starts. Only emitted when you
@@ -187,6 +234,14 @@ private constructor(
*/
interface Visitor {
+ /**
+ * Emitted when a diarized transcription returns a completed segment with speaker
+ * information. Only emitted when you
+ * [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+ * with `stream` set to `true` and `response_format` set to `diarized_json`.
+ */
+ fun visitTranscriptTextSegment(transcriptTextSegment: TranscriptionTextSegmentEvent): T
+
/**
* Emitted when there is an additional text delta. This is also the first event emitted when
* the transcription starts. Only emitted when you
@@ -226,6 +281,11 @@ private constructor(
val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull()
when (type) {
+ "transcript.text.segment" -> {
+ return tryDeserialize(node, jacksonTypeRef())
+ ?.let { TranscriptionStreamEvent(transcriptTextSegment = it, _json = json) }
+ ?: TranscriptionStreamEvent(_json = json)
+ }
"transcript.text.delta" -> {
return tryDeserialize(node, jacksonTypeRef())
?.let { TranscriptionStreamEvent(transcriptTextDelta = it, _json = json) }
@@ -251,6 +311,8 @@ private constructor(
provider: SerializerProvider,
) {
when {
+ value.transcriptTextSegment != null ->
+ generator.writeObject(value.transcriptTextSegment)
value.transcriptTextDelta != null ->
generator.writeObject(value.transcriptTextDelta)
value.transcriptTextDone != null -> generator.writeObject(value.transcriptTextDone)
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextDeltaEvent.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextDeltaEvent.kt
index 8ad8feba9..7c367169a 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextDeltaEvent.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextDeltaEvent.kt
@@ -31,6 +31,7 @@ private constructor(
private val delta: JsonField,
private val type: JsonValue,
private val logprobs: JsonField>,
+ private val segmentId: JsonField,
private val additionalProperties: MutableMap,
) {
@@ -41,7 +42,8 @@ private constructor(
@JsonProperty("logprobs")
@ExcludeMissing
logprobs: JsonField> = JsonMissing.of(),
- ) : this(delta, type, logprobs, mutableMapOf())
+ @JsonProperty("segment_id") @ExcludeMissing segmentId: JsonField = JsonMissing.of(),
+ ) : this(delta, type, logprobs, segmentId, mutableMapOf())
/**
* The text delta that was additionally transcribed.
@@ -74,6 +76,15 @@ private constructor(
*/
fun logprobs(): Optional> = logprobs.getOptional("logprobs")
+ /**
+ * Identifier of the diarized segment that this delta belongs to. Only present when using
+ * `gpt-4o-transcribe-diarize`.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun segmentId(): Optional = segmentId.getOptional("segment_id")
+
/**
* Returns the raw JSON value of [delta].
*
@@ -88,6 +99,13 @@ private constructor(
*/
@JsonProperty("logprobs") @ExcludeMissing fun _logprobs(): JsonField> = logprobs
+ /**
+ * Returns the raw JSON value of [segmentId].
+ *
+ * Unlike [segmentId], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("segment_id") @ExcludeMissing fun _segmentId(): JsonField = segmentId
+
@JsonAnySetter
private fun putAdditionalProperty(key: String, value: JsonValue) {
additionalProperties.put(key, value)
@@ -119,6 +137,7 @@ private constructor(
private var delta: JsonField? = null
private var type: JsonValue = JsonValue.from("transcript.text.delta")
private var logprobs: JsonField>? = null
+ private var segmentId: JsonField = JsonMissing.of()
private var additionalProperties: MutableMap = mutableMapOf()
@JvmSynthetic
@@ -126,6 +145,7 @@ private constructor(
delta = transcriptionTextDeltaEvent.delta
type = transcriptionTextDeltaEvent.type
logprobs = transcriptionTextDeltaEvent.logprobs.map { it.toMutableList() }
+ segmentId = transcriptionTextDeltaEvent.segmentId
additionalProperties = transcriptionTextDeltaEvent.additionalProperties.toMutableMap()
}
@@ -184,6 +204,21 @@ private constructor(
}
}
+ /**
+ * Identifier of the diarized segment that this delta belongs to. Only present when using
+ * `gpt-4o-transcribe-diarize`.
+ */
+ fun segmentId(segmentId: String) = segmentId(JsonField.of(segmentId))
+
+ /**
+ * Sets [Builder.segmentId] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.segmentId] with a well-typed [String] value instead.
+ * This method is primarily for setting the field to an undocumented or not yet supported
+ * value.
+ */
+ fun segmentId(segmentId: JsonField) = apply { this.segmentId = segmentId }
+
fun additionalProperties(additionalProperties: Map) = apply {
this.additionalProperties.clear()
putAllAdditionalProperties(additionalProperties)
@@ -220,6 +255,7 @@ private constructor(
checkRequired("delta", delta),
type,
(logprobs ?: JsonMissing.of()).map { it.toImmutable() },
+ segmentId,
additionalProperties.toMutableMap(),
)
}
@@ -238,6 +274,7 @@ private constructor(
}
}
logprobs().ifPresent { it.forEach { it.validate() } }
+ segmentId()
validated = true
}
@@ -258,7 +295,8 @@ private constructor(
internal fun validity(): Int =
(if (delta.asKnown().isPresent) 1 else 0) +
type.let { if (it == JsonValue.from("transcript.text.delta")) 1 else 0 } +
- (logprobs.asKnown().getOrNull()?.sumOf { it.validity().toInt() } ?: 0)
+ (logprobs.asKnown().getOrNull()?.sumOf { it.validity().toInt() } ?: 0) +
+ (if (segmentId.asKnown().isPresent) 1 else 0)
class Logprob
@JsonCreator(mode = JsonCreator.Mode.DISABLED)
@@ -502,13 +540,16 @@ private constructor(
delta == other.delta &&
type == other.type &&
logprobs == other.logprobs &&
+ segmentId == other.segmentId &&
additionalProperties == other.additionalProperties
}
- private val hashCode: Int by lazy { Objects.hash(delta, type, logprobs, additionalProperties) }
+ private val hashCode: Int by lazy {
+ Objects.hash(delta, type, logprobs, segmentId, additionalProperties)
+ }
override fun hashCode(): Int = hashCode
override fun toString() =
- "TranscriptionTextDeltaEvent{delta=$delta, type=$type, logprobs=$logprobs, additionalProperties=$additionalProperties}"
+ "TranscriptionTextDeltaEvent{delta=$delta, type=$type, logprobs=$logprobs, segmentId=$segmentId, additionalProperties=$additionalProperties}"
}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextSegmentEvent.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextSegmentEvent.kt
new file mode 100644
index 000000000..e98206787
--- /dev/null
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionTextSegmentEvent.kt
@@ -0,0 +1,367 @@
+// File generated from our OpenAPI spec by Stainless.
+
+package com.openai.models.audio.transcriptions
+
+import com.fasterxml.jackson.annotation.JsonAnyGetter
+import com.fasterxml.jackson.annotation.JsonAnySetter
+import com.fasterxml.jackson.annotation.JsonCreator
+import com.fasterxml.jackson.annotation.JsonProperty
+import com.openai.core.ExcludeMissing
+import com.openai.core.JsonField
+import com.openai.core.JsonMissing
+import com.openai.core.JsonValue
+import com.openai.core.checkRequired
+import com.openai.errors.OpenAIInvalidDataException
+import java.util.Collections
+import java.util.Objects
+
+/**
+ * Emitted when a diarized transcription returns a completed segment with speaker information. Only
+ * emitted when you
+ * [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
+ * with `stream` set to `true` and `response_format` set to `diarized_json`.
+ */
+class TranscriptionTextSegmentEvent
+@JsonCreator(mode = JsonCreator.Mode.DISABLED)
+private constructor(
+ private val id: JsonField,
+ private val end: JsonField,
+ private val speaker: JsonField,
+ private val start: JsonField,
+ private val text: JsonField,
+ private val type: JsonValue,
+ private val additionalProperties: MutableMap,
+) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("id") @ExcludeMissing id: JsonField = JsonMissing.of(),
+ @JsonProperty("end") @ExcludeMissing end: JsonField = JsonMissing.of(),
+ @JsonProperty("speaker") @ExcludeMissing speaker: JsonField = JsonMissing.of(),
+ @JsonProperty("start") @ExcludeMissing start: JsonField = JsonMissing.of(),
+ @JsonProperty("text") @ExcludeMissing text: JsonField = JsonMissing.of(),
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ ) : this(id, end, speaker, start, text, type, mutableMapOf())
+
+ /**
+ * Unique identifier for the segment.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun id(): String = id.getRequired("id")
+
+ /**
+ * End timestamp of the segment in seconds.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun end(): Float = end.getRequired("end")
+
+ /**
+ * Speaker label for this segment.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun speaker(): String = speaker.getRequired("speaker")
+
+ /**
+ * Start timestamp of the segment in seconds.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun start(): Float = start.getRequired("start")
+
+ /**
+ * Transcript text for this segment.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
+ * unexpectedly missing or null (e.g. if the server responded with an unexpected value).
+ */
+ fun text(): String = text.getRequired("text")
+
+ /**
+ * The type of the event. Always `transcript.text.segment`.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("transcript.text.segment")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server responded
+ * with an unexpected value).
+ */
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
+
+ /**
+ * Returns the raw JSON value of [id].
+ *
+ * Unlike [id], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("id") @ExcludeMissing fun _id(): JsonField = id
+
+ /**
+ * Returns the raw JSON value of [end].
+ *
+ * Unlike [end], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("end") @ExcludeMissing fun _end(): JsonField = end
+
+ /**
+ * Returns the raw JSON value of [speaker].
+ *
+ * Unlike [speaker], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("speaker") @ExcludeMissing fun _speaker(): JsonField = speaker
+
+ /**
+ * Returns the raw JSON value of [start].
+ *
+ * Unlike [start], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("start") @ExcludeMissing fun _start(): JsonField = start
+
+ /**
+ * Returns the raw JSON value of [text].
+ *
+ * Unlike [text], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("text") @ExcludeMissing fun _text(): JsonField = text
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /**
+ * Returns a mutable builder for constructing an instance of
+ * [TranscriptionTextSegmentEvent].
+ *
+ * The following fields are required:
+ * ```java
+ * .id()
+ * .end()
+ * .speaker()
+ * .start()
+ * .text()
+ * ```
+ */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [TranscriptionTextSegmentEvent]. */
+ class Builder internal constructor() {
+
+ private var id: JsonField? = null
+ private var end: JsonField? = null
+ private var speaker: JsonField? = null
+ private var start: JsonField? = null
+ private var text: JsonField? = null
+ private var type: JsonValue = JsonValue.from("transcript.text.segment")
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(transcriptionTextSegmentEvent: TranscriptionTextSegmentEvent) = apply {
+ id = transcriptionTextSegmentEvent.id
+ end = transcriptionTextSegmentEvent.end
+ speaker = transcriptionTextSegmentEvent.speaker
+ start = transcriptionTextSegmentEvent.start
+ text = transcriptionTextSegmentEvent.text
+ type = transcriptionTextSegmentEvent.type
+ additionalProperties = transcriptionTextSegmentEvent.additionalProperties.toMutableMap()
+ }
+
+ /** Unique identifier for the segment. */
+ fun id(id: String) = id(JsonField.of(id))
+
+ /**
+ * Sets [Builder.id] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.id] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun id(id: JsonField) = apply { this.id = id }
+
+ /** End timestamp of the segment in seconds. */
+ fun end(end: Float) = end(JsonField.of(end))
+
+ /**
+ * Sets [Builder.end] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.end] with a well-typed [Float] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun end(end: JsonField) = apply { this.end = end }
+
+ /** Speaker label for this segment. */
+ fun speaker(speaker: String) = speaker(JsonField.of(speaker))
+
+ /**
+ * Sets [Builder.speaker] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.speaker] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun speaker(speaker: JsonField) = apply { this.speaker = speaker }
+
+ /** Start timestamp of the segment in seconds. */
+ fun start(start: Float) = start(JsonField.of(start))
+
+ /**
+ * Sets [Builder.start] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.start] with a well-typed [Float] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun start(start: JsonField) = apply { this.start = start }
+
+ /** Transcript text for this segment. */
+ fun text(text: String) = text(JsonField.of(text))
+
+ /**
+ * Sets [Builder.text] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.text] with a well-typed [String] value instead. This
+ * method is primarily for setting the field to an undocumented or not yet supported value.
+ */
+ fun text(text: JsonField) = apply { this.text = text }
+
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("transcript.text.segment")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet supported
+ * value.
+ */
+ fun type(type: JsonValue) = apply { this.type = type }
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [TranscriptionTextSegmentEvent].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ *
+ * The following fields are required:
+ * ```java
+ * .id()
+ * .end()
+ * .speaker()
+ * .start()
+ * .text()
+ * ```
+ *
+ * @throws IllegalStateException if any required field is unset.
+ */
+ fun build(): TranscriptionTextSegmentEvent =
+ TranscriptionTextSegmentEvent(
+ checkRequired("id", id),
+ checkRequired("end", end),
+ checkRequired("speaker", speaker),
+ checkRequired("start", start),
+ checkRequired("text", text),
+ type,
+ additionalProperties.toMutableMap(),
+ )
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): TranscriptionTextSegmentEvent = apply {
+ if (validated) {
+ return@apply
+ }
+
+ id()
+ end()
+ speaker()
+ start()
+ text()
+ _type().let {
+ if (it != JsonValue.from("transcript.text.segment")) {
+ throw OpenAIInvalidDataException("'type' is invalid, received $it")
+ }
+ }
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic
+ internal fun validity(): Int =
+ (if (id.asKnown().isPresent) 1 else 0) +
+ (if (end.asKnown().isPresent) 1 else 0) +
+ (if (speaker.asKnown().isPresent) 1 else 0) +
+ (if (start.asKnown().isPresent) 1 else 0) +
+ (if (text.asKnown().isPresent) 1 else 0) +
+ type.let { if (it == JsonValue.from("transcript.text.segment")) 1 else 0 }
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is TranscriptionTextSegmentEvent &&
+ id == other.id &&
+ end == other.end &&
+ speaker == other.speaker &&
+ start == other.start &&
+ text == other.text &&
+ type == other.type &&
+ additionalProperties == other.additionalProperties
+ }
+
+ private val hashCode: Int by lazy {
+ Objects.hash(id, end, speaker, start, text, type, additionalProperties)
+ }
+
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "TranscriptionTextSegmentEvent{id=$id, end=$end, speaker=$speaker, start=$start, text=$text, type=$type, additionalProperties=$additionalProperties}"
+}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/AudioTranscription.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/AudioTranscription.kt
index 3e1544452..cc8da45af 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/AudioTranscription.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/AudioTranscription.kt
@@ -45,7 +45,8 @@ private constructor(
/**
* The model to use for transcription. Current options are `whisper-1`,
- * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+ * `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use
+ * `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -56,8 +57,8 @@ private constructor(
* An optional text to guide the model's style or continue a previous audio segment. For
* `whisper-1`, the
* [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
- * For `gpt-4o-transcribe` models, the prompt is a free text string, for example "expect words
- * related to technology".
+ * For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free
+ * text string, for example "expect words related to technology".
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -136,7 +137,8 @@ private constructor(
/**
* The model to use for transcription. Current options are `whisper-1`,
- * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+ * `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use
+ * `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
*/
fun model(model: Model) = model(JsonField.of(model))
@@ -152,8 +154,8 @@ private constructor(
* An optional text to guide the model's style or continue a previous audio segment. For
* `whisper-1`, the
* [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
- * For `gpt-4o-transcribe` models, the prompt is a free text string, for example "expect
- * words related to technology".
+ * For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a
+ * free text string, for example "expect words related to technology".
*/
fun prompt(prompt: String) = prompt(JsonField.of(prompt))
@@ -227,7 +229,8 @@ private constructor(
/**
* The model to use for transcription. Current options are `whisper-1`,
- * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+ * `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use
+ * `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
*/
class Model @JsonCreator private constructor(private val value: JsonField) : Enum {
@@ -245,21 +248,21 @@ private constructor(
@JvmField val WHISPER_1 = of("whisper-1")
- @JvmField val GPT_4O_TRANSCRIBE_LATEST = of("gpt-4o-transcribe-latest")
-
@JvmField val GPT_4O_MINI_TRANSCRIBE = of("gpt-4o-mini-transcribe")
@JvmField val GPT_4O_TRANSCRIBE = of("gpt-4o-transcribe")
+ @JvmField val GPT_4O_TRANSCRIBE_DIARIZE = of("gpt-4o-transcribe-diarize")
+
@JvmStatic fun of(value: String) = Model(JsonField.of(value))
}
/** An enum containing [Model]'s known values. */
enum class Known {
WHISPER_1,
- GPT_4O_TRANSCRIBE_LATEST,
GPT_4O_MINI_TRANSCRIBE,
GPT_4O_TRANSCRIBE,
+ GPT_4O_TRANSCRIBE_DIARIZE,
}
/**
@@ -273,9 +276,9 @@ private constructor(
*/
enum class Value {
WHISPER_1,
- GPT_4O_TRANSCRIBE_LATEST,
GPT_4O_MINI_TRANSCRIBE,
GPT_4O_TRANSCRIBE,
+ GPT_4O_TRANSCRIBE_DIARIZE,
/** An enum member indicating that [Model] was instantiated with an unknown value. */
_UNKNOWN,
}
@@ -290,9 +293,9 @@ private constructor(
fun value(): Value =
when (this) {
WHISPER_1 -> Value.WHISPER_1
- GPT_4O_TRANSCRIBE_LATEST -> Value.GPT_4O_TRANSCRIBE_LATEST
GPT_4O_MINI_TRANSCRIBE -> Value.GPT_4O_MINI_TRANSCRIBE
GPT_4O_TRANSCRIBE -> Value.GPT_4O_TRANSCRIBE
+ GPT_4O_TRANSCRIBE_DIARIZE -> Value.GPT_4O_TRANSCRIBE_DIARIZE
else -> Value._UNKNOWN
}
@@ -308,9 +311,9 @@ private constructor(
fun known(): Known =
when (this) {
WHISPER_1 -> Known.WHISPER_1
- GPT_4O_TRANSCRIBE_LATEST -> Known.GPT_4O_TRANSCRIBE_LATEST
GPT_4O_MINI_TRANSCRIBE -> Known.GPT_4O_MINI_TRANSCRIBE
GPT_4O_TRANSCRIBE -> Known.GPT_4O_TRANSCRIBE
+ GPT_4O_TRANSCRIBE_DIARIZE -> Known.GPT_4O_TRANSCRIBE_DIARIZE
else -> throw OpenAIInvalidDataException("Unknown Model: $value")
}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/vectorstores/VectorStoreCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/vectorstores/VectorStoreCreateParams.kt
index 6994f6f67..14fba5722 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/vectorstores/VectorStoreCreateParams.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/vectorstores/VectorStoreCreateParams.kt
@@ -39,6 +39,14 @@ private constructor(
*/
fun chunkingStrategy(): Optional = body.chunkingStrategy()
+ /**
+ * A description for the vector store. Can be used to describe the vector store's purpose.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun description(): Optional = body.description()
+
/**
* The expiration policy for a vector store.
*
@@ -85,6 +93,13 @@ private constructor(
*/
fun _chunkingStrategy(): JsonField = body._chunkingStrategy()
+ /**
+ * Returns the raw JSON value of [description].
+ *
+ * Unlike [description], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ fun _description(): JsonField = body._description()
+
/**
* Returns the raw JSON value of [expiresAfter].
*
@@ -151,10 +166,10 @@ private constructor(
* This is generally only useful if you are already constructing the body separately.
* Otherwise, it's more convenient to use the top-level setters instead:
* - [chunkingStrategy]
+ * - [description]
* - [expiresAfter]
* - [fileIds]
* - [metadata]
- * - [name]
* - etc.
*/
fun body(body: Body) = apply { this.body = body.toBuilder() }
@@ -202,6 +217,20 @@ private constructor(
body.staticChunkingStrategy(static_)
}
+ /**
+ * A description for the vector store. Can be used to describe the vector store's purpose.
+ */
+ fun description(description: String) = apply { body.description(description) }
+
+ /**
+ * Sets [Builder.description] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.description] with a well-typed [String] value instead.
+ * This method is primarily for setting the field to an undocumented or not yet supported
+ * value.
+ */
+ fun description(description: JsonField) = apply { body.description(description) }
+
/** The expiration policy for a vector store. */
fun expiresAfter(expiresAfter: ExpiresAfter) = apply { body.expiresAfter(expiresAfter) }
@@ -411,6 +440,7 @@ private constructor(
@JsonCreator(mode = JsonCreator.Mode.DISABLED)
private constructor(
private val chunkingStrategy: JsonField,
+ private val description: JsonField,
private val expiresAfter: JsonField,
private val fileIds: JsonField>,
private val metadata: JsonField,
@@ -423,6 +453,9 @@ private constructor(
@JsonProperty("chunking_strategy")
@ExcludeMissing
chunkingStrategy: JsonField = JsonMissing.of(),
+ @JsonProperty("description")
+ @ExcludeMissing
+ description: JsonField = JsonMissing.of(),
@JsonProperty("expires_after")
@ExcludeMissing
expiresAfter: JsonField = JsonMissing.of(),
@@ -433,7 +466,15 @@ private constructor(
@ExcludeMissing
metadata: JsonField