diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java index 1d1e4afd941..0c7bb81a31b 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java @@ -23,10 +23,8 @@ import org.springframework.ai.audio.transcription.AudioTranscription; import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt; import org.springframework.ai.audio.transcription.AudioTranscriptionResponse; +import org.springframework.ai.audio.transcription.metadata.StructuredResponse; import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.GranularityType; -import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.StructuredResponse; -import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Segment; -import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Word; import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat; import org.springframework.ai.azure.openai.metadata.AzureOpenAiAudioTranscriptionResponseMetadata; import org.springframework.ai.model.Model; @@ -38,6 +36,8 @@ import java.io.IOException; import java.util.List; +import static org.springframework.ai.audio.transcription.metadata.StructuredResponse.*; + /** * AzureOpenAI audio transcription client implementation for backed by * {@link OpenAIClient}. You provide as input the audio file you want to transcribe and diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionOptions.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionOptions.java index bd80aace91a..ed9f4ef6b98 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionOptions.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionOptions.java @@ -242,99 +242,24 @@ public String getValue() { } - /** - * @param language The language of the transcribed text. - * @param duration The duration of the audio in seconds. - * @param text The transcribed text. - * @param words The extracted words and their timestamps. - * @param segments The segments of the transcribed text and their corresponding - * details. - */ - @JsonInclude(Include.NON_NULL) - public record StructuredResponse( - // @formatter:off - @JsonProperty("language") String language, - @JsonProperty("duration") Float duration, - @JsonProperty("text") String text, - @JsonProperty("words") List words, - @JsonProperty("segments") List segments) { - // @formatter:on - - /** - * Extracted word and it's corresponding timestamps. - * - * @param word The text content of the word. - * @param start The start time of the word in seconds. - * @param end The end time of the word in seconds. - */ - @JsonInclude(Include.NON_NULL) - public record Word( - // @formatter:off - @JsonProperty("word") String word, - @JsonProperty("start") Float start, - @JsonProperty("end") Float end) { - // @formatter:on - } - - /** - * Segment of the transcribed text and its corresponding details. - * - * @param id Unique identifier of the segment. - * @param seek Seek offset of the segment. - * @param start Start time of the segment in seconds. - * @param end End time of the segment in seconds. - * @param text The text content of the segment. - * @param tokens Array of token IDs for the text content. - * @param temperature Temperature parameter used for generating the segment. - * @param avgLogprob Average logprob of the segment. If the value is lower than - * -1, consider the logprobs failed. - * @param compressionRatio Compression ratio of the segment. If the value is - * greater than 2.4, consider the compression failed. - * @param noSpeechProb Probability of no speech in the segment. If the value is - * higher than 1.0 and the avg_logprob is below -1, consider this segment silent. - */ - @JsonInclude(Include.NON_NULL) - public record Segment( - // @formatter:off - @JsonProperty("id") Integer id, - @JsonProperty("seek") Integer seek, - @JsonProperty("start") Float start, - @JsonProperty("end") Float end, - @JsonProperty("text") String text, - @JsonProperty("tokens") List tokens, - @JsonProperty("temperature") Float temperature, - @JsonProperty("avg_logprob") Float avgLogprob, - @JsonProperty("compression_ratio") Float compressionRatio, - @JsonProperty("no_speech_prob") Float noSpeechProb) { - // @formatter:on - } - } - public enum TranscriptResponseFormat { // @formatter:off - @JsonProperty("json") JSON(AudioTranscriptionFormat.JSON, StructuredResponse.class), - @JsonProperty("text") TEXT(AudioTranscriptionFormat.TEXT, String.class), - @JsonProperty("srt") SRT(AudioTranscriptionFormat.SRT, String.class), - @JsonProperty("verbose_json") VERBOSE_JSON(AudioTranscriptionFormat.VERBOSE_JSON, StructuredResponse.class), - @JsonProperty("vtt") VTT(AudioTranscriptionFormat.VTT, String.class); + @JsonProperty("json") JSON(AudioTranscriptionFormat.JSON), + @JsonProperty("text") TEXT(AudioTranscriptionFormat.TEXT), + @JsonProperty("srt") SRT(AudioTranscriptionFormat.SRT), + @JsonProperty("verbose_json") VERBOSE_JSON(AudioTranscriptionFormat.VERBOSE_JSON), + @JsonProperty("vtt") VTT(AudioTranscriptionFormat.VTT); public final AudioTranscriptionFormat value; - public final Class responseType; - - TranscriptResponseFormat(AudioTranscriptionFormat value, Class responseType) { + TranscriptResponseFormat(AudioTranscriptionFormat value) { this.value = value; - this.responseType = responseType; } public AudioTranscriptionFormat getValue() { return this.value; } - - public Class getResponseType() { - return this.responseType; - } } public enum GranularityType { diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/metadata/AzureOpenAiAudioTranscriptionResponseMetadata.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/metadata/AzureOpenAiAudioTranscriptionResponseMetadata.java index f64a805a146..fc762ddb3cc 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/metadata/AzureOpenAiAudioTranscriptionResponseMetadata.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/metadata/AzureOpenAiAudioTranscriptionResponseMetadata.java @@ -16,7 +16,8 @@ package org.springframework.ai.azure.openai.metadata; import org.springframework.ai.audio.transcription.AudioTranscriptionResponseMetadata; -import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions; +import org.springframework.ai.audio.transcription.metadata.StructuredResponse; +import org.springframework.lang.Nullable; import org.springframework.util.Assert; /** @@ -28,13 +29,9 @@ public class AzureOpenAiAudioTranscriptionResponseMetadata extends AudioTranscri protected static final String AI_METADATA_STRING = "{ @type: %1$s }"; - public static final AzureOpenAiAudioTranscriptionResponseMetadata NULL = new AzureOpenAiAudioTranscriptionResponseMetadata() { - }; - - public static AzureOpenAiAudioTranscriptionResponseMetadata from( - AzureOpenAiAudioTranscriptionOptions.StructuredResponse result) { - Assert.notNull(result, "AzureOpenAI Transcription must not be null"); - return new AzureOpenAiAudioTranscriptionResponseMetadata(); + public static AzureOpenAiAudioTranscriptionResponseMetadata from(StructuredResponse structuredResponse) { + Assert.notNull(structuredResponse, "AzureOpenAI Transcription must not be null"); + return new AzureOpenAiAudioTranscriptionResponseMetadata(structuredResponse); } public static AzureOpenAiAudioTranscriptionResponseMetadata from(String result) { @@ -42,7 +39,19 @@ public static AzureOpenAiAudioTranscriptionResponseMetadata from(String result) return new AzureOpenAiAudioTranscriptionResponseMetadata(); } + private final StructuredResponse structuredResponse; + protected AzureOpenAiAudioTranscriptionResponseMetadata() { + this(null); + } + + public AzureOpenAiAudioTranscriptionResponseMetadata(StructuredResponse structuredResponse) { + this.structuredResponse = structuredResponse; + } + + @Nullable + public StructuredResponse getStructuredResponse() { + return structuredResponse; } @Override diff --git a/spring-ai-core/src/main/java/org/springframework/ai/audio/transcription/metadata/StructuredResponse.java b/spring-ai-core/src/main/java/org/springframework/ai/audio/transcription/metadata/StructuredResponse.java new file mode 100644 index 00000000000..1be153ece62 --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/audio/transcription/metadata/StructuredResponse.java @@ -0,0 +1,43 @@ +package org.springframework.ai.audio.transcription.metadata; + +import org.springframework.lang.Nullable; + +import java.util.List; + +/** + * @author Piotr Olaszewski + */ +public record StructuredResponse(String language, Float duration, String text, @Nullable List words, + @Nullable List segments) { + + /** + * Extracted word and it's corresponding timestamps + * + * @param word The text content of the word. + * @param start The start time of the word in seconds. + * @param end The end time of the word in seconds. + */ + public record Word(String word, Float start, Float end) { + } + + /** + * Segment of the transcribed text and its corresponding details. + * + * @param id Unique identifier of the segment. + * @param seek Seek offset of the segment. + * @param start Start time of the segment in seconds. + * @param end End time of the segment in seconds. + * @param text The text content of the segment. + * @param tokens Array of token IDs for the text content. + * @param temperature Temperature parameter used for generating the segment. + * @param avgLogprob Average logprob of the segment. If the value is lower than * -1, + * consider the logprobs failed. + * @param compressionRatio Compression ratio of the segment. If the value is greater + * than 2.4, consider the compression failed. + * @param noSpeechProb Probability of no speech in the segment. If the value is higher + * than 1.0 and the avg_logprob is below -1, consider this segment silent. + */ + public record Segment(Integer id, Integer seek, Float start, Float end, String text, List tokens, + Float temperature, Float avgLogprob, Float compressionRatio, Float noSpeechProb) { + } +} diff --git a/spring-ai-core/src/main/java/org/springframework/ai/model/AbstractResponseMetadata.java b/spring-ai-core/src/main/java/org/springframework/ai/model/AbstractResponseMetadata.java index 42bd8678e8e..bb102f1be5a 100644 --- a/spring-ai-core/src/main/java/org/springframework/ai/model/AbstractResponseMetadata.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/model/AbstractResponseMetadata.java @@ -1,7 +1,7 @@ package org.springframework.ai.model; -import io.micrometer.common.lang.NonNull; -import io.micrometer.common.lang.Nullable; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; import java.util.Collections; import java.util.Map; diff --git a/spring-ai-core/src/main/java/org/springframework/ai/model/MutableResponseMetadata.java b/spring-ai-core/src/main/java/org/springframework/ai/model/MutableResponseMetadata.java index ac0c9254e75..f0b17e11f8b 100644 --- a/spring-ai-core/src/main/java/org/springframework/ai/model/MutableResponseMetadata.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/model/MutableResponseMetadata.java @@ -1,7 +1,7 @@ package org.springframework.ai.model; -import io.micrometer.common.lang.NonNull; -import io.micrometer.common.lang.Nullable; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; import java.util.Collections; import java.util.Map; diff --git a/spring-ai-core/src/main/java/org/springframework/ai/model/ResponseMetadata.java b/spring-ai-core/src/main/java/org/springframework/ai/model/ResponseMetadata.java index 24e544d4f29..288c5d66382 100644 --- a/spring-ai-core/src/main/java/org/springframework/ai/model/ResponseMetadata.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/model/ResponseMetadata.java @@ -15,8 +15,8 @@ */ package org.springframework.ai.model; -import io.micrometer.common.lang.NonNull; -import io.micrometer.common.lang.Nullable; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; import java.util.Map; import java.util.Set;