Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
-- Add IBM foundation model variant to the LLMModel table
-- This migration adds the IBM foundation model GPT-oss-120b

INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW());

-- Add initial pricing data for the new GPT model
-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available
WITH new_models AS (
SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b')
)
INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
SELECT
m.id,
CASE
WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015
END,
CASE
WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006
END,
NOW(),
'MANUAL',
true,
NOW(),
NOW()
FROM new_models m;
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ export interface ILlmProvider {
options?: LlmRequestOptions,
): Promise<LlmResponse>;
readonly key: string;
}

export interface IMultimodalLlmProvider extends ILlmProvider {
/**
* Send a request with image content to the LLM
*/
Expand Down
197 changes: 197 additions & 0 deletions apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import { WatsonxLLM } from "@langchain/community/llms/ibm";
import { HumanMessage } from "@langchain/core/messages";
import { Inject, Injectable } from "@nestjs/common";
import { WINSTON_MODULE_PROVIDER } from "nest-winston";
import { Logger } from "winston";
import { TOKEN_COUNTER } from "../../llm.constants";
import {
IMultimodalLlmProvider,
LlmRequestOptions,
LlmResponse,
} from "../interfaces/llm-provider.interface";
import { ITokenCounter } from "../interfaces/token-counter.interface";

@Injectable()
export class GptOss120bLlmService implements IMultimodalLlmProvider {
private readonly logger: Logger;
static readonly DEFAULT_MODEL = "openai/gpt-oss-120b";
readonly key = "gpt-oss-120b";

constructor(
@Inject(TOKEN_COUNTER) private readonly tokenCounter: ITokenCounter,
@Inject(WINSTON_MODULE_PROVIDER) parentLogger: Logger,
) {
this.logger = parentLogger.child({ context: GptOss120bLlmService.name });
}

private createChatModel(options?: LlmRequestOptions): WatsonxLLM {
return new WatsonxLLM({
version: "2024-05-31",
serviceUrl: "https://us-south.ml.cloud.ibm.com",
projectId: process.env.WATSONX_PROJECT_ID_LLAMA || "",
watsonxAIAuthType: "iam",
watsonxAIApikey: process.env.WATSONX_AI_API_KEY_LLAMA || "", // pragma: allowlist secret
model: options?.modelName ?? GptOss120bLlmService.DEFAULT_MODEL,
temperature: options?.temperature ?? 0.5,
maxNewTokens: options?.maxTokens ?? 1000,
});
}

async invoke(
messages: HumanMessage[],
options?: LlmRequestOptions,
): Promise<LlmResponse> {
const model = this.createChatModel(options);

const inputText = messages
.map((m) =>
typeof m.content === "string" ? m.content : JSON.stringify(m.content),
)
.join("\n");
const inputTokens = this.tokenCounter.countTokens(inputText);

this.logger.debug(`Invoking WatsonX LLM with ${inputTokens} input tokens`);

try {
console.log(`Invoking WatsonX LLM with input: ${inputText}`);
const result = await model.invoke(inputText);
console.log(`WatsonX LLM response: ${result}`);
const rawResponse = typeof result === "string" ? result : String(result);

// Extract JSON from the response if it contains additional text
const responseContent = this.extractJSONFromResponse(rawResponse);
const outputTokens = this.tokenCounter.countTokens(responseContent);

this.logger.debug(
`WatsonX LLM responded with ${outputTokens} output tokens`,
);

return {
content: responseContent,
tokenUsage: {
input: inputTokens,
output: outputTokens,
},
};
} catch (error) {
this.logger.error(
`WatsonX LLM API error: ${
error instanceof Error ? error.message : "Unknown error"
}`,
);
throw error;
}
}

async invokeWithImage(
textContent: string,
imageData: string,
options?: LlmRequestOptions,
): Promise<LlmResponse> {
this.logger.warn(
"WatsonX LLM does not support multimodal (text + image) inputs. Processing text only.",
);

const inputTokens = this.tokenCounter.countTokens(textContent);

this.logger.debug(
`Invoking WatsonX LLM with text only (${inputTokens} input tokens) - image data ignored`,
);

const model = this.createChatModel(options);

try {
const result = await model.invoke(textContent);
const rawResponse = typeof result === "string" ? result : String(result);
const responseContent = this.extractJSONFromResponse(rawResponse);
const outputTokens = this.tokenCounter.countTokens(responseContent);

this.logger.debug(
`WatsonX LLM responded with ${outputTokens} output tokens`,
);

return {
content: responseContent,
tokenUsage: {
input: inputTokens,
output: outputTokens,
},
};
} catch (error) {
this.logger.error(
`WatsonX LLM API error: ${
error instanceof Error ? error.message : "Unknown error"
}`,
);
throw error;
}
}

/**
* Extract JSON from WatsonX response that may contain additional text
*/
private extractJSONFromResponse(response: string): string {
try {
// First, try to parse the response as-is in case it's already clean JSON
JSON.parse(response);
return response;
} catch {
// If that fails, try to extract JSON from markdown code blocks
const jsonBlockMatch = response.match(/```json\s*([\S\s]*?)\s*```/);
if (jsonBlockMatch) {
const jsonContent = jsonBlockMatch[1].trim();
try {
JSON.parse(jsonContent);
return jsonContent;
} catch {
// Fall through to other extraction methods
}
}

// Try to find JSON object patterns in the response
const jsonObjectMatch = response.match(/{[\S\s]*}/);
if (jsonObjectMatch) {
const jsonContent = jsonObjectMatch[0];
try {
JSON.parse(jsonContent);
return jsonContent;
} catch {
// Fall through
}
}

// If no valid JSON found, return the original response
this.logger.warn(
"Could not extract valid JSON from WatsonX response, returning original",
);
return response;
}
}

/**
* Normalize image data to ensure it has the correct format
* Note: WatsonX LLM does not support image inputs, but keeping this method for potential future use
*/
private normalizeImageData(imageData: string): string {
if (!imageData) {
throw new Error("Image data is empty or null");
}

if (imageData.startsWith("data:")) {
return imageData;
}

let mimeType = "image/jpeg";
if (imageData.startsWith("/9j/")) {
mimeType = "image/jpeg";
} else if (imageData.startsWith("iVBORw0KGgo")) {
mimeType = "image/png";
} else if (imageData.startsWith("R0lGOD")) {
mimeType = "image/gif";
} else if (imageData.startsWith("UklGR")) {
mimeType = "image/webp";
}

return `data:${mimeType};base64,${imageData}`;
}
}
5 changes: 2 additions & 3 deletions apps/api/src/api/llm/core/services/gpt5-llm.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
import { Logger } from "winston";
import { TOKEN_COUNTER } from "../../llm.constants";
import {
ILlmProvider,
IMultimodalLlmProvider,
LlmRequestOptions,
LlmResponse,
} from "../interfaces/llm-provider.interface";
Expand All @@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
* enhanced capabilities and performance compared to GPT-4 models.
*/
@Injectable()
export class Gpt5LlmService implements ILlmProvider {
export class Gpt5LlmService implements IMultimodalLlmProvider {
private readonly logger: Logger;
static readonly DEFAULT_MODEL = "gpt-5";
readonly key = "gpt-5";
Expand All @@ -34,7 +34,6 @@ export class Gpt5LlmService implements ILlmProvider {
*/
private createChatModel(options?: LlmRequestOptions): ChatOpenAI {
return new ChatOpenAI({
temperature: options?.temperature ?? 0.5,
modelName: options?.modelName ?? Gpt5LlmService.DEFAULT_MODEL,
maxCompletionTokens: options?.maxTokens,
});
Expand Down
4 changes: 2 additions & 2 deletions apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
import { Logger } from "winston";
import { TOKEN_COUNTER } from "../../llm.constants";
import {
ILlmProvider,
IMultimodalLlmProvider,
LlmRequestOptions,
LlmResponse,
} from "../interfaces/llm-provider.interface";
Expand All @@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
* and cost-effectiveness for simpler tasks.
*/
@Injectable()
export class Gpt5MiniLlmService implements ILlmProvider {
export class Gpt5MiniLlmService implements IMultimodalLlmProvider {
private readonly logger: Logger;
static readonly DEFAULT_MODEL = "gpt-5-mini";
readonly key = "gpt-5-mini";
Expand Down
4 changes: 2 additions & 2 deletions apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
import { Logger } from "winston";
import { TOKEN_COUNTER } from "../../llm.constants";
import {
ILlmProvider,
IMultimodalLlmProvider,
LlmRequestOptions,
LlmResponse,
} from "../interfaces/llm-provider.interface";
Expand All @@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
* cost-effectiveness, ideal for basic text processing and quick responses.
*/
@Injectable()
export class Gpt5NanoLlmService implements ILlmProvider {
export class Gpt5NanoLlmService implements IMultimodalLlmProvider {
private readonly logger: Logger;
static readonly DEFAULT_MODEL = "gpt-5-nano";
readonly key = "gpt-5-nano";
Expand Down
9 changes: 7 additions & 2 deletions apps/api/src/api/llm/core/services/llm-resolver.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export class LLMResolverService {
string,
{ modelKey: string; cachedAt: number }
>();
private readonly CACHE_TTL = 5 * 60 * 1000; // 5 minutes cache
private readonly CACHE_TTL = 1;

constructor(
@Inject(LLM_ASSIGNMENT_SERVICE)
Expand All @@ -49,6 +49,9 @@ export class LLMResolverService {
// Get assigned model from service
const modelKey =
await this.assignmentService.getAssignedModel(featureKey);
console.log(
`Resolving model for feature ${featureKey}, assigned: ${modelKey}`,
);

if (modelKey) {
// Cache the result
Expand Down Expand Up @@ -144,7 +147,9 @@ export class LLMResolverService {
fallbackModel = "gpt-4o-mini",
): Promise<string> {
const resolvedModel = await this.resolveModelForFeature(featureKey);

console.log(
`Resolving model for feature ${featureKey}, resolved: ${resolvedModel}`,
);
if (resolvedModel) {
return resolvedModel;
}
Expand Down
14 changes: 7 additions & 7 deletions apps/api/src/api/llm/core/services/llm-router.service.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
import { Inject, Injectable, Logger } from "@nestjs/common";
import { ALL_LLM_PROVIDERS, LLM_RESOLVER_SERVICE } from "../../llm.constants";
import { ILlmProvider } from "../interfaces/llm-provider.interface";
import { IMultimodalLlmProvider } from "../interfaces/llm-provider.interface";
import { LLMResolverService } from "./llm-resolver.service";

@Injectable()
export class LlmRouter {
private readonly logger = new Logger(LlmRouter.name);
private readonly map: Map<string, ILlmProvider>;
private readonly map: Map<string, IMultimodalLlmProvider>;

constructor(
@Inject(ALL_LLM_PROVIDERS) providers: ILlmProvider[],
@Inject(ALL_LLM_PROVIDERS) providers: IMultimodalLlmProvider[],
@Inject(LLM_RESOLVER_SERVICE)
private readonly resolverService: LLMResolverService,
) {
this.map = new Map(providers.map((p) => [p.key, p]));
}

/** Return provider by key, or throw if it doesn't exist */
get(key: string): ILlmProvider {
get(key: string): IMultimodalLlmProvider {
const found = this.map.get(key);
if (!found) throw new Error(`No LLM provider registered for key "${key}"`);
return found;
}

/** Get provider for a specific AI feature (uses dynamic assignment) */
async getForFeature(featureKey: string): Promise<ILlmProvider> {
async getForFeature(featureKey: string): Promise<IMultimodalLlmProvider> {
try {
const assignedModelKey =
await this.resolverService.resolveModelForFeature(featureKey);
Expand Down Expand Up @@ -62,7 +62,7 @@ export class LlmRouter {
async getForFeatureWithFallback(
featureKey: string,
fallbackModelKey = "gpt-4o-mini",
): Promise<ILlmProvider> {
): Promise<IMultimodalLlmProvider> {
try {
const assignedModelKey =
await this.resolverService.getModelKeyWithFallback(
Expand Down Expand Up @@ -90,7 +90,7 @@ export class LlmRouter {
}

/** Convenience default (first registered) */
getDefault(): ILlmProvider {
getDefault(): IMultimodalLlmProvider {
return this.map.values().next().value;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
import { Logger } from "winston";
import { TOKEN_COUNTER } from "../../llm.constants";
import {
ILlmProvider,
IMultimodalLlmProvider,
LlmRequestOptions,
LlmResponse,
} from "../interfaces/llm-provider.interface";
Expand All @@ -16,7 +16,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
* Usage is identical to the full-size OpenAiLlmService.
*/
@Injectable()
export class OpenAiLlmMiniService implements ILlmProvider {
export class OpenAiLlmMiniService implements IMultimodalLlmProvider {
private readonly logger: Logger;
static readonly DEFAULT_MODEL = "gpt-4o-mini";
readonly key = "gpt-4o-mini";
Expand Down
Loading
Loading