Skip to content

Commit 00714e2

Browse files
committed
Merge branch 'feat/gpt-oss-capability'
2 parents 09ba2f6 + 7c6452e commit 00714e2

22 files changed

+333
-59
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
-- Add IBM foundation model variant to the LLMModel table
2+
-- This migration adds the IBM foundation model GPT-oss-120b
3+
4+
INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
5+
('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW());
6+
7+
-- Add initial pricing data for the new GPT model
8+
-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available
9+
WITH new_models AS (
10+
SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b')
11+
)
12+
INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
13+
SELECT
14+
m.id,
15+
CASE
16+
WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015
17+
END,
18+
CASE
19+
WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006
20+
END,
21+
NOW(),
22+
'MANUAL',
23+
true,
24+
NOW(),
25+
NOW()
26+
FROM new_models m;

apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ export interface ILlmProvider {
2424
options?: LlmRequestOptions,
2525
): Promise<LlmResponse>;
2626
readonly key: string;
27+
}
2728

29+
export interface IMultimodalLlmProvider extends ILlmProvider {
2830
/**
2931
* Send a request with image content to the LLM
3032
*/
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import { WatsonxLLM } from "@langchain/community/llms/ibm";
2+
import { HumanMessage } from "@langchain/core/messages";
3+
import { Inject, Injectable } from "@nestjs/common";
4+
import { WINSTON_MODULE_PROVIDER } from "nest-winston";
5+
import { Logger } from "winston";
6+
import { TOKEN_COUNTER } from "../../llm.constants";
7+
import {
8+
IMultimodalLlmProvider,
9+
LlmRequestOptions,
10+
LlmResponse,
11+
} from "../interfaces/llm-provider.interface";
12+
import { ITokenCounter } from "../interfaces/token-counter.interface";
13+
14+
@Injectable()
15+
export class GptOss120bLlmService implements IMultimodalLlmProvider {
16+
private readonly logger: Logger;
17+
static readonly DEFAULT_MODEL = "openai/gpt-oss-120b";
18+
readonly key = "gpt-oss-120b";
19+
20+
constructor(
21+
@Inject(TOKEN_COUNTER) private readonly tokenCounter: ITokenCounter,
22+
@Inject(WINSTON_MODULE_PROVIDER) parentLogger: Logger,
23+
) {
24+
this.logger = parentLogger.child({ context: GptOss120bLlmService.name });
25+
}
26+
27+
private createChatModel(options?: LlmRequestOptions): WatsonxLLM {
28+
return new WatsonxLLM({
29+
version: "2024-05-31",
30+
serviceUrl: "https://us-south.ml.cloud.ibm.com",
31+
projectId: process.env.WATSONX_PROJECT_ID_LLAMA || "",
32+
watsonxAIAuthType: "iam",
33+
watsonxAIApikey: process.env.WATSONX_AI_API_KEY_LLAMA || "", // pragma: allowlist secret
34+
model: options?.modelName ?? GptOss120bLlmService.DEFAULT_MODEL,
35+
temperature: options?.temperature ?? 0.5,
36+
maxNewTokens: options?.maxTokens ?? 1000,
37+
});
38+
}
39+
40+
async invoke(
41+
messages: HumanMessage[],
42+
options?: LlmRequestOptions,
43+
): Promise<LlmResponse> {
44+
const model = this.createChatModel(options);
45+
46+
const inputText = messages
47+
.map((m) =>
48+
typeof m.content === "string" ? m.content : JSON.stringify(m.content),
49+
)
50+
.join("\n");
51+
const inputTokens = this.tokenCounter.countTokens(inputText);
52+
53+
this.logger.debug(`Invoking WatsonX LLM with ${inputTokens} input tokens`);
54+
55+
try {
56+
console.log(`Invoking WatsonX LLM with input: ${inputText}`);
57+
const result = await model.invoke(inputText);
58+
console.log(`WatsonX LLM response: ${result}`);
59+
const rawResponse = typeof result === "string" ? result : String(result);
60+
61+
// Extract JSON from the response if it contains additional text
62+
const responseContent = this.extractJSONFromResponse(rawResponse);
63+
const outputTokens = this.tokenCounter.countTokens(responseContent);
64+
65+
this.logger.debug(
66+
`WatsonX LLM responded with ${outputTokens} output tokens`,
67+
);
68+
69+
return {
70+
content: responseContent,
71+
tokenUsage: {
72+
input: inputTokens,
73+
output: outputTokens,
74+
},
75+
};
76+
} catch (error) {
77+
this.logger.error(
78+
`WatsonX LLM API error: ${
79+
error instanceof Error ? error.message : "Unknown error"
80+
}`,
81+
);
82+
throw error;
83+
}
84+
}
85+
86+
async invokeWithImage(
87+
textContent: string,
88+
imageData: string,
89+
options?: LlmRequestOptions,
90+
): Promise<LlmResponse> {
91+
this.logger.warn(
92+
"WatsonX LLM does not support multimodal (text + image) inputs. Processing text only.",
93+
);
94+
95+
const inputTokens = this.tokenCounter.countTokens(textContent);
96+
97+
this.logger.debug(
98+
`Invoking WatsonX LLM with text only (${inputTokens} input tokens) - image data ignored`,
99+
);
100+
101+
const model = this.createChatModel(options);
102+
103+
try {
104+
const result = await model.invoke(textContent);
105+
const rawResponse = typeof result === "string" ? result : String(result);
106+
const responseContent = this.extractJSONFromResponse(rawResponse);
107+
const outputTokens = this.tokenCounter.countTokens(responseContent);
108+
109+
this.logger.debug(
110+
`WatsonX LLM responded with ${outputTokens} output tokens`,
111+
);
112+
113+
return {
114+
content: responseContent,
115+
tokenUsage: {
116+
input: inputTokens,
117+
output: outputTokens,
118+
},
119+
};
120+
} catch (error) {
121+
this.logger.error(
122+
`WatsonX LLM API error: ${
123+
error instanceof Error ? error.message : "Unknown error"
124+
}`,
125+
);
126+
throw error;
127+
}
128+
}
129+
130+
/**
131+
* Extract JSON from WatsonX response that may contain additional text
132+
*/
133+
private extractJSONFromResponse(response: string): string {
134+
try {
135+
// First, try to parse the response as-is in case it's already clean JSON
136+
JSON.parse(response);
137+
return response;
138+
} catch {
139+
// If that fails, try to extract JSON from markdown code blocks
140+
const jsonBlockMatch = response.match(/```json\s*([\S\s]*?)\s*```/);
141+
if (jsonBlockMatch) {
142+
const jsonContent = jsonBlockMatch[1].trim();
143+
try {
144+
JSON.parse(jsonContent);
145+
return jsonContent;
146+
} catch {
147+
// Fall through to other extraction methods
148+
}
149+
}
150+
151+
// Try to find JSON object patterns in the response
152+
const jsonObjectMatch = response.match(/{[\S\s]*}/);
153+
if (jsonObjectMatch) {
154+
const jsonContent = jsonObjectMatch[0];
155+
try {
156+
JSON.parse(jsonContent);
157+
return jsonContent;
158+
} catch {
159+
// Fall through
160+
}
161+
}
162+
163+
// If no valid JSON found, return the original response
164+
this.logger.warn(
165+
"Could not extract valid JSON from WatsonX response, returning original",
166+
);
167+
return response;
168+
}
169+
}
170+
171+
/**
172+
* Normalize image data to ensure it has the correct format
173+
* Note: WatsonX LLM does not support image inputs, but keeping this method for potential future use
174+
*/
175+
private normalizeImageData(imageData: string): string {
176+
if (!imageData) {
177+
throw new Error("Image data is empty or null");
178+
}
179+
180+
if (imageData.startsWith("data:")) {
181+
return imageData;
182+
}
183+
184+
let mimeType = "image/jpeg";
185+
if (imageData.startsWith("/9j/")) {
186+
mimeType = "image/jpeg";
187+
} else if (imageData.startsWith("iVBORw0KGgo")) {
188+
mimeType = "image/png";
189+
} else if (imageData.startsWith("R0lGOD")) {
190+
mimeType = "image/gif";
191+
} else if (imageData.startsWith("UklGR")) {
192+
mimeType = "image/webp";
193+
}
194+
195+
return `data:${mimeType};base64,${imageData}`;
196+
}
197+
}

apps/api/src/api/llm/core/services/gpt5-llm.service.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
55
import { Logger } from "winston";
66
import { TOKEN_COUNTER } from "../../llm.constants";
77
import {
8-
ILlmProvider,
8+
IMultimodalLlmProvider,
99
LlmRequestOptions,
1010
LlmResponse,
1111
} from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
1717
* enhanced capabilities and performance compared to GPT-4 models.
1818
*/
1919
@Injectable()
20-
export class Gpt5LlmService implements ILlmProvider {
20+
export class Gpt5LlmService implements IMultimodalLlmProvider {
2121
private readonly logger: Logger;
2222
static readonly DEFAULT_MODEL = "gpt-5";
2323
readonly key = "gpt-5";
@@ -34,7 +34,6 @@ export class Gpt5LlmService implements ILlmProvider {
3434
*/
3535
private createChatModel(options?: LlmRequestOptions): ChatOpenAI {
3636
return new ChatOpenAI({
37-
temperature: options?.temperature ?? 0.5,
3837
modelName: options?.modelName ?? Gpt5LlmService.DEFAULT_MODEL,
3938
maxCompletionTokens: options?.maxTokens,
4039
});

apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
55
import { Logger } from "winston";
66
import { TOKEN_COUNTER } from "../../llm.constants";
77
import {
8-
ILlmProvider,
8+
IMultimodalLlmProvider,
99
LlmRequestOptions,
1010
LlmResponse,
1111
} from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
1717
* and cost-effectiveness for simpler tasks.
1818
*/
1919
@Injectable()
20-
export class Gpt5MiniLlmService implements ILlmProvider {
20+
export class Gpt5MiniLlmService implements IMultimodalLlmProvider {
2121
private readonly logger: Logger;
2222
static readonly DEFAULT_MODEL = "gpt-5-mini";
2323
readonly key = "gpt-5-mini";

apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
55
import { Logger } from "winston";
66
import { TOKEN_COUNTER } from "../../llm.constants";
77
import {
8-
ILlmProvider,
8+
IMultimodalLlmProvider,
99
LlmRequestOptions,
1010
LlmResponse,
1111
} from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
1717
* cost-effectiveness, ideal for basic text processing and quick responses.
1818
*/
1919
@Injectable()
20-
export class Gpt5NanoLlmService implements ILlmProvider {
20+
export class Gpt5NanoLlmService implements IMultimodalLlmProvider {
2121
private readonly logger: Logger;
2222
static readonly DEFAULT_MODEL = "gpt-5-nano";
2323
readonly key = "gpt-5-nano";

apps/api/src/api/llm/core/services/llm-resolver.service.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ export class LLMResolverService {
2424
string,
2525
{ modelKey: string; cachedAt: number }
2626
>();
27-
private readonly CACHE_TTL = 5 * 60 * 1000; // 5 minutes cache
27+
private readonly CACHE_TTL = 1;
2828

2929
constructor(
3030
@Inject(LLM_ASSIGNMENT_SERVICE)
@@ -49,6 +49,9 @@ export class LLMResolverService {
4949
// Get assigned model from service
5050
const modelKey =
5151
await this.assignmentService.getAssignedModel(featureKey);
52+
console.log(
53+
`Resolving model for feature ${featureKey}, assigned: ${modelKey}`,
54+
);
5255

5356
if (modelKey) {
5457
// Cache the result
@@ -144,7 +147,9 @@ export class LLMResolverService {
144147
fallbackModel = "gpt-4o-mini",
145148
): Promise<string> {
146149
const resolvedModel = await this.resolveModelForFeature(featureKey);
147-
150+
console.log(
151+
`Resolving model for feature ${featureKey}, resolved: ${resolvedModel}`,
152+
);
148153
if (resolvedModel) {
149154
return resolvedModel;
150155
}

apps/api/src/api/llm/core/services/llm-router.service.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
11
import { Inject, Injectable, Logger } from "@nestjs/common";
22
import { ALL_LLM_PROVIDERS, LLM_RESOLVER_SERVICE } from "../../llm.constants";
3-
import { ILlmProvider } from "../interfaces/llm-provider.interface";
3+
import { IMultimodalLlmProvider } from "../interfaces/llm-provider.interface";
44
import { LLMResolverService } from "./llm-resolver.service";
55

66
@Injectable()
77
export class LlmRouter {
88
private readonly logger = new Logger(LlmRouter.name);
9-
private readonly map: Map<string, ILlmProvider>;
9+
private readonly map: Map<string, IMultimodalLlmProvider>;
1010

1111
constructor(
12-
@Inject(ALL_LLM_PROVIDERS) providers: ILlmProvider[],
12+
@Inject(ALL_LLM_PROVIDERS) providers: IMultimodalLlmProvider[],
1313
@Inject(LLM_RESOLVER_SERVICE)
1414
private readonly resolverService: LLMResolverService,
1515
) {
1616
this.map = new Map(providers.map((p) => [p.key, p]));
1717
}
1818

1919
/** Return provider by key, or throw if it doesn't exist */
20-
get(key: string): ILlmProvider {
20+
get(key: string): IMultimodalLlmProvider {
2121
const found = this.map.get(key);
2222
if (!found) throw new Error(`No LLM provider registered for key "${key}"`);
2323
return found;
2424
}
2525

2626
/** Get provider for a specific AI feature (uses dynamic assignment) */
27-
async getForFeature(featureKey: string): Promise<ILlmProvider> {
27+
async getForFeature(featureKey: string): Promise<IMultimodalLlmProvider> {
2828
try {
2929
const assignedModelKey =
3030
await this.resolverService.resolveModelForFeature(featureKey);
@@ -62,7 +62,7 @@ export class LlmRouter {
6262
async getForFeatureWithFallback(
6363
featureKey: string,
6464
fallbackModelKey = "gpt-4o-mini",
65-
): Promise<ILlmProvider> {
65+
): Promise<IMultimodalLlmProvider> {
6666
try {
6767
const assignedModelKey =
6868
await this.resolverService.getModelKeyWithFallback(
@@ -90,7 +90,7 @@ export class LlmRouter {
9090
}
9191

9292
/** Convenience default (first registered) */
93-
getDefault(): ILlmProvider {
93+
getDefault(): IMultimodalLlmProvider {
9494
return this.map.values().next().value;
9595
}
9696

apps/api/src/api/llm/core/services/openai-llm-mini.service.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
55
import { Logger } from "winston";
66
import { TOKEN_COUNTER } from "../../llm.constants";
77
import {
8-
ILlmProvider,
8+
IMultimodalLlmProvider,
99
LlmRequestOptions,
1010
LlmResponse,
1111
} from "../interfaces/llm-provider.interface";
@@ -16,7 +16,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
1616
* Usage is identical to the full-size OpenAiLlmService.
1717
*/
1818
@Injectable()
19-
export class OpenAiLlmMiniService implements ILlmProvider {
19+
export class OpenAiLlmMiniService implements IMultimodalLlmProvider {
2020
private readonly logger: Logger;
2121
static readonly DEFAULT_MODEL = "gpt-4o-mini";
2222
readonly key = "gpt-4o-mini";

0 commit comments

Comments
 (0)