ibm-skills-network · MmagdyHafezZ · Oct 1, 2025
@@ -0,0 +1,26 @@
+-- Add IBM foundation model variant to the LLMModel table
+-- This migration adds the IBM foundation model GPT-oss-120b
+
+INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
+('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW());
+
+-- Add initial pricing data for the new GPT model
+-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available
+WITH new_models AS (
+  SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b')
+)
+INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
+SELECT 
+  m.id,
+  CASE 
+    WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015
+  END,
+  CASE 
+    WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006
+  END,
+  NOW(),
+  'MANUAL',
+  true,
+  NOW(),
+  NOW()
+FROM new_models m;
@@ -24,7 +24,9 @@ export interface ILlmProvider {
     options?: LlmRequestOptions,
   ): Promise<LlmResponse>;
   readonly key: string;
+}
 
+export interface IMultimodalLlmProvider extends ILlmProvider {
   /**
    * Send a request with image content to the LLM
    */

@@ -0,0 +1,197 @@
+import { WatsonxLLM } from "@langchain/community/llms/ibm";
+import { HumanMessage } from "@langchain/core/messages";
+import { Inject, Injectable } from "@nestjs/common";
+import { WINSTON_MODULE_PROVIDER } from "nest-winston";
+import { Logger } from "winston";
+import { TOKEN_COUNTER } from "../../llm.constants";
+import {
+  IMultimodalLlmProvider,
+  LlmRequestOptions,
+  LlmResponse,
+} from "../interfaces/llm-provider.interface";
+import { ITokenCounter } from "../interfaces/token-counter.interface";
+
+@Injectable()
+export class GptOss120bLlmService implements IMultimodalLlmProvider {
+  private readonly logger: Logger;
+  static readonly DEFAULT_MODEL = "openai/gpt-oss-120b";
+  readonly key = "gpt-oss-120b";
+
+  constructor(
+    @Inject(TOKEN_COUNTER) private readonly tokenCounter: ITokenCounter,
+    @Inject(WINSTON_MODULE_PROVIDER) parentLogger: Logger,
+  ) {
+    this.logger = parentLogger.child({ context: GptOss120bLlmService.name });
+  }
+
+  private createChatModel(options?: LlmRequestOptions): WatsonxLLM {
+    return new WatsonxLLM({
+      version: "2024-05-31",
+      serviceUrl: "https://us-south.ml.cloud.ibm.com",
+      projectId: process.env.WATSONX_PROJECT_ID_LLAMA || "",
+      watsonxAIAuthType: "iam",
+      watsonxAIApikey: process.env.WATSONX_AI_API_KEY_LLAMA || "", // pragma: allowlist secret
+      model: options?.modelName ?? GptOss120bLlmService.DEFAULT_MODEL,
+      temperature: options?.temperature ?? 0.5,
+      maxNewTokens: options?.maxTokens ?? 1000,
+    });
+  }
+
+  async invoke(
+    messages: HumanMessage[],
+    options?: LlmRequestOptions,
+  ): Promise<LlmResponse> {
+    const model = this.createChatModel(options);
+
+    const inputText = messages
+      .map((m) =>
+        typeof m.content === "string" ? m.content : JSON.stringify(m.content),
+      )
+      .join("\n");
+    const inputTokens = this.tokenCounter.countTokens(inputText);
+
+    this.logger.debug(`Invoking WatsonX LLM with ${inputTokens} input tokens`);
+
+    try {
+      console.log(`Invoking WatsonX LLM with input: ${inputText}`);
+      const result = await model.invoke(inputText);
+      console.log(`WatsonX LLM response: ${result}`);
+      const rawResponse = typeof result === "string" ? result : String(result);
+
+      // Extract JSON from the response if it contains additional text
+      const responseContent = this.extractJSONFromResponse(rawResponse);
+      const outputTokens = this.tokenCounter.countTokens(responseContent);
+
+      this.logger.debug(
+        `WatsonX LLM responded with ${outputTokens} output tokens`,
+      );
+
+      return {
+        content: responseContent,
+        tokenUsage: {
+          input: inputTokens,
+          output: outputTokens,
+        },
+      };
+    } catch (error) {
+      this.logger.error(
+        `WatsonX LLM API error: ${
+          error instanceof Error ? error.message : "Unknown error"
+        }`,
+      );
+      throw error;
+    }
+  }
+
+  async invokeWithImage(
+    textContent: string,
+    imageData: string,
+    options?: LlmRequestOptions,
+  ): Promise<LlmResponse> {
+    this.logger.warn(
+      "WatsonX LLM does not support multimodal (text + image) inputs. Processing text only.",
+    );
+
+    const inputTokens = this.tokenCounter.countTokens(textContent);
+
+    this.logger.debug(
+      `Invoking WatsonX LLM with text only (${inputTokens} input tokens) - image data ignored`,
+    );
+
+    const model = this.createChatModel(options);
+
+    try {
+      const result = await model.invoke(textContent);
+      const rawResponse = typeof result === "string" ? result : String(result);
+      const responseContent = this.extractJSONFromResponse(rawResponse);
+      const outputTokens = this.tokenCounter.countTokens(responseContent);
+
+      this.logger.debug(
+        `WatsonX LLM responded with ${outputTokens} output tokens`,
+      );
+
+      return {
+        content: responseContent,
+        tokenUsage: {
+          input: inputTokens,
+          output: outputTokens,
+        },
+      };
+    } catch (error) {
+      this.logger.error(
+        `WatsonX LLM API error: ${
+          error instanceof Error ? error.message : "Unknown error"
+        }`,
+      );
+      throw error;
+    }
+  }
+
+  /**
+   * Extract JSON from WatsonX response that may contain additional text
+   */
+  private extractJSONFromResponse(response: string): string {
+    try {
+      // First, try to parse the response as-is in case it's already clean JSON
+      JSON.parse(response);
+      return response;
+    } catch {
+      // If that fails, try to extract JSON from markdown code blocks
+      const jsonBlockMatch = response.match(/```json\s*([\S\s]*?)\s*```/);
+      if (jsonBlockMatch) {
+        const jsonContent = jsonBlockMatch[1].trim();
+        try {
+          JSON.parse(jsonContent);
+          return jsonContent;
+        } catch {
+          // Fall through to other extraction methods
+        }
+      }
+
+      // Try to find JSON object patterns in the response
+      const jsonObjectMatch = response.match(/{[\S\s]*}/);
+      if (jsonObjectMatch) {
+        const jsonContent = jsonObjectMatch[0];
+        try {
+          JSON.parse(jsonContent);
+          return jsonContent;
+        } catch {
+          // Fall through
+        }
+      }
+
+      // If no valid JSON found, return the original response
+      this.logger.warn(
+        "Could not extract valid JSON from WatsonX response, returning original",
+      );
+      return response;
+    }
+  }
+
+  /**
+   * Normalize image data to ensure it has the correct format
+   * Note: WatsonX LLM does not support image inputs, but keeping this method for potential future use
+   */
+  private normalizeImageData(imageData: string): string {
+    if (!imageData) {
+      throw new Error("Image data is empty or null");
+    }
+
+    if (imageData.startsWith("data:")) {
+      return imageData;
+    }
+
+    let mimeType = "image/jpeg";
+    if (imageData.startsWith("/9j/")) {
+      mimeType = "image/jpeg";
+    } else if (imageData.startsWith("iVBORw0KGgo")) {
+      mimeType = "image/png";
+    } else if (imageData.startsWith("R0lGOD")) {
+      mimeType = "image/gif";
+    } else if (imageData.startsWith("UklGR")) {
+      mimeType = "image/webp";
+    }
+
+    return `data:${mimeType};base64,${imageData}`;
+  }
+}
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * enhanced capabilities and performance compared to GPT-4 models.
  */
 @Injectable()
-export class Gpt5LlmService implements ILlmProvider {
+export class Gpt5LlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-5";
   readonly key = "gpt-5";
@@ -34,7 +34,6 @@ export class Gpt5LlmService implements ILlmProvider {
    */
   private createChatModel(options?: LlmRequestOptions): ChatOpenAI {
     return new ChatOpenAI({
-      temperature: options?.temperature ?? 0.5,
       modelName: options?.modelName ?? Gpt5LlmService.DEFAULT_MODEL,
       maxCompletionTokens: options?.maxTokens,
     });

@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * and cost-effectiveness for simpler tasks.
  */
 @Injectable()
-export class Gpt5MiniLlmService implements ILlmProvider {
+export class Gpt5MiniLlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-5-mini";
   readonly key = "gpt-5-mini";

@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * cost-effectiveness, ideal for basic text processing and quick responses.
  */
 @Injectable()
-export class Gpt5NanoLlmService implements ILlmProvider {
+export class Gpt5NanoLlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-5-nano";
   readonly key = "gpt-5-nano";

@@ -24,7 +24,7 @@ export class LLMResolverService {
     string,
     { modelKey: string; cachedAt: number }
   >();
-  private readonly CACHE_TTL = 5 * 60 * 1000; // 5 minutes cache
+  private readonly CACHE_TTL = 1;
 
   constructor(
     @Inject(LLM_ASSIGNMENT_SERVICE)
@@ -49,6 +49,9 @@ export class LLMResolverService {
       // Get assigned model from service
       const modelKey =
         await this.assignmentService.getAssignedModel(featureKey);
+      console.log(
+        `Resolving model for feature ${featureKey}, assigned: ${modelKey}`,
+      );
 
       if (modelKey) {
         // Cache the result
@@ -144,7 +147,9 @@ export class LLMResolverService {
     fallbackModel = "gpt-4o-mini",
   ): Promise<string> {
     const resolvedModel = await this.resolveModelForFeature(featureKey);
-
+    console.log(
+      `Resolving model for feature ${featureKey}, resolved: ${resolvedModel}`,
+    );
     if (resolvedModel) {
       return resolvedModel;
     }

@@ -1,30 +1,30 @@
 import { Inject, Injectable, Logger } from "@nestjs/common";
 import { ALL_LLM_PROVIDERS, LLM_RESOLVER_SERVICE } from "../../llm.constants";
-import { ILlmProvider } from "../interfaces/llm-provider.interface";
+import { IMultimodalLlmProvider } from "../interfaces/llm-provider.interface";
 import { LLMResolverService } from "./llm-resolver.service";
 
 @Injectable()
 export class LlmRouter {
   private readonly logger = new Logger(LlmRouter.name);
-  private readonly map: Map<string, ILlmProvider>;
+  private readonly map: Map<string, IMultimodalLlmProvider>;
 
   constructor(
-    @Inject(ALL_LLM_PROVIDERS) providers: ILlmProvider[],
+    @Inject(ALL_LLM_PROVIDERS) providers: IMultimodalLlmProvider[],
     @Inject(LLM_RESOLVER_SERVICE)
     private readonly resolverService: LLMResolverService,
   ) {
     this.map = new Map(providers.map((p) => [p.key, p]));
   }
 
   /** Return provider by key, or throw if it doesn't exist */
-  get(key: string): ILlmProvider {
+  get(key: string): IMultimodalLlmProvider {
     const found = this.map.get(key);
     if (!found) throw new Error(`No LLM provider registered for key "${key}"`);
     return found;
   }
 
   /** Get provider for a specific AI feature (uses dynamic assignment) */
-  async getForFeature(featureKey: string): Promise<ILlmProvider> {
+  async getForFeature(featureKey: string): Promise<IMultimodalLlmProvider> {
     try {
       const assignedModelKey =
         await this.resolverService.resolveModelForFeature(featureKey);
@@ -62,7 +62,7 @@ export class LlmRouter {
   async getForFeatureWithFallback(
     featureKey: string,
     fallbackModelKey = "gpt-4o-mini",
-  ): Promise<ILlmProvider> {
+  ): Promise<IMultimodalLlmProvider> {
     try {
       const assignedModelKey =
         await this.resolverService.getModelKeyWithFallback(
@@ -90,7 +90,7 @@ export class LlmRouter {
   }
 
   /** Convenience default (first registered) */
-  getDefault(): ILlmProvider {
+  getDefault(): IMultimodalLlmProvider {
     return this.map.values().next().value;
   }
 

@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -16,7 +16,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * Usage is identical to the full-size OpenAiLlmService.
  */
 @Injectable()
-export class OpenAiLlmMiniService implements ILlmProvider {
+export class OpenAiLlmMiniService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-4o-mini";
   readonly key = "gpt-4o-mini";