From 355bd254d15587ab6924fa52a0b1e8f5e4db5e72 Mon Sep 17 00:00:00 2001
From: MmagdyhafezZ <magdy.hafez9123@gmail.com>
Date: Wed, 1 Oct 2025 14:25:25 -0600
Subject: [PATCH] Behzad changes

---
 .../migration.sql                             |  26 +++
 .../core/interfaces/llm-provider.interface.ts |   2 +
 .../core/services/gpt-oss-120b-llm-service.ts | 197 ++++++++++++++++++
 .../api/llm/core/services/gpt5-llm.service.ts |   5 +-
 .../core/services/gpt5-mini-llm.service.ts    |   4 +-
 .../core/services/gpt5-nano-llm.service.ts    |   4 +-
 .../llm/core/services/llm-resolver.service.ts |   9 +-
 .../llm/core/services/llm-router.service.ts   |  14 +-
 .../core/services/openai-llm-mini.service.ts  |   4 +-
 .../services/openai-llm-vision.service.ts     |   4 +-
 .../llm/core/services/openai-llm.service.ts   |   4 +-
 .../core/services/prompt-processor.service.ts |  30 ++-
 .../grading/services/file-grading.service.ts  |  21 +-
 .../grading/services/grading-judge.service.ts |   3 +-
 .../services/presentation-grading.service.ts  |   6 +-
 .../grading/services/url-grading.service.ts   |  18 +-
 .../grading/services/video-grading.service.ts |   3 +-
 .../services/question-generation.service.ts   |  12 +-
 .../services/question-validator.service.ts    |   3 +-
 .../rubric/services/rubric.service.ts         |   7 +-
 .../services/translation.service.ts           |   8 +-
 apps/api/src/api/llm/llm.module.ts            |   8 +-
 22 files changed, 333 insertions(+), 59 deletions(-)
 create mode 100644 apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql
 create mode 100644 apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts
diff --git a/apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql b/apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql
new file mode 100644
index 00000000..cdb7e21e
--- /dev/null
+++ b/apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql
@@ -0,0 +1,26 @@
+-- Add IBM foundation model variant to the LLMModel table
+-- This migration adds the IBM foundation model GPT-oss-120b
+
+INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES
+('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW());
+
+-- Add initial pricing data for the new GPT model
+-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available
+WITH new_models AS (
+  SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b')
+)
+INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt")
+SELECT 
+  m.id,
+  CASE 
+    WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015
+  END,
+  CASE 
+    WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006
+  END,
+  NOW(),
+  'MANUAL',
+  true,
+  NOW(),
+  NOW()
+FROM new_models m;
\ No newline at end of file
diff --git a/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts b/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts
index 0b5b88df..d00b2219 100644
--- a/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts
+++ b/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts
@@ -24,7 +24,9 @@ export interface ILlmProvider {
     options?: LlmRequestOptions,
   ): Promise<LlmResponse>;
   readonly key: string;
+}
 
+export interface IMultimodalLlmProvider extends ILlmProvider {
   /**
    * Send a request with image content to the LLM
    */
diff --git a/apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts b/apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts
new file mode 100644
index 00000000..a5a82281
--- /dev/null
+++ b/apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts
@@ -0,0 +1,197 @@
+import { WatsonxLLM } from "@langchain/community/llms/ibm";
+import { HumanMessage } from "@langchain/core/messages";
+import { Inject, Injectable } from "@nestjs/common";
+import { WINSTON_MODULE_PROVIDER } from "nest-winston";
+import { Logger } from "winston";
+import { TOKEN_COUNTER } from "../../llm.constants";
+import {
+  IMultimodalLlmProvider,
+  LlmRequestOptions,
+  LlmResponse,
+} from "../interfaces/llm-provider.interface";
+import { ITokenCounter } from "../interfaces/token-counter.interface";
+
+@Injectable()
+export class GptOss120bLlmService implements IMultimodalLlmProvider {
+  private readonly logger: Logger;
+  static readonly DEFAULT_MODEL = "openai/gpt-oss-120b";
+  readonly key = "gpt-oss-120b";
+
+  constructor(
+    @Inject(TOKEN_COUNTER) private readonly tokenCounter: ITokenCounter,
+    @Inject(WINSTON_MODULE_PROVIDER) parentLogger: Logger,
+  ) {
+    this.logger = parentLogger.child({ context: GptOss120bLlmService.name });
+  }
+
+  private createChatModel(options?: LlmRequestOptions): WatsonxLLM {
+    return new WatsonxLLM({
+      version: "2024-05-31",
+      serviceUrl: "https://us-south.ml.cloud.ibm.com",
+      projectId: process.env.WATSONX_PROJECT_ID_LLAMA || "",
+      watsonxAIAuthType: "iam",
+      watsonxAIApikey: process.env.WATSONX_AI_API_KEY_LLAMA || "", // pragma: allowlist secret
+      model: options?.modelName ?? GptOss120bLlmService.DEFAULT_MODEL,
+      temperature: options?.temperature ?? 0.5,
+      maxNewTokens: options?.maxTokens ?? 1000,
+    });
+  }
+
+  async invoke(
+    messages: HumanMessage[],
+    options?: LlmRequestOptions,
+  ): Promise<LlmResponse> {
+    const model = this.createChatModel(options);
+
+    const inputText = messages
+      .map((m) =>
+        typeof m.content === "string" ? m.content : JSON.stringify(m.content),
+      )
+      .join("\n");
+    const inputTokens = this.tokenCounter.countTokens(inputText);
+
+    this.logger.debug(`Invoking WatsonX LLM with ${inputTokens} input tokens`);
+
+    try {
+      console.log(`Invoking WatsonX LLM with input: ${inputText}`);
+      const result = await model.invoke(inputText);
+      console.log(`WatsonX LLM response: ${result}`);
+      const rawResponse = typeof result === "string" ? result : String(result);
+
+      // Extract JSON from the response if it contains additional text
+      const responseContent = this.extractJSONFromResponse(rawResponse);
+      const outputTokens = this.tokenCounter.countTokens(responseContent);
+
+      this.logger.debug(
+        `WatsonX LLM responded with ${outputTokens} output tokens`,
+      );
+
+      return {
+        content: responseContent,
+        tokenUsage: {
+          input: inputTokens,
+          output: outputTokens,
+        },
+      };
+    } catch (error) {
+      this.logger.error(
+        `WatsonX LLM API error: ${
+          error instanceof Error ? error.message : "Unknown error"
+        }`,
+      );
+      throw error;
+    }
+  }
+
+  async invokeWithImage(
+    textContent: string,
+    imageData: string,
+    options?: LlmRequestOptions,
+  ): Promise<LlmResponse> {
+    this.logger.warn(
+      "WatsonX LLM does not support multimodal (text + image) inputs. Processing text only.",
+    );
+
+    const inputTokens = this.tokenCounter.countTokens(textContent);
+
+    this.logger.debug(
+      `Invoking WatsonX LLM with text only (${inputTokens} input tokens) - image data ignored`,
+    );
+
+    const model = this.createChatModel(options);
+
+    try {
+      const result = await model.invoke(textContent);
+      const rawResponse = typeof result === "string" ? result : String(result);
+      const responseContent = this.extractJSONFromResponse(rawResponse);
+      const outputTokens = this.tokenCounter.countTokens(responseContent);
+
+      this.logger.debug(
+        `WatsonX LLM responded with ${outputTokens} output tokens`,
+      );
+
+      return {
+        content: responseContent,
+        tokenUsage: {
+          input: inputTokens,
+          output: outputTokens,
+        },
+      };
+    } catch (error) {
+      this.logger.error(
+        `WatsonX LLM API error: ${
+          error instanceof Error ? error.message : "Unknown error"
+        }`,
+      );
+      throw error;
+    }
+  }
+
+  /**
+   * Extract JSON from WatsonX response that may contain additional text
+   */
+  private extractJSONFromResponse(response: string): string {
+    try {
+      // First, try to parse the response as-is in case it's already clean JSON
+      JSON.parse(response);
+      return response;
+    } catch {
+      // If that fails, try to extract JSON from markdown code blocks
+      const jsonBlockMatch = response.match(/```json\s*([\S\s]*?)\s*```/);
+      if (jsonBlockMatch) {
+        const jsonContent = jsonBlockMatch[1].trim();
+        try {
+          JSON.parse(jsonContent);
+          return jsonContent;
+        } catch {
+          // Fall through to other extraction methods
+        }
+      }
+
+      // Try to find JSON object patterns in the response
+      const jsonObjectMatch = response.match(/{[\S\s]*}/);
+      if (jsonObjectMatch) {
+        const jsonContent = jsonObjectMatch[0];
+        try {
+          JSON.parse(jsonContent);
+          return jsonContent;
+        } catch {
+          // Fall through
+        }
+      }
+
+      // If no valid JSON found, return the original response
+      this.logger.warn(
+        "Could not extract valid JSON from WatsonX response, returning original",
+      );
+      return response;
+    }
+  }
+
+  /**
+   * Normalize image data to ensure it has the correct format
+   * Note: WatsonX LLM does not support image inputs, but keeping this method for potential future use
+   */
+  private normalizeImageData(imageData: string): string {
+    if (!imageData) {
+      throw new Error("Image data is empty or null");
+    }
+
+    if (imageData.startsWith("data:")) {
+      return imageData;
+    }
+
+    let mimeType = "image/jpeg";
+    if (imageData.startsWith("/9j/")) {
+      mimeType = "image/jpeg";
+    } else if (imageData.startsWith("iVBORw0KGgo")) {
+      mimeType = "image/png";
+    } else if (imageData.startsWith("R0lGOD")) {
+      mimeType = "image/gif";
+    } else if (imageData.startsWith("UklGR")) {
+      mimeType = "image/webp";
+    }
+
+    return `data:${mimeType};base64,${imageData}`;
+  }
+}
diff --git a/apps/api/src/api/llm/core/services/gpt5-llm.service.ts b/apps/api/src/api/llm/core/services/gpt5-llm.service.ts
index da6f408b..45425b0b 100644
--- a/apps/api/src/api/llm/core/services/gpt5-llm.service.ts
+++ b/apps/api/src/api/llm/core/services/gpt5-llm.service.ts
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * enhanced capabilities and performance compared to GPT-4 models.
  */
 @Injectable()
-export class Gpt5LlmService implements ILlmProvider {
+export class Gpt5LlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-5";
   readonly key = "gpt-5";
@@ -34,7 +34,6 @@ export class Gpt5LlmService implements ILlmProvider {
    */
   private createChatModel(options?: LlmRequestOptions): ChatOpenAI {
     return new ChatOpenAI({
-      temperature: options?.temperature ?? 0.5,
       modelName: options?.modelName ?? Gpt5LlmService.DEFAULT_MODEL,
       maxCompletionTokens: options?.maxTokens,
     });
diff --git a/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts b/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts
index b2e799bf..8adf397f 100644
--- a/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts
+++ b/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * and cost-effectiveness for simpler tasks.
  */
 @Injectable()
-export class Gpt5MiniLlmService implements ILlmProvider {
+export class Gpt5MiniLlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-5-mini";
   readonly key = "gpt-5-mini";
diff --git a/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts b/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts
index c17bb2ee..c2120705 100644
--- a/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts
+++ b/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * cost-effectiveness, ideal for basic text processing and quick responses.
  */
 @Injectable()
-export class Gpt5NanoLlmService implements ILlmProvider {
+export class Gpt5NanoLlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-5-nano";
   readonly key = "gpt-5-nano";
diff --git a/apps/api/src/api/llm/core/services/llm-resolver.service.ts b/apps/api/src/api/llm/core/services/llm-resolver.service.ts
index ed2891e8..2017afee 100644
--- a/apps/api/src/api/llm/core/services/llm-resolver.service.ts
+++ b/apps/api/src/api/llm/core/services/llm-resolver.service.ts
@@ -24,7 +24,7 @@ export class LLMResolverService {
     string,
     { modelKey: string; cachedAt: number }
   >();
-  private readonly CACHE_TTL = 5 * 60 * 1000; // 5 minutes cache
+  private readonly CACHE_TTL = 1;
 
   constructor(
     @Inject(LLM_ASSIGNMENT_SERVICE)
@@ -49,6 +49,9 @@ export class LLMResolverService {
       // Get assigned model from service
       const modelKey =
         await this.assignmentService.getAssignedModel(featureKey);
+      console.log(
+        `Resolving model for feature ${featureKey}, assigned: ${modelKey}`,
+      );
 
       if (modelKey) {
         // Cache the result
@@ -144,7 +147,9 @@ export class LLMResolverService {
     fallbackModel = "gpt-4o-mini",
   ): Promise<string> {
     const resolvedModel = await this.resolveModelForFeature(featureKey);
-
+    console.log(
+      `Resolving model for feature ${featureKey}, resolved: ${resolvedModel}`,
+    );
     if (resolvedModel) {
       return resolvedModel;
     }
diff --git a/apps/api/src/api/llm/core/services/llm-router.service.ts b/apps/api/src/api/llm/core/services/llm-router.service.ts
index c814145f..f7fbdaf3 100644
--- a/apps/api/src/api/llm/core/services/llm-router.service.ts
+++ b/apps/api/src/api/llm/core/services/llm-router.service.ts
@@ -1,15 +1,15 @@
 import { Inject, Injectable, Logger } from "@nestjs/common";
 import { ALL_LLM_PROVIDERS, LLM_RESOLVER_SERVICE } from "../../llm.constants";
-import { ILlmProvider } from "../interfaces/llm-provider.interface";
+import { IMultimodalLlmProvider } from "../interfaces/llm-provider.interface";
 import { LLMResolverService } from "./llm-resolver.service";
 
 @Injectable()
 export class LlmRouter {
   private readonly logger = new Logger(LlmRouter.name);
-  private readonly map: Map<string, ILlmProvider>;
+  private readonly map: Map<string, IMultimodalLlmProvider>;
 
   constructor(
-    @Inject(ALL_LLM_PROVIDERS) providers: ILlmProvider[],
+    @Inject(ALL_LLM_PROVIDERS) providers: IMultimodalLlmProvider[],
     @Inject(LLM_RESOLVER_SERVICE)
     private readonly resolverService: LLMResolverService,
   ) {
@@ -17,14 +17,14 @@ export class LlmRouter {
   }
 
   /** Return provider by key, or throw if it doesn't exist */
-  get(key: string): ILlmProvider {
+  get(key: string): IMultimodalLlmProvider {
     const found = this.map.get(key);
     if (!found) throw new Error(`No LLM provider registered for key "${key}"`);
     return found;
   }
 
   /** Get provider for a specific AI feature (uses dynamic assignment) */
-  async getForFeature(featureKey: string): Promise<ILlmProvider> {
+  async getForFeature(featureKey: string): Promise<IMultimodalLlmProvider> {
     try {
       const assignedModelKey =
         await this.resolverService.resolveModelForFeature(featureKey);
@@ -62,7 +62,7 @@ export class LlmRouter {
   async getForFeatureWithFallback(
     featureKey: string,
     fallbackModelKey = "gpt-4o-mini",
-  ): Promise<ILlmProvider> {
+  ): Promise<IMultimodalLlmProvider> {
     try {
       const assignedModelKey =
         await this.resolverService.getModelKeyWithFallback(
@@ -90,7 +90,7 @@ export class LlmRouter {
   }
 
   /** Convenience default (first registered) */
-  getDefault(): ILlmProvider {
+  getDefault(): IMultimodalLlmProvider {
     return this.map.values().next().value;
   }
 
diff --git a/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts b/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts
index bb9bbda8..e5260afb 100644
--- a/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts
+++ b/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts
@@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
@@ -16,7 +16,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface";
  * Usage is identical to the full-size OpenAiLlmService.
  */
 @Injectable()
-export class OpenAiLlmMiniService implements ILlmProvider {
+export class OpenAiLlmMiniService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-4o-mini";
   readonly key = "gpt-4o-mini";
diff --git a/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts b/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts
index cffd47de..53387410 100644
--- a/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts
+++ b/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts
@@ -5,14 +5,14 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
 import { ITokenCounter } from "../interfaces/token-counter.interface";
 
 @Injectable()
-export class Gpt4VisionPreviewLlmService implements ILlmProvider {
+export class Gpt4VisionPreviewLlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-4.1-mini";
   readonly key = "gpt-4.1-mini";
diff --git a/apps/api/src/api/llm/core/services/openai-llm.service.ts b/apps/api/src/api/llm/core/services/openai-llm.service.ts
index 1ed080fc..e6aa613e 100644
--- a/apps/api/src/api/llm/core/services/openai-llm.service.ts
+++ b/apps/api/src/api/llm/core/services/openai-llm.service.ts
@@ -5,14 +5,14 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston";
 import { Logger } from "winston";
 import { TOKEN_COUNTER } from "../../llm.constants";
 import {
-  ILlmProvider,
+  IMultimodalLlmProvider,
   LlmRequestOptions,
   LlmResponse,
 } from "../interfaces/llm-provider.interface";
 import { ITokenCounter } from "../interfaces/token-counter.interface";
 
 @Injectable()
-export class OpenAiLlmService implements ILlmProvider {
+export class OpenAiLlmService implements IMultimodalLlmProvider {
   private readonly logger: Logger;
   static readonly DEFAULT_MODEL = "gpt-4o";
   readonly key = "gpt-4o";
diff --git a/apps/api/src/api/llm/core/services/prompt-processor.service.ts b/apps/api/src/api/llm/core/services/prompt-processor.service.ts
index 1f03cc34..ba92f47c 100644
--- a/apps/api/src/api/llm/core/services/prompt-processor.service.ts
+++ b/apps/api/src/api/llm/core/services/prompt-processor.service.ts
@@ -41,6 +41,10 @@ export class PromptProcessorService implements IPromptProcessor {
         featureKey,
         fallbackModel,
       );
+      console.log(
+        `Processing prompt for feature ${featureKey} with model ${llm.key}`,
+      );
+
       return await this._processPromptWithProvider(
         prompt,
         assignmentId,
@@ -68,6 +72,7 @@ export class PromptProcessorService implements IPromptProcessor {
   ): Promise<string> {
     try {
       const llm = this.router.get(llmKey ?? "gpt-4o");
+
       return await this._processPromptWithProvider(
         prompt,
         assignmentId,
@@ -146,18 +151,21 @@ export class PromptProcessorService implements IPromptProcessor {
       throw formatError;
     }
 
-    const result = await llm.invoke([new HumanMessage(input)]);
-    const response = this.cleanResponse(result.content);
-
-    await this.usageTracker.trackUsage(
-      assignmentId,
-      usageType,
-      result.tokenUsage.input,
-      result.tokenUsage.output,
-      llm.key,
-    );
+    try {
+      const result = await llm.invoke([new HumanMessage(input)]);
+      const response = this.cleanResponse(result.content);
+      await this.usageTracker.trackUsage(
+        assignmentId,
+        usageType,
+        result.tokenUsage.input,
+        result.tokenUsage.output,
+        llm.key,
+      );
 
-    return response;
+      return response;
+    } catch (error) {
+      console.log("The issue is:", error);
+    }
   }
 
   /**
diff --git a/apps/api/src/api/llm/features/grading/services/file-grading.service.ts b/apps/api/src/api/llm/features/grading/services/file-grading.service.ts
index 00e68ff7..1934bfd4 100644
--- a/apps/api/src/api/llm/features/grading/services/file-grading.service.ts
+++ b/apps/api/src/api/llm/features/grading/services/file-grading.service.ts
@@ -182,7 +182,9 @@ export class FileGradingService implements IFileGradingService {
       );
     } catch (retryError) {
       this.logger.error(
-        `All LLM retry attempts failed: ${retryError instanceof Error ? retryError.message : String(retryError)}`,
+        `All LLM retry attempts failed: ${
+          retryError instanceof Error ? retryError.message : String(retryError)
+        }`,
       );
       return this.createFallbackResponse(
         maxTotalPoints,
@@ -298,10 +300,11 @@ export class FileGradingService implements IFileGradingService {
           `LLM attempt ${attempt}/${maxRetries} with model ${primaryModel}`,
         );
 
-        const response = await this.promptProcessor.processPrompt(
+        const response = await this.promptProcessor.processPromptForFeature(
           prompt,
           assignmentId,
           AIUsageType.ASSIGNMENT_GRADING,
+          "file_grading",
           primaryModel,
         );
 
@@ -316,7 +319,10 @@ export class FileGradingService implements IFileGradingService {
         }
 
         this.logger.warn(
-          `LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice(0, 100)}..."`,
+          `LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice(
+            0,
+            100,
+          )}..."`,
         );
         lastError = new Error(
           `Invalid LLM response: ${response?.slice(0, 100)}`,
@@ -344,10 +350,11 @@ export class FileGradingService implements IFileGradingService {
         `Primary model ${primaryModel} failed after ${maxRetries} attempts, trying fallback model ${fallbackModel}`,
       );
 
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.ASSIGNMENT_GRADING,
+        "file_grading",
         fallbackModel,
       );
 
@@ -361,7 +368,11 @@ export class FileGradingService implements IFileGradingService {
       );
     } catch (fallbackError) {
       this.logger.error(
-        `Fallback model also failed: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`,
+        `Fallback model also failed: ${
+          fallbackError instanceof Error
+            ? fallbackError.message
+            : String(fallbackError)
+        }`,
       );
     }
 
diff --git a/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts b/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts
index a6628134..7e6a05ee 100644
--- a/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts
+++ b/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts
@@ -139,10 +139,11 @@ export class GradingJudgeService implements IGradingJudgeService {
       );
 
       const response = await this.processWithTimeout(
-        this.promptProcessor.processPrompt(
+        this.promptProcessor.processPromptForFeature(
           prompt,
           input.assignmentId,
           AIUsageType.GRADING_VALIDATION,
+          "content_moderation",
           selectedModel,
         ),
         this.maxJudgeTimeout,
diff --git a/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts b/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts
index cccca3f1..68f78640 100644
--- a/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts
+++ b/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts
@@ -152,10 +152,11 @@ export class PresentationGradingService implements IPresentationGradingService {
     });
 
     // Process the prompt through the LLM
-    const response = await this.promptProcessor.processPrompt(
+    const response = await this.promptProcessor.processPromptForFeature(
       prompt,
       assignmentId,
       AIUsageType.ASSIGNMENT_GRADING,
+      "presentation_grading",
     );
 
     try {
@@ -265,10 +266,11 @@ ${parsedResponse.guidance}
 
     try {
       // Process the prompt through the LLM
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.LIVE_RECORDING_FEEDBACK,
+        "live_recording_feedback",
       );
 
       // Parse the response
diff --git a/apps/api/src/api/llm/features/grading/services/url-grading.service.ts b/apps/api/src/api/llm/features/grading/services/url-grading.service.ts
index 4c65cbc1..d7b713fc 100644
--- a/apps/api/src/api/llm/features/grading/services/url-grading.service.ts
+++ b/apps/api/src/api/llm/features/grading/services/url-grading.service.ts
@@ -107,7 +107,9 @@ export class UrlGradingService implements IUrlGradingService {
       );
     } catch (retryError) {
       this.logger.error(
-        `All URL grading LLM retry attempts failed: ${retryError instanceof Error ? retryError.message : String(retryError)}`,
+        `All URL grading LLM retry attempts failed: ${
+          retryError instanceof Error ? retryError.message : String(retryError)
+        }`,
       );
       return this.createFallbackUrlResponse(
         totalPoints,
@@ -212,10 +214,11 @@ export class UrlGradingService implements IUrlGradingService {
       try {
         this.logger.debug(`URL grading LLM attempt ${attempt}/${maxRetries}`);
 
-        const response = await this.promptProcessor.processPrompt(
+        const response = await this.promptProcessor.processPromptForFeature(
           prompt,
           assignmentId,
           AIUsageType.ASSIGNMENT_GRADING,
+          "url_grading",
         );
 
         // Check if response is valid
@@ -229,7 +232,10 @@ export class UrlGradingService implements IUrlGradingService {
         }
 
         this.logger.warn(
-          `URL grading LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice(0, 100)}..."`,
+          `URL grading LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice(
+            0,
+            100,
+          )}..."`,
         );
         lastError = new Error(
           `Invalid LLM response: ${response?.slice(0, 100)}`,
@@ -257,7 +263,11 @@ export class UrlGradingService implements IUrlGradingService {
       throw lastError || new Error("All URL grading LLM attempts failed");
     } catch (fallbackError) {
       this.logger.error(
-        `URL grading fallback also failed: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`,
+        `URL grading fallback also failed: ${
+          fallbackError instanceof Error
+            ? fallbackError.message
+            : String(fallbackError)
+        }`,
       );
       throw lastError || new Error("All URL grading LLM attempts failed");
     }
diff --git a/apps/api/src/api/llm/features/grading/services/video-grading.service.ts b/apps/api/src/api/llm/features/grading/services/video-grading.service.ts
index b169d754..6071d50d 100644
--- a/apps/api/src/api/llm/features/grading/services/video-grading.service.ts
+++ b/apps/api/src/api/llm/features/grading/services/video-grading.service.ts
@@ -95,10 +95,11 @@ export class VideoPresentationGradingService
       },
     });
 
-    const response = await this.promptProcessor.processPrompt(
+    const response = await this.promptProcessor.processPromptForFeature(
       prompt,
       assignmentId,
       AIUsageType.ASSIGNMENT_GRADING,
+      "video_grading",
     );
 
     try {
diff --git a/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts b/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts
index 95d0106b..32f69ce1 100644
--- a/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts
+++ b/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts
@@ -250,10 +250,11 @@ export class QuestionGenerationService implements IQuestionGenerationService {
         this.logger.debug(
           `Generating questions for assignment ID: ${assignmentId}`,
         );
-        const response = await this.promptProcessor.processPrompt(
+        const response = await this.promptProcessor.processPromptForFeature(
           prompt,
           assignmentId,
           AIUsageType.ASSIGNMENT_GENERATION,
+          "question_generation",
         );
 
         // Parse response
@@ -854,7 +855,7 @@ FORMAT INSTRUCTIONS:
     
     {formatInstructions}
     `;
-    const response = await this.promptProcessor.processPrompt(
+    const response = await this.promptProcessor.processPromptForFeature(
       new PromptTemplate({
         template,
         inputVariables: [],
@@ -867,6 +868,7 @@ FORMAT INSTRUCTIONS:
       }),
       assignmentId,
       AIUsageType.ASSIGNMENT_GENERATION,
+      "question_generation",
     );
     const parsedResponse = await parser.parse(response);
     if (parsedResponse.scoring) {
@@ -2167,10 +2169,11 @@ FORMAT INSTRUCTIONS:
 
     while (attemptsLeft > 0 && !success) {
       try {
-        response = await this.promptProcessor.processPrompt(
+        response = await this.promptProcessor.processPromptForFeature(
           prompt,
           assignmentId,
           AIUsageType.ASSIGNMENT_GENERATION,
+          "question_generation",
         );
 
         // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
@@ -2327,7 +2330,7 @@ FORMAT INSTRUCTIONS:
 
     while (attemptsLeft > 0 && !success) {
       try {
-        response = await this.promptProcessor.processPrompt(
+        response = await this.promptProcessor.processPromptForFeature(
           new PromptTemplate({
             template,
             inputVariables: [],
@@ -2338,6 +2341,7 @@ FORMAT INSTRUCTIONS:
           }),
           assignmentId,
           AIUsageType.ASSIGNMENT_GENERATION,
+          "question_generation",
         );
 
         const parsedResponse = await parser.parse(response);
diff --git a/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts b/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts
index 5d54c7bb..da79c97d 100644
--- a/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts
+++ b/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts
@@ -150,10 +150,11 @@ export class QuestionValidatorService implements IQuestionValidatorService {
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.ASSIGNMENT_GENERATION,
+        "question_generation",
         "gpt-4o-mini",
       );
 
diff --git a/apps/api/src/api/llm/features/rubric/services/rubric.service.ts b/apps/api/src/api/llm/features/rubric/services/rubric.service.ts
index b43a2b4a..49c6a0e0 100644
--- a/apps/api/src/api/llm/features/rubric/services/rubric.service.ts
+++ b/apps/api/src/api/llm/features/rubric/services/rubric.service.ts
@@ -118,10 +118,12 @@ export class RubricService implements IRubricService {
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.QUESTION_GENERATION,
+        "rubric_generation",
+        "gpt-4o-mini",
       );
 
       let parsed:
@@ -517,10 +519,11 @@ export class RubricService implements IRubricService {
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.QUESTION_GENERATION,
+        "rubric_generation",
       );
 
       let parsed:
diff --git a/apps/api/src/api/llm/features/translation/services/translation.service.ts b/apps/api/src/api/llm/features/translation/services/translation.service.ts
index 859021cd..788eea73 100644
--- a/apps/api/src/api/llm/features/translation/services/translation.service.ts
+++ b/apps/api/src/api/llm/features/translation/services/translation.service.ts
@@ -242,7 +242,7 @@ INSTRUCTIONS:
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.TRANSLATION,
@@ -361,7 +361,7 @@ INSTRUCTIONS:
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.TRANSLATION,
@@ -430,7 +430,7 @@ INSTRUCTIONS:
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.TRANSLATION,
@@ -792,7 +792,7 @@ INSTRUCTIONS:
     });
 
     try {
-      const response = await this.promptProcessor.processPrompt(
+      const response = await this.promptProcessor.processPromptForFeature(
         prompt,
         assignmentId,
         AIUsageType.TRANSLATION,
diff --git a/apps/api/src/api/llm/llm.module.ts b/apps/api/src/api/llm/llm.module.ts
index 22a8bfc5..2abf0ac3 100644
--- a/apps/api/src/api/llm/llm.module.ts
+++ b/apps/api/src/api/llm/llm.module.ts
@@ -4,6 +4,7 @@ import { S3Service } from "../files/services/s3.service";
 import { Gpt5LlmService } from "./core/services/gpt5-llm.service";
 import { Gpt5MiniLlmService } from "./core/services/gpt5-mini-llm.service";
 import { Gpt5NanoLlmService } from "./core/services/gpt5-nano-llm.service";
+import { GptOss120bLlmService } from "./core/services/gpt-oss-120b-llm-service";
 import { LLMAssignmentService } from "./core/services/llm-assignment.service";
 import { LLMPricingService } from "./core/services/llm-pricing.service";
 import { LLMResolverService } from "./core/services/llm-resolver.service";
@@ -63,6 +64,7 @@ import {
     Gpt5LlmService,
     Gpt5MiniLlmService,
     Gpt5NanoLlmService,
+    GptOss120bLlmService,
     // LlamaLlmService,
     LlmRouter,
     {
@@ -74,9 +76,10 @@ import {
         p4: Gpt5LlmService,
         p5: Gpt5MiniLlmService,
         p6: Gpt5NanoLlmService,
-        // p7: LlamaLlmService,
+        p7: GptOss120bLlmService,
+        // p8: LlamaLlmService,
       ) => {
-        return [p1, p2, p3, p4, p5, p6];
+        return [p1, p2, p3, p4, p5, p6, p7];
       },
       inject: [
         OpenAiLlmService,
@@ -85,6 +88,7 @@ import {
         Gpt5LlmService,
         Gpt5MiniLlmService,
         Gpt5NanoLlmService,
+        GptOss120bLlmService,
         // LlamaLlmService,
       ],
     },