From 355bd254d15587ab6924fa52a0b1e8f5e4db5e72 Mon Sep 17 00:00:00 2001 From: MmagdyhafezZ Date: Wed, 1 Oct 2025 14:25:25 -0600 Subject: [PATCH] Behzad changes --- .../migration.sql | 26 +++ .../core/interfaces/llm-provider.interface.ts | 2 + .../core/services/gpt-oss-120b-llm-service.ts | 197 ++++++++++++++++++ .../api/llm/core/services/gpt5-llm.service.ts | 5 +- .../core/services/gpt5-mini-llm.service.ts | 4 +- .../core/services/gpt5-nano-llm.service.ts | 4 +- .../llm/core/services/llm-resolver.service.ts | 9 +- .../llm/core/services/llm-router.service.ts | 14 +- .../core/services/openai-llm-mini.service.ts | 4 +- .../services/openai-llm-vision.service.ts | 4 +- .../llm/core/services/openai-llm.service.ts | 4 +- .../core/services/prompt-processor.service.ts | 30 ++- .../grading/services/file-grading.service.ts | 21 +- .../grading/services/grading-judge.service.ts | 3 +- .../services/presentation-grading.service.ts | 6 +- .../grading/services/url-grading.service.ts | 18 +- .../grading/services/video-grading.service.ts | 3 +- .../services/question-generation.service.ts | 12 +- .../services/question-validator.service.ts | 3 +- .../rubric/services/rubric.service.ts | 7 +- .../services/translation.service.ts | 8 +- apps/api/src/api/llm/llm.module.ts | 8 +- 22 files changed, 333 insertions(+), 59 deletions(-) create mode 100644 apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql create mode 100644 apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts diff --git a/apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql b/apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql new file mode 100644 index 00000000..cdb7e21e --- /dev/null +++ b/apps/api/prisma/migrations/20250915144814_add_ibm_foundation_models/migration.sql @@ -0,0 +1,26 @@ +-- Add IBM foundation model variant to the LLMModel table +-- This migration adds the IBM foundation model GPT-oss-120b + +INSERT INTO "LLMModel" ("modelKey", "displayName", "provider", "isActive", "createdAt", "updatedAt") VALUES +('gpt-oss-120b', 'GPT-OSS-120B', 'OpenAI', true, NOW(), NOW()); + +-- Add initial pricing data for the new GPT model +-- Note: These are estimated prices, adjust based on actual OpenAI pricing when available +WITH new_models AS ( + SELECT id, "modelKey" FROM "LLMModel" WHERE "modelKey" IN ('gpt-oss-120b') +) +INSERT INTO "LLMPricing" ("modelId", "inputTokenPrice", "outputTokenPrice", "effectiveDate", "source", "isActive", "createdAt", "updatedAt") +SELECT + m.id, + CASE + WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.00000015 + END, + CASE + WHEN m."modelKey" = 'gpt-oss-120b' THEN 0.0000006 + END, + NOW(), + 'MANUAL', + true, + NOW(), + NOW() +FROM new_models m; \ No newline at end of file diff --git a/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts b/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts index 0b5b88df..d00b2219 100644 --- a/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts +++ b/apps/api/src/api/llm/core/interfaces/llm-provider.interface.ts @@ -24,7 +24,9 @@ export interface ILlmProvider { options?: LlmRequestOptions, ): Promise; readonly key: string; +} +export interface IMultimodalLlmProvider extends ILlmProvider { /** * Send a request with image content to the LLM */ diff --git a/apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts b/apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts new file mode 100644 index 00000000..a5a82281 --- /dev/null +++ b/apps/api/src/api/llm/core/services/gpt-oss-120b-llm-service.ts @@ -0,0 +1,197 @@ +import { WatsonxLLM } from "@langchain/community/llms/ibm"; +import { HumanMessage } from "@langchain/core/messages"; +import { Inject, Injectable } from "@nestjs/common"; +import { WINSTON_MODULE_PROVIDER } from "nest-winston"; +import { Logger } from "winston"; +import { TOKEN_COUNTER } from "../../llm.constants"; +import { + IMultimodalLlmProvider, + LlmRequestOptions, + LlmResponse, +} from "../interfaces/llm-provider.interface"; +import { ITokenCounter } from "../interfaces/token-counter.interface"; + +@Injectable() +export class GptOss120bLlmService implements IMultimodalLlmProvider { + private readonly logger: Logger; + static readonly DEFAULT_MODEL = "openai/gpt-oss-120b"; + readonly key = "gpt-oss-120b"; + + constructor( + @Inject(TOKEN_COUNTER) private readonly tokenCounter: ITokenCounter, + @Inject(WINSTON_MODULE_PROVIDER) parentLogger: Logger, + ) { + this.logger = parentLogger.child({ context: GptOss120bLlmService.name }); + } + + private createChatModel(options?: LlmRequestOptions): WatsonxLLM { + return new WatsonxLLM({ + version: "2024-05-31", + serviceUrl: "https://us-south.ml.cloud.ibm.com", + projectId: process.env.WATSONX_PROJECT_ID_LLAMA || "", + watsonxAIAuthType: "iam", + watsonxAIApikey: process.env.WATSONX_AI_API_KEY_LLAMA || "", // pragma: allowlist secret + model: options?.modelName ?? GptOss120bLlmService.DEFAULT_MODEL, + temperature: options?.temperature ?? 0.5, + maxNewTokens: options?.maxTokens ?? 1000, + }); + } + + async invoke( + messages: HumanMessage[], + options?: LlmRequestOptions, + ): Promise { + const model = this.createChatModel(options); + + const inputText = messages + .map((m) => + typeof m.content === "string" ? m.content : JSON.stringify(m.content), + ) + .join("\n"); + const inputTokens = this.tokenCounter.countTokens(inputText); + + this.logger.debug(`Invoking WatsonX LLM with ${inputTokens} input tokens`); + + try { + console.log(`Invoking WatsonX LLM with input: ${inputText}`); + const result = await model.invoke(inputText); + console.log(`WatsonX LLM response: ${result}`); + const rawResponse = typeof result === "string" ? result : String(result); + + // Extract JSON from the response if it contains additional text + const responseContent = this.extractJSONFromResponse(rawResponse); + const outputTokens = this.tokenCounter.countTokens(responseContent); + + this.logger.debug( + `WatsonX LLM responded with ${outputTokens} output tokens`, + ); + + return { + content: responseContent, + tokenUsage: { + input: inputTokens, + output: outputTokens, + }, + }; + } catch (error) { + this.logger.error( + `WatsonX LLM API error: ${ + error instanceof Error ? error.message : "Unknown error" + }`, + ); + throw error; + } + } + + async invokeWithImage( + textContent: string, + imageData: string, + options?: LlmRequestOptions, + ): Promise { + this.logger.warn( + "WatsonX LLM does not support multimodal (text + image) inputs. Processing text only.", + ); + + const inputTokens = this.tokenCounter.countTokens(textContent); + + this.logger.debug( + `Invoking WatsonX LLM with text only (${inputTokens} input tokens) - image data ignored`, + ); + + const model = this.createChatModel(options); + + try { + const result = await model.invoke(textContent); + const rawResponse = typeof result === "string" ? result : String(result); + const responseContent = this.extractJSONFromResponse(rawResponse); + const outputTokens = this.tokenCounter.countTokens(responseContent); + + this.logger.debug( + `WatsonX LLM responded with ${outputTokens} output tokens`, + ); + + return { + content: responseContent, + tokenUsage: { + input: inputTokens, + output: outputTokens, + }, + }; + } catch (error) { + this.logger.error( + `WatsonX LLM API error: ${ + error instanceof Error ? error.message : "Unknown error" + }`, + ); + throw error; + } + } + + /** + * Extract JSON from WatsonX response that may contain additional text + */ + private extractJSONFromResponse(response: string): string { + try { + // First, try to parse the response as-is in case it's already clean JSON + JSON.parse(response); + return response; + } catch { + // If that fails, try to extract JSON from markdown code blocks + const jsonBlockMatch = response.match(/```json\s*([\S\s]*?)\s*```/); + if (jsonBlockMatch) { + const jsonContent = jsonBlockMatch[1].trim(); + try { + JSON.parse(jsonContent); + return jsonContent; + } catch { + // Fall through to other extraction methods + } + } + + // Try to find JSON object patterns in the response + const jsonObjectMatch = response.match(/{[\S\s]*}/); + if (jsonObjectMatch) { + const jsonContent = jsonObjectMatch[0]; + try { + JSON.parse(jsonContent); + return jsonContent; + } catch { + // Fall through + } + } + + // If no valid JSON found, return the original response + this.logger.warn( + "Could not extract valid JSON from WatsonX response, returning original", + ); + return response; + } + } + + /** + * Normalize image data to ensure it has the correct format + * Note: WatsonX LLM does not support image inputs, but keeping this method for potential future use + */ + private normalizeImageData(imageData: string): string { + if (!imageData) { + throw new Error("Image data is empty or null"); + } + + if (imageData.startsWith("data:")) { + return imageData; + } + + let mimeType = "image/jpeg"; + if (imageData.startsWith("/9j/")) { + mimeType = "image/jpeg"; + } else if (imageData.startsWith("iVBORw0KGgo")) { + mimeType = "image/png"; + } else if (imageData.startsWith("R0lGOD")) { + mimeType = "image/gif"; + } else if (imageData.startsWith("UklGR")) { + mimeType = "image/webp"; + } + + return `data:${mimeType};base64,${imageData}`; + } +} diff --git a/apps/api/src/api/llm/core/services/gpt5-llm.service.ts b/apps/api/src/api/llm/core/services/gpt5-llm.service.ts index da6f408b..45425b0b 100644 --- a/apps/api/src/api/llm/core/services/gpt5-llm.service.ts +++ b/apps/api/src/api/llm/core/services/gpt5-llm.service.ts @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; import { TOKEN_COUNTER } from "../../llm.constants"; import { - ILlmProvider, + IMultimodalLlmProvider, LlmRequestOptions, LlmResponse, } from "../interfaces/llm-provider.interface"; @@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface"; * enhanced capabilities and performance compared to GPT-4 models. */ @Injectable() -export class Gpt5LlmService implements ILlmProvider { +export class Gpt5LlmService implements IMultimodalLlmProvider { private readonly logger: Logger; static readonly DEFAULT_MODEL = "gpt-5"; readonly key = "gpt-5"; @@ -34,7 +34,6 @@ export class Gpt5LlmService implements ILlmProvider { */ private createChatModel(options?: LlmRequestOptions): ChatOpenAI { return new ChatOpenAI({ - temperature: options?.temperature ?? 0.5, modelName: options?.modelName ?? Gpt5LlmService.DEFAULT_MODEL, maxCompletionTokens: options?.maxTokens, }); diff --git a/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts b/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts index b2e799bf..8adf397f 100644 --- a/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts +++ b/apps/api/src/api/llm/core/services/gpt5-mini-llm.service.ts @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; import { TOKEN_COUNTER } from "../../llm.constants"; import { - ILlmProvider, + IMultimodalLlmProvider, LlmRequestOptions, LlmResponse, } from "../interfaces/llm-provider.interface"; @@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface"; * and cost-effectiveness for simpler tasks. */ @Injectable() -export class Gpt5MiniLlmService implements ILlmProvider { +export class Gpt5MiniLlmService implements IMultimodalLlmProvider { private readonly logger: Logger; static readonly DEFAULT_MODEL = "gpt-5-mini"; readonly key = "gpt-5-mini"; diff --git a/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts b/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts index c17bb2ee..c2120705 100644 --- a/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts +++ b/apps/api/src/api/llm/core/services/gpt5-nano-llm.service.ts @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; import { TOKEN_COUNTER } from "../../llm.constants"; import { - ILlmProvider, + IMultimodalLlmProvider, LlmRequestOptions, LlmResponse, } from "../interfaces/llm-provider.interface"; @@ -17,7 +17,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface"; * cost-effectiveness, ideal for basic text processing and quick responses. */ @Injectable() -export class Gpt5NanoLlmService implements ILlmProvider { +export class Gpt5NanoLlmService implements IMultimodalLlmProvider { private readonly logger: Logger; static readonly DEFAULT_MODEL = "gpt-5-nano"; readonly key = "gpt-5-nano"; diff --git a/apps/api/src/api/llm/core/services/llm-resolver.service.ts b/apps/api/src/api/llm/core/services/llm-resolver.service.ts index ed2891e8..2017afee 100644 --- a/apps/api/src/api/llm/core/services/llm-resolver.service.ts +++ b/apps/api/src/api/llm/core/services/llm-resolver.service.ts @@ -24,7 +24,7 @@ export class LLMResolverService { string, { modelKey: string; cachedAt: number } >(); - private readonly CACHE_TTL = 5 * 60 * 1000; // 5 minutes cache + private readonly CACHE_TTL = 1; constructor( @Inject(LLM_ASSIGNMENT_SERVICE) @@ -49,6 +49,9 @@ export class LLMResolverService { // Get assigned model from service const modelKey = await this.assignmentService.getAssignedModel(featureKey); + console.log( + `Resolving model for feature ${featureKey}, assigned: ${modelKey}`, + ); if (modelKey) { // Cache the result @@ -144,7 +147,9 @@ export class LLMResolverService { fallbackModel = "gpt-4o-mini", ): Promise { const resolvedModel = await this.resolveModelForFeature(featureKey); - + console.log( + `Resolving model for feature ${featureKey}, resolved: ${resolvedModel}`, + ); if (resolvedModel) { return resolvedModel; } diff --git a/apps/api/src/api/llm/core/services/llm-router.service.ts b/apps/api/src/api/llm/core/services/llm-router.service.ts index c814145f..f7fbdaf3 100644 --- a/apps/api/src/api/llm/core/services/llm-router.service.ts +++ b/apps/api/src/api/llm/core/services/llm-router.service.ts @@ -1,15 +1,15 @@ import { Inject, Injectable, Logger } from "@nestjs/common"; import { ALL_LLM_PROVIDERS, LLM_RESOLVER_SERVICE } from "../../llm.constants"; -import { ILlmProvider } from "../interfaces/llm-provider.interface"; +import { IMultimodalLlmProvider } from "../interfaces/llm-provider.interface"; import { LLMResolverService } from "./llm-resolver.service"; @Injectable() export class LlmRouter { private readonly logger = new Logger(LlmRouter.name); - private readonly map: Map; + private readonly map: Map; constructor( - @Inject(ALL_LLM_PROVIDERS) providers: ILlmProvider[], + @Inject(ALL_LLM_PROVIDERS) providers: IMultimodalLlmProvider[], @Inject(LLM_RESOLVER_SERVICE) private readonly resolverService: LLMResolverService, ) { @@ -17,14 +17,14 @@ export class LlmRouter { } /** Return provider by key, or throw if it doesn't exist */ - get(key: string): ILlmProvider { + get(key: string): IMultimodalLlmProvider { const found = this.map.get(key); if (!found) throw new Error(`No LLM provider registered for key "${key}"`); return found; } /** Get provider for a specific AI feature (uses dynamic assignment) */ - async getForFeature(featureKey: string): Promise { + async getForFeature(featureKey: string): Promise { try { const assignedModelKey = await this.resolverService.resolveModelForFeature(featureKey); @@ -62,7 +62,7 @@ export class LlmRouter { async getForFeatureWithFallback( featureKey: string, fallbackModelKey = "gpt-4o-mini", - ): Promise { + ): Promise { try { const assignedModelKey = await this.resolverService.getModelKeyWithFallback( @@ -90,7 +90,7 @@ export class LlmRouter { } /** Convenience default (first registered) */ - getDefault(): ILlmProvider { + getDefault(): IMultimodalLlmProvider { return this.map.values().next().value; } diff --git a/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts b/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts index bb9bbda8..e5260afb 100644 --- a/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts +++ b/apps/api/src/api/llm/core/services/openai-llm-mini.service.ts @@ -5,7 +5,7 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; import { TOKEN_COUNTER } from "../../llm.constants"; import { - ILlmProvider, + IMultimodalLlmProvider, LlmRequestOptions, LlmResponse, } from "../interfaces/llm-provider.interface"; @@ -16,7 +16,7 @@ import { ITokenCounter } from "../interfaces/token-counter.interface"; * Usage is identical to the full-size OpenAiLlmService. */ @Injectable() -export class OpenAiLlmMiniService implements ILlmProvider { +export class OpenAiLlmMiniService implements IMultimodalLlmProvider { private readonly logger: Logger; static readonly DEFAULT_MODEL = "gpt-4o-mini"; readonly key = "gpt-4o-mini"; diff --git a/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts b/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts index cffd47de..53387410 100644 --- a/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts +++ b/apps/api/src/api/llm/core/services/openai-llm-vision.service.ts @@ -5,14 +5,14 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; import { TOKEN_COUNTER } from "../../llm.constants"; import { - ILlmProvider, + IMultimodalLlmProvider, LlmRequestOptions, LlmResponse, } from "../interfaces/llm-provider.interface"; import { ITokenCounter } from "../interfaces/token-counter.interface"; @Injectable() -export class Gpt4VisionPreviewLlmService implements ILlmProvider { +export class Gpt4VisionPreviewLlmService implements IMultimodalLlmProvider { private readonly logger: Logger; static readonly DEFAULT_MODEL = "gpt-4.1-mini"; readonly key = "gpt-4.1-mini"; diff --git a/apps/api/src/api/llm/core/services/openai-llm.service.ts b/apps/api/src/api/llm/core/services/openai-llm.service.ts index 1ed080fc..e6aa613e 100644 --- a/apps/api/src/api/llm/core/services/openai-llm.service.ts +++ b/apps/api/src/api/llm/core/services/openai-llm.service.ts @@ -5,14 +5,14 @@ import { WINSTON_MODULE_PROVIDER } from "nest-winston"; import { Logger } from "winston"; import { TOKEN_COUNTER } from "../../llm.constants"; import { - ILlmProvider, + IMultimodalLlmProvider, LlmRequestOptions, LlmResponse, } from "../interfaces/llm-provider.interface"; import { ITokenCounter } from "../interfaces/token-counter.interface"; @Injectable() -export class OpenAiLlmService implements ILlmProvider { +export class OpenAiLlmService implements IMultimodalLlmProvider { private readonly logger: Logger; static readonly DEFAULT_MODEL = "gpt-4o"; readonly key = "gpt-4o"; diff --git a/apps/api/src/api/llm/core/services/prompt-processor.service.ts b/apps/api/src/api/llm/core/services/prompt-processor.service.ts index 1f03cc34..ba92f47c 100644 --- a/apps/api/src/api/llm/core/services/prompt-processor.service.ts +++ b/apps/api/src/api/llm/core/services/prompt-processor.service.ts @@ -41,6 +41,10 @@ export class PromptProcessorService implements IPromptProcessor { featureKey, fallbackModel, ); + console.log( + `Processing prompt for feature ${featureKey} with model ${llm.key}`, + ); + return await this._processPromptWithProvider( prompt, assignmentId, @@ -68,6 +72,7 @@ export class PromptProcessorService implements IPromptProcessor { ): Promise { try { const llm = this.router.get(llmKey ?? "gpt-4o"); + return await this._processPromptWithProvider( prompt, assignmentId, @@ -146,18 +151,21 @@ export class PromptProcessorService implements IPromptProcessor { throw formatError; } - const result = await llm.invoke([new HumanMessage(input)]); - const response = this.cleanResponse(result.content); - - await this.usageTracker.trackUsage( - assignmentId, - usageType, - result.tokenUsage.input, - result.tokenUsage.output, - llm.key, - ); + try { + const result = await llm.invoke([new HumanMessage(input)]); + const response = this.cleanResponse(result.content); + await this.usageTracker.trackUsage( + assignmentId, + usageType, + result.tokenUsage.input, + result.tokenUsage.output, + llm.key, + ); - return response; + return response; + } catch (error) { + console.log("The issue is:", error); + } } /** diff --git a/apps/api/src/api/llm/features/grading/services/file-grading.service.ts b/apps/api/src/api/llm/features/grading/services/file-grading.service.ts index 00e68ff7..1934bfd4 100644 --- a/apps/api/src/api/llm/features/grading/services/file-grading.service.ts +++ b/apps/api/src/api/llm/features/grading/services/file-grading.service.ts @@ -182,7 +182,9 @@ export class FileGradingService implements IFileGradingService { ); } catch (retryError) { this.logger.error( - `All LLM retry attempts failed: ${retryError instanceof Error ? retryError.message : String(retryError)}`, + `All LLM retry attempts failed: ${ + retryError instanceof Error ? retryError.message : String(retryError) + }`, ); return this.createFallbackResponse( maxTotalPoints, @@ -298,10 +300,11 @@ export class FileGradingService implements IFileGradingService { `LLM attempt ${attempt}/${maxRetries} with model ${primaryModel}`, ); - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GRADING, + "file_grading", primaryModel, ); @@ -316,7 +319,10 @@ export class FileGradingService implements IFileGradingService { } this.logger.warn( - `LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice(0, 100)}..."`, + `LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice( + 0, + 100, + )}..."`, ); lastError = new Error( `Invalid LLM response: ${response?.slice(0, 100)}`, @@ -344,10 +350,11 @@ export class FileGradingService implements IFileGradingService { `Primary model ${primaryModel} failed after ${maxRetries} attempts, trying fallback model ${fallbackModel}`, ); - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GRADING, + "file_grading", fallbackModel, ); @@ -361,7 +368,11 @@ export class FileGradingService implements IFileGradingService { ); } catch (fallbackError) { this.logger.error( - `Fallback model also failed: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`, + `Fallback model also failed: ${ + fallbackError instanceof Error + ? fallbackError.message + : String(fallbackError) + }`, ); } diff --git a/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts b/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts index a6628134..7e6a05ee 100644 --- a/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts +++ b/apps/api/src/api/llm/features/grading/services/grading-judge.service.ts @@ -139,10 +139,11 @@ export class GradingJudgeService implements IGradingJudgeService { ); const response = await this.processWithTimeout( - this.promptProcessor.processPrompt( + this.promptProcessor.processPromptForFeature( prompt, input.assignmentId, AIUsageType.GRADING_VALIDATION, + "content_moderation", selectedModel, ), this.maxJudgeTimeout, diff --git a/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts b/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts index cccca3f1..68f78640 100644 --- a/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts +++ b/apps/api/src/api/llm/features/grading/services/presentation-grading.service.ts @@ -152,10 +152,11 @@ export class PresentationGradingService implements IPresentationGradingService { }); // Process the prompt through the LLM - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GRADING, + "presentation_grading", ); try { @@ -265,10 +266,11 @@ ${parsedResponse.guidance} try { // Process the prompt through the LLM - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.LIVE_RECORDING_FEEDBACK, + "live_recording_feedback", ); // Parse the response diff --git a/apps/api/src/api/llm/features/grading/services/url-grading.service.ts b/apps/api/src/api/llm/features/grading/services/url-grading.service.ts index 4c65cbc1..d7b713fc 100644 --- a/apps/api/src/api/llm/features/grading/services/url-grading.service.ts +++ b/apps/api/src/api/llm/features/grading/services/url-grading.service.ts @@ -107,7 +107,9 @@ export class UrlGradingService implements IUrlGradingService { ); } catch (retryError) { this.logger.error( - `All URL grading LLM retry attempts failed: ${retryError instanceof Error ? retryError.message : String(retryError)}`, + `All URL grading LLM retry attempts failed: ${ + retryError instanceof Error ? retryError.message : String(retryError) + }`, ); return this.createFallbackUrlResponse( totalPoints, @@ -212,10 +214,11 @@ export class UrlGradingService implements IUrlGradingService { try { this.logger.debug(`URL grading LLM attempt ${attempt}/${maxRetries}`); - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GRADING, + "url_grading", ); // Check if response is valid @@ -229,7 +232,10 @@ export class UrlGradingService implements IUrlGradingService { } this.logger.warn( - `URL grading LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice(0, 100)}..."`, + `URL grading LLM returned invalid response on attempt ${attempt}/${maxRetries}: "${response?.slice( + 0, + 100, + )}..."`, ); lastError = new Error( `Invalid LLM response: ${response?.slice(0, 100)}`, @@ -257,7 +263,11 @@ export class UrlGradingService implements IUrlGradingService { throw lastError || new Error("All URL grading LLM attempts failed"); } catch (fallbackError) { this.logger.error( - `URL grading fallback also failed: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`, + `URL grading fallback also failed: ${ + fallbackError instanceof Error + ? fallbackError.message + : String(fallbackError) + }`, ); throw lastError || new Error("All URL grading LLM attempts failed"); } diff --git a/apps/api/src/api/llm/features/grading/services/video-grading.service.ts b/apps/api/src/api/llm/features/grading/services/video-grading.service.ts index b169d754..6071d50d 100644 --- a/apps/api/src/api/llm/features/grading/services/video-grading.service.ts +++ b/apps/api/src/api/llm/features/grading/services/video-grading.service.ts @@ -95,10 +95,11 @@ export class VideoPresentationGradingService }, }); - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GRADING, + "video_grading", ); try { diff --git a/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts b/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts index 95d0106b..32f69ce1 100644 --- a/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts +++ b/apps/api/src/api/llm/features/question-generation/services/question-generation.service.ts @@ -250,10 +250,11 @@ export class QuestionGenerationService implements IQuestionGenerationService { this.logger.debug( `Generating questions for assignment ID: ${assignmentId}`, ); - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GENERATION, + "question_generation", ); // Parse response @@ -854,7 +855,7 @@ FORMAT INSTRUCTIONS: {formatInstructions} `; - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( new PromptTemplate({ template, inputVariables: [], @@ -867,6 +868,7 @@ FORMAT INSTRUCTIONS: }), assignmentId, AIUsageType.ASSIGNMENT_GENERATION, + "question_generation", ); const parsedResponse = await parser.parse(response); if (parsedResponse.scoring) { @@ -2167,10 +2169,11 @@ FORMAT INSTRUCTIONS: while (attemptsLeft > 0 && !success) { try { - response = await this.promptProcessor.processPrompt( + response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GENERATION, + "question_generation", ); // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment @@ -2327,7 +2330,7 @@ FORMAT INSTRUCTIONS: while (attemptsLeft > 0 && !success) { try { - response = await this.promptProcessor.processPrompt( + response = await this.promptProcessor.processPromptForFeature( new PromptTemplate({ template, inputVariables: [], @@ -2338,6 +2341,7 @@ FORMAT INSTRUCTIONS: }), assignmentId, AIUsageType.ASSIGNMENT_GENERATION, + "question_generation", ); const parsedResponse = await parser.parse(response); diff --git a/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts b/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts index 5d54c7bb..da79c97d 100644 --- a/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts +++ b/apps/api/src/api/llm/features/question-generation/services/question-validator.service.ts @@ -150,10 +150,11 @@ export class QuestionValidatorService implements IQuestionValidatorService { }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.ASSIGNMENT_GENERATION, + "question_generation", "gpt-4o-mini", ); diff --git a/apps/api/src/api/llm/features/rubric/services/rubric.service.ts b/apps/api/src/api/llm/features/rubric/services/rubric.service.ts index b43a2b4a..49c6a0e0 100644 --- a/apps/api/src/api/llm/features/rubric/services/rubric.service.ts +++ b/apps/api/src/api/llm/features/rubric/services/rubric.service.ts @@ -118,10 +118,12 @@ export class RubricService implements IRubricService { }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.QUESTION_GENERATION, + "rubric_generation", + "gpt-4o-mini", ); let parsed: @@ -517,10 +519,11 @@ export class RubricService implements IRubricService { }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.QUESTION_GENERATION, + "rubric_generation", ); let parsed: diff --git a/apps/api/src/api/llm/features/translation/services/translation.service.ts b/apps/api/src/api/llm/features/translation/services/translation.service.ts index 859021cd..788eea73 100644 --- a/apps/api/src/api/llm/features/translation/services/translation.service.ts +++ b/apps/api/src/api/llm/features/translation/services/translation.service.ts @@ -242,7 +242,7 @@ INSTRUCTIONS: }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.TRANSLATION, @@ -361,7 +361,7 @@ INSTRUCTIONS: }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.TRANSLATION, @@ -430,7 +430,7 @@ INSTRUCTIONS: }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.TRANSLATION, @@ -792,7 +792,7 @@ INSTRUCTIONS: }); try { - const response = await this.promptProcessor.processPrompt( + const response = await this.promptProcessor.processPromptForFeature( prompt, assignmentId, AIUsageType.TRANSLATION, diff --git a/apps/api/src/api/llm/llm.module.ts b/apps/api/src/api/llm/llm.module.ts index 22a8bfc5..2abf0ac3 100644 --- a/apps/api/src/api/llm/llm.module.ts +++ b/apps/api/src/api/llm/llm.module.ts @@ -4,6 +4,7 @@ import { S3Service } from "../files/services/s3.service"; import { Gpt5LlmService } from "./core/services/gpt5-llm.service"; import { Gpt5MiniLlmService } from "./core/services/gpt5-mini-llm.service"; import { Gpt5NanoLlmService } from "./core/services/gpt5-nano-llm.service"; +import { GptOss120bLlmService } from "./core/services/gpt-oss-120b-llm-service"; import { LLMAssignmentService } from "./core/services/llm-assignment.service"; import { LLMPricingService } from "./core/services/llm-pricing.service"; import { LLMResolverService } from "./core/services/llm-resolver.service"; @@ -63,6 +64,7 @@ import { Gpt5LlmService, Gpt5MiniLlmService, Gpt5NanoLlmService, + GptOss120bLlmService, // LlamaLlmService, LlmRouter, { @@ -74,9 +76,10 @@ import { p4: Gpt5LlmService, p5: Gpt5MiniLlmService, p6: Gpt5NanoLlmService, - // p7: LlamaLlmService, + p7: GptOss120bLlmService, + // p8: LlamaLlmService, ) => { - return [p1, p2, p3, p4, p5, p6]; + return [p1, p2, p3, p4, p5, p6, p7]; }, inject: [ OpenAiLlmService, @@ -85,6 +88,7 @@ import { Gpt5LlmService, Gpt5MiniLlmService, Gpt5NanoLlmService, + GptOss120bLlmService, // LlamaLlmService, ], },