diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 02c27003..2097212e 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -118,6 +118,10 @@ public function register(IRegistrationContext $context): void { if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToTextProofread')) { $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\ProofreadProvider::class); } + if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImages')) { + $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\AnalyzeImagesTaskType::class); + } + $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\AnalyzeImagesProvider::class); } if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech')) { $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\TextToSpeechTaskType::class); diff --git a/lib/TaskProcessing/AnalyzeImagesProvider.php b/lib/TaskProcessing/AnalyzeImagesProvider.php new file mode 100644 index 00000000..324f885c --- /dev/null +++ b/lib/TaskProcessing/AnalyzeImagesProvider.php @@ -0,0 +1,193 @@ +openAiAPIService->getServiceName(); + } + + public function getTaskTypeId(): string { + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImages')) { + return \OCP\TaskProcessing\TaskTypes\AnalyzeImages::ID; + } + return AnalyzeImagesTaskType::ID; + } + + public function getExpectedRuntime(): int { + return $this->openAiAPIService->getExpTextProcessingTime(); + } + + public function getInputShapeEnumValues(): array { + return []; + } + + public function getInputShapeDefaults(): array { + return []; + } + + + public function getOptionalInputShape(): array { + return [ + 'max_tokens' => new ShapeDescriptor( + $this->l->t('Maximum output words'), + $this->l->t('The maximum number of words/tokens that can be generated in the output.'), + EShapeType::Number + ), + 'model' => new ShapeDescriptor( + $this->l->t('Model'), + $this->l->t('The model used to generate the output'), + EShapeType::Enum + ), + ]; + } + + public function getOptionalInputShapeEnumValues(): array { + return [ + 'model' => $this->openAiAPIService->getModelEnumValues($this->userId), + ]; + } + + public function getOptionalInputShapeDefaults(): array { + $adminModel = $this->openAiAPIService->isUsingOpenAi() + ? ($this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID) + : $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id'); + return [ + 'max_tokens' => 1000, + 'model' => $adminModel, + ]; + } + + public function getOutputShapeEnumValues(): array { + return []; + } + + public function getOptionalOutputShape(): array { + return []; + } + + public function getOptionalOutputShapeEnumValues(): array { + return []; + } + + public function process(?string $userId, array $input, callable $reportProgress): array { + + if (!$this->openAiAPIService->isUsingOpenAi() && !$this->openAiSettingsService->getChatEndpointEnabled()) { + throw new RuntimeException('Must support chat completion endpoint'); + } + + $history = []; + + if (!isset($input['images']) || !is_array($input['images'])) { + throw new RuntimeException('Invalid file list'); + } + // Maximum file count for openai is 500. Seems reasonable enough to enforce for all apis though (https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=url#image-input-requirements) + if (count($input['images']) > 500) { + throw new RuntimeException('Too many files given. Max is 500'); + } + $fileSize = 0; + foreach ($input['images'] as $image) { + if (!$image instanceof File || !$image->isReadable()) { + throw new RuntimeException('Invalid input file'); + } + $fileSize += intval($image->getSize()); + // Maximum file size for openai is 50MB. Seems reasonable enough to enforce for all apis though. (https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=url#image-input-requirements) + if ($fileSize > 50 * 1000 * 1000) { + throw new RuntimeException('Filesize of input files too large. Max is 50MB'); + } + $inputFile = base64_encode(stream_get_contents($image->fopen('rb'))); + $fileType = $image->getMimeType(); + if (!str_starts_with($fileType, 'image/')) { + throw new RuntimeException('Invalid input file type ' . $fileType); + } + if ($this->openAiAPIService->isUsingOpenAi()) { + $validFileTypes = [ + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/webp', + ]; + if (!in_array($fileType, $validFileTypes)) { + throw new RuntimeException('Invalid input file type for OpenAI ' . $fileType); + } + } + $history[] = json_encode([ + 'role' => 'user', + 'content' => [ + [ + 'type' => 'image_url', + 'image_url' => [ + 'url' => 'data:' . $fileType . ';base64,' . $inputFile, + ], + ], + ], + ]); + } + + + if (!isset($input['input']) || !is_string($input['input'])) { + throw new RuntimeException('Invalid prompt'); + } + $prompt = $input['input']; + + if (isset($input['model']) && is_string($input['model'])) { + $model = $input['model']; + } else { + $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_COMPLETION_MODEL_ID) ?: Application::DEFAULT_COMPLETION_MODEL_ID; + } + + $maxTokens = null; + if (isset($input['max_tokens']) && is_int($input['max_tokens'])) { + $maxTokens = $input['max_tokens']; + } + + try { + $systemPrompt = 'Take the user\'s question and answer it based on the provided images. Ensure that the answer matches the language of the user\'s question.'; + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, $systemPrompt, $history, 1, $maxTokens); + $completion = $completion['messages']; + + if (count($completion) > 0) { + return ['output' => array_pop($completion)]; + } + + throw new RuntimeException('No result in OpenAI/LocalAI response.'); + } catch (\Exception $e) { + $this->logger->warning('OpenAI/LocalAI\'s image question generation failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('OpenAI/LocalAI\'s image question generation failed with: ' . $e->getMessage()); + } + } +} diff --git a/lib/TaskProcessing/AnalyzeImagesTaskType.php b/lib/TaskProcessing/AnalyzeImagesTaskType.php new file mode 100644 index 00000000..13e12a53 --- /dev/null +++ b/lib/TaskProcessing/AnalyzeImagesTaskType.php @@ -0,0 +1,77 @@ +l->t('Analyze images'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Ask a question about the given images.'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'images' => new ShapeDescriptor( + $this->l->t('Images'), + $this->l->t('Images to ask a question about'), + EShapeType::ListOfImages, + ), + 'input' => new ShapeDescriptor( + $this->l->t('Question'), + $this->l->t('What to ask about the image.'), + EShapeType::Text, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Generated response'), + $this->l->t('The answer to the question'), + EShapeType::Text + ), + ]; + } +}