diff --git a/contrib/aifun/Makefile b/contrib/aifun/Makefile new file mode 100644 index 00000000000..8e5247557b9 --- /dev/null +++ b/contrib/aifun/Makefile @@ -0,0 +1,34 @@ +EXTENSION = aifun +DATA = aifun--1.0.sql +CONTROL = aifun.control + +# Python 3 executable +PYTHON3=$(shell which python3) + +# Python code directories +PYTHON_DIR = $(srcdir)/aifun +PG_PYTHON_DIR = $(libdir)/python/aifun + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +# ------------------------------------------------------------------ +# Custom install section for Python handler files +# ------------------------------------------------------------------ + +install: python-install + +python-install: + @echo "Installing Python handler files to $(DESTDIR)$(PG_PYTHON_DIR)" + mkdir -p '$(DESTDIR)$(PG_PYTHON_DIR)' + cp -a $(PYTHON_DIR)/*.py '$(DESTDIR)$(PG_PYTHON_DIR)/' + cp -a $(PYTHON_DIR)/providers '$(DESTDIR)$(PG_PYTHON_DIR)/' + cp $(srcdir)/requirements.txt '$(DESTDIR)$(PG_PYTHON_DIR)/' + $(PYTHON3) -m pip install --user --upgrade -r '$(DESTDIR)$(PG_PYTHON_DIR)/requirements.txt' + +uninstall: python-uninstall + +python-uninstall: + @echo "Removing Python handler files from $(DESTDIR)$(PG_PYTHON_DIR)" + rm -rf '$(DESTDIR)$(PG_PYTHON_DIR)' diff --git a/contrib/aifun/README.md b/contrib/aifun/README.md new file mode 100644 index 00000000000..7227d1d7ba3 --- /dev/null +++ b/contrib/aifun/README.md @@ -0,0 +1,379 @@ +# AIFun PostgreSQL Extension + +An Apache Cloudberry/PostgreSQL extension that provides AI-powered functions that can be used with various LLM providers (OpenAI, Anthropic, Google Gemini, AWS Bedrock, any third-party providers that follow the OpenAI API specification, and locally hosted models using vLLM or SGLang). + +## Features + +- **Multiple AI Providers**: Support for OpenAI, Anthropic, Google Gemini, and AWS Bedrock +- **User Isolation**: Each user manages their own API keys and providers +- **Row Level Security**: PostgreSQL RLS ensures users can only access their own configurations +- **AI Functions**: Ask questions, generate embeddings, classify text, extract structured data, summarize, translate, and more +- **Simple Setup**: No complex encryption keys, users directly manage their API keys + +## Installation + +```bash +# Install the extension +make install + +# In PostgreSQL +CREATE EXTENSION aifun; +``` + +## Quick Start + +### 1. Add Your AI Provider + +```sql +-- Add OpenAI-compatible provider +SELECT + aifun.add_provider( + p_id => 'local_llm', + p_type => 'openai', + p_api_key => 'abc', + p_metadata => '{ + "endpoint": "http://10.14.10.1:8800/vllm/v1" + }'::JSONB + ); + +-- Add OpenAI provider +SELECT + aifun.add_provider( + p_id => 'local_llm', + p_type => 'openai', + p_api_key => 'api-key-openai' + ); + +-- Add Anthropic provider +SELECT + aifun.add_provider( + p_id => 'local_llm', + p_type => 'anthropic', + p_api_key => 'api-key-anthropic' + ); + +-- Add Google Gemini provider +SELECT + aifun.add_provider( + p_id => 'local_llm', + p_type => 'gemini', + p_api_key => 'api-key-gemini' + ); +``` + +### 2. Use AI Functions + +```sql +-- Ask a question +SELECT + aifun.ask( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + prompt => 'What is PostgreSQL?' + ); + +-- Ask a question with context +SELECT + aifun.ask( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + prompt => 'What is the main advantage?', + context => '{"context": "PostgreSQL is a relational database with ACID compliance and extensive extensibility."}'::jsonb + ); + +-- Have a conversation with the AI +SELECT + aifun.chat( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + messages => '[ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is PostgreSQL?" + }, + { + "role": "assistant", + "content": "PostgreSQL is an open-source relational database management system." + }, + { + "role": "user", + "content": "What are its main features?" + } + ]'::jsonb + ); + +-- Classify text +SELECT + aifun.classify( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + text_to_classify => 'I love this product!', + labels => ARRAY['positive', 'negative', 'neutral'] + ); + +-- Extract structured data +SELECT + aifun.extract( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + text_to_parse => 'John is 30 years old and works as a developer', + json_schema => '{ + "name": "string", + "age": "number", + "job": "string" + }'::JSONB + ); + +-- Summarize text +SELECT + aifun.summarize( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + text_to_summarize => 'aifun is a PostgreSQL extension that provides AI-powered functions using various LLM providers (OpenAI, Anthropic, Google Gemini, AWS Bedrock). + It allows users to ask questions, generate embeddings, classify text, extract structured data, summarize, translate, and more. + ' +); + +-- Translate text +SELECT + aifun.translate( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + text_to_translate => 'Hello world', + target_language => 'Spanish' + ); + +-- Visual Q&A +SELECT + aifun.visual_qa( + provider => 'local_vlm', + model => 'paddleocr-vl', + image => '', + question => 'What is the color of the shape?' + ); + +-- Generate embeddings +SELECT + aifun.embed( + provider => 'local_embedding', + model => 'jina', + text_to_embed => 'PostgreSQL is awesome' + ); + +-- Multimodal embeddings with JSON content +SELECT + aifun.multimodal_embed( + provider => 'local_multimodal_embedding', + model => 'bge', + content => '{"text": "A red square", "image": ""}'::jsonb + ); + +-- Multimodal embeddings with text and image +SELECT + aifun.multimodal_embed( + provider => 'local_multimodal_embedding', + model => 'bge', + text => 'A red square', + image => '' + ); + +-- Parse PDF (Replace with actual base64-encoded PDF content) +SELECT aifun.parse_pdf( + file_content_base64 => 'data:application/pdf;base64,JVBERi0xLjQKJdPr6eUQyAAAAAA==' +); + +-- Parse DOCX (Replace with actual base64-encoded DOCX content) +SELECT aifun.parse_docx( + file_content_base64 => 'data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,UEsDBBQACAgIAQwCAAQAAQAAAgICAgICAgAAAAAA==' +); + +-- Parse PPTX (Replace with actual base64-encoded PPTX content) +SELECT aifun.parse_pptx( + file_content_base64 => 'data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,UEsDBBQACAgIAQwCAAQAAQAAAgICAgICAgAAAAAA==' +); + +-- Parse PDF with VLM +SELECT + aifun.parse_pdf_with_vlm( + provider => 'local_vlm', + model => 'paddleocr-vl', + file_content_base64 => 'data:application/pdf;base64,JVBERi0xLjQKJdPr6eUQyAAAAAA==' + ); + +-- Parse document with specified format (Replace with actual base64-encoded content) +SELECT aifun.parse_document( + file_content_base64 => 'data:application/pdf;base64,JVBERi0xLjQKJdPr6eUQyAAAAAA==', + file_extension => 'pdf' +); + +-- Parse document with specified format (Replace with actual base64-encoded content) +SELECT aifun.parse_document( + file_content_base64 => 'data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,UEsDBBQACAgIAQwCAAQAAQAAAgICAgICAgAAAAAA==', + file_extension => 'docx' +); + +-- Parse document with specified format (Replace with actual base64-encoded content) +SELECT aifun.parse_document( + file_content_base64 => 'data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,UEsDBBQACAgIAQwCAAQAAQAAAgICAgICAgAAAAAA==', + file_extension => 'pptx' +); +``` + +### 3. Use AI Functions with Tables + +```sql +CREATE TABLE IF NOT EXISTS test ( + id SERIAL PRIMARY KEY, + content TEXT +); + +-- Insert some test data +INSERT INTO test (content) +VALUES ('PostgreSQL is a powerful open-source database system.'), + ('I love using Anthropic for AI-powered chatbots.'), + ('AIFun is a PostgreSQL extension that provides AI-powered functions using various LL M providers.'); + +CREATE TABLE test_translation AS +SELECT + id, + aifun.translate( + provider => 'local_llm', + model => 'zhipu/glm4-9b-chat', + text_to_translate => content, + target_language => 'zh' + ) AS translation +FROM test; + +CREATE TABLE test_embedding AS +SELECT + id, + aifun.embed( + provider => 'local_llm', + model => 'jina', + text_to_embed => content + ) AS embedding +FROM test; +``` + +### 4. Use AI Functions with Directory Tables +```sql +-- Create a directory table +CREATE DIRECTORY TABLE test_dirt; + +-- Parse PDF files +SELECT + aifun.parse_pdf( + file_content_bytea => content + ) +FROM directory_table('test_dirt') +WHERE relative_path LIKE '%.pdf'; + +-- Parse DOCX files +SELECT + aifun.parse_docx( + file_content_bytea => content + ) +FROM directory_table('test_dirt') +WHERE relative_path LIKE '%.docx'; + +-- Parse PPTX files +SELECT + aifun.parse_pptx( + file_content_bytea => content + ) +FROM directory_table('test_dirt') +WHERE relative_path LIKE '%.pptx'; + +-- Parse all document types +SELECT + aifun.parse_document( + file_content_bytea => content, + file_extension => SPLIT_PART(relative_path, '.', -1) + ) +FROM directory_table('test_dirt'); +``` + +### 4. Manage Your Providers + +```sql +-- List your providers +SELECT * FROM aifun.my_providers; + +-- Update API key +SELECT aifun.update_api_key('my_openai', 'new-api-key'); + +-- Remove provider +SELECT aifun.remove_provider('my_openai'); + +-- Check if provider exists +SELECT aifun.has_provider('my_openai'); +``` + +## Security Model + +This extension uses **Row Level Security (RLS)** to ensure complete user isolation: + +- **User Isolation**: Each user can only access their own provider configurations +- **No Shared Secrets**: No master encryption keys that could be compromised +- **Direct API Keys**: Users manage their own API keys directly +- **PostgreSQL Security**: Leverages PostgreSQL's built-in RLS for access control + +### How It Works + +1. Each provider configuration is tagged with the owner's username +2. RLS policies ensure users can only see/modify their own data +3. API keys are stored as plain text (user's own keys) +4. No central encryption/decryption overhead + +## Supported Providers + +| Provider | Type | Models | +|----------|------|---------| +| OpenAI | `openai` | gpt-4, gpt-3.5-turbo, text-embedding-ada-002, or compatible models | +| Anthropic | `anthropic` | claude-3-sonnet, claude-3-haiku | +| Google Gemini | `gemini` | gemini-pro, gemini-pro-vision | +| AWS Bedrock | `aws_bedrock` | Various models via AWS Bedrock | + +## Appendix - List of Functions + +| Schema | Name | Result data type | Argument data types | Type | +|--------|------|------------------|---------------------|------| +| aifun | _get_provider_key | text | p_provider_id text | func | +| aifun | add_provider | void | p_id text, p_type text, p_api_key text, p_metadata jsonb DEFAULT NULL::jsonb | func | +| aifun | ask | text | provider text, model text, prompt text | func | +| aifun | ask | text | provider text, model text, prompt text, context text | func | +| aifun | chat | text | provider text, model text, messages jsonb | func | +| aifun | chunk | text[] | text text, chunk_size integer DEFAULT 1000, overlap integer DEFAULT 200 | func | +| aifun | classify | text | provider text, model text, text_to_classify text, labels text[] | func | +| aifun | embed | vector | provider text, model text, text_to_embed text | func | +| aifun | extract | jsonb | provider text, model text, text_to_parse text, json_schema jsonb | func | +| aifun | extract_keywords | text[] | provider text, model text, text text, num_keywords integer DEFAULT 5 | func | +| aifun | fix_grammar | text | provider text, model text, text text | func | +| aifun | has_provider | boolean | p_provider_id text | func | +| aifun | help | text | p_function_name text | func | +| aifun | list_all | TABLE(function_name text, function_description text) | | func | +| aifun | multimodal_embed | vector | provider text, model text, content jsonb | func | +| aifun | multimodal_embed | vector | provider text, model text, text text DEFAULT NULL::text, image bytea DEFAULT NULL::bytea | func | +| aifun | multimodal_embed | vector | provider text, model text, text text DEFAULT NULL::text, image text DEFAULT NULL::text | func | +| aifun | parse_document | jsonb | file_content_base64 text, file_extension text | func | +| aifun | parse_document | jsonb | file_content_bytea bytea, file_extension text | func | +| aifun | parse_docx | jsonb | file_content_base64 text | func | +| aifun | parse_docx | jsonb | file_content_bytea bytea | func | +| aifun | parse_pdf | jsonb | file_content_base64 text | func | +| aifun | parse_pdf | jsonb | file_content_bytea bytea | func | +| aifun | parse_pdf_with_vlm | jsonb | provider text, model text, file_content_base64 text, prompt text DEFAULT 'Extract all text and describe any images, charts, or visual elements in this PDF.'::text | func | +| aifun | parse_pdf_with_vlm | jsonb | provider text, model text, file_content_bytea bytea, prompt text DEFAULT 'Extract all text and describe any images, charts, or visual elements in this PDF.'::text | func | +| aifun | parse_pptx | jsonb | file_content_base64 text | func | +| aifun | parse_pptx | jsonb | file_content_bytea bytea | func | +| aifun | remove_provider | void | p_id text | func | +| aifun | rerank | text[] | provider text, model text, query text, documents text[] | func | +| aifun | similarity | double precision | provider text, model text, text1 text, text2 text | func | +| aifun | summarize | text | provider text, model text, text_to_summarize text, length integer DEFAULT 50 | func | +| aifun | translate | text | provider text, model text, text_to_translate text, target_language text | func | +| aifun | update_api_key | void | p_provider_id text, p_api_key text | func | +| aifun | visual_qa | text | provider text, model text, image bytea, question text | func | +| aifun | visual_qa | text | provider text, model text, image text, question text | func | \ No newline at end of file diff --git a/contrib/aifun/aifun--1.0.sql b/contrib/aifun/aifun--1.0.sql new file mode 100644 index 00000000000..d32d0fa2e0f --- /dev/null +++ b/contrib/aifun/aifun--1.0.sql @@ -0,0 +1,876 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +\echo Use "CREATE EXTENSION aifun" to load this file. \quit + +-- SECURITY WARNING: This extension handles sensitive API keys and credentials. +-- Row Level Security (RLS) is applied to ensure proper isolation. +-- Each user can only access their own provider configurations. + +CREATE SCHEMA IF NOT EXISTS aifun; + +-- Table to store provider configurations for each user +-- This table uses Row Level Security (RLS) to ensure users can only access their own providers +CREATE TABLE IF NOT EXISTS aifun.providers ( + owner_role TEXT NOT NULL DEFAULT current_user, + provider_id TEXT NOT NULL, + provider_type TEXT NOT NULL, + api_key TEXT NOT NULL, + metadata JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + PRIMARY KEY (owner_role, provider_id) +) DISTRIBUTED REPLICATED; + +-- Enable Row Level Security on the providers table +ALTER TABLE aifun.providers ENABLE ROW LEVEL SECURITY; + +-- Create RLS policy to ensure users can only access their own providers +CREATE POLICY user_isolation_policy ON aifun.providers + FOR ALL TO public + USING (owner_role = current_user) + WITH CHECK (owner_role = current_user); + +-- Add or update a provider for the current user +CREATE OR REPLACE FUNCTION aifun.add_provider( + p_id TEXT, + p_type TEXT, + p_api_key TEXT, + p_metadata JSONB DEFAULT NULL +) RETURNS void AS $$ +BEGIN + INSERT INTO aifun.providers (owner_role, provider_id, provider_type, api_key, metadata) + VALUES ( + current_user, + p_id, + p_type, + p_api_key, + p_metadata + ) + ON CONFLICT (owner_role, provider_id) DO UPDATE + SET provider_type = EXCLUDED.provider_type, + api_key = EXCLUDED.api_key, + metadata = EXCLUDED.metadata; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aifun.add_provider(TEXT, TEXT, TEXT, JSONB) IS 'Adds a new AI provider configuration or updates an existing one for the current user. + +Parameters: +- p_id: Unique provider identifier +- p_type: Provider type (e.g., "openai", "anthropic", "gemini", etc.) +- p_api_key: API key for authentication +- p_metadata: Optional JSON with additional configuration + +Each user can only access their own providers due to Row Level Security.'; + +-- Function to remove a provider configuration for the current user +CREATE OR REPLACE FUNCTION aifun.remove_provider(p_id TEXT) +RETURNS void AS $$ +BEGIN + DELETE FROM aifun.providers + WHERE owner_role = current_user AND provider_id = p_id; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aifun.remove_provider(TEXT) IS 'Removes an AI provider configuration for the current user. + +Parameters: +- p_id: The unique identifier of the provider to remove + +Users can only remove their own providers due to Row Level Security. This action is irreversible.'; + +-- Function to get API key for a provider (internal use only) +CREATE OR REPLACE FUNCTION aifun._get_provider_key(p_provider_id TEXT) +RETURNS TEXT AS $$ +DECLARE + v_api_key TEXT; +BEGIN + SELECT api_key INTO v_api_key + FROM aifun.providers + WHERE owner_role = current_user AND provider_id = p_provider_id; + + IF v_api_key IS NULL THEN + RAISE EXCEPTION 'Provider not found or access denied'; + END IF; + + RETURN v_api_key; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aifun._get_provider_key(TEXT) IS 'Internal function that retrieves the API key for a specific provider. + +Parameters: +- p_provider_id: The unique identifier of the provider + +This function is restricted to the current user''s providers due to Row Level Security. Raises an exception if the provider is not found or access is denied.'; + +-- Function to update provider API key +CREATE OR REPLACE FUNCTION aifun.update_api_key(p_provider_id TEXT, p_api_key TEXT) +RETURNS void AS $$ +BEGIN + UPDATE aifun.providers + SET api_key = p_api_key + WHERE owner_role = current_user AND provider_id = p_provider_id; + + IF NOT FOUND THEN + RAISE EXCEPTION 'Provider not found or access denied'; + END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aifun.update_api_key(TEXT, TEXT) IS 'Updates the API key for an existing AI provider configuration. + +Parameters: +- p_provider_id: The unique identifier of the provider +- p_api_key: The new API key + +Users can only update their own providers due to Row Level Security. Raises an exception if the provider is not found.'; + +-- Function to check if a provider exists for current user +CREATE OR REPLACE FUNCTION aifun.has_provider(p_provider_id TEXT) +RETURNS BOOLEAN AS $$ +BEGIN + RETURN EXISTS ( + SELECT 1 + FROM aifun.providers + WHERE owner_role = current_user AND provider_id = p_provider_id + ); +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aifun.has_provider(TEXT) IS 'Checks if a specific AI provider configuration exists for the current user. + +Parameters: +- p_provider_id: The unique identifier of the provider to check + +Returns true if the provider exists and is accessible, false otherwise. This function respects Row Level Security.'; + +-- Function to ask a question using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.ask(provider TEXT, model TEXT, prompt TEXT) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import ask + return ask(provider, model, prompt) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.ask(TEXT, TEXT, TEXT) IS 'Sends a question or prompt to an AI model and returns the response. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- prompt: The question or prompt to send + +Returns the AI model''s response as text. Raises an exception if the provider or model is not available.'; + +-- Function to ask a question with context using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.ask(provider TEXT, model TEXT, prompt TEXT, context TEXT) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import ask + + if context: + enhanced_prompt = f"Context: {context}\n\nQuestion: {prompt}" + else: + enhanced_prompt = prompt + + return ask(provider, model, enhanced_prompt) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.ask(TEXT, TEXT, TEXT, TEXT) IS 'Sends a question or prompt with context to an AI model and returns the response. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- prompt: The question or prompt to send +- context: Additional context information as plain text + +Returns the AI model''s response as text. The context is provided to the model to help with generating more relevant responses. Raises an exception if the provider or model is not available.'; + +-- Function to have a conversation with an AI model using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.chat(provider TEXT, model TEXT, messages JSONB) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import ask + import json + + # Convert messages to a list of dictionaries + message_list = [] + if messages is not None: + message_list = list(messages) + + # Format messages for the AI model + formatted_messages = [] + for msg in message_list: + if isinstance(msg, dict) and 'role' in msg and 'content' in msg: + formatted_messages.append(msg) + + # Build conversation prompt + conversation_prompt = "" + system_messages = [msg for msg in formatted_messages if msg['role'] == 'system'] + if system_messages: + conversation_prompt += f"System: {system_messages[0]['content']}\n\n" + + for msg in formatted_messages: + if msg['role'] == 'user': + conversation_prompt += f"User: {msg['content']}\n" + elif msg['role'] == 'assistant': + conversation_prompt += f"Assistant: {msg['content']}\n" + + conversation_prompt += "Assistant: " + + return ask(provider, model, conversation_prompt) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.chat(TEXT, TEXT, JSONB) IS 'Has a conversation with an AI model using a specified provider and model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- messages: Array of message objects in JSONB format, each with ''role'' and ''content'' fields + +Returns the AI model''s response as text. The messages should be an array of objects with role (system/user/assistant) and content fields. Raises an exception if the provider or model is not available.'; + +-- Function to generate an embedding for a given text using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.embed(provider TEXT, model TEXT, text_to_embed TEXT) +RETURNS vector AS $$ + try: + from aifun.llm_handler import embed + return embed(provider, model, text_to_embed) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.embed(TEXT, TEXT, TEXT) IS 'Generates a vector embedding for the given text using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific embedding model to use +- text_to_embed: The text to embed + +Returns the embedding as a vector of floating-point numbers. Raises an exception if the provider or model is not available.'; + +-- Function to generate an embedding for multimodal content (text, images, etc.) using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.multimodal_embed(provider TEXT, model TEXT, content JSONB) +RETURNS vector AS $$ + try: + import json + from aifun.llm_handler import multimodal_embed + + return multimodal_embed( + provider, + model, + json.loads(content) + ) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.multimodal_embed(TEXT, TEXT, JSONB) IS 'Generates a vector embedding for multimodal content (text, images, etc.) using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific embedding model to use +- content: JSONB object containing the multimodal content to embed, + expected format: {"text": "...", "image": "data:image/png;base64,..."} + +Returns the embedding as a vector of floating-point numbers. Raises an exception if the provider or model is not available.'; + +-- Function to generate an embedding for multimodal content (text, images, etc.) using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.multimodal_embed(provider TEXT, model TEXT, text TEXT DEFAULT NULL, image TEXT DEFAULT NULL) +RETURNS vector AS $$ + try: + import json + from aifun.llm_handler import multimodal_embed + + return multimodal_embed( + provider, + model, + { + "text": text, + "image": image + } + ) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.multimodal_embed(TEXT, TEXT, TEXT, TEXT) IS 'Generates a vector embedding for multimodal content (text, images, etc.) using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific embedding model to use +- text: The text to embed (optional) +- image: The image to embed (optional, base64-encoded), expected format: "data:image/png;base64,..." + +Returns the embedding as a vector of floating-point numbers. Raises an exception if the provider or model is not available.'; + +-- Function to generate an embedding for multimodal content (text, images, etc.) using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.multimodal_embed(provider TEXT, model TEXT, text TEXT DEFAULT NULL, image BYTEA DEFAULT NULL) +RETURNS vector AS $$ + try: + import io + import json + import base64 + from PIL import Image + from aifun.llm_handler import multimodal_embed + + base64_image = None + if image is not None: + pil_image = Image.open(io.BytesIO(image)) + buf = io.BytesIO() + pil_image.save(buf, format='PNG') + base64_image = base64.b64encode(buf.getvalue()).decode('utf-8') + base64_image = f"data:image/png;base64,{base64_image}" + + return multimodal_embed( + provider, + model, + { + "text": text, + "image": base64_image + } + ) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.multimodal_embed(TEXT, TEXT, TEXT, BYTEA) IS 'Generates a vector embedding for multimodal content (text, images, etc.) using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific embedding model to use +- text: The text to embed (optional) +- image: The image to embed (optional, binary data), expected format: BYTEA + +Returns the embedding as a vector of floating-point numbers. Raises an exception if the provider or model is not available.'; + + +-- Function to classify a text using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.classify(provider TEXT, model TEXT, text_to_classify TEXT, labels TEXT[]) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import classify + return classify(provider, model, text_to_classify, labels) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.classify(TEXT, TEXT, TEXT, TEXT[]) IS 'Classifies text into predefined categories using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- text_to_classify: The text to classify +- labels: Array of category labels to classify into + +Returns the most likely category as text. Raises an exception if the provider or model is not available.'; + +-- Function to extract structured information from a text using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.extract(provider TEXT, model TEXT, text_to_parse TEXT, json_schema JSONB) +RETURNS JSONB AS $$ + import json + try: + from aifun.llm_handler import extract + data = extract(provider, model, text_to_parse, json_schema) + return json.dumps(data) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.extract(TEXT, TEXT, TEXT, JSONB) IS 'Extracts structured information from a text using a specified provider and model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific extraction model to use +- text_to_extract: The text to extract information from +- p_options: Optional JSON with additional parameters + +Returns a JSON object with the extracted information.'; + +-- Function to summarize a text using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.summarize(provider TEXT, model TEXT, text_to_summarize TEXT, length INTEGER DEFAULT 50) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import summarize + return summarize(provider, model, text_to_summarize, length) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.summarize(TEXT, TEXT, TEXT, INTEGER) IS 'Generates a concise summary of the given text using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- text_to_summarize: The text to summarize +- length: Optional length of the summary (default is 50) + +Returns the summary as text. Raises an exception if the provider or model is not available.'; + +-- Function to translate a text using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.translate(provider TEXT, model TEXT, text_to_translate TEXT, target_language TEXT) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import translate + return translate(provider, model, text_to_translate, target_language) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.translate(TEXT, TEXT, TEXT, TEXT) IS 'Translates text from one language to another using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- text_to_translate: The text to translate +- target_language: The target language code or name + +Returns the translated text. Raises an exception if the provider or model is not available.'; + +-- Function to calculate the similarity between two texts using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.similarity(provider TEXT, model TEXT, text1 TEXT, text2 TEXT) +RETURNS float8 AS $$ + try: + from aifun.llm_handler import similarity + return similarity(provider, model, text1, text2) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.similarity(TEXT, TEXT, TEXT, TEXT) IS 'Calculates the similarity between two texts using a specified provider and model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific similarity model to use +- text1: The first text to compare +- text2: The second text to compare + +Returns a similarity score between 0 and 1.'; + +-- Function to fix grammar in a text using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.fix_grammar(provider TEXT, model TEXT, text TEXT) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import fix_grammar + return fix_grammar(provider, model, text) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.fix_grammar(TEXT, TEXT, TEXT) IS 'Fixes grammar in a text using a specified provider and model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific grammar correction model to use +- text_to_fix: The text with grammar to fix + +Returns the text with corrected grammar.'; + +-- Function to chunk a text into smaller parts using a specified chunk size and overlap +CREATE OR REPLACE FUNCTION aifun.chunk(text TEXT, chunk_size INTEGER DEFAULT 1000, overlap INTEGER DEFAULT 200) +RETURNS TEXT[] AS $$ + try: + from aifun.llm_handler import chunk + return chunk(text, chunk_size, overlap) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.chunk(TEXT, INTEGER, INTEGER) IS 'Splits a long text into smaller, overlapping chunks for processing. + +Parameters: +- text_to_chunk: The text to chunk +- chunk_size: The size of each chunk +- overlap: The overlap between chunks + +Returns an array of text chunks. Useful for processing long documents with models that have context length limitations.'; + +-- Function to rerank documents based on a query using a specified provider and model +CREATE OR REPLACE FUNCTION aifun.rerank(provider TEXT, model TEXT, query TEXT, documents TEXT[]) +RETURNS TEXT[] AS $$ + try: + from aifun.llm_handler import rerank + return rerank(provider, model, query, documents) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.rerank(TEXT, TEXT, TEXT, TEXT[]) IS 'Reranks documents based on their relevance to a query using an AI model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific model to use +- query: The search query +- documents: Array of documents to rerank + +Returns an array of document indices sorted by relevance. Raises an exception if the provider or model is not available.'; + +-- Function to extract keywords from a text using a specified provider and model +-- This function uses to ensure proper access control +CREATE OR REPLACE FUNCTION aifun.extract_keywords(provider TEXT, model TEXT, text TEXT, num_keywords INTEGER DEFAULT 5) +RETURNS TEXT[] AS $$ + try: + from aifun.llm_handler import extract_keywords + return extract_keywords(provider, model, text, num_keywords) + except Exception as e: + plpy.error(f"Error in AI function: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.extract_keywords(TEXT, TEXT, TEXT, INTEGER) IS 'Extracts keywords from a text using a specified provider and model. + +Parameters: +- provider: The unique identifier of the provider +- model: The specific keyword extraction model to use +- text_to_extract: The text to extract keywords from +- num_keywords: The number of keywords to extract (default: 5) + +Returns an array of keywords.'; + + +-- Function to parse PDF file content and extract text (TEXT version) +CREATE OR REPLACE FUNCTION aifun.parse_pdf(file_content_base64 TEXT) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_pdf + return json.dumps(parse_pdf(file_content_base64)) + except Exception as e: + plpy.error(f"Error parsing PDF file: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_pdf(TEXT) IS 'Extracts text from a base64-encoded PDF file. + +Parameters: +- file_content_base64: Base64-encoded PDF file content + +Returns the extracted pages from the PDF file as a JSON array. Uses PyPDF2 library for PDF parsing. +Raises an exception if the file is not a valid PDF or if any error occurs during processing.'; + +-- Function to parse PDF file content and extract text (BYTEA version) +CREATE OR REPLACE FUNCTION aifun.parse_pdf(file_content_bytea BYTEA) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_pdf + return json.dumps(parse_pdf(file_content_bytea)) + except Exception as e: + plpy.error(f"Error parsing PDF file from bytea: {e}") +$$ LANGUAGE plpython3u; + + +COMMENT ON FUNCTION aifun.parse_pdf(BYTEA) IS 'Extracts text from a binary PDF file (BYTEA). + +Parameters: +- file_content_bytea: Binary PDF file content + +Returns the extracted pages from the PDF file as a JSON array. Uses PyPDF2 library for PDF parsing. +Raises an exception if the file is not a valid PDF or if any error occurs during processing.'; + + +-- Function to parse DOCX file content and extract text +CREATE OR REPLACE FUNCTION aifun.parse_docx(file_content_base64 TEXT) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_docx + return json.dumps(parse_docx(file_content_base64)) + except Exception as e: + plpy.error(f"Error parsing DOCX file: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_docx(TEXT) IS 'Extracts text from a base64-encoded DOCX file. + +Parameters: +- file_content_base64: Base64-encoded DOCX file content + +Returns the extracted pages from the DOCX file as a JSON array. Uses python-docx library for DOCX parsing. +Raises an exception if the file is not a valid DOCX or if any error occurs during processing.'; + +-- Function to parse DOCX file content and extract text (BYTEA version) +CREATE OR REPLACE FUNCTION aifun.parse_docx(file_content_bytea BYTEA) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_docx + return json.dumps(parse_docx(file_content_bytea)) + except Exception as e: + plpy.error(f"Error parsing DOCX file from bytea: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_docx(BYTEA) IS 'Extracts text from a binary DOCX file (BYTEA). + +Parameters: +- file_content_bytea: Binary DOCX file content + +Returns the extracted pages from the DOCX file as a JSON array. Uses python-docx library for DOCX parsing. +Raises an exception if the file is not a valid DOCX or if any error occurs during processing.'; + +-- Function to parse PPTX file content and extract text +CREATE OR REPLACE FUNCTION aifun.parse_pptx(file_content_base64 TEXT) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_pptx + return json.dumps(parse_pptx(file_content_base64)) + except Exception as e: + plpy.error(f"Error parsing PPTX file from base64: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_pptx(TEXT) IS 'Extracts text from a base64-encoded PPTX file. + +Parameters: +- file_content_base64: Base64-encoded PPTX file content + +Returns the extracted slides from the PPTX file as a JSON array. Uses python-pptx library for PPTX parsing. +Raises an exception if the file is not a valid PPTX or if any error occurs during processing.'; + + +-- Function to parse PPTX file content and extract text (BYTEA version) +CREATE OR REPLACE FUNCTION aifun.parse_pptx(file_content_bytea BYTEA) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_pptx + return json.dumps(parse_pptx(file_content_bytea)) + except Exception as e: + plpy.error(f"Error parsing PPTX file from bytea: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_pptx(BYTEA) IS 'Extracts text from a binary PPTX file (BYTEA). + +Parameters: +- file_content_bytea: Binary PPTX file content + +Returns the extracted slides from the PPTX file as a JSON array. Uses python-pptx library for PPTX parsing. +Raises an exception if the file is not a valid PPTX or if any error occurs during processing.'; + +-- Universal document parser function +CREATE OR REPLACE FUNCTION aifun.parse_document(file_content_base64 TEXT, file_extension TEXT) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_document + return json.dumps(parse_document(file_content_base64, file_extension)) + except Exception as e: + plpy.error(f"Error parsing document: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_document(TEXT, TEXT) IS 'Universal document parser that handles different file formats. + +Parameters: +- file_content_base64: Base64-encoded file content +- file_extension: File extension (pdf, docx, pptx, png, etc.) + +Automatically detects the file type based on extension and calls the appropriate parser function. +Returns the extracted data from the document. +Raises an exception if the file format is not supported or if any error occurs during processing.'; + +-- Function to parse document content and extract text (BYTEA version) +CREATE OR REPLACE FUNCTION aifun.parse_document(file_content_bytea BYTEA, file_extension TEXT) +RETURNS JSONB AS $$ + try: + import json + from aifun.parser import parse_document + return json.dumps(parse_document(file_content_bytea, file_extension)) + except Exception as e: + plpy.error(f"Error parsing document from bytea: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_document(BYTEA, TEXT) IS 'Universal document parser that handles different file formats. + +Parameters: +- file_content_bytea: Binary file content +- file_extension: File extension (pdf, docx, pptx, png, etc.) + +Automatically detects the file type based on extension and calls the appropriate parser function. +Returns the extracted data from the document. +Raises an exception if the file format is not supported or if any error occurs during processing.'; + +-- Function to get help information for a UDF by reading its comment +CREATE OR REPLACE FUNCTION aifun.help( + p_function_name TEXT +) +RETURNS TEXT +LANGUAGE plpgsql +AS $$ +DECLARE + v_comment TEXT; +BEGIN + SELECT pg_description.description INTO v_comment + FROM pg_description + JOIN pg_proc ON pg_description.objoid = pg_proc.oid + JOIN pg_namespace ON pg_proc.pronamespace = pg_namespace.oid + WHERE pg_description.objsubid = 0 + AND pg_namespace.nspname = 'aifun' + AND pg_proc.proname = p_function_name; + + IF v_comment IS NULL THEN + RETURN 'No help information available for function: aifun.' || p_function_name; + ELSE + RETURN v_comment; + END IF; +END; +$$; + +COMMENT ON FUNCTION aifun.help(TEXT) IS 'Retrieves help information for a specified aifun function. + +Parameters: +- p_function_name: The name of the function without the "aifun." prefix + +Returns the comment/documentation for the function. Example: SELECT aifun.help(''ask''); to get help for the aifun.ask function.'; + +-- Function to list all available aifun functions +CREATE OR REPLACE FUNCTION aifun.list_all() +RETURNS TABLE( + function_name TEXT, + function_description TEXT +) +LANGUAGE plpgsql +AS $$ +BEGIN + RETURN QUERY + SELECT + pg_proc.proname::TEXT AS function_name, + pg_description.description::TEXT AS function_description + FROM pg_proc + JOIN pg_namespace ON pg_proc.pronamespace = pg_namespace.oid + LEFT JOIN pg_description ON pg_description.objoid = pg_proc.oid AND pg_description.objsubid = 0 + WHERE pg_namespace.nspname = 'aifun' + AND pg_proc.prokind = 'f' + ORDER BY pg_proc.proname; +END; +$$; + +COMMENT ON FUNCTION aifun.list_all() IS 'Lists all available aifun functions. + +Returns a table of all available aifun functions with their names and descriptions.'; + +-- VLM-based PDF parsing function (TEXT version) +CREATE OR REPLACE FUNCTION aifun.parse_pdf_with_vlm(provider TEXT, model TEXT, file_content_base64 TEXT, prompt TEXT DEFAULT 'Extract all text and describe any images, charts, or visual elements in this PDF.') +RETURNS JSONB AS $$ + try: + import json + from aifun.llm_handler import vlm_parse_pdf + + result = vlm_parse_pdf(provider, model, file_content_base64, prompt) + return json.dumps(result) + except Exception as e: + plpy.error(f"Error parsing PDF with VLM: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_pdf_with_vlm(TEXT, TEXT, TEXT, TEXT) IS 'Parse PDF using Vision Language Model for enhanced text extraction and visual analysis. + +This function converts PDF pages to images and uses a VLM to extract text and analyze visual elements. +It provides more accurate OCR and can describe charts, diagrams, and other visual content. + +Parameters: +- provider: The unique identifier of the VLM provider +- model: The vision model to use (e.g., "paddleocr-vl") +- file_content_base64: Base64-encoded PDF file content +- prompt: Custom prompt for analysis (optional) + +Returns JSON array with extracted text and visual analysis for each page. +Raises an exception if VLM processing fails or if PDF conversion fails.'; + +-- VLM-based PDF parsing function (BYTEA version) +CREATE OR REPLACE FUNCTION aifun.parse_pdf_with_vlm(provider TEXT, model TEXT, file_content_bytea BYTEA, prompt TEXT DEFAULT 'Extract all text and describe any images, charts, or visual elements in this PDF.') +RETURNS JSONB AS $$ + try: + import json + from aifun.llm_handler import vlm_parse_pdf + + result = vlm_parse_pdf(provider, model, file_content_bytea, prompt) + return json.dumps(result) + except Exception as e: + plpy.error(f"Error parsing PDF with VLM from bytea: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.parse_pdf_with_vlm(TEXT, TEXT, BYTEA, TEXT) IS 'Parse PDF using Vision Language Model for enhanced text extraction and visual analysis. + +This function converts PDF pages to images and uses a VLM to extract text and analyze visual elements. +It provides more accurate OCR and can describe charts, diagrams, and other visual content. + +Parameters: +- provider: The unique identifier of the VLM provider +- model: The vision model to use (e.g., "paddleocr-vl") +- file_content_bytea: Binary PDF file content (BYTEA) +- prompt: Custom prompt for analysis (optional) + +Returns JSON array with extracted text and visual analysis for each page. +Raises an exception if VLM processing fails or if PDF conversion fails.'; + +-- Visual Question Answering function (TEXT version) +CREATE OR REPLACE FUNCTION aifun.visual_qa(provider TEXT, model TEXT, image TEXT, question TEXT) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import visual_qa + return visual_qa(provider, model, image, question) + except Exception as e: + plpy.error(f"Error performing visual QA: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.visual_qa(TEXT, TEXT, TEXT, TEXT) IS 'Perform visual question answering on an image. + +This function allows asking specific questions about image content and getting + detailed answers based on visual analysis of the image. + +Parameters: +- provider: The unique identifier of the VLM provider +- model: The vision model to use (e.g., "paddleocr-vl") +- image: Base64-encoded image file content, including the prefix "data:image/png;base64," +- question: Question to ask about the image + +Returns the answer to the question based on image analysis. +Raises an exception if VLM processing fails or if image processing fails.'; + +-- Visual Question Answering function (BYTEA version) +CREATE OR REPLACE FUNCTION aifun.visual_qa(provider TEXT, model TEXT, image BYTEA, question TEXT) +RETURNS TEXT AS $$ + try: + from aifun.llm_handler import visual_qa + return visual_qa(provider, model, image, question) + except Exception as e: + plpy.error(f"Error performing visual QA from bytea: {e}") +$$ LANGUAGE plpython3u; + +COMMENT ON FUNCTION aifun.visual_qa(TEXT, TEXT, BYTEA, TEXT) IS 'Perform visual question answering on an image. + +This function allows asking specific questions about image content and getting +detailed answers based on visual analysis of the image. + +Parameters: +- provider: The unique identifier of the VLM provider +- model: The vision model to use (e.g., "paddleocr-vl") +- file_content_bytea: Binary image file content (BYTEA) +- question: Question to ask about the image + +Returns the answer to the question based on image analysis. +Raises an exception if VLM processing fails or if image processing fails.'; + +-- Grant usage on schema aifun to public +GRANT USAGE ON SCHEMA aifun TO public; + +GRANT ALL ON aifun.providers TO public; diff --git a/contrib/aifun/aifun.control b/contrib/aifun/aifun.control new file mode 100644 index 00000000000..04bb5312922 --- /dev/null +++ b/contrib/aifun/aifun.control @@ -0,0 +1,4 @@ +comment = 'AI Functions for Apache Cloudberry/PostgreSQL' +default_version = '1.0' +relocatable = true +requires = 'plpython3u, vector' diff --git a/contrib/aifun/aifun/__init__.py b/contrib/aifun/aifun/__init__.py new file mode 100644 index 00000000000..649f57f41ee --- /dev/null +++ b/contrib/aifun/aifun/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .utils import cosine_similarity, parse_jsonb, retry_with_backoff \ No newline at end of file diff --git a/contrib/aifun/aifun/llm_handler.py b/contrib/aifun/aifun/llm_handler.py new file mode 100644 index 00000000000..e2a5ece18b2 --- /dev/null +++ b/contrib/aifun/aifun/llm_handler.py @@ -0,0 +1,616 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Union +import json + +from .utils import get_plpy, cosine_similarity, CACHE, parse_jsonb +from .providers import get_provider_class + +plpy = get_plpy() + +def _get_provider_instance(provider_id: str): + """ + Get a provider instance by ID + + This is an internal factory function that handles database lookups and provider instantiation. + Uses Row Level Security (RLS) to ensure users can only access their own providers. + Caches instances per user per provider_id for performance. + + Args: + provider_id (str): The unique identifier of the provider + + Returns: + object: An instance of the provider class, configured with API key and metadata + + Raises: + Error: If the provider is not found for the current user + Error: If the provider type is invalid + """ + current_user = plpy.execute("SELECT current_user")[0]['current_user'] + cache_key = f"{current_user}::{provider_id}" + + if cache_key in CACHE: + return CACHE[cache_key] + + plan = plpy.prepare(""" + SELECT + provider_type, + api_key, + metadata + FROM + aifun.providers + WHERE + owner_role = current_user AND provider_id = $1 + """, ["text"]) + results = plpy.execute(plan, [provider_id]) + + if not results: + plpy.error(f"Provider '{provider_id}' not found for current user.") + + config = results[0] + provider_type = config['provider_type'] + api_key = config['api_key'] + metadata = json.loads(config['metadata']) if config['metadata'] else {} + + try: + ProviderClass = get_provider_class(provider_type) + # Create credentials object with API key for provider compatibility + credentials = {"api_key": api_key} + instance = ProviderClass(credentials, metadata) + CACHE[cache_key] = instance + return instance + except ValueError as e: + plpy.error(str(e)) + + +# --- Public Functions (called by PostgreSQL) --- + +def ask(provider_id: str, model: str, prompt: str) -> str: + """ + Send a question to an AI model and get a response + + Uses the specified provider and model to send a question to an AI and get the model's response. + Uses a default temperature parameter of 0 to ensure consistency in responses. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the AI model to use + prompt (str): The question or prompt to send to the AI + + Returns: + str: The response text from the AI model + + Example: + answer = ask("openai_provider", "gpt-4", "What is artificial intelligence?") + """ + provider = _get_provider_instance(provider_id) + return provider.ask(model, prompt, temperature=0) + +def embed(provider_id: str, model: str, text_to_embed: str) -> list[float]: + """ + Generate vector embeddings for text + + Converts text to vector representation using the specified provider and model. + Can be used for text similarity comparison, clustering, and other tasks. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for generating embeddings + text_to_embed (str): The text to generate vector embeddings for + + Returns: + list[float]: The vector embedding representation of the text + + Example: + vector = embed("openai_provider", "text-embedding-ada-002", "This is a sample text") + """ + provider = _get_provider_instance(provider_id) + return provider.embed(model, text_to_embed) + +def multimodal_embed(provider_id: str, model: str, content: dict) -> list[float]: + """ + Generate vector embeddings for multimodal content + + Generates a unified vector representation for content containing multiple modalities such as text and images. + Supports combinations of different content types for multimodal search, comparison, and other scenarios. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for generating multimodal embeddings + content (dict): The content to embed, must be a dictionary + Expected format: {"text": "...", "image": "..."} + Image should be base64-encoded strings with mime type prefix (e.g., "data:image/png;base64,") + + Returns: + list[float]: The vector embedding representation of the multimodal content + + Raises: + Error: If the content is not in valid JSON format + + Example: + content = {"text": "A cat sitting on a sofa", "image": "base64-encoded image data"} + vector = multimodal_embed("openai_provider", "clip-embeddings", content) + """ + provider = _get_provider_instance(provider_id) + + if "image" in content and content["image"] and not content["image"].startswith("data:image/"): + plpy.error("Image content must include mime type prefix (e.g., 'data:image/png;base64,')") + + return provider.multimodal_embed(model, content) + +def classify(provider_id: str, model: str, text_to_classify: str, labels: list[str]) -> str: + """ + Classify text into predefined categories + + Uses an AI model to classify given text into one of the specified categories. + The model will be prompted to return only the category name without any other content. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for classification + text_to_classify (str): The text content to classify + labels (list): A list of predefined categories + + Returns: + str: The classification result, which is one of the labels + + Example: + categories = ["technology", "sports", "entertainment"] + result = classify("openai_provider", "gpt-4", "The latest iPhone has been released", categories) + # Returns "technology" + """ + provider = _get_provider_instance(provider_id) + label_list_str = ", ".join([f"'{label}'" for label in labels]) + prompt = f"""Given the following text, classify it into one of the following categories: {label_list_str}. +Respond with only the chosen category name and nothing else. +Text: {text_to_classify} +Category:""" + + result = provider.ask(model, prompt, max_tokens=50, temperature=0) + stripped_result = result.strip().strip("'\"") + if stripped_result in labels: + return stripped_result + return result + +def extract(provider_id: str, model: str, text_to_parse: str, json_schema: str) -> dict: + """ + Extract structured information from text according to a JSON schema + + Uses an AI model to extract information from unstructured text and format the output according to a specified JSON schema. + The function validates that the returned JSON is valid to ensure the output meets the expected format. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for information extraction + text_to_parse (str): The text content to extract information from + json_schema (str): The JSON schema that defines the output format + + Returns: + dict: The extraction result formatted according to json_schema + + Raises: + Error: If the model returns invalid JSON + Error: If there is an error processing the JSON response + + Example: + schema = '{"name": "string", "age": "number", "email": "string"}' + text = "John is 25 years old and his email is john@example.com" + result = extract("openai_provider", "gpt-4", text, schema) + # Returns {"name": "John", "age": 25, "email": "john@example.com"} + """ + provider = _get_provider_instance(provider_id) + prompt = f"""Extract information from the following text according to the JSON schema. +Respond with only the extracted JSON object. +Schema: {json_schema} +Text: {text_to_parse} +JSON:""" + + # Parse and validate JSON to ensure it's properly formatted + try: + result = provider.ask(model, prompt, temperature=0) + parsed_json = parse_jsonb(result) + return parsed_json + except json.JSONDecodeError as e: + plpy.error(f"LLM returned invalid JSON: {result}. Error: {str(e)}") + except Exception as e: + plpy.error(f"Error processing JSON response: {str(e)}") + +def summarize(provider_id: str, model: str, text_to_summarize: str, length: int = 50) -> str: + """ + Generate a concise summary of text + + Uses an AI model to generate a concise summary of the given text. + Uses a medium temperature parameter (0.5) to maintain some creativity while ensuring content accuracy. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for generating summaries + text_to_summarize (str): The text content to summarize + length (int, optional): The desired length of the summary (default is 50) + + Returns: + str: The summary content of the text + + Example: + long_text = "This is a long text content..." + summary = summarize("openai_provider", "gpt-4", long_text) + """ + provider = _get_provider_instance(provider_id) + prompt = f"Provide a concise summary of the following text. The summary should be {length} words or less.\nText: {text_to_summarize}\nSummary:" + return provider.ask(model, prompt, temperature=0.3) + +def translate(provider_id: str, model: str, text_to_translate: str, target_language: str) -> str: + """ + Translate text to a target language + + Uses an AI model to translate text to the specified target language. + The model will be prompted to return only the translated text without any other content. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for translation + text_to_translate (str): The text content to translate + target_language (str): The name of the target language + + Returns: + str: The translated text + + Example: + translated = translate("openai_provider", "gpt-4", "Hello, world!", "Chinese") + # Returns "你好,世界!" + """ + provider = _get_provider_instance(provider_id) + prompt = f"Translate the following text into {target_language}. Respond with only the translated text.\nText: {text_to_translate}\nTranslation:" + return provider.ask(model, prompt, temperature=0) + +def similarity(provider_id: str, model: str, text1: str, text2: str) -> float: + """ + Calculate the similarity between two texts + + Evaluates the semantic similarity between two texts by converting them to vector embeddings and then calculating cosine similarity. + The return value ranges from 0 to 1, where 1 indicates complete similarity and 0 indicates no similarity. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for generating embeddings + text1 (str): The first text + text2 (str): The second text + + Returns: + float: The similarity score between the two texts, ranging from 0 to 1 + + Raises: + Error: If unable to parse the embedding string returned by the provider + + Example: + score = similarity("openai_provider", "text-embedding-ada-002", + "Artificial intelligence is the future", "AI will change the world") + # Returns a similarity score between 0 and 1 + """ + provider = _get_provider_instance(provider_id) + embedding_str1 = provider.embed(model, text1) + embedding_str2 = provider.embed(model, text2) + try: + vec1 = json.loads(embedding_str1) + vec2 = json.loads(embedding_str2) + except (json.JSONDecodeError, TypeError): + plpy.error("Failed to parse embedding string from provider.") + return cosine_similarity(vec1, vec2) + +def fix_grammar(provider_id: str, model: str, text: str) -> str: + """ + Correct grammar and spelling errors in text + + Uses an AI model to detect and correct grammar and spelling errors in text. + The model will be prompted to return only the corrected text without any other content. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for grammar correction + text (str): The text to correct for grammar and spelling + + Returns: + str: The corrected text + + Example: + corrected = fix_grammar("openai_provider", "gpt-4", "I has a apple.") + # Returns "I have an apple." + """ + provider = _get_provider_instance(provider_id) + prompt = f"Correct the grammar and spelling of the following text. Respond with only the corrected text and nothing else.\n\nText: \"{text}\"\n\nCorrected:" + return provider.ask(model, prompt, temperature=0) + +def chunk(text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]: + """ + Split text into chunks of specified size with optional overlap + + Splits long text into smaller chunks for easier processing and analysis. Can specify the size of each chunk and the overlap between chunks, + which helps maintain contextual continuity. + + Args: + text (str): The text content to split + chunk_size (int, optional): The size of each chunk, default is 1000 characters + overlap (int, optional): The number of overlapping characters between chunks, default is 200 + + Returns: + list: A list containing the text chunks + + Raises: + Error: If input text is not a string + Error: If chunk_size is not a positive integer + Error: If overlap is not a non-negative integer + Error: If overlap is greater than or equal to chunk_size + + Example: + long_text = "This is a long text content..." + chunks = chunk(long_text, chunk_size=500, overlap=100) + # Returns a list containing multiple text chunks + """ + if not isinstance(text, str): + plpy.error("Input 'text' must be a string.") + if not isinstance(chunk_size, int) or chunk_size <= 0: + plpy.error("'chunk_size' must be a positive integer.") + if not isinstance(overlap, int) or overlap < 0: + plpy.error("'overlap' must be a non-negative integer.") + if overlap >= chunk_size: + plpy.error("'overlap' must be less than 'chunk_size'.") + + chunks = [] + start = 0 + while start < len(text): + end = start + chunk_size + chunk_content = text[start:end] + chunks.append(chunk_content) + start += (chunk_size - overlap) + if start >= len(text) - overlap and start < len(text): # Ensure last chunk is not just overlap + if len(text) - (start + overlap) > 0: # If there's still significant text left + pass # The loop will handle the last chunk + else: + break # Avoid creating tiny chunks at the very end if only overlap is left + + return chunks + +def rerank(provider_id: str, model: str, query: str, documents: list[str]) -> list[str]: + """ + Rerank a list of documents based on a query + + Uses an AI model to evaluate the relevance of each document in a list to a given query and reorder them from most to least relevant. + The model will be prompted to return a JSON array containing the reranked documents. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for reranking + query (str): The query text to evaluate document relevance against + documents (list): A list of documents to rerank, each element is a string + + Returns: + list: A list of documents ordered from most to least relevant + + Raises: + Error: If input query is not a string + Error: If input documents is not a list of strings + Error: If an error occurs during the reranking process + + Example: + docs = ["Article about machine learning", "Cooking recipe", "AI development trends"] + ranked = rerank("openai_provider", "gpt-4", "AI technology", docs) + # Returns a reranked list of documents with AI-related documents at the front + """ + if not isinstance(documents, list) or not all(isinstance(d, str) for d in documents): + plpy.error("Input 'documents' must be a list of strings.") + + provider = _get_provider_instance(provider_id) + + # Construct a prompt to instruct the LLM to rerank + documents_str = "\n".join([f"Document {i+1}: {doc}" for i, doc in enumerate(documents)]) + prompt = f"""You are an expert document reranker. Your task is to reorder a list of documents based on their relevance to a given query. +Return a JSON array where each element is one of the original documents, ordered from most relevant to least relevant. +Do NOT include any other text or explanation, just the JSON array. + +Query: "{query}" + +Documents to rerank: +--- +{documents_str} +--- + +Reranked Documents (JSON array):""" + + try: + result = provider.ask(model, prompt, temperature=0) + # Attempt to parse the result as JSON + reranked_list = parse_jsonb(result) + if not isinstance(reranked_list, list) or not all(isinstance(d, str) for d in reranked_list): + plpy.warning(f"LLM returned non-list or non-string elements for reranking: {result}") + return documents # Fallback to original order if parsing fails + + # Basic validation: ensure all returned documents were part of the original set + # This is a heuristic, LLMs can hallucinate. + if all(doc in documents for doc in reranked_list): + return reranked_list + else: + plpy.warning(f"LLM returned documents not in original set during reranking. Returning original order. LLM output: {result}") + return documents # Fallback to original order if validation fails + + except json.JSONDecodeError: + plpy.warning(f"LLM returned non-JSON response for reranking. Returning original order. LLM output: {result}") + return documents # Fallback to original order if parsing fails + except Exception as e: + plpy.error(f"Failed to rerank documents: {e}") + return documents # Fallback in case of any other error + +def extract_keywords(provider_id: str, model: str, text: str, num_keywords: int = 5) -> list[str]: + """ + Extract a specified number of keywords from text + + Uses an AI model to extract the most important keywords or phrases from a given text. + Can specify the number of keywords to extract, and the model will return a JSON array containing these keywords. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for keyword extraction + text (str): The text content to extract keywords from + num_keywords (int, optional): The number of keywords to extract, default is 5 + + Returns: + list: A list containing the extracted keywords + + Raises: + Error: If input text is not a string + Error: If num_keywords is not a positive integer + Error: If an error occurs during the keyword extraction process + + Example: + article = "Artificial intelligence is a branch of computer science that aims to create systems capable of performing tasks that typically require human intelligence." + keywords = extract_keywords("openai_provider", "gpt-4", article, 3) + # Might return ["artificial intelligence", "computer science", "intelligent tasks"] + """ + if not isinstance(num_keywords, int) or num_keywords <= 0: + plpy.error("'num_keywords' must be a positive integer.") + + provider = _get_provider_instance(provider_id) + + prompt = f"""Extract exactly {num_keywords} keywords or key phrases from the following text. +Return them as a JSON array of strings. Do NOT include any other text or explanation, just the JSON array. + +Text: "{text}" + +Keywords (JSON array):""" + + try: + result = provider.ask(model, prompt, temperature=0) + keywords_list = parse_jsonb(result) + if not isinstance(keywords_list, list) or not all(isinstance(k, str) for k in keywords_list): + plpy.warning(f"LLM returned non-list or non-string elements for keywords: {result}") + return [] + return keywords_list + except json.JSONDecodeError: + plpy.warning(f"LLM returned non-JSON response for keywords. LLM output: {result}") + return [] + except Exception as e: + plpy.error(f"Failed to extract keywords: {e}") + return [] + + +def vlm_parse_pdf( + provider_id: str, + model: str, + file_content: Union[str, bytes], + prompt: str = "Extract all text and describe any images, charts, or visual elements in this PDF.", +) -> list[str]: + """ + Parse PDF using Vision Language Model for enhanced text extraction and visual analysis + + This function converts PDF pages to images and uses a VLM to extract text and analyze visual elements. + It provides more accurate OCR and can describe charts, diagrams, and other visual content. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the model to use for VLM processing + file_content (Union[str, bytes]): PDF file content (base64 string or bytes) + prompt (str): Custom prompt for text extraction and analysis + + Returns: + list: List of extracted text/analysis for each page + + Raises: + Error: If VLM processing fails + Error: If PDF conversion fails + """ + plpy = get_plpy() + provider = _get_provider_instance(provider_id) + + try: + import io + import pypdfium2 as pdfium + import base64 + + if isinstance(file_content, str): + if file_content.startswith("data:application/pdf;base64,"): + file_content = file_content.split(",")[1] + file_content = base64.b64decode(file_content) + + pdf = pdfium.PdfDocument(file_content) + page_nums = len(pdf) + results = [] + for i in range(page_nums): + page = pdf[i] + bitmap = page.render(scale=2) + pil_image = bitmap.to_pil() + + buf = io.BytesIO() + pil_image.save(buf, format="PNG") + base64_image = f"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode('utf-8')}" + + parsed_content = provider.vlm_ask( + model=model, + prompt=prompt, + image=base64_image + ) + + results.append({ + "page_num": i + 1, + "content": parsed_content + }) + pdf.close() + + return { + "metadata": { + "pageCount": page_nums + }, + "pages": results + } + + except Exception as e: + plpy.error(f"Error processing PDF with VLM: {e}") + + +def visual_qa(provider_id: str, model: str, image: Union[str, bytes], question: str): + """ + Perform visual question answering on an image + + This function allows asking specific questions about image content and getting + detailed answers based on visual analysis of the image. + + Args: + provider_id (str): The unique identifier of the provider + model (str): The name of the vision model to use + image (Union[str, bytes]): Base64 encoded image content or bytes, if string, should include mime type prefix (e.g., "data:image/png;base64,") + question (str): Question to ask about the image + + Returns: + str: Answer to the question based on image analysis + """ + provider = _get_provider_instance(provider_id) + + if isinstance(image, str): + if not image.startswith("data:image/"): + raise ValueError("Image string must include mime type prefix (e.g., 'data:image/png;base64,')") + + return provider.vlm_ask(model, question, image) + + from PIL import Image + import io + import base64 + + image = Image.open(io.BytesIO(image)) + buf = io.BytesIO() + image.save(buf, format="PNG") + image = f"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode('utf-8')}" + + return provider.vlm_ask(model, question, image) diff --git a/contrib/aifun/aifun/parser.py b/contrib/aifun/aifun/parser.py new file mode 100644 index 00000000000..1ba29e6068c --- /dev/null +++ b/contrib/aifun/aifun/parser.py @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Union +import base64 +import io + +from .utils import get_plpy + +def parse_pdf(file_content: Union[str, bytes]): + """ + Parse PDF file content and extract text + + Extracts text from a base64-encoded PDF file or bytes. This function decodes the base64 content, + uses PyPDF2 to extract text from each page, and concatenates the results. + + Args: + file_content (Union[str, bytes]): Base64-encoded PDF file content or bytes + + Returns: + str: Extracted text from the PDF file + + Raises: + Warning: If input is not a string + Warning: If the base64 decoding fails + Warning: If the file is not a valid PDF + Warning: If any error occurs during PDF processing + + Example: + pdf_base64 = "base64_encoded_pdf_content" + text = parse_pdf(pdf_base64) + # Returns the extracted text from the PDF + """ + plpy = get_plpy() + + try: + import PyPDF2 + + if isinstance(file_content, str): + if file_content.startswith("data:application/pdf;base64,"): + file_content = file_content.split(",")[1] + file_content = base64.b64decode(file_content) + + pdf_file = io.BytesIO(file_content) + + pdf_reader = PyPDF2.PdfReader(pdf_file) + pages = [] + + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num] + pages.append({ + "page_num": page_num, + "text": page.extract_text() or "" + }) + + return pages + except base64.binascii.Error: + plpy.warning("Failed to decode base64 content. Please check the input format.") + return { + "error": "Failed to decode base64 content. Please check the input format." + } + except PyPDF2.errors.PdfReadError: + plpy.warning("Failed to read PDF file. The content may not be a valid PDF.") + return { + "error": "Failed to read PDF file. The content may not be a valid PDF." + } + except Exception as e: + plpy.warning(f"Error processing PDF file: {e}") + return { + "error": f"Error processing PDF file: {e}" + } + + +def parse_docx(file_content: Union[str, bytes]): + """ + Parse DOCX file content and extract text + + Extracts text from a DOCX file or bytes. This function decodes the base64 content, + uses python-docx to extract text from the document, and returns the concatenated text. + + Args: + file_content (Union[str, bytes]): DOCX file content or bytes + + Returns: + str: Extracted text from the DOCX file + + Raises: + Warning: If input is not a string + Warning: If the base64 decoding fails + Warning: If the file is not a valid DOCX + Warning: If any error occurs during DOCX processing + + Example: + docx_base64 = "base64_encoded_docx_content" + text = parse_docx(docx_base64) + # Returns the extracted text from the DOCX + """ + plpy = get_plpy() + + try: + from docx import Document + + if isinstance(file_content, str): + if file_content.startswith("data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,"): + file_content = file_content.split(",")[1] + file_content = base64.b64decode(file_content) + + docx_file = io.BytesIO(file_content) + + doc = Document(docx_file) + paragraphs = [] + + for para_num, para in enumerate(doc.paragraphs): + paragraphs.append({ + "para_num": para_num + 1, + "text": para.text + }) + + tables = [] + # Extract text from tables + for table_num, table in enumerate(doc.tables): + # Store table as a list of rows, each row being a list of cell texts + table_data = [] + for row_num, row in enumerate(table.rows): + row_cells = [] + for cell_num, cell in enumerate(row.cells): + row_cells.append({ + "cell_num": cell_num + 1, + "text": cell.text + }) + table_data.append({ + "row_num": row_num + 1, + "cells": row_cells + }) + + # Generate a plain text representation of the table for backward compatibility + plain_text_rows = [] + for row in table_data: + row_text = "\t".join(cell["text"] for cell in row["cells"]) + plain_text_rows.append(row_text) + + tables.append({ + "table_num": table_num + 1, + "data": table_data, # Structured table data with rows and columns + "text": "\n".join(plain_text_rows), # Tab-separated plain text + "num_rows": len(table_data), + "num_columns": len(table_data[0]["cells"]) if table_data else 0 + }) + + return { + "paragraphs": paragraphs, + "tables": tables + } + except base64.binascii.Error: + plpy.warning("Failed to decode base64 content. Please check the input format.") + return { + "error": "Failed to decode base64 content. Please check the input format." + } + except Exception as e: + plpy.warning(f"Error processing DOCX file: {e}") + return { + "error": f"Error processing DOCX file: {e}" + } + + +def parse_pptx(file_content: Union[str, bytes]): + """ + Parse PPTX file content and extract text + + Extracts text from a PPTX file or bytes. This function decodes the base64 content, + uses python-pptx to extract text from slides, and returns the concatenated text. + + Args: + file_content (Union[str, bytes]): PPTX file content or bytes + + Returns: + dict: Extracted text from the PPTX file, with slides as a list of strings + + Raises: + Warning: If input is not a string or bytes + Warning: If the base64 decoding fails + Warning: If the file is not a valid PPTX + Warning: If any error occurs during PPTX processing + + Example: + pptx_base64 = "base64_encoded_pptx_content" + text = parse_pptx(pptx_base64) + # Returns the extracted text from the PPTX + """ + plpy = get_plpy() + + try: + from pptx import Presentation + + if isinstance(file_content, str): + if file_content.startswith("data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,"): + file_content = file_content.split(",")[1] + file_content = base64.b64decode(file_content) + + pptx_file = io.BytesIO(file_content) + prs = Presentation(pptx_file) + + slides = [] + for slide_idx, slide in enumerate(prs.slides, 1): + slide_text = [] + for shape in slide.shapes: + if hasattr(shape, 'text') and shape.text.strip(): + slide_text.append(shape.text) + + if len(slide_text) > 1: + slides.append({ + "slide_idx": slide_idx, + "text": "\n".join(slide_text) + } + ) + + return slides + + except base64.binascii.Error: + plpy.warning("Failed to decode base64 content. Please check the input format.") + return { + "error": "Failed to decode base64 content. Please check the input format." + } + except Exception as e: + plpy.warning(f"Error processing PPTX file: {e}") + return { + "error": f"Error processing PPTX file: {e}" + } + +def parse_document(file_content: Union[str, bytes], file_extension: str): + """ + Universal document parser that handles different file formats + + Automatically detects the file type based on extension and calls the appropriate parser function. + + Args: + file_content (Union[str, bytes]): File content or bytes + file_extension (str): File extension (pdf, docx, pptx, png, etc.) + provider_id (str, optional): Provider ID for AI-based processing (required for some formats) + model (str, optional): Model name for AI-based processing (required for some formats) + + Returns: + str: Extracted text from the document + + Raises: + Warning: If input parameters are invalid + Warning: If the file format is not supported + Warning: If any error occurs during document processing + + Example: + doc_base64 = "base64_encoded_document_content" + text = parse_document(doc_base64, "pdf") + # Returns the extracted text from the document + """ + plpy = get_plpy() + + if not isinstance(file_extension, str) or not file_extension: + plpy.warning("Input 'file_extension' must be a non-empty string.") + return { + "error": "Input 'file_extension' must be a non-empty string." + } + + file_extension = file_extension.lower().lstrip('.') + + if file_extension == 'pdf': + return parse_pdf(file_content) + elif file_extension == 'docx': + return parse_docx(file_content) + elif file_extension == 'pptx': + return parse_pptx(file_content) + else: + plpy.warning(f"Unsupported file format: {file_extension}") + return { + "error": f"Unsupported file format: {file_extension}" + } diff --git a/contrib/aifun/aifun/providers/__init__.py b/contrib/aifun/aifun/providers/__init__.py new file mode 100644 index 00000000000..7ea5172ef4c --- /dev/null +++ b/contrib/aifun/aifun/providers/__init__.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .openai import OpenAIProvider +from .anthropic import AnthropicProvider +from .gemini import GeminiProvider +from .aws_bedrock import AWSBedrockProvider + +# The central mapping of provider type names to their implementation classes +PROVIDER_MAP = { + "openai": OpenAIProvider, + "anthropic": AnthropicProvider, + "google": GeminiProvider, + "aws_bedrock": AWSBedrockProvider, +} + +def get_provider_class(provider_type): + """ + Factory function to get the provider class based on its type name. + """ + provider_class = PROVIDER_MAP.get(provider_type) + if not provider_class: + # This error will be caught by the calling function in handler.py + # and reported as a plpy.error + raise ValueError(f"Unsupported provider type: {provider_type}") + return provider_class diff --git a/contrib/aifun/aifun/providers/anthropic.py b/contrib/aifun/aifun/providers/anthropic.py new file mode 100644 index 00000000000..4f185882203 --- /dev/null +++ b/contrib/aifun/aifun/providers/anthropic.py @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .base import RequestsBasedProvider + +class AnthropicProvider(RequestsBasedProvider): + """ + Provider implementation for Anthropic's Claude API. + + This class implements the Anthropic API endpoints for text generation + using Claude models through the Anthropic REST API. + """ + + def _get_endpoint(self): + """ + Get the endpoint URL for Anthropic's messages API. + + Returns: + str: The complete URL for the messages API endpoint + """ + return self.metadata.get("endpoint", "https://api.anthropic.com/v1/messages") + + def _get_headers(self): + """ + Get the required headers for Anthropic API requests. + + Returns: + dict: Dictionary containing the required headers including API key and version + """ + return { + "x-api-key": self.credentials['api_key'], + "anthropic-version": self.metadata.get("anthropic_version", "2023-06-01"), + "Content-Type": "application/json" + } + + def ask(self, model: str, prompt: str, **kwargs): + """ + Generate a text response using Anthropic's Claude API. + + Args: + model (str): The model identifier (e.g., "claude-3-opus-20240229") + prompt (str): The input prompt for text generation + **kwargs: Additional parameters for the Anthropic API (max_tokens, temperature, etc.) + + Returns: + str: The generated text response + + Raises: + requests.HTTPError: If the API request fails + """ + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": prompt + } + ], + "max_tokens": 4096, + **kwargs + } + data = self._make_request( + "post", + self._get_endpoint(), + self._get_headers(), + payload + ) + return data["content"][0]["text"].strip() + + def vlm_ask(self, model: str, prompt: str, image: str, **kwargs): + """ + Generate a text response using Anthropic's Claude API for visual language models. + + Args: + model (str): The model identifier (e.g., "claude-3-opus-20240229") + prompt (str): The input prompt for text generation + image (str): The base64-encoded image string for visual context, + including the mime type prefix (e.g., "data:image/png;base64,") + **kwargs: Additional parameters for the Anthropic API + + Returns: + str: The generated text response + + Raises: + requests.HTTPError: If the API request fails + """ + mime_type = image.split(";")[0].split(":")[-1] + image = image.split(',')[1] + + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image", + "image": { + "data": image, + "mime_type": mime_type + } + }, + { + "type": "text", + "text": prompt + } + ] + } + ], + "max_tokens": 4096, + **kwargs + } + data = self._make_request( + "post", + self._get_endpoint(), + self._get_headers(), + payload + ) + return data["content"][0]["text"].strip() + \ No newline at end of file diff --git a/contrib/aifun/aifun/providers/aws_bedrock.py b/contrib/aifun/aifun/providers/aws_bedrock.py new file mode 100644 index 00000000000..9f663dac767 --- /dev/null +++ b/contrib/aifun/aifun/providers/aws_bedrock.py @@ -0,0 +1,243 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import boto3 + +from .base import BaseProvider +from ..utils import retry_with_backoff + + +class AWSBedrockProvider(BaseProvider): + """ + Provider implementation for AWS Bedrock service. + + This class implements the AWS Bedrock API endpoints for text generation, + embeddings, and multimodal embeddings using the boto3 SDK. + """ + + def __init__(self, credentials: dict, metadata: dict = None): + """ + Initialize the AWS Bedrock provider with credentials and metadata. + + Args: + credentials (dict): AWS credentials including access key, secret key, and session token + metadata (dict, optional): Additional metadata including AWS region. Defaults to None. + + Raises: + Exception: If boto3 is not installed + """ + super().__init__(credentials, metadata) + if boto3 is None: + self.plpy.error("boto3 is not installed. Please run 'pip install boto3' to use AWS Bedrock.") + + self.client = boto3.client( + "bedrock-runtime", + aws_access_key_id=self.credentials.get("aws_access_key_id"), + aws_secret_access_key=self.credentials.get("aws_secret_access_key"), + aws_session_token=self.credentials.get("aws_session_token"), + region_name=self.metadata.get("region") + ) + + @retry_with_backoff() + def _invoke_model(self, model_id: str, body: str): + """ + Invoke a model with the given body using AWS Bedrock. + + Args: + model_id (str): The model identifier to invoke + body (str): JSON string containing the request body + + Returns: + dict: The response from the model invocation + + Raises: + Exception: If the model invocation fails + """ + response = self.client.invoke_model( + body=body, + modelId=model_id, + accept="application/json", + contentType="application/json" + ) + return json.loads(response.get('body').read()) + + def ask(self, model: str, prompt: str, **kwargs): + """ + Generate a text response using AWS Bedrock models. + + Args: + model (str): The model identifier (e.g., "anthropic.claude-3-opus-20240229") + prompt (str): The input prompt for text generation + **kwargs: Additional parameters specific to the model + + Returns: + str: The generated text response + + Raises: + Exception: If the model is not supported or the request fails + """ + body = {} + if "anthropic.claude" in model: + if "v3" in model: + body = json.dumps({ + "messages": [ + { + "role": "user", + "content": prompt + } + ], + "max_tokens": kwargs.pop("max_tokens", 4096), + "anthropic_version": "bedrock-2023-05-31", + **kwargs + }) + response_body = self._invoke_model(model, body) + return response_body.get('content')[0].get('text') + else: # Claude v1/v2 + body = json.dumps({ + "prompt": f"\n\nHuman: {prompt}\n\nAssistant:", + "max_tokens_to_sample": kwargs.pop("max_tokens_to_sample", 4096), + **kwargs + }) + response_body = self._invoke_model(model, body) + return response_body.get('completion') + elif "amazon.titan" in model: + body = json.dumps({ + "inputText": prompt, + "textGenerationConfig": {"maxTokenCount": kwargs.pop("maxTokenCount", 4096), **kwargs} + }) + response_body = self._invoke_model(model, body) + return response_body.get('results')[0].get('outputText') + else: + self.plpy.error(f"Model '{model}' is not currently supported by the AWSBedrockProvider.") + + def embed(self, model: str, text: str, **kwargs): + """ + Generate text embeddings using AWS Bedrock models. + + Args: + model (str): The model identifier (e.g., "amazon.titan-embed-text-v1") + text (str): The input text to generate embeddings for + **kwargs: Additional parameters specific to the model + + Returns: + str: A string representation of the embedding vector + + Raises: + Exception: If the model is not supported for embeddings or the request fails + """ + body = {} + if "amazon.titan" in model: + body = json.dumps({"inputText": text}) + response_body = self._invoke_model(model, body) + return str(response_body.get('embedding')) + else: + self.plpy.error(f"Model '{model}' is not currently supported for embeddings by the AWSBedrockProvider.") + + def multimodal_embed(self, model, content, **kwargs): + """ + Generate embeddings for multimodal content using AWS Bedrock. + Supports text and image content for models like Amazon Titan Multimodal Embeddings. + + Args: + model (str): The model identifier (e.g., "amazon.titan-embed-image-v1") + content (dict): Dictionary with text and/or image data + Format: {"text": "some text", "image": "base64_encoded_image"} + **kwargs: Additional parameters for the embedding model + + Notes: + - The input_image should include the mime type prefix (e.g., "data:image/png;base64,") + + Returns: + str: A string representation of the embedding vector + + Raises: + Exception: If the model is not supported for multimodal embeddings or the request fails + """ + if "amazon.titan-embed-image" in model: + # Extract text and image from content + input_text = content.get("text", "") + input_image = content.get("image", "") + + # Build the request body for Titan Multimodal Embeddings + body = { + "inputImage": input_image, + "inputText": input_text, + "embeddingConfig": { + "outputEmbeddingLength": kwargs.get("embedding_length", 1024) + } + } + + response_body = self._invoke_model(model, json.dumps(body)) + return str(response_body.get('embedding')) + else: + self.plpy.error(f"Model '{model}' is not currently supported for multimodal embeddings by the AWSBedrockProvider.") + + def vlm_ask(self, model, prompt, image, **kwargs): + """ + Generate a text response using AWS Bedrock models for visual language models. + + Args: + model (str): The model identifier (e.g., "anthropic.claude-3-opus-20240229") + prompt (str): The input prompt for text generation + image (str): The base64-encoded image string for visual context, + including the mime type prefix (e.g., "data:image/png;base64,") + **kwargs: Additional parameters specific to the model + + Returns: + str: The generated text response + + Raises: + Exception: If the model is not supported or the request fails + """ + body = {} + if "anthropic.claude" in model: + if "v3" in model: + body = json.dumps({ + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": image + } + }, + { + "type": "text", + "text": prompt + } + ] + } + ], + "max_tokens": kwargs.pop("max_tokens", 4096), + **kwargs + }) + response_body = self._invoke_model(model, body) + return response_body.get('content')[0].get('text') + else: # Claude v1/v2 + body = json.dumps({ + "prompt": f"\n\nHuman: {prompt}\n\nAssistant:", + "max_tokens_to_sample": 4096, + **kwargs + }) + response_body = self._invoke_model(model, body) + return response_body.get('completion') + else: + self.plpy.error(f"Model '{model}' is not currently supported for visual language models by the AWSBedrockProvider.") + \ No newline at end of file diff --git a/contrib/aifun/aifun/providers/base.py b/contrib/aifun/aifun/providers/base.py new file mode 100644 index 00000000000..55b64f0c4f8 --- /dev/null +++ b/contrib/aifun/aifun/providers/base.py @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import requests + +from ..utils import get_plpy, retry_with_backoff + +class BaseProvider: + """ + Base class for all AI providers. + + This class defines the common interface that all provider implementations must follow. + It provides basic functionality and abstract methods that concrete providers must implement. + + Attributes: + credentials (dict): Authentication credentials for the provider + metadata (dict): Additional metadata about the provider configuration + plpy: PostgreSQL interface for error handling + """ + + def __init__(self, credentials: dict, metadata: dict = None): + """ + Initialize the base provider with credentials and metadata. + + Args: + credentials (dict): Authentication credentials for the provider + metadata (dict, optional): Additional metadata about the provider configuration. + Defaults to None. + """ + self.credentials = credentials + self.metadata = metadata or {} + self.plpy = get_plpy() + + def ask(self, model: str, prompt: str, **kwargs) -> str: + """ + Generate a text response from the given prompt. + + Args: + model (str): The model identifier to use for generation + prompt (str): The input prompt for text generation + **kwargs: Additional parameters specific to the provider/model + + Returns: + str: The generated text response + + Raises: + NotImplementedError: If the provider doesn't support this method + """ + raise NotImplementedError(f"{self.__class__.__name__} does not support 'ask'") + + def embed(self, model: str, text: str, **kwargs) -> str: + """ + Generate text embeddings for the given text. + + Args: + model (str): The model identifier to use for embedding generation + text (str): The input text to generate embeddings for + **kwargs: Additional parameters specific to the provider/model + + Returns: + str: A string representation of the embedding vector + + Raises: + NotImplementedError: If the provider doesn't support this method + """ + raise NotImplementedError(f"{self.__class__.__name__} does not support 'embedding'") + + def multimodal_embed(self, model: str, content: dict, **kwargs) -> str: + """ + Generate embeddings for multimodal content (text, images, etc.) + + Args: + model (str): The model identifier for embedding generation + content (dict): A dictionary containing the content to embed + Expected format: {"text": "...", "image": "..."} or similar + **kwargs: Additional parameters for the embedding model + + Returns: + str: A string representation of the embedding vector + + Raises: + NotImplementedError: If the provider doesn't support this method + """ + raise NotImplementedError(f"{self.__class__.__name__} does not support 'multimodal_embedding'") + + def vlm_ask(self, model: str, prompt: str, image: str, **kwargs) -> str: + """ + Generate a text response from the given prompt using a visual language model. + + Args: + model (str): The model identifier to use for generation + prompt (str): The input prompt for text generation + image (str): The base64-encoded image string for visual context + **kwargs: Additional parameters specific to the provider/model + + Returns: + str: The generated text response + + Raises: + NotImplementedError: If the provider doesn't support this method + """ + raise NotImplementedError(f"{self.__class__.__name__} does not support 'vlm_ask'") + +class RequestsBasedProvider(BaseProvider): + """ + Base class for providers that use HTTP requests. + + This class provides common functionality for providers that communicate + with their APIs through HTTP requests. + """ + + @retry_with_backoff() + def _make_request(self, method, url, headers, json_payload): + """ + Make an HTTP request with retry logic. + + Args: + method (str): HTTP method (GET, POST, etc.) + url (str): The endpoint URL + headers (dict): HTTP headers to include in the request + json_payload (dict): JSON payload to send in the request body + + Returns: + dict: The JSON response from the API + + Raises: + requests.HTTPError: If the request fails + """ + response = requests.request(method, url, headers=headers, json=json_payload, timeout=60) + response.raise_for_status() + return response.json() diff --git a/contrib/aifun/aifun/providers/gemini.py b/contrib/aifun/aifun/providers/gemini.py new file mode 100644 index 00000000000..d7660f387f7 --- /dev/null +++ b/contrib/aifun/aifun/providers/gemini.py @@ -0,0 +1,230 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .base import RequestsBasedProvider + +class GeminiProvider(RequestsBasedProvider): + """ + Provider implementation for Google's Gemini API. + + This class implements the Gemini API endpoints for text generation, + embeddings, and multimodal embeddings using the Google Generative AI REST API. + """ + + def _get_endpoint(self, model: str, task: str): + """ + Get the endpoint URL for a specific model and task. + + Args: + model (str): The model identifier + task (str): The task type (e.g., "generateContent", "embedContent") + + Returns: + str: The complete URL for the API endpoint + """ + base_url = self.metadata.get("endpoint", "https://generativelanguage.googleapis.com/v1beta/models") + return f"{base_url}/{model}:{task}?key={self.credentials['api_key']}" + + def _get_headers(self): + """ + Get the required headers for Gemini API requests. + + Returns: + dict: Dictionary containing the required headers + """ + return {"Content-Type": "application/json"} + + def ask(self, model: str, prompt: str, **kwargs): + """ + Generate a text response using Google's Gemini API. + + Args: + model (str): The model identifier (e.g., "gemini-pro") + prompt (str): The input prompt for text generation + **kwargs: Additional parameters for the Gemini API (temperature, max_output_tokens, etc.) + + Returns: + str: The generated text response + + Raises: + requests.HTTPError: If the API request fails + """ + payload = { + "contents": [ + { + "parts": [ + { + "text": prompt + } + ] + } + ] + } + if "temperature" in kwargs or "max_output_tokens" in kwargs: + payload["generationConfig"] = { + k: v for k, v in kwargs.items() + if k in ["temperature", "max_output_tokens"] + } + endpoint = self._get_endpoint(model, "generateContent") + data = self._make_request( + "post", + endpoint, + self._get_headers(), + payload + ) + return data["candidates"][0]["content"]["parts"][0]["text"].strip() + + def embed(self, model: str, text: str, **kwargs): + """ + Generate text embeddings using Google's Gemini API. + + Args: + model (str): The model identifier (e.g., "embedding-001") + text (str): The input text to generate embeddings for + **kwargs: Additional parameters for the Gemini API + + Returns: + str: A string representation of the embedding vector + + Raises: + requests.HTTPError: If the API request fails + """ + payload = { + "model": f"models/{model}", + "content": { + "parts": [ + { + "text": text + } + ] + } + } + endpoint = self._get_endpoint(model, "embedContent") + data = self._make_request( + "post", + endpoint, + self._get_headers(), + payload + ) + return str(data["embedding"]["values"]) + + def multimodal_embed(self, model: str, content: dict, **kwargs): + """ + Generate embeddings for multimodal content using Google Gemini. + Supports text and image content for Gemini models. + + Args: + model (str): The model identifier (e.g., "multimodalembedding") + content (dict): Dictionary with text and/or image data + Format: {"text": "some text", "image": "base64_encoded_image"} + **kwargs: Additional parameters for the embedding model (e.g., mime_type) + + Note: + - The input_image should be a base64-encoded string including the mime type prefix (e.g., "data:image/png;base64,") + + Returns: + str: A string representation of the embedding vector + + Raises: + requests.HTTPError: If the API request fails + Exception: If content doesn't contain at least text or image data + """ + # Extract text and image from content + input_text = content.get("text", "") + input_image = content.get("image", "") + + # Build the parts array for the request + parts = [] + + if input_text: + parts.append({"text": input_text}) + + if input_image: + mime_type = input_image.split(";")[0].split(":")[-1] + image = input_image.split(',')[1] + parts.append({ + "inline_data": { + "mime_type": mime_type, + "data": image + } + }) + + if not parts: + self.plpy.error("Content must contain at least text or image data.") + + # Build the payload for the Gemini API + payload = { + "model": f"models/{model}", + "content": {"parts": parts} + } + + endpoint = self._get_endpoint(model, "embedContent") + data = self._make_request( + "post", + endpoint, + self._get_headers(), + payload + ) + return str(data["embedding"]["values"]) + + def vlm_ask(self, model: str, prompt: str, image: str, **kwargs): + """ + Generate a text response using Google's Gemini API for visual language models. + + Args: + model (str): The model identifier (e.g., "gemini-pro-vision") + prompt (str): The input prompt for text generation + image (str): The base64-encoded image string for visual context, + including the mime type prefix (e.g., "data:image/png;base64,") + **kwargs: Additional parameters for the Gemini API + + Returns: + str: The generated text response + + Raises: + requests.HTTPError: If the API request fails + """ + mime_type = image.split(";")[0].split(":")[-1] + image = image.split(',')[1] + + payload = { + "model": f"models/{model}", + "contents": [ + { + "parts": [ + { + "text": prompt + }, + { + "inline_data": { + "mime_type": mime_type, + "data": image + } + } + ] + } + ] + } + + endpoint = self._get_endpoint(model, "generateContent") + data = self._make_request( + "post", + endpoint, + self._get_headers(), + payload + ) + return data["candidates"][0]["content"]["parts"][0]["text"].strip() diff --git a/contrib/aifun/aifun/providers/openai.py b/contrib/aifun/aifun/providers/openai.py new file mode 100644 index 00000000000..ff00a93f6c9 --- /dev/null +++ b/contrib/aifun/aifun/providers/openai.py @@ -0,0 +1,209 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .base import RequestsBasedProvider + +class OpenAIProvider(RequestsBasedProvider): + """ + Provider implementation for OpenAI API. + + This class implements the OpenAI API endpoints for text generation, embeddings, + and multimodal embeddings using the OpenAI REST API. + """ + + def _get_endpoint(self, path: str) -> str: + """ + Get the full endpoint URL for a given API path. + + Args: + path (str): The API path (e.g., "/chat/completions") + + Returns: + str: The complete URL for the API endpoint + """ + base_url = self.metadata.get("endpoint", "https://api.openai.com/v1") + return f"{base_url}{path}" + + def _get_headers(self) -> dict: + """ + Get the required headers for OpenAI API requests. + + Returns: + dict: Dictionary containing the required headers + """ + return { + "Authorization": f"Bearer {self.credentials['api_key']}", + "Content-Type": "application/json" + } + + def ask(self, model: str, prompt: str, **kwargs) -> str: + """ + Generate a text response using OpenAI's chat completion API. + + Args: + model (str): The model identifier (e.g., "gpt-4", "gpt-3.5-turbo") + prompt (str): The input prompt for text generation + **kwargs: Additional parameters for the OpenAI API (temperature, max_tokens, etc.) + + Returns: + str: The generated text response + + Raises: + requests.HTTPError: If the API request fails + """ + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": prompt + } + ], + **kwargs + } + data = self._make_request( + "post", + self._get_endpoint("/chat/completions"), + self._get_headers(), + payload + ) + return data["choices"][0]["message"]["content"].strip() + + def embed(self, model: str, text: str, **kwargs) -> list: + """ + Generate text embeddings using OpenAI's embeddings API. + + Args: + model (str): The model identifier (e.g., "text-embedding-ada-002") + text (str): The input text to generate embeddings for + **kwargs: Additional parameters for the OpenAI API + + Returns: + list: A list of floating-point numbers representing the embedding vector + + Raises: + requests.HTTPError: If the API request fails + """ + payload = { + "model": model, + "input": text, + **kwargs + } + data = self._make_request( + "post", + self._get_endpoint("/embeddings"), + self._get_headers(), + payload + ) + return str(data["data"][0]["embedding"]) + + def multimodal_embed(self, model: str, content: dict, **kwargs) -> list: + """ + Generate embeddings for multimodal content using OpenAI. + Supports text and image content for models like CLIP. + + Args: + model (str): The model identifier (e.g., "text-embedding-3-large") + content (dict): Dictionary with text and/or image data + Format: {"text": "some text", "image": "base64_encoded_image"} + **kwargs: Additional parameters for the embedding model + + Note: + - The input_image should be a base64-encoded string including the mime type prefix (e.g., "data:image/png;base64,") + + Returns: + list: A list of floating-point numbers representing the embedding vector + + Raises: + requests.HTTPError: If the API request fails + """ + # Extract text and image from content + input_text = content.get("text", "") + input_image = content.get("image", "") + + if input_image and input_text: + payload = { + "model": model, + "input": [ + {"text": input_text}, + {"image": input_image} + ], + **kwargs + } + elif input_image: + # Image-only embedding + payload = { + "model": model, + "input": [{"image": input_image}], + **kwargs + } + else: + payload = { + "model": model, + "input": [{"text": input_text}], + **kwargs + } + + data = self._make_request("post", self._get_endpoint("/embeddings"), self._get_headers(), payload) + return str(data["data"][0]["embedding"]) + + def vlm_ask(self, model: str, prompt: str, image: str, **kwargs) -> str: + """ + Generate a text response using OpenAI's visual language model API. + + Args: + model (str): The model identifier (e.g., "gpt-4-vision-preview") + prompt (str): The input prompt for text generation + image (str): The base64-encoded image string (including the prefix mime type, e.g., "data:image/png;base64,") for visual context + **kwargs: Additional parameters for the OpenAI API + + Returns: + str: The generated text response + + Raises: + requests.HTTPError: If the API request fails + """ + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": image + } + }, + { + "type": "text", + "text": prompt + } + ] + } + ], + **kwargs + } + + data = self._make_request( + "post", + self._get_endpoint("/chat/completions"), + self._get_headers(), + payload + ) + + return data["choices"][0]["message"]["content"].strip() diff --git a/contrib/aifun/aifun/utils.py b/contrib/aifun/aifun/utils.py new file mode 100644 index 00000000000..756d4aedca8 --- /dev/null +++ b/contrib/aifun/aifun/utils.py @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import time +import math +from functools import wraps + +# A simple cache for plpy and provider instances +# The key will be `user::provider_id` to ensure user isolation +CACHE = {} + +def get_plpy(): + if "plpy" in CACHE: + return CACHE["plpy"] + + try: + import plpy + CACHE["plpy"] = plpy + return plpy + except ImportError: + raise ImportError("plpy module not available. This code must be run within PostgreSQL PL/Python environment.") + +def cosine_similarity(vec1, vec2): + dot_product = sum(p*q for p,q in zip(vec1, vec2)) + magnitude = math.sqrt(sum([val**2 for val in vec1])) * math.sqrt(sum([val**2 for val in vec2])) + if not magnitude: + return 0.0 + return dot_product / magnitude + + +def parse_jsonb(jsonb_str): + try: + import json + + jsonb_str = jsonb_str.strip() + if jsonb_str.startswith("```json"): + jsonb_str = jsonb_str.split("```json", 1)[1].split("```", 1)[0] + else: + jsonb_str = jsonb_str.split("```", 1)[0] + + return json.loads(jsonb_str) + except json.JSONDecodeError: + get_plpy().warning(f"Error parsing JSONB string: {jsonb_str}") + return None + +def retry_with_backoff(retries=3, backoff_in_seconds=1): + def rwb(f): + @wraps(f) + def wrapper(*args, **kwargs): + plpy = get_plpy() + x = 0 + while True: + try: + return f(*args, **kwargs) + except Exception as e: + # A more robust implementation would check for specific boto3/requests transient errors + if "botocore.exceptions" in str(type(e)) or "requests.exceptions" in str(type(e)): + if x < retries: + x += 1 + sleep_time = backoff_in_seconds * 2**(x-1) + plpy.warning(f"API call failed, retrying in {sleep_time}s... ({x}/{retries})") + time.sleep(sleep_time) + else: + plpy.warning(f"API call failed after {retries} retries.") + raise e + else: + raise e + return wrapper + return rwb diff --git a/contrib/aifun/requirements.txt b/contrib/aifun/requirements.txt new file mode 100644 index 00000000000..3ba76fe360e --- /dev/null +++ b/contrib/aifun/requirements.txt @@ -0,0 +1,7 @@ +requests==2.32.5 +boto3==1.40.72 +PyPDF2==3.0.1 +python-pptx==1.0.2 +python-docx==1.2.0 +pypdfium2==5.0.0 +pillow==12.0.0