README generated by AI
A comprehensive Retrieval-Augmented Generation (RAG) service built in Go that enables intelligent document storage, processing, and vector search capabilities. Ragnar provides a complete solution for ingesting various document formats, converting them to markdown, chunking content, generating embeddings, and performing semantic search.
- Multi-format Document Support: Supports PDF, DOCX, ODT, HTML, JSON, plain text, and markdown files
- Intelligent Document Processing: Automatically converts documents to markdown using Pandoc and pdftotext
- Document Chunking: Smart text chunking with configurable strategies
- Vector Embeddings: Generate embeddings using various AI models via Bellman AI platform
- Semantic Search: Perform vector-based similarity search across document chunks
- Access Control: Token-based authentication with fine-grained permissions
- Storage Flexibility: Uses MinIO-compatible object storage for scalable file storage
- Processing Pipeline: Asynchronous document processing with status tracking
- RESTful API: Complete REST API with OpenAPI/Swagger documentation
Ragnar consists of several key components:
- Web API: RESTful HTTP server with comprehensive endpoints
- Document Parser: Converts various file formats to markdown
- Chunker: Splits documents into manageable chunks for processing
- AI Integration: Embedding generation via Bellman AI platform
- Storage Layer: MinIO/S3-compatible object storage
- Database: PostgreSQL for metadata and chunk storage
- Processing Pipeline: Background job processing for document workflows
- Clone the repository:
git clone https://github.com/modfin/ragnar.git
cd ragnar- Start all services:
docker-compose up -dThis will start:
- Ragnar API server on port
7100 - PostgreSQL database on port
6789 - MinIO object storage on port
9000(console on9191)
- Access the API:
- API: http://localhost:7100
- MinIO Console: http://localhost:9191 (admin/admin)
Ragnar provides a comprehensive Go client library for easy integration.
go get github.com/modfin/ragnarpackage main
import (
"context"
"fmt"
"strings"
"github.com/modfin/ragnar"
)
func main() {
// Initialize the client
client := ragnar.NewClient(ragnar.ClientConfig{
BaseURL: "http://localhost:7100",
AccessKey: "your-access-key",
})
ctx := context.Background()
// Create a new tub (document collection)
tub, err := client.CreateTub(ctx, ragnar.Tub{
TubName: "my-documents",
})
if err != nil {
panic(err)
}
fmt.Printf("Created tub: %s\n", tub.TubId)
}// List all available tubs
tubs, err := client.GetTubs(ctx)
if err != nil {
log.Fatal(err)
}
// Get specific tub
tub, err := client.GetTub(ctx, "my-documents")
if err != nil {
log.Fatal(err)
}
// Update tub with required headers
updatedTub := tub.WithRequiredDocumentHeaders("project-id", "department")
result, err := client.UpdateTub(ctx, updatedTub)
if err != nil {
log.Fatal(err)
}
// Delete tub
deletedTub, err := client.DeleteTub(ctx, "my-documents")
if err != nil {
log.Fatal(err)
}// Upload a text document
content := strings.NewReader("This is my document content")
headers := map[string]string{
"Content-Type": "text/plain",
"x-ragnar-project-id": "proj-123", // Custom header
}
doc, err := client.CreateTubDocument(ctx, "my-documents", content, headers)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Document uploaded: %s\n", doc.DocumentId)// Upload with pre-processed markdown and chunks
fileContent := strings.NewReader("<h1>Title</h1><p>Content</p>")
markdownContent := strings.NewReader("# Title\n\nContent")
chunks := []ragnar.Chunk{
{ChunkId: 0, Content: "Title"},
{ChunkId: 1, Content: "Content"},
}
doc, err := client.CreateTubDocumentWithOptionals(
ctx, "my-documents", fileContent, markdownContent, chunks, headers)
if err != nil {
log.Fatal(err)
}// Download original document
reader, err := client.DownloadTubDocument(ctx, "my-documents", doc.DocumentId)
if err != nil {
log.Fatal(err)
}
defer reader.Close()
// Download converted markdown
mdReader, err := client.DownloadTubDocumentMarkdown(ctx, "my-documents", doc.DocumentId)
if err != nil {
log.Fatal(err)
}
defer mdReader.Close()// Update existing document
newContent := strings.NewReader("Updated content")
updatedHeaders := map[string]string{
"Content-Type": "text/plain",
"x-ragnar-filename": "updated.txt",
}
updatedDoc, err := client.UpdateTubDocument(ctx, "my-documents",
doc.DocumentId, newContent, updatedHeaders)
if err != nil {
log.Fatal(err)
}// Check processing status
status, err := client.GetTubDocumentStatus(ctx, "my-documents", doc.DocumentId)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Status: %s\n", status.Status) // "pending", "processing", "completed", "failed"
// Wait for completion (example helper function)
func waitForCompletion(client ragnar.Client, tub, docId string) error {
for {
status, err := client.GetTubDocumentStatus(ctx, tub, docId)
if err != nil {
return err
}
switch status.Status {
case "completed":
return nil
case "failed":
return fmt.Errorf("document processing failed")
default:
time.Sleep(5 * time.Second)
}
}
}// Get all chunks from a document
chunks, err := client.GetTubDocumentChunks(ctx, "my-documents", doc.DocumentId, 50, 0)
if err != nil {
log.Fatal(err)
}
for i, chunk := range chunks {
fmt.Printf("Chunk %d: %s\n", i, chunk.Content[:100]) // First 100 chars
}
// Get specific chunk by index
chunk, err := client.GetTubDocumentChunk(ctx, "my-documents", doc.DocumentId, 0)
if err != nil {
log.Fatal(err)
}
fmt.Printf("First chunk: %s\n", chunk.Content)// Perform semantic search across document chunks
query := "What is the project timeline?"
searchResults, err := client.SearchTubDocumentChunks(
ctx, "my-documents", query, nil, 10, 0)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Found %d matching chunks:\n", len(searchResults))
for i, chunk := range searchResults {
fmt.Printf("%d. %s (Doc: %s)\n", i+1, chunk.Content, chunk.DocumentId)
}
// Search with document filtering
filter := map[string]any{
"project-id": []string{"proj-123", "proj-456"},
"department": "engineering",
}
filteredResults, err := client.SearchTubDocumentChunks(
ctx, "my-documents", query, filter, 5, 0)
if err != nil {
log.Fatal(err)
}// List all documents in a tub
docs, err := client.GetTubDocuments(ctx, "my-documents", nil, 20, 0)
if err != nil {
log.Fatal(err)
}
// Filter documents by headers
filter := map[string]any{
"project-id": "proj-123",
"content-type": []string{"application/pdf", "text/plain"},
}
filteredDocs, err := client.GetTubDocuments(ctx, "my-documents", filter, 10, 0)
if err != nil {
log.Fatal(err)
}
// Get specific document metadata
doc, err := client.GetTubDocument(ctx, "my-documents", documentId)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Document: %s, Created: %v\n", doc.DocumentId, doc.CreatedAt)// The client returns descriptive HTTP errors
doc, err := client.GetTubDocument(ctx, "nonexistent", "doc-id")
if err != nil {
if strings.Contains(err.Error(), "HTTP 404") {
fmt.Println("Document not found")
} else if strings.Contains(err.Error(), "HTTP 401") {
fmt.Println("Unauthorized - check your access key")
} else {
fmt.Printf("Other error: %v\n", err)
}
}Ragnar uses Bearer token authentication. Access tokens control permissions:
// Access tokens have specific permissions
type AccessToken struct {
AccessKeyId string
TokenName string
AllowCreateTubs bool // Can create new tubs
AllowReadTubs bool // Can list tubs
CreatedAt time.Time
UpdatedAt time.Time
}Documents can include custom headers for metadata and filtering:
headers := map[string]string{
"Content-Type": "application/pdf",
"x-ragnar-filename": "report.pdf",
"x-ragnar-project-id": "proj-123",
"x-ragnar-department": "research",
"x-ragnar-author": "john.doe",
"x-ragnar-version": "1.0",
}Headers prefixed with x-ragnar- are automatically stored and can be used for filtering.
# Database
RAGNAR_DB_URI="postgres://user:pass@localhost/ragnar?sslmode=disable"
# Storage (MinIO/S3)
RAGNAR_S3_ENDPOINT="localhost:9000"
RAGNAR_S3_BUCKET="ragnar-documents"
RAGNAR_S3_ACCESS_KEY="access-key"
RAGNAR_S3_SECRET_KEY="secret-key"
# AI/Bellman Integration
RAGNAR_BELLMAN_URI="https://bellman.example.com"
RAGNAR_BELLMAN_NAME="ragnar-instance"
RAGNAR_BELLMAN_KEY="bellman-api-key"
# Server
RAGNAR_HTTP_PORT=8080
RAGNAR_PRODUCTION=falseThe service provides a complete REST API:
GET /tubs- List tubsPOST /tubs- Create tubGET /tubs/{tub}- Get tub infoPUT /tubs/{tub}- Update tubDELETE /tubs/{tub}- Delete tubGET /tubs/{tub}/documents- List documentsPOST /tubs/{tub}/documents- Upload documentGET /tubs/{tub}/documents/{id}- Get documentPUT /tubs/{tub}/documents/{id}- Update documentDELETE /tubs/{tub}/documents/{id}- Delete documentGET /tubs/{tub}/documents/{id}/download- Download originalGET /tubs/{tub}/documents/{id}/download/markdown- Download markdownGET /tubs/{tub}/documents/{id}/status- Processing statusGET /tubs/{tub}/documents/{id}/chunks- Get chunksGET /search/xnn/{tub}- Vector search
OpenAPI documentation available at /.well-known/openapi.json