Merge branch 'docker-uv' into chunking

spa5k · web-flow · commit 55ace1c45d47 · 2025-03-10T18:51:51.000+05:30
diff --git a/.dockerignore b/.dockerignore
@@ -57,4 +57,9 @@ build/
 
 # Model cache and downloads
 model_cache/
-downloads/ 
+downloads/ 
+
+*.pyc
+*.pyo
+*.pyd
+.env
diff --git a/Dockerfile b/Dockerfile
@@ -1,26 +1,44 @@
-# First, build the application and install dependencies
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
+ARG CPU_ONLY=false
 
 WORKDIR /app
 
-# Download models in builder stage
+# Install build dependencies
 RUN apt-get update && \
-    apt-get install -y libgl1 libglib2.0-0 && \
-    apt-get clean
+    apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
+    rm -rf /var/lib/apt/lists/*
 
 # Copy only dependency files and create a dummy README
 COPY pyproject.toml uv.lock ./
 # Create a dummy README.md file to satisfy package requirements
 RUN echo "# Placeholder README" > README.md
 
-# Create venv and install project for model downloads
-RUN python -m venv /app/.venv && \
-    . /app/.venv/bin/activate && \
-    uv pip install -e .
+# Install dependencies but not the project itself
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project
+
+# Copy the rest of the project
+COPY . .
 
-# Set up cache directories and download models
-ENV HF_HOME=/app/.cache/huggingface \
-    TORCH_HOME=/app/.cache/torch
+# Better GPU detection: Check both architecture and if NVIDIA is available
+RUN ARCH=$(uname -m) && \
+    if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
+    USE_GPU=false; \
+    else \
+    USE_GPU=true; \
+    fi && \
+    echo "Detected GPU availability: $USE_GPU" && \
+    # For PyTorch installation with architecture detection
+    uv pip uninstall -y torch torchvision torchaudio || true && \
+    if [ "$USE_GPU" = "false" ]; then \
+    # For CPU or ARM architectures or no NVIDIA
+    echo "Installing PyTorch for CPU" && \
+    uv pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
+    else \
+    # For x86_64 with GPU support
+    echo "Installing PyTorch with CUDA support" && \
+    uv pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
+    fi
 
 # Download models
 RUN . /app/.venv/bin/activate && \
@@ -29,16 +47,29 @@ RUN . /app/.venv/bin/activate && \
     python -c 'import easyocr; reader = easyocr.Reader(["fr", "de", "es", "en", "it", "pt"], gpu=True); print("EasyOCR models downloaded successfully")' && \
     python -c 'from chonkie import SDPMChunker; chunker = SDPMChunker(embedding_model="minishlab/potion-base-8M"); print("Chonkie models downloaded successfully")'
 
-# Final stage with CUDA support
-FROM python:3.12-slim-bookworm AS runtime
+# Download models for the pipeline
+RUN uv run python -c "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True)"
 
-ARG CPU_ONLY=false
+# Pre-download EasyOCR models with better GPU detection
+RUN ARCH=$(uname -m) && \
+    if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
+    echo "Downloading EasyOCR models for CPU" && \
+    uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=False); print('EasyOCR CPU models downloaded successfully')"; \
+    else \
+    echo "Downloading EasyOCR models with GPU support" && \
+    uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=True); print('EasyOCR GPU models downloaded successfully')"; \
+    fi
+    
+RUN uv run python -c 'from chonkie import SDPMChunker; chunker = SDPMChunker(embedding_model="minishlab/potion-base-8M"); print("Chonkie models downloaded successfully")'
+
+# Production stage
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
 WORKDIR /app
 
 # Install runtime dependencies
 RUN apt-get update && \
-    apt-get install -y redis-server libgl1 libglib2.0-0 && \
-    apt-get clean
+    apt-get install -y --no-install-recommends redis-server libgl1 libglib2.0-0 curl && \
+    rm -rf /var/lib/apt/lists/*
 
 # Copy model cache from builder - this rarely changes
 COPY --from=builder --chown=app:app /app/.cache /app/.cache/
@@ -57,31 +88,24 @@ COPY --chown=app:app main.py ./
 ENV PYTHONPATH=/app \
     HF_HOME=/app/.cache/huggingface \
     TORCH_HOME=/app/.cache/torch \
-    OMP_NUM_THREADS=4
+    PYTHONPATH=/app \
+    OMP_NUM_THREADS=4 \
+    UV_COMPILE_BYTECODE=1
 
-# Create app user
-RUN useradd -m app && \
-    chown -R app:app /app /tmp && \
-    python -m venv /app/.venv && \
-    chown -R app:app /app/.venv
+# Create a non-root user
+RUN useradd --create-home app && \
+    mkdir -p /app && \
+    chown -R app:app /app /tmp
 
-USER app
+# Copy the virtual environment from the builder stage
+COPY --from=builder --chown=app:app /app/.venv /app/.venv
+ENV PATH="/app/.venv/bin:$PATH"
 
-# Install dependencies and project
-RUN . /app/.venv/bin/activate && \
-    cd /app && \
-    pip install -e .
+# Copy necessary files for the application
+COPY --chown=app:app . .
 
-# Install PyTorch with CUDA support
-RUN . /app/.venv/bin/activate && \
-    if [ "$CPU_ONLY" = "true" ]; then \
-    pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
-    else \
-    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
-    fi
-
-ENV PATH="/app/.venv/bin:$PATH"
+# Switch to non-root user
+USER app
 
 EXPOSE 8080
-
-CMD ["python", "-m", "uvicorn", "--port", "8080", "--host", "0.0.0.0", "main:app"]
+CMD ["uvicorn", "main:app", "--port", "8080", "--host", "0.0.0.0"]
diff --git a/Makefile b/Makefile
@@ -20,6 +20,7 @@ help:
 	@echo "Docker:"
 	@echo "  docker-build-cpu   - Build Docker image (CPU version)"
 	@echo "  docker-build-gpu   - Build Docker image (GPU version)"
+	@echo "  docker-start       - Auto-detect system and start appropriate container (CPU/GPU)"
 	@echo "  docker-start-cpu   - Start services in CPU mode"
 	@echo "  docker-start-gpu   - Start services in GPU mode"
 	@echo "  docker-stop        - Stop all Docker services"
@@ -79,6 +80,17 @@ docker-start-cpu:
 docker-start-gpu:
 	$(DOCKER_GPU_COMPOSE) up --build --scale celery_worker=3
 
+# Auto-detect architecture and start appropriate container
+docker-start:
+	@echo "Auto-detecting system architecture..."
+	@if [ "$(shell uname -m)" = "arm64" ] || [ "$(shell uname -m)" = "aarch64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
+		echo "ARM architecture or NVIDIA drivers not detected. Using CPU mode."; \
+		$(MAKE) docker-start-cpu; \
+	else \
+		echo "NVIDIA GPU detected. Using GPU mode."; \
+		$(MAKE) docker-start-gpu; \
+	fi
+
 docker-stop:
 	$(DOCKER_CPU_COMPOSE) down
 	$(DOCKER_GPU_COMPOSE) down
diff --git a/README.md b/README.md
@@ -154,7 +154,7 @@ The project includes a Makefile for convenient management of Docker operations:
 ```bash
 # Build and run in CPU mode with 1 worker
 make docker-build-cpu
-make docker-run-cpu
+make docker-start-cpu
 
 # Or build and run with multiple workers
 make docker-run-cpu WORKER_COUNT=3
@@ -164,7 +164,7 @@ make docker-run-cpu WORKER_COUNT=3
 ```bash
 # Build and run in GPU mode with 1 worker
 make docker-build-gpu
-make docker-run-gpu
+make docker-start-gpu
 
 # Or build and run with multiple workers
 make docker-run-gpu WORKER_COUNT=3
diff --git a/detect_gpu.sh b/detect_gpu.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Script to detect GPU and select the appropriate Docker Compose file
+
+# Check if nvidia-smi exists and can be executed
+if command -v nvidia-smi >/dev/null 2>&1; then
+    # Try to run nvidia-smi to check if drivers are loaded correctly
+    if nvidia-smi >/dev/null 2>&1; then
+        echo "NVIDIA GPU detected with working drivers."
+        GPU_AVAILABLE=true
+        
+        # Check CUDA version
+        CUDA_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | cut -d'.' -f1)
+        echo "CUDA compatible driver version: $CUDA_VERSION"
+        
+        # Check if the detected CUDA version is compatible with our requirements (CUDA 11+)
+        if [ -n "$CUDA_VERSION" ] && [ "$CUDA_VERSION" -ge 11 ]; then
+            echo "Using GPU configuration (CUDA $CUDA_VERSION detected)"
+            COMPOSE_FILE="docker-compose.gpu.yml"
+            DOCKER_BUILDKIT=1
+            DOCKER_BUILD_ARGS="--build-arg CPU_ONLY=false"
+            # Pass GPU capabilities to docker build
+            export DOCKER_BUILDKIT=1
+            export DOCKER_DEFAULT_PLATFORM=linux/amd64
+            export DOCKER_CLI_EXPERIMENTAL=enabled
+        else
+            echo "NVIDIA GPU detected but CUDA version ($CUDA_VERSION) is too old. Minimum required: 11"
+            echo "Falling back to CPU configuration."
+            GPU_AVAILABLE=false
+            COMPOSE_FILE="docker-compose.cpu.yml"
+            DOCKER_BUILD_ARGS="--build-arg CPU_ONLY=true"
+        fi
+    else
+        echo "NVIDIA GPU software detected but drivers may not be properly installed."
+        GPU_AVAILABLE=false
+        COMPOSE_FILE="docker-compose.cpu.yml"
+        DOCKER_BUILD_ARGS="--build-arg CPU_ONLY=true"
+    fi
+else
+    echo "No NVIDIA GPU detected. Using CPU configuration."
+    GPU_AVAILABLE=false
+    COMPOSE_FILE="docker-compose.cpu.yml"
+    DOCKER_BUILD_ARGS="--build-arg CPU_ONLY=true"
+fi
+
+# Check architecture
+ARCH=$(uname -m)
+if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
+    echo "ARM architecture detected. Forcing CPU mode regardless of GPU availability."
+    GPU_AVAILABLE=false
+    COMPOSE_FILE="docker-compose.cpu.yml"
+    DOCKER_BUILD_ARGS="--build-arg CPU_ONLY=true"
+fi
+
+# Export for other scripts to use
+export GPU_AVAILABLE
+export COMPOSE_FILE
+export DOCKER_BUILD_ARGS
+
+echo "Selected configuration: $COMPOSE_FILE"
+echo "Build arguments: $DOCKER_BUILD_ARGS"
+echo "GPU_AVAILABLE=$GPU_AVAILABLE"
+
+# If this script is being sourced, don't execute docker-compose
+if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
+    return 0
+fi
+
+# If passed arguments, run docker-compose with them
+if [ $# -gt 0 ]; then
+    echo "Running: docker-compose -f $COMPOSE_FILE $@"
+    docker-compose -f $COMPOSE_FILE $@
+else
+    echo "Usage: $0 [docker-compose commands]"
+    echo "or source this script to export the variables"
+fi 
diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml
@@ -4,17 +4,16 @@ services:
       context: .
       args:
         CPU_ONLY: "true"
-      target: runtime
     image: converter-cpu-image
-    command: /app/.venv/bin/python -m celery -A worker.celery_config worker --pool=solo -n worker_primary --loglevel=info
+    command: uv run celery -A worker.celery_config worker --pool=solo -n worker_primary --loglevel=info
     volumes:
       - ./worker:/app/worker
     environment:
       - REDIS_HOST=${REDIS_HOST}
       - ENV=production
     restart: on-failure
     healthcheck:
-      test: [ "CMD", "/app/.venv/bin/celery", "-A", "worker.celery_config", "inspect", "ping", "-d", "celery@worker_primary" ]
+      test: [ "CMD", "uv", "run", "celery", "-A", "worker.celery_config", "inspect", "ping", "-d", "celery@worker_primary" ]
       interval: 30s
       timeout: 10s
       retries: 3
@@ -28,11 +27,10 @@ services:
       context: .
       args:
         CPU_ONLY: "true"
-      target: runtime
       cache_from:
         - converter-cpu-image
     image: converter-cpu-image
-    command: /app/.venv/bin/python -m uvicorn main:app --port 8080 --host 0.0.0.0 --workers 4 --proxy-headers
+    command: uv run uvicorn main:app --port 8080 --host 0.0.0.0 --workers 4 --proxy-headers
     environment:
       - REDIS_HOST=${REDIS_HOST}
       - ENV=production
@@ -71,11 +69,10 @@ services:
       context: .
       args:
         CPU_ONLY: "true"
-      target: runtime
       cache_from:
         - converter-cpu-image
     image: converter-cpu-image
-    command: /app/.venv/bin/python -m celery -A worker.celery_config flower --port=5555
+    command: uv run celery -A worker.celery_config flower --port=5555
     ports:
       - "5556:5555"
     volumes:
diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml
diff --git a/entrypoint.sh b/entrypoint.sh