GrapeCity-AI
diff --git a/‎README.md‎
Lines changed: 31 additions & 31 deletions b/‎README.md‎
Lines changed: 31 additions & 31 deletions
diff --git a/‎sources/gc-qa-rag-etl/deploy/docker-compose.dockerhub.example.yml‎
Lines changed: 46 additions & 0 deletions b/‎sources/gc-qa-rag-etl/deploy/docker-compose.dockerhub.example.yml‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎sources/gc-qa-rag-etl/deploy/docker-compose.dockerhub.yml‎
Lines changed: 7 additions & 0 deletions b/‎sources/gc-qa-rag-etl/deploy/docker-compose.dockerhub.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎sources/gc-qa-rag-etl/env.example‎
Lines changed: 33 additions & 0 deletions b/‎sources/gc-qa-rag-etl/env.example‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎sources/gc-qa-rag-etl/etlapp/common/config.py‎
Lines changed: 65 additions & 27 deletions b/‎sources/gc-qa-rag-etl/etlapp/common/config.py‎
Lines changed: 65 additions & 27 deletions
@@ -88,56 +88,47 @@ GC-QA-RAG 是一个**企业级的检索增强生成（RAG）系统**。我们通
 git clone https://github.com/GrapeCity-AI/gc-qa-rag.git
 cd gc-qa-rag
 
-# 2. 配置API密钥 (必须！)
-# 编辑 sources/gc-qa-rag-etl/.config.production.json
-# 编辑 sources/gc-qa-rag-server/.config.production.json
-# 填入您的API密钥
+# 2. 编辑 ETL 配置
+cd sources/gc-qa-rag-etl/deploy
+# 编辑 docker-compose.dockerhub.yml，取消注释并填入你的API密钥：
+# GC_QA_RAG_LLM_API_KEY: "your_llm_api_key_here"
+# GC_QA_RAG_EMBEDDING_API_KEY: "your_embedding_api_key_here"
 
-# 3. 进入部署目录
+# 3. 启动 ETL 服务
+docker compose -f docker-compose.dockerhub.yml up -d
+
+# 4. 编辑 RAG 配置
 cd sources/gc-qa-rag-server/deploy
+# 编辑 docker-compose.dockerhub.yml，取消注释并填入你的API密钥：
+# GC_QA_RAG_LLM_DEFAULT_API_KEY: "your_llm_api_key_here"
+# GC_QA_RAG_EMBEDDING_API_KEY: "your_embedding_api_key_here"
 
-# 4. 使用 Docker Hub 镜像启动服务
+# 5. 启动 RAG 服务
 docker compose -f docker-compose.dockerhub.yml up -d
 ```
 
-#### 方法二：本地构建镜像
+#### 方法二：本地手动构建镜像
 
 ```bash
 # 1. 克隆项目
 git clone https://github.com/GrapeCity-AI/gc-qa-rag.git
 cd gc-qa-rag
 
 # 2. 配置API密钥 (必须！)
-# 编辑 sources/gc-qa-rag-etl/.config.production.json
-# 编辑 sources/gc-qa-rag-server/.config.production.json
+# 编辑 sources/gc-qa-rag-etl/.config.production.json 或 .env 文件
+# 编辑 sources/gc-qa-rag-server/.config.production.json 或 .env 文件
 # 填入您的API密钥
 
-# 3. 进入部署目录
-cd sources/gc-qa-rag-server/deploy
-
-# 4. 启动所有服务
-docker compose up -d --build
-```
-
-ETL 管理后台部署：
-
-#### 方法一：使用 Docker Hub 镜像（推荐）
-
-```bash
-# 1. 进入 ETL 目录
+# 3. 进入 ETL 目录
 cd sources/gc-qa-rag-etl/deploy
 
-# 2. 使用 Docker Hub 镜像启动服务
-docker compose -f docker-compose.dockerhub.yml up -d
-```
-
-#### 方法二：本地构建镜像
+# 4. 构建 ETL 镜像，并启动服务
+docker compose up -d --build
 
-```bash
-# 1. 进入 ETL 目录
-cd sources/gc-qa-rag-etl/deploy
+# 5. 进入 RAG 目录
+cd sources/gc-qa-rag-server/deploy
 
-# 2. 构建 Docker 镜像
+# 6. 构建 RAG 服务镜像，并启动服务
 docker compose up -d --build
 ```
 
@@ -172,6 +163,15 @@ docker compose up -d --build
 -   MySQL
 -   Qdrant
 
+### 📋 配置说明
+
+使用官方 Docker 镜像时，需要在 docker-compose.dockerhub.yml 文件中配置环境变量传递给容器。系统支持优先级为：**Docker 环境变量 > .env 文件 > JSON 配置文件**
+
+**完整的环境变量列表请查看：**
+
+-   RAG 服务：[`sources/gc-qa-rag-server/env.example`](./sources/gc-qa-rag-server/env.example)
+-   ETL 服务：[`sources/gc-qa-rag-etl/env.example`](./sources/gc-qa-rag-etl/env.example)
+
 **重要**：无论选择哪种部署方式，都需要先配置 API 密钥！
 
 详细步骤请参考我们的[《快速开始指南》](./quickstart.md)。
 
@@ -0,0 +1,46 @@
+version: "3.8"
+
+# 这是一个完整的环境变量配置示例
+# 复制此文件为 docker-compose.yml 并填入你的API密钥
+
+services:
+    rag-etl:
+        image: grapecitysoftware/gc-qa-rag-etl:latest
+        container_name: rag_etl_container
+        restart: on-failure
+        ports:
+            - "8001:8001"
+        environment:
+            # 基础配置
+            GC_QA_RAG_ENV: production
+
+            # === 必需配置 ===
+            # 大语言模型API密钥（必需）
+            GC_QA_RAG_LLM_API_KEY: "your_llm_api_key_here"
+            # 嵌入模型API密钥（必需）
+            GC_QA_RAG_EMBEDDING_API_KEY: "your_embedding_api_key_here"
+
+            # === 可选配置 ===
+            # LLM设置
+            GC_QA_RAG_LLM_API_BASE: "https://dashscope.aliyuncs.com/compatible-mode/v1"
+            GC_QA_RAG_LLM_MODEL_NAME: "qwen-plus"
+            GC_QA_RAG_LLM_MAX_RPM: "100"
+
+            # 向量数据库配置
+            GC_QA_RAG_VECTOR_DB_HOST: "http://host.docker.internal:6333"
+
+            # 存储配置
+            GC_QA_RAG_ROOT_PATH: "/app/.rag-cache"
+            GC_QA_RAG_LOG_PATH: "/app/logs"
+
+            # DAS配置（可选，用于文档爬取）
+            # GC_QA_RAG_DAS_BASE_URL_PAGE: ""
+            # GC_QA_RAG_DAS_BASE_URL_THREAD: ""
+            # GC_QA_RAG_DAS_TOKEN: ""
+        volumes:
+            - rag-etl-cache:/app/.rag-cache
+            - rag-etl-logs:/app/logs
+
+volumes:
+    rag-etl-cache:
+    rag-etl-logs:
@@ -8,6 +8,13 @@ services:
         restart: on-failure
         environment:
             GC_QA_RAG_ENV: production
+            # === 必需配置 - 请填入你的API密钥 ===
+            # GC_QA_RAG_LLM_API_KEY: "your_llm_api_key_here"
+            # GC_QA_RAG_EMBEDDING_API_KEY: "your_embedding_api_key_here"
+            # === 可选配置 ===
+            # GC_QA_RAG_LLM_API_BASE: "https://dashscope.aliyuncs.com/compatible-mode/v1"
+            # GC_QA_RAG_LLM_MODEL_NAME: "qwen-plus"
+            # GC_QA_RAG_VECTOR_DB_HOST: "http://host.docker.internal:6333"
         ports:
             - "8001:8001"
         volumes:
 
@@ -0,0 +1,33 @@
+# GC-QA-RAG ETL Service Environment Variables
+# Copy this file to .env and fill in your values
+# Or set these as Docker environment variables
+
+# Environment (optional, defaults to "production")
+GC_QA_RAG_ENV=production
+
+# === Required Configuration ===
+# LLM API base URL (optional, defaults to Alibaba DashScope)
+GC_QA_RAG_LLM_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1
+# LLM model name (optional, defaults to qwen-plus)
+GC_QA_RAG_LLM_MODEL_NAME=qwen-plus
+# LLM API key (required)
+GC_QA_RAG_LLM_API_KEY=your_llm_api_key_here
+
+# Embedding API key (required) 
+GC_QA_RAG_EMBEDDING_API_KEY=your_embedding_api_key_here
+
+# === Optional Configuration ===
+# Max requests per minute (optional, defaults to 100)
+GC_QA_RAG_LLM_MAX_RPM=100
+
+# Vector database host (optional, defaults to docker internal)
+GC_QA_RAG_VECTOR_DB_HOST=http://host.docker.internal:6333
+
+# Storage paths (optional, uses system defaults)
+GC_QA_RAG_ROOT_PATH=./.rag-cache
+GC_QA_RAG_LOG_PATH=./
+
+# === DAS Configuration (Optional for document crawling) ===
+GC_QA_RAG_DAS_BASE_URL_PAGE=
+GC_QA_RAG_DAS_BASE_URL_THREAD=
+GC_QA_RAG_DAS_TOKEN=
@@ -1,6 +1,6 @@
 import os
 import json
-from typing import Optional
+from typing import Optional, Union
 from dataclasses import dataclass
 from pathlib import Path
 from dotenv import load_dotenv
@@ -32,6 +32,45 @@ class VectorDbConfig:
     host: str
 
 
+def _get_config_value(key: str, config_raw: dict, default: Optional[str] = None) -> str:
+    """Get configuration value with priority: ENV > .env > JSON."""
+    # First check environment variables
+    env_value = os.getenv(key)
+    if env_value is not None:
+        return env_value
+    
+    # Then check nested JSON structure
+    keys = key.lower().split('_')
+    # Skip the 'gc_qa_rag' prefix for JSON lookup
+    if len(keys) >= 4 and keys[0] == 'gc' and keys[1] == 'qa' and keys[2] == 'rag':
+        keys = keys[3:]
+    
+    current = config_raw
+    try:
+        for k in keys:
+            current = current[k]
+        return str(current)
+    except (KeyError, TypeError):
+        pass
+    
+    # Return default if provided
+    if default is not None:
+        return default
+    
+    raise ValueError(f"Configuration value not found for key: {key}")
+
+
+def _get_config_int(key: str, config_raw: dict, default: Optional[int] = None) -> int:
+    """Get integer configuration value with priority: ENV > .env > JSON."""
+    value = _get_config_value(key, config_raw, str(default) if default is not None else None)
+    try:
+        return int(value)
+    except (ValueError, TypeError):
+        if default is not None:
+            return default
+        raise ValueError(f"Invalid integer value for key {key}: {value}")
+
+
 @dataclass
 class Config:
     environment: str
@@ -45,47 +84,46 @@ class Config:
     @classmethod
     def from_environment(cls, environment: str) -> "Config":
         """Create a Config instance from environment name."""
+        # Load .env file first (lower priority than direct env vars)
+        load_dotenv()
+        
+        # Try to load JSON config, but make it optional
+        config_raw = {}
         config_path = Path(f".config.{environment}.json")
-        if not config_path.exists():
-            raise FileNotFoundError(f"Configuration file not found: {config_path}")
-
-        try:
-            with open(config_path) as f:
-                config_raw = json.load(f)
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid JSON in configuration file: {e}")
+        if config_path.exists():
+            try:
+                with open(config_path) as f:
+                    config_raw = json.load(f)
+            except json.JSONDecodeError as e:
+                print(f"Warning: Invalid JSON in configuration file: {e}")
 
         return cls(
             environment=environment,
             das=DasConfig(
-                base_url_page=config_raw["das"]["base_url_page"],
-                base_url_thread=config_raw["das"]["base_url_thread"],
-                token=config_raw["das"]["token"],
+                base_url_page=_get_config_value("GC_QA_RAG_DAS_BASE_URL_PAGE", config_raw, ""),
+                base_url_thread=_get_config_value("GC_QA_RAG_DAS_BASE_URL_THREAD", config_raw, ""),
+                token=_get_config_value("GC_QA_RAG_DAS_TOKEN", config_raw, ""),
             ),
             llm=LlmConfig(
-                api_key=config_raw["llm"]["api_key"],
-                api_base=config_raw["llm"]["api_base"],
-                model_name=config_raw["llm"]["model_name"],
-                max_rpm=config_raw["llm"].get("max_rpm", 100),
+                api_key=_get_config_value("GC_QA_RAG_LLM_API_KEY", config_raw),
+                api_base=_get_config_value("GC_QA_RAG_LLM_API_BASE", config_raw, "https://dashscope.aliyuncs.com/compatible-mode/v1"),
+                model_name=_get_config_value("GC_QA_RAG_LLM_MODEL_NAME", config_raw, "qwen-plus"),
+                max_rpm=_get_config_int("GC_QA_RAG_LLM_MAX_RPM", config_raw, 100),
             ),
-            embedding=EmbeddingConfig(api_key=config_raw["embedding"]["api_key"]),
-            vector_db=VectorDbConfig(host=config_raw["vector_db"]["host"]),
-            root_path=config_raw.get(
-                "root_path", user_cache_dir("gc-qa-rag", ensure_exists=True)
+            embedding=EmbeddingConfig(
+                api_key=_get_config_value("GC_QA_RAG_EMBEDDING_API_KEY", config_raw)
             ),
-            log_path=config_raw.get(
-                "log_path", user_log_dir("gc-qa-rag", ensure_exists=True)
+            vector_db=VectorDbConfig(
+                host=_get_config_value("GC_QA_RAG_VECTOR_DB_HOST", config_raw, "http://host.docker.internal:6333")
             ),
+            root_path=_get_config_value("GC_QA_RAG_ROOT_PATH", config_raw, user_cache_dir("gc-qa-rag", ensure_exists=True)),
+            log_path=_get_config_value("GC_QA_RAG_LOG_PATH", config_raw, user_log_dir("gc-qa-rag", ensure_exists=True)),
         )
 
 
 def get_config() -> Config:
     """Get the application configuration."""
-    load_dotenv()
-    environment = os.getenv("GC_QA_RAG_ENV")
-    if not environment:
-        raise ValueError("GC_QA_RAG_ENV environment variable is not set")
-
+    environment = os.getenv("GC_QA_RAG_ENV", "production")
     return Config.from_environment(environment)