|
5 | 5 | "colab": { |
6 | 6 | "private_outputs": true, |
7 | 7 | "provenance": [], |
8 | | - "authorship_tag": "ABX9TyPJuZcVawsmdIfuIMLuRx2y", |
| 8 | + "authorship_tag": "ABX9TyPoHH519BuqGSnR/HON75UP", |
9 | 9 | "include_colab_link": true |
10 | 10 | }, |
11 | 11 | "kernelspec": { |
|
24 | 24 | "colab_type": "text" |
25 | 25 | }, |
26 | 26 | "source": [ |
27 | | - "<a href=\"https://colab.research.google.com/github/OneFineStarstuff/AGI-Pipeline/blob/main/agi_pipeline.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" |
| 27 | + "<a href=\"https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff.github.io/blob/main/agi_pipeline.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" |
28 | 28 | ] |
29 | 29 | }, |
30 | 30 | { |
31 | 31 | "cell_type": "code", |
32 | 32 | "source": [ |
| 33 | + "# === Imports ===\n", |
33 | 34 | "import os\n", |
34 | 35 | "import asyncio\n", |
| 36 | + "import time\n", |
| 37 | + "from typing import List\n", |
35 | 38 | "import torch\n", |
36 | 39 | "from transformers import T5Tokenizer, T5ForConditionalGeneration\n", |
37 | 40 | "from PIL import Image\n", |
38 | | - "from fastapi import FastAPI, UploadFile, Depends, HTTPException\n", |
| 41 | + "from fastapi import FastAPI, UploadFile, Depends, HTTPException, Request\n", |
39 | 42 | "from fastapi.security import OAuth2PasswordBearer\n", |
40 | | - "from pydantic import BaseModel\n", |
| 43 | + "from pydantic import BaseModel, SecretStr\n", |
41 | 44 | "import whisper\n", |
42 | 45 | "from ultralytics import YOLO\n", |
43 | 46 | "import pyttsx3\n", |
|
48 | 51 | "\n", |
49 | 52 | "# === Logging Setup ===\n", |
50 | 53 | "logger.add(\"pipeline_{time}.log\", rotation=\"1 MB\", level=\"DEBUG\", enqueue=True, backtrace=True, diagnose=True)\n", |
| 54 | + "logger.info(\"Application startup\")\n", |
51 | 55 | "\n", |
52 | | - "# === Environment Variables and Authentication ===\n", |
53 | | - "SECURE_TOKEN = os.getenv(\"SECURE_TOKEN\", \"my_secure_token\")\n", |
| 56 | + "# === Security Enhancement: Environment Variable for Secure Token ===\n", |
| 57 | + "SECURE_TOKEN = SecretStr(os.getenv(\"SECURE_TOKEN\", \"YvZz9Hni0hWJPh_UWW4dQYf9rhIe9nNYcC5ZQTTZz0Q\"))\n", |
| 58 | + "\n", |
| 59 | + "# === OAuth2PasswordBearer for Authentication ===\n", |
54 | 60 | "oauth2_scheme = OAuth2PasswordBearer(tokenUrl=\"token\")\n", |
55 | 61 | "\n", |
| 62 | + "# === Authentication Function ===\n", |
56 | 63 | "def authenticate_user(token: str = Depends(oauth2_scheme)):\n", |
57 | | - " if token != SECURE_TOKEN:\n", |
| 64 | + " if token != SECURE_TOKEN.get_secret_value():\n", |
58 | 65 | " logger.warning(\"Authentication failed.\")\n", |
59 | 66 | " raise HTTPException(status_code=401, detail=\"Invalid token\")\n", |
60 | 67 | "\n", |
61 | | - "# === Request and Response Models ===\n", |
| 68 | + "# === Request and Response Models (Pydantic) ===\n", |
62 | 69 | "class TextRequest(BaseModel):\n", |
63 | 70 | " text: str\n", |
64 | 71 | "\n", |
65 | 72 | "class TextResponse(BaseModel):\n", |
66 | 73 | " response: str\n", |
67 | 74 | "\n", |
68 | | - "# === NLP Module ===\n", |
| 75 | + "# === NLP Module (T5 Transformer) ===\n", |
69 | 76 | "class NLPModule:\n", |
70 | 77 | " def __init__(self):\n", |
71 | 78 | " model_name = \"google/flan-t5-small\"\n", |
72 | | - " try:\n", |
73 | | - " self.tokenizer = T5Tokenizer.from_pretrained(model_name)\n", |
74 | | - " self.model = T5ForConditionalGeneration.from_pretrained(model_name)\n", |
75 | | - " logger.info(\"NLP model loaded successfully.\")\n", |
76 | | - " except Exception as e:\n", |
77 | | - " logger.error(f\"Failed to load NLP model: {e}\")\n", |
78 | | - " raise RuntimeError(\"Failed to load NLP model.\")\n", |
| 79 | + " self.tokenizer = T5Tokenizer.from_pretrained(model_name)\n", |
| 80 | + " self.model = T5ForConditionalGeneration.from_pretrained(model_name)\n", |
| 81 | + " logger.info(\"NLP model loaded successfully.\")\n", |
79 | 82 | "\n", |
80 | 83 | " def generate_text(self, prompt: str) -> str:\n", |
81 | 84 | " if not prompt.strip():\n", |
82 | 85 | " raise ValueError(\"Prompt cannot be empty.\")\n", |
83 | 86 | " logger.debug(f\"Generating text for prompt: {prompt}\")\n", |
84 | | - " try:\n", |
85 | | - " inputs = self.tokenizer(prompt, return_tensors=\"pt\")\n", |
86 | | - " outputs = self.model.generate(inputs[\"input_ids\"], max_length=100)\n", |
87 | | - " response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", |
88 | | - " logger.info(f\"Generated response: {response}\")\n", |
89 | | - " return response\n", |
90 | | - " except Exception as e:\n", |
91 | | - " logger.error(f\"Error in text generation: {e}\")\n", |
92 | | - " raise RuntimeError(\"Text generation failed.\")\n", |
93 | | - "\n", |
94 | | - "# === CV Module with Object Detection ===\n", |
| 87 | + " inputs = self.tokenizer(prompt, return_tensors=\"pt\")\n", |
| 88 | + " outputs = self.model.generate(inputs[\"input_ids\"], max_length=100)\n", |
| 89 | + " response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", |
| 90 | + " logger.info(f\"Generated response: {response}\")\n", |
| 91 | + " return response\n", |
| 92 | + "\n", |
| 93 | + "# === CV Module (YOLOv8 for Object Detection) ===\n", |
95 | 94 | "class CVModule:\n", |
96 | 95 | " def __init__(self):\n", |
97 | | - " try:\n", |
98 | | - " self.device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", |
99 | | - " self.model = YOLO('yolov5su.pt').to(self.device)\n", |
100 | | - " logger.info(\"CV model loaded successfully.\")\n", |
101 | | - " except Exception as e:\n", |
102 | | - " logger.error(f\"Failed to load CV model: {e}\")\n", |
103 | | - " raise RuntimeError(\"Failed to load CV model.\")\n", |
| 96 | + " self.device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", |
| 97 | + " self.model = YOLO('yolov8n.pt').to(self.device)\n", |
| 98 | + " logger.info(\"CV model loaded successfully.\")\n", |
104 | 99 | "\n", |
105 | 100 | " def detect_objects(self, image: Image.Image) -> str:\n", |
106 | 101 | " logger.debug(\"Detecting objects in the image.\")\n", |
107 | | - " try:\n", |
108 | | - " results = self.model(image)\n", |
109 | | - " return results.pandas().xyxy[0].to_json()\n", |
110 | | - " except Exception as e:\n", |
111 | | - " logger.error(f\"Object detection failed: {e}\")\n", |
112 | | - " raise ValueError(\"Object detection error.\")\n", |
113 | | - "\n", |
114 | | - "# === Speech Processor ===\n", |
| 102 | + " results = self.model(image)\n", |
| 103 | + " return results.pandas().xyxy[0].to_json()\n", |
| 104 | + "\n", |
| 105 | + "# === Speech Processor (Whisper for Speech-to-Text, PyTTSX3 for Text-to-Speech) ===\n", |
115 | 106 | "class SpeechProcessor:\n", |
116 | 107 | " def __init__(self):\n", |
117 | | - " try:\n", |
118 | | - " self.whisper_model = whisper.load_model(\"base\")\n", |
119 | | - " self.tts = pyttsx3.init()\n", |
120 | | - " logger.info(\"Speech processor initialized successfully.\")\n", |
121 | | - " except Exception as e:\n", |
122 | | - " logger.error(f\"Failed to initialize speech processor: {e}\")\n", |
123 | | - " raise RuntimeError(\"Failed to initialize speech processor.\")\n", |
| 108 | + " self.whisper_model = whisper.load_model(\"base\")\n", |
| 109 | + " self.tts = pyttsx3.init()\n", |
| 110 | + " logger.info(\"Speech processor initialized successfully.\")\n", |
124 | 111 | "\n", |
125 | 112 | " def speech_to_text(self, audio_file: UploadFile) -> str:\n", |
126 | | - " logger.debug(\"Processing speech-to-text.\")\n", |
127 | | - " try:\n", |
128 | | - " with audio_file.file as audio_data:\n", |
129 | | - " result = self.whisper_model.transcribe(audio_data)\n", |
| 113 | + " with audio_file.file as audio_data:\n", |
| 114 | + " result = self.whisper_model.transcribe(audio_data)\n", |
130 | 115 | " return result['text']\n", |
131 | | - " except Exception as e:\n", |
132 | | - " logger.error(f\"Speech-to-text failed: {e}\")\n", |
133 | | - " raise ValueError(\"Speech-to-text error.\")\n", |
134 | 116 | "\n", |
135 | 117 | " def text_to_speech(self, text: str) -> None:\n", |
136 | 118 | " if not text.strip():\n", |
137 | 119 | " raise ValueError(\"Text cannot be empty.\")\n", |
138 | | - " logger.debug(\"Processing text-to-speech.\")\n", |
139 | | - " try:\n", |
140 | | - " self.tts.say(text)\n", |
141 | | - " self.tts.runAndWait()\n", |
142 | | - " except Exception as e:\n", |
143 | | - " logger.error(f\"Text-to-speech failed: {e}\")\n", |
144 | | - " raise RuntimeError(\"Text-to-speech error.\")\n", |
| 120 | + " self.tts.say(text)\n", |
| 121 | + " self.tts.runAndWait()\n", |
145 | 122 | "\n", |
146 | 123 | " def __del__(self):\n", |
147 | 124 | " self.tts.stop()\n", |
|
154 | 131 | " self.speech_processor = SpeechProcessor()\n", |
155 | 132 | "\n", |
156 | 133 | " async def process_nlp(self, text: str) -> str:\n", |
157 | | - " return self.nlp.generate_text(text)\n", |
| 134 | + " return await asyncio.to_thread(self.nlp.generate_text, text)\n", |
158 | 135 | "\n", |
159 | 136 | " async def process_cv(self, image: Image.Image) -> str:\n", |
160 | 137 | " return await asyncio.to_thread(self.cv.detect_objects, image)\n", |
|
167 | 144 | "\n", |
168 | 145 | "# === FastAPI Application ===\n", |
169 | 146 | "app = FastAPI()\n", |
| 147 | + "\n", |
170 | 148 | "pipeline = EnhancedAGIPipeline()\n", |
171 | 149 | "\n", |
| 150 | + "# === Endpoints ===\n", |
172 | 151 | "@app.post(\"/process-nlp/\", response_model=TextResponse, dependencies=[Depends(authenticate_user)])\n", |
173 | 152 | "async def process_nlp(request: TextRequest):\n", |
174 | | - " try:\n", |
175 | | - " response = await pipeline.process_nlp(request.text)\n", |
176 | | - " logger.info(\"NLP processed successfully.\")\n", |
177 | | - " return {\"response\": response}\n", |
178 | | - " except Exception as e:\n", |
179 | | - " logger.error(f\"NLP processing failed: {e}\")\n", |
180 | | - " raise HTTPException(status_code=500, detail=\"NLP processing error.\")\n", |
| 153 | + " response = await pipeline.process_nlp(request.text)\n", |
| 154 | + " return {\"response\": response}\n", |
181 | 155 | "\n", |
182 | 156 | "@app.post(\"/process-cv-detection/\", dependencies=[Depends(authenticate_user)])\n", |
183 | 157 | "async def process_cv_detection(file: UploadFile):\n", |
184 | | - " try:\n", |
| 158 | + " image = Image.open(io.BytesIO(await file.read()))\n", |
| 159 | + " response = await pipeline.process_cv(image)\n", |
| 160 | + " return {\"detections\": response}\n", |
| 161 | + "\n", |
| 162 | + "@app.post(\"/batch-cv-detection/\", dependencies=[Depends(authenticate_user)])\n", |
| 163 | + "async def batch_cv_detection(files: List[UploadFile]):\n", |
| 164 | + " responses = []\n", |
| 165 | + " for file in files:\n", |
185 | 166 | " image = Image.open(io.BytesIO(await file.read()))\n", |
186 | 167 | " response = await pipeline.process_cv(image)\n", |
187 | | - " logger.info(\"Object detection processed successfully.\")\n", |
188 | | - " return {\"detections\": response}\n", |
189 | | - " except Exception as e:\n", |
190 | | - " logger.error(f\"Object detection failed: {e}\")\n", |
191 | | - " raise HTTPException(status_code=500, detail=\"Object detection error.\")\n", |
| 168 | + " responses.append(response)\n", |
| 169 | + " return {\"batch_detections\": responses}\n", |
192 | 170 | "\n", |
193 | 171 | "@app.post(\"/speech-to-text/\", response_model=TextResponse, dependencies=[Depends(authenticate_user)])\n", |
194 | 172 | "async def speech_to_text(file: UploadFile):\n", |
195 | | - " try:\n", |
196 | | - " response = await pipeline.process_speech_to_text(file)\n", |
197 | | - " logger.info(\"Speech-to-text processed successfully.\")\n", |
198 | | - " return {\"response\": response}\n", |
199 | | - " except Exception as e:\n", |
200 | | - " logger.error(f\"Speech-to-text failed: {e}\")\n", |
201 | | - " raise HTTPException(status_code=500, detail=\"Speech-to-text error.\")\n", |
| 173 | + " response = await pipeline.process_speech_to_text(file)\n", |
| 174 | + " return {\"response\": response}\n", |
202 | 175 | "\n", |
203 | 176 | "@app.post(\"/text-to-speech/\", dependencies=[Depends(authenticate_user)])\n", |
204 | 177 | "async def text_to_speech(request: TextRequest):\n", |
205 | | - " try:\n", |
206 | | - " await pipeline.process_text_to_speech(request.text)\n", |
207 | | - " logger.info(\"Text-to-speech processed successfully.\")\n", |
208 | | - " return {\"response\": \"Speech synthesis complete.\"}\n", |
209 | | - " except Exception as e:\n", |
210 | | - " logger.error(f\"Text-to-speech failed: {e}\")\n", |
211 | | - " raise HTTPException(status_code=500, detail=\"Text-to-speech error.\")\n", |
212 | | - "\n", |
213 | | - "# === Run the Application with HTTPS ===\n", |
| 178 | + " await pipeline.process_text_to_speech(request.text)\n", |
| 179 | + " return {\"response\": \"Speech synthesis complete.\"}\n", |
| 180 | + "\n", |
| 181 | + "# === Run the Application with HTTPS (uvicorn) ===\n", |
214 | 182 | "if __name__ == \"__main__\":\n", |
215 | 183 | " nest_asyncio.apply()\n", |
216 | 184 | " config = uvicorn.Config(app, host=\"0.0.0.0\", port=8000)\n", |
217 | 185 | " server = uvicorn.Server(config)\n", |
218 | 186 | " asyncio.run(server.serve())" |
219 | 187 | ], |
220 | 188 | "metadata": { |
221 | | - "id": "3yRf_BMYqzHJ" |
| 189 | + "id": "UgUAMujBWqGS" |
222 | 190 | }, |
223 | 191 | "execution_count": null, |
224 | 192 | "outputs": [] |
|
0 commit comments