FunAudioLLM · amieruljapri · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.github/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,48 @@
+FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    wget \
+    curl \
+    build-essential \
+    software-properties-common \
+    && add-apt-repository ppa:deadsnakes/ppa -y \
+    && apt-get update && apt-get install -y \
+    python3.10 \
+    python3.10-dev \
+    python3.10-venv \
+    git-lfs \
+    ffmpeg \
+    && ln -sf /usr/bin/python3.10 /usr/bin/python \
+    && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Clone the ThinkSound repository
+RUN git clone https://github.com/liuhuadai/ThinkSound.git .
+
+RUN pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu126
+RUN pip install -r requirements.txt
+
+# Create a non-root user
+RUN useradd -m -u 1000 thinksound && \
+    chown -R thinksound:thinksound /app
+USER thinksound
+
+RUN chmod +x scripts/demo.sh
+
+# Expose port for Gradio web interface
+EXPOSE 7860
+
+# Set default command to launch the web interface
+CMD ["python", "app.py"]
+
+# Alternative commands (uncomment as needed):
+# For interactive bash session:
+# CMD ["/bin/bash"]
+
+# For running demo script (requires arguments):
+# ENTRYPOINT ["./scripts/demo.sh"]
diff --git a/README.md b/README.md
@@ -102,6 +102,72 @@ git clone https://huggingface.co/liuhuadai/ThinkSound ckpts
 
 ### ▶️ Run the Demo
 
+#### **Docker for WSL/Ubuntu
+
+Use to run this with own workspace that already clone this repo + models
+
+Prerequisite
+
+1. Download all required models. Warning. It is large
+
+```bash
+
+sudo apt install git-lfs
+
+git clone https://huggingface.co/facebook/metaclip-h14-fullcc2.5b
+git clone https://huggingface.co/google/t5-v1_1-xl
+git clone https://huggingface.co/liuhuadai/ThinkSound ckpts
+```
+
+2. Move all the models to the root of this repository
+
+Pull ready docker image
+
+1. If your gpu support cuda 12.6.x, you can pull this image
+
+```bash
+docker pull sasuketaichou/sajenakcube:thinksound
+```
+
+Note: You can skip Build local step if you do this, go to Run docker step
+
+Build local
+
+Note: Please check your supported nvidia cuda version with your device. Change `FROM cuda-version-of-your-device` of Dockerfile
+
+1. Run at the root of this repository
+
+```bash
+docker build -t thinksound:latest .
+```
+
+Run docker
+
+1. Append your local ThinkSound workspace with the models that we just downloaded. (this is done in start_docker.sh)
+
+2. To attach ThinkSound folder via script
+
+```bash
+cd ..
+ls ## make sure ThinkSound folder is visible
+```
+
+3. Run the script
+
+If pull from ready docker image
+
+```bash
+docker run --gpus all -it -v $(pwd)/ThinkSound:/app --rm -p 7860:7860 --net=host sasuketaichou/sajenakcube:thinksound
+```
+
+If build local
+
+```bash
+docker run --gpus all -it -v $(pwd)/ThinkSound:/app --rm -p 7860:7860 --net=host thinksound:latest
+```
+
+Test via browser `localhost:7860`
+
 #### **Linux/macOS**
 
 ```bash

diff --git a/extract_latents.py b/extract_latents.py
@@ -52,7 +52,7 @@ def main(args):
     dataloader = DataLoader(
         dataset,
         batch_size=2,
-        num_workers=2,
+        # num_workers=2,
         pin_memory=False,
         drop_last=False,
         collate_fn=error_avoidance_collate