From 0db6ab0309ca423d3737662db610191964a9a740 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 11:24:06 +0900
Subject: [PATCH 01/37] add Dockerfile and confirm docker build successfully
 finished.

---
 .dockerignore | 27 +++++++++++++++++++++++++++
 Dockerfile    | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..628f753
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,27 @@
+# Ignore the following files and directories when building the Docker image
+*.pyc
+__pycache__/
+*.ipynb_checkpoints
+*.log
+*.csv
+*.tsv
+*.h5
+*.pth
+*.pt
+*.zip
+*.tar.gz
+*.egg-info/
+dist/
+build/
+.env
+venv/
+.env.local
+*.DS_Store
+*.egg
+*.whl
+*.pkl
+*.json
+*.yaml
+*.yml
+assets/
+submodules/
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..ce675db
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,50 @@
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+
+# Set the working directory
+WORKDIR /EDGS
+
+# Install system dependencies first, including git, build-essential, and cmake
+RUN apt-get update && apt-get install -y \
+  git \
+  wget \
+  build-essential \
+  cmake \
+  ninja-build \
+  && rm -rf /var/lib/apt/lists/*
+
+# Copy only essential files for cloning submodules first (e.g., .gitmodules)
+# Or, if submodules are public, you might not need to copy anything specific for this step
+# For simplicity, we'll copy everything, but this could be optimized
+COPY . .
+
+# Initialize and update submodules
+RUN git submodule init && git submodule update --recursive
+
+# Install Miniconda
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
+  bash /tmp/miniconda.sh -b -p /opt/conda && \
+  rm /tmp/miniconda.sh
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Create the conda environment and install dependencies
+RUN conda create -y -n edgs python=3.10 pip && \
+  conda clean -afy && \
+  echo "source activate edgs" > ~/.bashrc
+
+# Set CUDA architectures to compile for
+ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"
+
+# Activate the environment and install Python dependencies
+RUN /bin/bash -c "source activate edgs && \
+  pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \
+  pip install -e ./submodules/gaussian-splatting/submodules/diff-gaussian-rasterization && \
+  pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \
+  pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \
+  pip install -e ./submodules/RoMa && \
+  pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d"
+
+# Expose the port for Gradio
+EXPOSE 7862
+
+# Command to run the Gradio demo
+CMD ["bash", "-c", "source activate edgs && python gradio_demo.py --port 7862"]
\ No newline at end of file

From f215ce8b85e7a4c57aed431e14fb1d4e349957eb Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 13:24:15 +0900
Subject: [PATCH 02/37] add docker compose file and confimed it works well

---
 Dockerfile         |  2 ++
 docker-compose.yml | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 docker-compose.yml

diff --git a/Dockerfile b/Dockerfile
index ce675db..9719c23 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,6 +10,8 @@ RUN apt-get update && apt-get install -y \
   build-essential \
   cmake \
   ninja-build \
+  libgl1-mesa-glx \
+  libglib2.0-0 \
   && rm -rf /var/lib/apt/lists/*
 
 # Copy only essential files for cloning submodules first (e.g., .gitmodules)
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..6801517
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,16 @@
+services:
+  edgs-app:
+    build: . # Instructs Docker Compose to build using the Dockerfile in the current directory
+    image: edgs-app # This is the name of the image you built
+    ports:
+      - "7862:7862" # Map port 7862 on the host to port 7862 in the container
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all # Use all available GPUs
+              capabilities: [gpu] # Request GPU capabilities
+    volumes:
+      - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container
+      - ./output:/EDGS/output # Example: map a local 'output' folder
\ No newline at end of file

From db084aff053981a903032b236c39906fd3799a61 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 13:53:12 +0900
Subject: [PATCH 03/37] fix asset file not found error

---
 .dockerignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.dockerignore b/.dockerignore
index 628f753..b6c1da7 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -23,5 +23,4 @@ venv/
 *.json
 *.yaml
 *.yml
-assets/
 submodules/
\ No newline at end of file

From a3f90013531934e0eb9e169876d6bd2f890e6160 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 13:58:43 +0900
Subject: [PATCH 04/37] update readme to use docker compose

---
 README.md | 39 ++++-----------------------------------
 1 file changed, 4 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index 8c40427..c6a5307 100644
--- a/README.md
+++ b/README.md
@@ -69,45 +69,14 @@ Alternatively, check our [Colab notebook](https://colab.research.google.com/gith
 <a id="sec-install"></a>
 ## 🛠️ Installation
 
-You can either run `install.sh` or manually install using the following:
+You can install it just:
 
 ```bash
-git clone git@github.com:CompVis/EDGS.git --recursive
-cd EDGS
-git submodule update --init --recursive 
-
-conda create -y -n edgs python=3.10 pip
-conda activate edgs
-
-# Set up path to your CUDA. In our experience similar versions like 12.2 also work well 
-export CUDA_HOME=/usr/local/cuda-12.1
-export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
-export PATH=$CUDA_HOME/bin:$PATH
-
-conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia -y
-conda install nvidia/label/cuda-12.1.0::cuda-toolkit -y
-
-pip install -e submodules/gaussian-splatting/submodules/diff-gaussian-rasterization
-pip install -e submodules/gaussian-splatting/submodules/simple-knn
-
-# For COLMAP and pycolmap
-# Optionally install original colmap but probably pycolmap suffices
-# conda install conda-forge/label/colmap_dev::colmap
-pip install pycolmap
-
-
-pip install wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg
-conda install numpy=1.26.4 -y -c conda-forge --override-channels
-
-pip install -e submodules/RoMa
-conda install anaconda::jupyter --yes
-
-# Stuff necessary for gradio and visualizations
-pip install gradio 
-pip install plotly scikit-learn moviepy==2.1.1 ffmpeg
-pip install open3d 
+docker compose up
 ```
 
+or you can install with running `install.sh`.
+
 <a id="sec-data"></a>
 ## 📦 Data
 

From 55fbbde3c4241282cdd99053d7487029b237c904 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 17:55:38 +0900
Subject: [PATCH 05/37] add command line EDGS python func, and let Dockerfile
 can choose gradle or command line.

---
 .gitignore                           |   3 +
 Dockerfile                           |   2 +-
 notebooks/fit_model_to_scene_full.py | 203 +++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 1 deletion(-)
 create mode 100644 notebooks/fit_model_to_scene_full.py

diff --git a/.gitignore b/.gitignore
index a05a2b7..c38a86d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,6 @@ dmypy.json
 # Pyre type checker
 .pyre/
 learnableearthparser/fast_sampler/_sampler.c
+
+# data
+data/
diff --git a/Dockerfile b/Dockerfile
index 9719c23..c69f166 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -49,4 +49,4 @@ RUN /bin/bash -c "source activate edgs && \
 EXPOSE 7862
 
 # Command to run the Gradio demo
-CMD ["bash", "-c", "source activate edgs && python gradio_demo.py --port 7862"]
\ No newline at end of file
+CMD ["bash"]
\ No newline at end of file
diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py
new file mode 100644
index 0000000..a73ab04
--- /dev/null
+++ b/notebooks/fit_model_to_scene_full.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # EDGS: Eliminating Densification for Gaussian Splatting
+# EDGS improves 3D Gaussian Splatting by removing the need for densification. It starts from a dense point cloud initialization based on 2D correspondences, leading to:
+# - ⚡ Faster convergence (only 25% of training time)
+#  - 🌀 Higher rendering quality
+#  - 💡 No need for progressive densification
+
+# ## 2. Import libraries
+import argparse
+import os
+import random
+import sys
+
+import hydra
+import numpy as np
+import omegaconf
+import torch
+import wandb
+from hydra import compose, initialize
+from matplotlib import pyplot as plt
+from omegaconf import OmegaConf
+
+sys.path.append("../")
+sys.path.append("../submodules/gaussian-splatting")
+from source.trainer import EDGSTrainer
+from source.utils_aux import set_seed
+
+# --- Add argument parsing ---
+parser = argparse.ArgumentParser(
+    description="Fit EDGS model to a scene, optionally from a video."
+)
+parser.add_argument(
+    "--video_path",
+    type=str,
+    default="../assets/examples/video_fruits.mp4",
+    help="Path to the input video file.",
+)
+args = parser.parse_args()
+# --- End argument parsing ---
+
+with initialize(config_path="../configs", version_base="1.1"):
+    cfg = compose(config_name="train")
+print(OmegaConf.to_yaml(cfg))
+
+
+# # 3. Init input parameters
+
+# ## 3.1 Optionally preprocess video
+PATH_TO_VIDEO = args.video_path
+num_ref_views = 16  # how many frames you want to extract from video and colmap
+
+# Update the config with your settings
+cfg.wandb.name = "EDGS.demo.scene"
+cfg.wandb.mode = "disabled"  # "online"
+cfg.gs.dataset.model_path = (
+    "./scene_edgsed/"  # "change this to your path to the processed scene"
+)
+cfg.gs.dataset.source_path = "../assets/scene_colmaped/"  # "change this to your path"
+# Optionally for video processed
+# cfg.gs.dataset.source_path="../assets/video_colmaped/"
+cfg.gs.dataset.images = "images"
+cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12
+cfg.train.gs_epochs = 30000
+cfg.gs.opt.opacity_reset_interval = 1_000_000
+cfg.train.no_densify = True
+cfg.init_wC.matches_per_ref = 15_000
+cfg.init_wC.nns_per_ref = 3
+cfg.init_wC.num_refs = 180
+cfg.init_wC.roma_model = "outdoors"
+
+
+# # 4. Initilize model and logger
+_ = wandb.init(
+    entity=cfg.wandb.entity,
+    project=cfg.wandb.project,
+    config=omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True),
+    name=cfg.wandb.name,
+    mode=cfg.wandb.mode,
+)
+omegaconf.OmegaConf.resolve(cfg)
+set_seed(cfg.seed)
+# Init output folder
+print("Output folder: {}".format(cfg.gs.dataset.model_path))
+os.makedirs(cfg.gs.dataset.model_path, exist_ok=True)
+# Init gs model
+gs = hydra.utils.instantiate(cfg.gs)
+trainer = EDGSTrainer(GS=gs, training_config=cfg.gs.opt, device=cfg.device)
+
+
+# # 5. Init with matchings
+trainer.timer.start()
+trainer.init_with_corr(cfg.init_wC)
+trainer.timer.pause()
+
+
+# ### Visualize a few initial viewpoints
+with torch.no_grad():
+    viewpoint_stack = trainer.GS.scene.getTrainCameras()
+    viewpoint_cams_to_viz = random.sample(trainer.GS.scene.getTrainCameras(), 4)
+    for viewpoint_cam in viewpoint_cams_to_viz:
+        render_pkg = trainer.GS(viewpoint_cam)
+        image = render_pkg["render"]
+
+        image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)
+        image_gt_np = (
+            viewpoint_cam.original_image.clone()
+            .detach()
+            .cpu()
+            .numpy()
+            .transpose(1, 2, 0)
+        )
+
+        # Clip values to be in the range [0, 1]
+        image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
+        image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8)
+
+        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
+        ax[0].imshow(image_gt_np)
+        ax[0].axis("off")
+        ax[1].imshow(image_np)
+        ax[1].axis("off")
+        plt.tight_layout()
+        plt.show()
+
+
+# # 6.Optimize scene
+# Optimize first briefly for 5k steps and visualize results. We also disable saving of pretrained models. Train function can be changed for any other method
+trainer.saving_iterations = []
+cfg.train.gs_epochs = 5_000
+trainer.train(cfg.train)
+
+
+# ### Visualize same viewpoints
+with torch.no_grad():
+    for viewpoint_cam in viewpoint_cams_to_viz:
+        render_pkg = trainer.GS(viewpoint_cam)
+        image = render_pkg["render"]
+
+        image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)
+        image_gt_np = (
+            viewpoint_cam.original_image.clone()
+            .detach()
+            .cpu()
+            .numpy()
+            .transpose(1, 2, 0)
+        )
+
+        # Clip values to be in the range [0, 1]
+        image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
+        image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8)
+
+        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
+        ax[0].imshow(image_gt_np)
+        ax[0].axis("off")
+        ax[1].imshow(image_np)
+        ax[1].axis("off")
+        plt.tight_layout()
+        plt.show()
+
+
+# ### Save model
+with torch.no_grad():
+    trainer.save_model()
+
+
+# # 7. Continue training until we reach total 30K training steps
+cfg.train.gs_epochs = 25_000
+trainer.train(cfg.train)
+
+
+# ### Visualize same viewpoints
+with torch.no_grad():
+    for viewpoint_cam in viewpoint_cams_to_viz:
+        render_pkg = trainer.GS(viewpoint_cam)
+        image = render_pkg["render"]
+
+        image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)
+        image_gt_np = (
+            viewpoint_cam.original_image.clone()
+            .detach()
+            .cpu()
+            .numpy()
+            .transpose(1, 2, 0)
+        )
+
+        # Clip values to be in the range [0, 1]
+        image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
+        image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8)
+
+        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
+        ax[0].imshow(image_gt_np)
+        ax[0].axis("off")
+        ax[1].imshow(image_np)
+        ax[1].axis("off")
+        plt.tight_layout()
+        plt.show()
+
+
+# ### Save model
+with torch.no_grad():
+    trainer.save_model()

From 56aed1217476498ba5d3418836b65e3cc658cafe Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 19:29:56 +0900
Subject: [PATCH 06/37] fix to runnable

---
 Dockerfile                           |  4 ++--
 README.md                            | 18 +++++++++++++-
 notebooks/fit_model_to_scene_full.py | 36 +++++++++++++++++++++-------
 3 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c69f166..d6782de 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -48,5 +48,5 @@ RUN /bin/bash -c "source activate edgs && \
 # Expose the port for Gradio
 EXPOSE 7862
 
-# Command to run the Gradio demo
-CMD ["bash"]
\ No newline at end of file
+# Keep the container running in detached mode
+CMD ["tail", "-f", "/dev/null"]
\ No newline at end of file
diff --git a/README.md b/README.md
index c6a5307..06e9740 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ Alternatively, check our [Colab notebook](https://colab.research.google.com/gith
 You can install it just:
 
 ```bash
-docker compose up
+docker compose up -d
 ```
 
 or you can install with running `install.sh`.
@@ -87,6 +87,22 @@ We evaluated on the following datasets:
 
 ### Using Your Own Dataset
 
+#### Option A
+Use gradle demo;
+```
+docker compose exec edgs-app bash
+python gradio_demo.py --port 7862
+```
+
+#### Option B
+From command line;
+```
+docker compose exec edgs-app bash
+cd notebooks
+python fit_model_to_scene_full.py --video_path <your mp4 video>
+```
+
+#### Option C
 You can use the same data format as the [3DGS project](https://github.com/graphdeco-inria/gaussian-splatting?tab=readme-ov-file#processing-your-own-scenes). Please follow their guide to prepare your scene.
 
 Expected folder structure:
diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py
index a73ab04..bc29cd7 100644
--- a/notebooks/fit_model_to_scene_full.py
+++ b/notebooks/fit_model_to_scene_full.py
@@ -24,6 +24,7 @@
 
 sys.path.append("../")
 sys.path.append("../submodules/gaussian-splatting")
+from gradio_demo import preprocess_input
 from source.trainer import EDGSTrainer
 from source.utils_aux import set_seed
 
@@ -51,15 +52,34 @@
 PATH_TO_VIDEO = args.video_path
 num_ref_views = 16  # how many frames you want to extract from video and colmap
 
+# process the input video
+if True:
+    print("Starting video preprocessing...")
+    # Ensure num_corrs is defined. Using cfg.init_wC.matches_per_ref as likely intended.
+    num_corrs = cfg.init_wC.matches_per_ref
+    try:
+        images, scene_dir = preprocess_input(PATH_TO_VIDEO, num_ref_views, num_corrs)
+        print(f"Video preprocessed. Scene directory: {scene_dir}")
+        cfg.gs.dataset.source_path = scene_dir
+        # Define a model_path, e.g., in a subdirectory of the scene_dir or a dedicated output folder
+        cfg.gs.dataset.model_path = os.path.join(
+            os.path.dirname(scene_dir), os.path.basename(scene_dir) + "_edgs_model"
+        )
+        print(f"Set dataset.source_path to: {cfg.gs.dataset.source_path}")
+        print(f"Set dataset.model_path to: {cfg.gs.dataset.model_path}")
+    except Exception as e:
+        print(f"Error during video preprocessing: {e}")
+        sys.exit(1)
+else:
+    # This block will be used if video preprocessing is skipped.
+    # Ensure these paths are valid if this branch is taken.
+    print("Skipping video preprocessing. Using pre-configured paths.")
+    cfg.gs.dataset.model_path = "./scene_edgsed/"
+    cfg.gs.dataset.source_path = (
+        "../assets/scene_colmaped/"  # Ensure this is a valid COLMAP scene
+    )
+
 # Update the config with your settings
-cfg.wandb.name = "EDGS.demo.scene"
-cfg.wandb.mode = "disabled"  # "online"
-cfg.gs.dataset.model_path = (
-    "./scene_edgsed/"  # "change this to your path to the processed scene"
-)
-cfg.gs.dataset.source_path = "../assets/scene_colmaped/"  # "change this to your path"
-# Optionally for video processed
-# cfg.gs.dataset.source_path="../assets/video_colmaped/"
 cfg.gs.dataset.images = "images"
 cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12
 cfg.train.gs_epochs = 30000

From 3fd1a7c409209a8bdae273bc4c99825f7cc574dc Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 3 Jun 2025 20:53:47 +0900
Subject: [PATCH 07/37] debug fit_model_to_scene_full.py by creating util
 functions.

---
 gradio_demo.py                       | 423 ++++++++++++++++++---------
 notebooks/fit_model_to_scene_full.py |  37 +--
 source/utils_preprocess.py           | 240 +++++++++++----
 3 files changed, 483 insertions(+), 217 deletions(-)

diff --git a/gradio_demo.py b/gradio_demo.py
index 9c55e44..d91841c 100644
--- a/gradio_demo.py
+++ b/gradio_demo.py
@@ -1,27 +1,34 @@
-import torch
+import argparse
+import contextlib
+import io
 import os
 import shutil
+import sys
 import tempfile
-import argparse
+import time
+
 import gradio as gr
-import sys
-import io
-from PIL import Image
-import numpy as np
-from source.utils_aux import set_seed
-from source.utils_preprocess import read_video_frames, preprocess_frames, select_optimal_frames, save_frames_to_scene_dir, run_colmap_on_scene
-from source.trainer import EDGSTrainer
-from hydra import initialize, compose
 import hydra
-import time
-from source.visualization import generate_circular_camera_path, save_numpy_frames_as_mp4, generate_fully_smooth_cameras_with_tsp, put_text_on_image
-import contextlib
-import base64
+import numpy as np
+import torch
+from hydra import compose, initialize
 
+from source.trainer import EDGSTrainer
+from source.utils_aux import set_seed
+from source.utils_preprocess import (
+    orchestrate_video_to_colmap_scene,  # Import the new/refactored function
+    run_colmap_on_scene,
+)
+from source.visualization import (
+    generate_circular_camera_path,
+    generate_fully_smooth_cameras_with_tsp,
+    put_text_on_image,
+    save_numpy_frames_as_mp4,
+)
 
 # Init RoMA model:
-sys.path.append('../submodules/RoMa')
-from romatch import roma_outdoor, roma_indoor
+sys.path.append("../submodules/RoMa")
+from romatch import roma_indoor
 
 roma_model = roma_indoor(device="cuda:0")
 roma_model.upsample_preds = False
@@ -33,6 +40,7 @@
 
 trainer = None
 
+
 class Tee(io.TextIOBase):
     def __init__(self, *streams):
         self.streams = streams
@@ -46,6 +54,7 @@ def flush(self):
         for stream in self.streams:
             stream.flush()
 
+
 def capture_logs(func, *args, **kwargs):
     log_capture_string = io.StringIO()
     tee = Tee(sys.__stdout__, log_capture_string)
@@ -53,12 +62,15 @@ def capture_logs(func, *args, **kwargs):
         result = func(*args, **kwargs)
     return result, log_capture_string.getvalue()
 
+
 # Training Pipeline
-def run_training_pipeline(scene_dir, 
-                          num_ref_views=16, 
-                          num_corrs_per_view=20000, 
-                          num_steps=1_000,
-                          mode_toggle="Ours (EDGS)"):
+def run_training_pipeline(
+    scene_dir,
+    num_ref_views=16,
+    num_corrs_per_view=20000,
+    num_steps=1_000,
+    mode_toggle="Ours (EDGS)",
+):
     with initialize(config_path="./configs", version_base="1.1"):
         cfg = compose(config_name="train")
 
@@ -72,8 +84,8 @@ def run_training_pipeline(scene_dir,
 
     cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12
     cfg.train.gs_epochs = 30000
-    
-    if mode_toggle=="Ours (EDGS)":
+
+    if mode_toggle == "Ours (EDGS)":
         cfg.gs.opt.opacity_reset_interval = 1_000_000
         cfg.train.reduce_opacity = True
         cfg.train.no_densify = True
@@ -84,15 +96,20 @@ def run_training_pipeline(scene_dir,
         cfg.init_wC.nns_per_ref = 1
         cfg.init_wC.num_refs = num_ref_views
         cfg.init_wC.add_SfM_init = False
-        cfg.init_wC.scaling_factor = 0.00077 * 2.
-        
+        cfg.init_wC.scaling_factor = 0.00077 * 2.0
+
     set_seed(cfg.seed)
     os.makedirs(cfg.gs.dataset.model_path, exist_ok=True)
 
     global trainer
     global MODEL_PATH
     generator3dgs = hydra.utils.instantiate(cfg.gs, do_train_test_split=False)
-    trainer = EDGSTrainer(GS=generator3dgs, training_config=cfg.gs.opt, device=cfg.device, log_wandb=cfg.wandb.mode != 'disabled')
+    trainer = EDGSTrainer(
+        GS=generator3dgs,
+        training_config=cfg.gs.opt,
+        device=cfg.device,
+        log_wandb=cfg.wandb.mode != "disabled",
+    )
 
     # Disable evaluation and saving
     trainer.saving_iterations = []
@@ -102,13 +119,15 @@ def run_training_pipeline(scene_dir,
     trainer.timer.start()
     start_time = time.time()
     trainer.init_with_corr(cfg.init_wC, roma_model=roma_model)
-    time_for_init = time.time()-start_time
+    time_for_init = time.time() - start_time
 
     viewpoint_cams = trainer.GS.scene.getTrainCameras()
-    path_cameras = generate_fully_smooth_cameras_with_tsp(existing_cameras=viewpoint_cams, 
-                                                          n_selected=6, # 8
-                                                          n_points_per_segment=30, # 30 
-                                                          closed=False)
+    path_cameras = generate_fully_smooth_cameras_with_tsp(
+        existing_cameras=viewpoint_cams,
+        n_selected=6,  # 8
+        n_points_per_segment=30,  # 30
+        closed=False,
+    )
     path_cameras = path_cameras + path_cameras[::-1]
 
     path_renderings = []
@@ -122,13 +141,24 @@ def run_training_pipeline(scene_dir,
             image = render_pkg["render"]
             image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1)
             image_np = (image_np * 255).astype(np.uint8)
-            path_renderings.append(put_text_on_image(img=image_np, 
-                                                     text=f"Init stage.\nTime:{time_for_init:.3f}s.   "))
-    path_renderings = path_renderings + [put_text_on_image(img=image_np, text=f"Start fitting.\nTime:{time_for_init:.3f}s.   ")]*30
-    
+            path_renderings.append(
+                put_text_on_image(
+                    img=image_np, text=f"Init stage.\nTime:{time_for_init:.3f}s.   "
+                )
+            )
+    path_renderings = (
+        path_renderings
+        + [
+            put_text_on_image(
+                img=image_np, text=f"Start fitting.\nTime:{time_for_init:.3f}s.   "
+            )
+        ]
+        * 30
+    )
+
     # Train and save visualizations during training.
     start_time = time.time()
-    for _ in range(int(num_steps//10)):
+    for _ in range(int(num_steps // 10)):
         with torch.no_grad():
             viewpoint_cam = path_cameras[idx]
             idx = (idx + 1) % len(path_cameras)
@@ -136,20 +166,27 @@ def run_training_pipeline(scene_dir,
             image = render_pkg["render"]
             image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1)
             image_np = (image_np * 255).astype(np.uint8)
-            path_renderings.append(put_text_on_image(
-                img=image_np, 
-                text=f"Fitting stage.\nTime:{time_for_init + time.time()-start_time:.3f}s.   "))
-    
+            path_renderings.append(
+                put_text_on_image(
+                    img=image_np,
+                    text=f"Fitting stage.\nTime:{time_for_init + time.time() - start_time:.3f}s.   ",
+                )
+            )
+
         cfg.train.gs_epochs = 10
         trainer.train(cfg.train)
-        print(f"Time elapsed: {(time_for_init + time.time()-start_time):.2f}s.")
+        print(f"Time elapsed: {(time_for_init + time.time() - start_time):.2f}s.")
         # if (cfg.init_wC.use == False) and (time_for_init + time.time()-start_time) > 60:
         #     break
     final_time = time.time()
-    
+
     # Add static frame. To highlight we're done
-    path_renderings += [put_text_on_image(
-        img=image_np, text=f"Done.\nTime:{time_for_init + final_time -start_time:.3f}s.   ")]*30
+    path_renderings += [
+        put_text_on_image(
+            img=image_np,
+            text=f"Done.\nTime:{time_for_init + final_time - start_time:.3f}s.   ",
+        )
+    ] * 30
     # Final rendering at the end.
     for _ in range(len(path_cameras)):
         with torch.no_grad():
@@ -159,37 +196,56 @@ def run_training_pipeline(scene_dir,
             image = render_pkg["render"]
             image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1)
             image_np = (image_np * 255).astype(np.uint8)
-            path_renderings.append(put_text_on_image(img=image_np, 
-                                                 text=f"Final result.\nTime:{time_for_init + final_time -start_time:.3f}s.   "))
+            path_renderings.append(
+                put_text_on_image(
+                    img=image_np,
+                    text=f"Final result.\nTime:{time_for_init + final_time - start_time:.3f}s.   ",
+                )
+            )
 
     trainer.save_model()
-    final_video_path = os.path.join(STATIC_FILE_SERVING_FOLDER, f"{scene_name}_final.mp4")
-    save_numpy_frames_as_mp4(frames=path_renderings, output_path=final_video_path, fps=30, center_crop=0.85)
+    final_video_path = os.path.join(
+        STATIC_FILE_SERVING_FOLDER, f"{scene_name}_final.mp4"
+    )
+    save_numpy_frames_as_mp4(
+        frames=path_renderings, output_path=final_video_path, fps=30, center_crop=0.85
+    )
     MODEL_PATH = cfg.gs.dataset.model_path
-    ply_path = os.path.join(cfg.gs.dataset.model_path, f"point_cloud/iteration_{trainer.gs_step}/point_cloud.ply")
-    shutil.copy(ply_path, os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply"))
+    ply_path = os.path.join(
+        cfg.gs.dataset.model_path,
+        f"point_cloud/iteration_{trainer.gs_step}/point_cloud.ply",
+    )
+    shutil.copy(
+        ply_path, os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply")
+    )
 
     return final_video_path, ply_path
 
+
 # Gradio Interface
 def gradio_interface(input_path, num_ref_views, num_corrs, num_steps):
-    images, scene_dir = run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024)
-    shutil.copytree(scene_dir, STATIC_FILE_SERVING_FOLDER+'/scene_colmaped',  dirs_exist_ok=True)
-    (final_video_path, ply_path), log_output = capture_logs(run_training_pipeline,
-                                                            scene_dir,
-                                                            num_ref_views,
-                                                            num_corrs,
-                                                            num_steps)
+    images, scene_dir = run_full_pipeline(
+        input_path, num_ref_views, num_corrs, max_size=1024
+    )
+    shutil.copytree(
+        scene_dir, STATIC_FILE_SERVING_FOLDER + "/scene_colmaped", dirs_exist_ok=True
+    )
+    (final_video_path, ply_path), log_output = capture_logs(
+        run_training_pipeline, scene_dir, num_ref_views, num_corrs, num_steps
+    )
     images_rgb = [img[:, :, ::-1] for img in images]
     return images_rgb, final_video_path, scene_dir, ply_path, log_output
 
+
 # Dummy Render Functions
 def render_all_views(scene_dir):
     viewpoint_cams = trainer.GS.scene.getTrainCameras()
-    path_cameras = generate_fully_smooth_cameras_with_tsp(existing_cameras=viewpoint_cams, 
-                                                          n_selected=8, 
-                                                          n_points_per_segment=60, 
-                                                          closed=False)
+    path_cameras = generate_fully_smooth_cameras_with_tsp(
+        existing_cameras=viewpoint_cams,
+        n_selected=8,
+        n_points_per_segment=60,
+        closed=False,
+    )
     path_cameras = path_cameras + path_cameras[::-1]
 
     path_renderings = []
@@ -200,19 +256,21 @@ def render_all_views(scene_dir):
             image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1)
             image_np = (image_np * 255).astype(np.uint8)
             path_renderings.append(image_np)
-    save_numpy_frames_as_mp4(frames=path_renderings, 
-                             output_path=os.path.join(STATIC_FILE_SERVING_FOLDER, "render_all_views.mp4"), 
-                             fps=30, 
-                             center_crop=0.85)
-    
+    save_numpy_frames_as_mp4(
+        frames=path_renderings,
+        output_path=os.path.join(STATIC_FILE_SERVING_FOLDER, "render_all_views.mp4"),
+        fps=30,
+        center_crop=0.85,
+    )
+
     return os.path.join(STATIC_FILE_SERVING_FOLDER, "render_all_views.mp4")
 
+
 def render_circular_path(scene_dir):
     viewpoint_cams = trainer.GS.scene.getTrainCameras()
-    path_cameras = generate_circular_camera_path(existing_cameras=viewpoint_cams, 
-                                                 N=240, 
-                                                 radius_scale=0.65,
-                                                 d=0)
+    path_cameras = generate_circular_camera_path(
+        existing_cameras=viewpoint_cams, N=240, radius_scale=0.65, d=0
+    )
 
     path_renderings = []
     with torch.no_grad():
@@ -222,22 +280,29 @@ def render_circular_path(scene_dir):
             image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1)
             image_np = (image_np * 255).astype(np.uint8)
             path_renderings.append(image_np)
-    save_numpy_frames_as_mp4(frames=path_renderings, 
-                             output_path=os.path.join(STATIC_FILE_SERVING_FOLDER, "render_circular_path.mp4"), 
-                             fps=30, 
-                             center_crop=0.85)
-    
+    save_numpy_frames_as_mp4(
+        frames=path_renderings,
+        output_path=os.path.join(
+            STATIC_FILE_SERVING_FOLDER, "render_circular_path.mp4"
+        ),
+        fps=30,
+        center_crop=0.85,
+    )
+
     return os.path.join(STATIC_FILE_SERVING_FOLDER, "render_circular_path.mp4")
 
+
 # Download Functions
 def download_cameras():
     path = os.path.join(MODEL_PATH, "cameras.json")
     return f"[📥 Download Cameras.json](file={path})"
 
+
 def download_model():
     path = os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply")
     return f"[📥 Download Pretrained Model (.ply)](file={path})"
 
+
 # Full pipeline helpers
 def run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024):
     tmpdirname = tempfile.mkdtemp()
@@ -249,51 +314,27 @@ def run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024):
 
     return selected_frames, scene_dir
 
-# Preprocess Input
-def process_input(input_path, num_ref_views, output_dir, max_size=1024):
-    if isinstance(input_path, (str, os.PathLike)):
-        if os.path.isdir(input_path):
-            frames = []
-            for img_file in sorted(os.listdir(input_path)):
-                if img_file.lower().endswith(('jpg', 'jpeg', 'png')):
-                    img = Image.open(os.path.join(output_dir, img_file)).convert('RGB')
-                    img.thumbnail((1024, 1024))
-                    frames.append(np.array(img))
-        else:
-            frames = read_video_frames(video_input=input_path, max_size=max_size)
-    else:
-        frames = read_video_frames(video_input=input_path, max_size=max_size)
-
-    frames_scores = preprocess_frames(frames)
-    selected_frames_indices = select_optimal_frames(scores=frames_scores, 
-                                                    k=min(num_ref_views, len(frames)))
-    selected_frames = [frames[frame_idx] for frame_idx in selected_frames_indices]
-
-    save_frames_to_scene_dir(frames=selected_frames, scene_dir=output_dir)
-    return selected_frames
-
-def preprocess_input(input_path, num_ref_views, max_size=1024):
-    tmpdirname = tempfile.mkdtemp()
-    scene_dir = os.path.join(tmpdirname, "scene")
-    os.makedirs(scene_dir, exist_ok=True)
-    selected_frames = process_input(input_path, num_ref_views, scene_dir, max_size)
-    run_colmap_on_scene(scene_dir)
-    return selected_frames, scene_dir
 
 def start_training(scene_dir, num_ref_views, num_corrs, num_steps):
-    return capture_logs(run_training_pipeline, scene_dir, num_ref_views, num_corrs, num_steps)
-    
+    return capture_logs(
+        run_training_pipeline, scene_dir, num_ref_views, num_corrs, num_steps
+    )
+
 
 # Gradio App
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=6):
-            gr.Markdown("""
+            gr.Markdown(
+                """
             ## <span style='font-size: 20px;'>📄 EDGS: Eliminating Densification for Efficient Convergence of 3DGS</span>
             🔗 <a href='https://compvis.github.io/EDGS' target='_blank'>Project Page</a>
-            """, elem_id="header")
+            """,
+                elem_id="header",
+            )
 
-    gr.Markdown("""
+    gr.Markdown(
+        """
                 ### <span style='font-size: 22px;'>🛠️ How to Use This Demo</span>
 
                 1. Upload a **front-facing video** or **a folder of images** of a **static** scene.
@@ -306,37 +347,52 @@ def start_training(scene_dir, num_ref_views, num_corrs, num_steps):
 
                 ✅ Best for scenes with small camera motion.
                 ❗ For full 360° or large-scale scenes, we recommend the Colab version (see project page).
-                """, elem_id="quickstart")
-
+                """,
+        elem_id="quickstart",
+    )
 
     scene_dir_state = gr.State()
     ply_model_state = gr.State()
 
     with gr.Row():
         with gr.Column(scale=2):
-            input_file = gr.File(label="Upload Video or Images", 
-                file_types=[".mp4", ".avi", ".mov", ".png", ".jpg", ".jpeg"], 
-                file_count="multiple")
+            input_file = gr.File(
+                label="Upload Video or Images",
+                file_types=[".mp4", ".avi", ".mov", ".png", ".jpg", ".jpeg"],
+                file_count="multiple",
+            )
             gr.Examples(
-                examples = [
+                examples=[
                     [["assets/examples/video_bakery.mp4"]],
                     [["assets/examples/video_flowers.mp4"]],
                     [["assets/examples/video_fruits.mp4"]],
                     [["assets/examples/video_plant.mp4"]],
                     [["assets/examples/video_salad.mp4"]],
                     [["assets/examples/video_tram.mp4"]],
-                    [["assets/examples/video_tulips.mp4"]]
-                    ],
+                    [["assets/examples/video_tulips.mp4"]],
+                ],
                 inputs=[input_file],
                 label="🎞️ ALternatively, try an Example Video",
-                examples_per_page=4
+                examples_per_page=4,
+            )
+            ref_slider = gr.Slider(
+                4, 32, value=16, step=1, label="Number of Reference Views"
+            )
+            corr_slider = gr.Slider(
+                5000,
+                30000,
+                value=20000,
+                step=1000,
+                label="Correspondences per Reference View",
+            )
+            fit_steps_slider = gr.Slider(
+                100, 5000, value=400, step=100, label="Number of optimization steps"
             )
-            ref_slider = gr.Slider(4, 32, value=16, step=1, label="Number of Reference Views")
-            corr_slider = gr.Slider(5000, 30000, value=20000, step=1000, label="Correspondences per Reference View")
-            fit_steps_slider = gr.Slider(100, 5000, value=400, step=100, label="Number of optimization steps")
             preprocess_button = gr.Button("📸 Preprocess Input")
             start_button = gr.Button("🚀 Start Reconstruction", interactive=False)
-            gallery = gr.Gallery(label="Selected Reference Views", columns=4, height=300)
+            gallery = gr.Gallery(
+                label="Selected Reference Views", columns=4, height=300
+            )
 
         with gr.Column(scale=3):
             gr.Markdown("### 🏋️ Training Visualization")
@@ -351,43 +407,118 @@ def start_training(scene_dir, num_ref_views, num_corrs, num_steps):
             gr.Markdown("### 📦 Output Files")
             with gr.Row(height=50):
                 with gr.Column():
-                    #gr.Markdown(value=f"[📥 Download .ply](file/point_cloud_final.ply)")
+                    # gr.Markdown(value=f"[📥 Download .ply](file/point_cloud_final.ply)")
                     download_cameras_button = gr.Button("📥 Download Cameras.json")
                     download_cameras_file = gr.File(label="📄 Cameras.json")
                 with gr.Column():
-                    download_model_button = gr.Button("📥 Download Pretrained Model (.ply)")
+                    download_model_button = gr.Button(
+                        "📥 Download Pretrained Model (.ply)"
+                    )
                     download_model_file = gr.File(label="📄 Pretrained Model (.ply)")
 
     log_output_box = gr.Textbox(label="🖥️ Log", lines=10, interactive=False)
 
-    def on_preprocess_click(input_file, num_ref_views):
-        images, scene_dir = preprocess_input(input_file, num_ref_views)
-        return gr.update(value=[x[...,::-1] for x in images]), scene_dir, gr.update(interactive=True)
+    def on_preprocess_click(
+        input_file_obj, num_ref_views_val
+    ):  # input_file_obj is from gr.File
+        # 'input_file_obj' from gr.File is a tempfile._TemporaryFileWrapper object
+        # It has a .name attribute which is the path to the temporary file
+        if input_file_obj is None:
+            gr.Warning("Please upload a file or select an example.")
+            return None, None, gr.update(interactive=False)
+
+        # Handle single file vs. list of files (if file_count="multiple")
+        actual_input_path = None
+        if isinstance(
+            input_file_obj, list
+        ):  # If file_count="multiple" and multiple files are uploaded
+            if not input_file_obj:
+                gr.Warning("No file provided in the list.")
+                return None, None, gr.update(interactive=False)
+            actual_input_path = input_file_obj[
+                0
+            ].name  # Process the first file for simplicity, or adapt
+            # If you expect a folder of images, you might need to handle this differently,
+            # as Gradio's gr.File with file_count="multiple" gives a list of temp file objects.
+            # The original process_input had logic for os.path.isdir(input_path).
+            # If users are meant to upload a folder, gr.File might not be the best component,
+            # or you'd need to zip/unzip. For now, assuming single video or first of multiple.
+        elif hasattr(input_file_obj, "name"):  # Single file object
+            actual_input_path = input_file_obj.name
+        else:
+            gr.Warning("Invalid input file.")
+            return None, None, gr.update(interactive=False)
+
+        # Use the refactored preprocessing function
+        # The first return value 'images_data' is a list of numpy arrays (the frame pixel data)
+        images_data, scene_dir_val = orchestrate_video_to_colmap_scene(
+            actual_input_path,  # Pass the path of the uploaded temp file
+            num_ref_views_val,
+            max_size=1024,  # Or get from a Gradio component
+            base_work_dir="./gradio_processed_scenes",  # Store Gradio outputs in a specific place
+        )
+        if not scene_dir_val:
+            gr.Error("Preprocessing failed. Check logs.")
+            return None, None, gr.update(interactive=False)
+
+        # Convert numpy arrays (BGR from OpenCV) to RGB for Gradio gallery
+        gallery_images = []
+        if images_data:
+            for img_data_np in images_data:
+                if isinstance(img_data_np, np.ndarray):
+                    # Assuming frames from read_video_frames are BGR, convert to RGB for PIL/Gradio
+                    gallery_images.append(
+                        Image.fromarray(cv2.cvtColor(img_data_np, cv2.COLOR_BGR2RGB))
+                    )
+                else:  # If images_data contains PIL Images already
+                    gallery_images.append(img_data_np)
+
+        return (
+            gr.update(value=gallery_images),
+            scene_dir_val,
+            gr.update(interactive=True),
+        )
 
     def on_start_click(scene_dir, num_ref_views, num_corrs, num_steps):
-        (video_path, ply_path), logs = start_training(scene_dir, num_ref_views, num_corrs, num_steps)
+        (video_path, ply_path), logs = start_training(
+            scene_dir, num_ref_views, num_corrs, num_steps
+        )
         return video_path, ply_path, logs
 
     preprocess_button.click(
         fn=on_preprocess_click,
         inputs=[input_file, ref_slider],
-        outputs=[gallery, scene_dir_state, start_button]
+        outputs=[gallery, scene_dir_state, start_button],
     )
 
     start_button.click(
         fn=on_start_click,
         inputs=[scene_dir_state, ref_slider, corr_slider, fit_steps_slider],
-        outputs=[video_output, model3d_viewer, log_output_box]
+        outputs=[video_output, model3d_viewer, log_output_box],
     )
 
-    render_all_views_button.click(fn=render_all_views, inputs=[scene_dir_state], outputs=[rendered_video_output])
-    render_circular_path_button.click(fn=render_circular_path, inputs=[scene_dir_state], outputs=[rendered_video_output])
-
-    download_cameras_button.click(fn=lambda: os.path.join(MODEL_PATH, "cameras.json"), inputs=[], outputs=[download_cameras_file])
-    download_model_button.click(fn=lambda: os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply"), inputs=[], outputs=[download_model_file])
+    render_all_views_button.click(
+        fn=render_all_views, inputs=[scene_dir_state], outputs=[rendered_video_output]
+    )
+    render_circular_path_button.click(
+        fn=render_circular_path,
+        inputs=[scene_dir_state],
+        outputs=[rendered_video_output],
+    )
 
+    download_cameras_button.click(
+        fn=lambda: os.path.join(MODEL_PATH, "cameras.json"),
+        inputs=[],
+        outputs=[download_cameras_file],
+    )
+    download_model_button.click(
+        fn=lambda: os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply"),
+        inputs=[],
+        outputs=[download_model_file],
+    )
 
-    gr.Markdown("""
+    gr.Markdown(
+        """
     ---
     ### <span style='font-size: 20px;'>📖 Detailed Overview</span>
 
@@ -413,12 +544,22 @@ def on_start_click(scene_dir, num_ref_views, num_corrs, num_steps):
 
     ---
     Preloaded models coming soon. (TODO)
-    """, elem_id="details")
+    """,
+        elem_id="details",
+    )
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Launch Gradio demo for EDGS preprocessing and 3D viewing.")
-    parser.add_argument("--port", type=int, default=7860, help="Port to launch the Gradio app on.")
-    parser.add_argument("--no_share", action='store_true', help="Disable Gradio sharing and assume local access (default: share=True)")
+    parser = argparse.ArgumentParser(
+        description="Launch Gradio demo for EDGS preprocessing and 3D viewing."
+    )
+    parser.add_argument(
+        "--port", type=int, default=7860, help="Port to launch the Gradio app on."
+    )
+    parser.add_argument(
+        "--no_share",
+        action="store_true",
+        help="Disable Gradio sharing and assume local access (default: share=True)",
+    )
     args = parser.parse_args()
 
     demo.launch(server_name="0.0.0.0", server_port=args.port, share=not args.no_share)
diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py
index bc29cd7..0b977e5 100644
--- a/notebooks/fit_model_to_scene_full.py
+++ b/notebooks/fit_model_to_scene_full.py
@@ -24,9 +24,11 @@
 
 sys.path.append("../")
 sys.path.append("../submodules/gaussian-splatting")
-from gradio_demo import preprocess_input
 from source.trainer import EDGSTrainer
 from source.utils_aux import set_seed
+from source.utils_preprocess import (
+    orchestrate_video_to_colmap_scene,  # Use the refactored function
+)
 
 # --- Add argument parsing ---
 parser = argparse.ArgumentParser(
@@ -53,31 +55,24 @@
 num_ref_views = 16  # how many frames you want to extract from video and colmap
 
 # process the input video
-if True:
-    print("Starting video preprocessing...")
-    # Ensure num_corrs is defined. Using cfg.init_wC.matches_per_ref as likely intended.
-    num_corrs = cfg.init_wC.matches_per_ref
+if PATH_TO_VIDEO and os.path.exists(PATH_TO_VIDEO):
+    print(f"Starting video processing for: {PATH_TO_VIDEO}")
     try:
-        images, scene_dir = preprocess_input(PATH_TO_VIDEO, num_ref_views, num_corrs)
-        print(f"Video preprocessed. Scene directory: {scene_dir}")
-        cfg.gs.dataset.source_path = scene_dir
-        # Define a model_path, e.g., in a subdirectory of the scene_dir or a dedicated output folder
-        cfg.gs.dataset.model_path = os.path.join(
-            os.path.dirname(scene_dir), os.path.basename(scene_dir) + "_edgs_model"
+        # The first return value 'images_data' might not be directly used by the trainer
+        # if the Scene object loads everything from the COLMAP directory.
+        _, scene_dir = orchestrate_video_to_colmap_scene(
+            PATH_TO_VIDEO,
+            args.num_ref_views,  # Assuming you added this arg
+            max_size=1024,  # Or make it an arg
+            base_work_dir=args.processed_scenes_dir,  # Assuming you added this arg
         )
-        print(f"Set dataset.source_path to: {cfg.gs.dataset.source_path}")
-        print(f"Set dataset.model_path to: {cfg.gs.dataset.model_path}")
+        if scene_dir is None:
+            print(f"Failed to process video {PATH_TO_VIDEO}. Exiting.")
+            sys.exit(1)
     except Exception as e:
         print(f"Error during video preprocessing: {e}")
         sys.exit(1)
-else:
-    # This block will be used if video preprocessing is skipped.
-    # Ensure these paths are valid if this branch is taken.
-    print("Skipping video preprocessing. Using pre-configured paths.")
-    cfg.gs.dataset.model_path = "./scene_edgsed/"
-    cfg.gs.dataset.source_path = (
-        "../assets/scene_colmaped/"  # Ensure this is a valid COLMAP scene
-    )
+
 
 # Update the config with your settings
 cfg.gs.dataset.images = "images"
diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index 7c6dab3..d90a03f 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -1,17 +1,15 @@
 # This file contains function for video or image collection preprocessing.
 # For video we do the preprocessing and select k sharpest frames.
-# Afterwards scene is constructed 
+# Afterwards scene is constructed
+import os
+import time
+
 import cv2
 import numpy as np
-from tqdm import tqdm
 import pycolmap
-import os
-import time
-import tempfile
-from moviepy import VideoFileClip
 from matplotlib import pyplot as plt
+from moviepy import VideoFileClip
 from PIL import Image
-import cv2
 from tqdm import tqdm
 
 WORKDIR = "../outputs/"
@@ -22,20 +20,25 @@ def get_rotation_moviepy(video_path):
     rotation = 0
 
     try:
-        displaymatrix = clip.reader.infos['inputs'][0]['streams'][2]['metadata'].get('displaymatrix', '')
-        if 'rotation of' in displaymatrix:
-            angle = float(displaymatrix.strip().split('rotation of')[-1].split('degrees')[0])
+        displaymatrix = clip.reader.infos["inputs"][0]["streams"][2]["metadata"].get(
+            "displaymatrix", ""
+        )
+        if "rotation of" in displaymatrix:
+            angle = float(
+                displaymatrix.strip().split("rotation of")[-1].split("degrees")[0]
+            )
             rotation = int(angle) % 360
-            
+
     except Exception as e:
         print(f"No displaymatrix rotation found: {e}")
 
     clip.reader.close()
-    #if clip.audio:
+    # if clip.audio:
     #    clip.audio.reader.close_proc()
 
     return rotation
 
+
 def resize_max_side(frame, max_size):
     h, w = frame.shape[:2]
     scale = max_size / max(h, w)
@@ -43,6 +46,7 @@ def resize_max_side(frame, max_size):
         frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
     return frame
 
+
 def read_video_frames(video_input, k=1, max_size=1024):
     """
     Extracts every k-th frame from a video or list of images, resizes to max size, and returns frames as list.
@@ -58,7 +62,9 @@ def read_video_frames(video_input, k=1, max_size=1024):
     # Handle list of image files (not single video in a list)
     if isinstance(video_input, list):
         # If it's a single video in a list, treat it as video
-        if len(video_input) == 1 and video_input[0].name.endswith(('.mp4', '.avi', '.mov')):
+        if len(video_input) == 1 and video_input[0].name.endswith(
+            (".mp4", ".avi", ".mov")
+        ):
             video_input = video_input[0]  # unwrap single video file
         else:
             # Treat as list of images
@@ -66,18 +72,19 @@ def read_video_frames(video_input, k=1, max_size=1024):
             for img_file in video_input:
                 img = Image.open(img_file.name).convert("RGB")
                 img.thumbnail((max_size, max_size))
-                frames.append(np.array(img)[...,::-1])
+                frames.append(np.array(img)[..., ::-1])
             return frames
 
     # Handle file-like or path
-    if hasattr(video_input, 'name'):
+    if hasattr(video_input, "name"):
         video_path = video_input.name
     elif isinstance(video_input, (str, os.PathLike)):
         video_path = str(video_input)
     else:
-        raise ValueError("Unsupported video input type. Must be a filepath, file-like object, or list of images.")
+        raise ValueError(
+            "Unsupported video input type. Must be a filepath, file-like object, or list of images."
+        )
 
-    
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError(f"Error: Could not open video {video_path}.")
@@ -97,20 +104,21 @@ def read_video_frames(video_input, k=1, max_size=1024):
                 scale = max(h, w) / max_size
                 if scale > 1:
                     frame = cv2.resize(frame, (int(w / scale), int(h / scale)))
-                frames.append(frame[...,[2,1,0]])
+                frames.append(frame[..., [2, 1, 0]])
                 pbar.update(1)
             frame_count += 1
 
     cap.release()
     return frames
 
+
 def resize_max_side(frame, max_size):
     """
     Resizes the frame so that its largest side equals max_size, maintaining aspect ratio.
     """
     height, width = frame.shape[:2]
     max_dim = max(height, width)
-    
+
     if max_dim <= max_size:
         return frame  # No need to resize
 
@@ -118,41 +126,47 @@ def resize_max_side(frame, max_size):
     new_width = int(width * scale)
     new_height = int(height * scale)
 
-    resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA)
+    resized_frame = cv2.resize(
+        frame, (new_width, new_height), interpolation=cv2.INTER_AREA
+    )
     return resized_frame
 
 
-
 def variance_of_laplacian(image):
-	# compute the Laplacian of the image and then return the focus
-	# measure, which is simply the variance of the Laplacian
-	return cv2.Laplacian(image, cv2.CV_64F).var()
-    
-def process_all_frames(IMG_FOLDER = '/scratch/datasets/hq_data/night2_all_frames',
-                       to_visualize=False,
-                       save_images=True):
+    # compute the Laplacian of the image and then return the focus
+    # measure, which is simply the variance of the Laplacian
+    return cv2.Laplacian(image, cv2.CV_64F).var()
+
+
+def process_all_frames(
+    IMG_FOLDER="/scratch/datasets/hq_data/night2_all_frames",
+    to_visualize=False,
+    save_images=True,
+):
     dict_scores = {}
-    for idx, img_name in tqdm(enumerate(sorted([x for x in os.listdir(IMG_FOLDER) if '.png' in x]))):
-        
-        img = cv2.imread(os.path.join(IMG_FOLDER, img_name))#[250:, 100:]
+    for idx, img_name in tqdm(
+        enumerate(sorted([x for x in os.listdir(IMG_FOLDER) if ".png" in x]))
+    ):
+        img = cv2.imread(os.path.join(IMG_FOLDER, img_name))  # [250:, 100:]
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        fm = variance_of_laplacian(gray) + \
-                variance_of_laplacian(cv2.resize(gray, (0,0), fx=0.75, fy=0.75)) + \
-                variance_of_laplacian(cv2.resize(gray, (0,0), fx=0.5, fy=0.5)) + \
-                variance_of_laplacian(cv2.resize(gray, (0,0), fx=0.25, fy=0.25))
+        fm = (
+            variance_of_laplacian(gray)
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75))
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5))
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25))
+        )
         if to_visualize:
             plt.figure()
             plt.title(f"Laplacian score: {fm:.2f}")
-            plt.imshow(img[..., [2,1,0]])
+            plt.imshow(img[..., [2, 1, 0]])
             plt.show()
-        dict_scores[idx] = {"idx" : idx, 
-                            "img_name" : img_name,
-                            "score" : fm}
+        dict_scores[idx] = {"idx": idx, "img_name": img_name, "score": fm}
         if save_images:
             dict_scores[idx]["img"] = img
-        
+
     return dict_scores
 
+
 def select_optimal_frames(scores, k):
     """
     Selects a minimal subset of frames while ensuring no gaps exceed k.
@@ -165,12 +179,14 @@ def select_optimal_frames(scores, k):
         list of int: Indices of selected frames.
     """
     n = len(scores)
-    selected = [0, n-1]
+    selected = [0, n - 1]
     i = 0  # Start at the first frame
 
     while i < n:
         # Find the best frame to select within the next k frames
-        best_idx = max(range(i, min(i + k + 1, n)), key=lambda x: scores[x], default=None)
+        best_idx = max(
+            range(i, min(i + k + 1, n)), key=lambda x: scores[x], default=None
+        )
 
         if best_idx is None:
             break  # No more frames left
@@ -187,6 +203,7 @@ def variance_of_laplacian(image):
     """
     return cv2.Laplacian(image, cv2.CV_64F).var()
 
+
 def preprocess_frames(frames, verbose=False):
     """
     Compute sharpness scores for a list of frames using multi-scale Laplacian variance.
@@ -204,12 +221,12 @@ def preprocess_frames(frames, verbose=False):
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 
         fm = (
-            variance_of_laplacian(gray) +
-            variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75)) +
-            variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)) +
-            variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25))
+            variance_of_laplacian(gray)
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75))
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5))
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25))
         )
-        
+
         if verbose:
             print(f"Frame {idx}: Sharpness Score = {fm:.2f}")
 
@@ -217,6 +234,7 @@ def preprocess_frames(frames, verbose=False):
 
     return scores
 
+
 def select_optimal_frames(scores, k):
     """
     Selects k frames by splitting into k segments and picking the sharpest frame from each.
@@ -226,7 +244,7 @@ def select_optimal_frames(scores, k):
         k (int): Number of frames to select.
 
     Returns:
-        list of int: Indices of selected frames.  
+        list of int: Indices of selected frames.
     """
     n = len(scores)
     selected_indices = []
@@ -236,15 +254,16 @@ def select_optimal_frames(scores, k):
         start = i * segment_size
         end = (i + 1) * segment_size if i < k - 1 else n  # Last chunk may be larger
         segment_scores = scores[start:end]
-        
+
         if len(segment_scores) == 0:
             continue  # Safety check if some segment is empty
-        
+
         best_in_segment = start + np.argmax(segment_scores)
         selected_indices.append(best_in_segment)
 
     return sorted(selected_indices)
 
+
 def save_frames_to_scene_dir(frames, scene_dir):
     """
     Saves a list of frames into the target scene directory under 'images/' subfolder.
@@ -257,7 +276,9 @@ def save_frames_to_scene_dir(frames, scene_dir):
     os.makedirs(images_dir, exist_ok=True)
 
     for idx, frame in enumerate(frames):
-        filename = os.path.join(images_dir, f"{idx:08d}.png")  # 00000000.png, 00000001.png, etc.
+        filename = os.path.join(
+            images_dir, f"{idx:08d}.png"
+        )  # 00000000.png, 00000001.png, etc.
         cv2.imwrite(filename, frame)
 
     print(f"Saved {len(frames)} frames to {images_dir}")
@@ -269,7 +290,7 @@ def run_colmap_on_scene(scene_dir):
 
     Args:
         scene_dir (str): Path to scene directory containing 'images' folder.
-    
+
     TODO: if the function hasn't managed to match all the frames either increase image size,
     increase number of features or just remove those frames from the folder scene_dir/images
     """
@@ -280,7 +301,7 @@ def run_colmap_on_scene(scene_dir):
     database_path = os.path.join(scene_dir, "database.db")
     sparse_path = os.path.join(scene_dir, "sparse")
     image_dir = os.path.join(scene_dir, "images")
-    
+
     # Make sure output directories exist
     os.makedirs(sparse_path, exist_ok=True)
 
@@ -291,7 +312,7 @@ def run_colmap_on_scene(scene_dir):
         sift_options={
             "max_num_features": 512 * 2,
             "max_image_size": 512 * 1,
-        }
+        },
     )
     print(f"Finished feature extraction in {(time.time() - start_time):.2f}s.")
 
@@ -325,10 +346,119 @@ def run_colmap_on_scene(scene_dir):
     reconstruction = pycolmap.Reconstruction(recon_path)
 
     for cam in reconstruction.cameras.values():
-        cam.model = 'SIMPLE_PINHOLE'
+        cam.model = "SIMPLE_PINHOLE"
         cam.params = cam.params[:3]  # Keep only [f, cx, cy]
 
     reconstruction.write(recon_path)
 
     print(f"Total pipeline time: {(time.time() - start_time):.2f}s.")
 
+
+def process_input_for_colmap(input_path, num_ref_views, output_dir, max_size=1024):
+    """
+    Helper function to read frames from video or image folder, select optimal ones,
+    and save them to the output_dir/images.
+    This is based on process_input from gradio_demo.py.
+    Renamed to avoid potential confusion if 'process_input' is too generic.
+    """
+    frames_to_save_in_scene_dir = []
+    if isinstance(input_path, (str, os.PathLike)):  # If input_path is a path string
+        if os.path.isdir(input_path):  # If it's a directory of images
+            print(f"Processing image directory: {input_path}")
+            raw_frames = []
+            image_files = sorted(
+                [
+                    f
+                    for f in os.listdir(input_path)
+                    if f.lower().endswith(("jpg", "jpeg", "png"))
+                ]
+            )
+            for img_file in image_files:
+                img = Image.open(os.path.join(input_path, img_file)).convert("RGB")
+                # Resize if necessary, similar to video frames
+                width, height = img.size
+                if max(width, height) > max_size:
+                    scale = max_size / max(width, height)
+                    new_width = int(width * scale)
+                    new_height = int(height * scale)
+                    img = img.resize((new_width, new_height), Image.LANCZOS)
+                raw_frames.append(np.array(img))
+        else:  # If it's a single video file path
+            print(f"Processing video file: {input_path}")
+            raw_frames = read_video_frames(video_input=input_path, max_size=max_size)
+    elif hasattr(
+        input_path, "name"
+    ):  # If input_path is a file-like object (e.g., from Gradio upload)
+        print(f"Processing uploaded video file: {input_path.name}")
+        raw_frames = read_video_frames(video_input=input_path.name, max_size=max_size)
+    else:
+        raise ValueError(f"Unsupported input_path type: {type(input_path)}")
+
+    if not raw_frames:
+        print("No frames extracted or read.")
+        return []
+
+    frames_scores = preprocess_frames(
+        raw_frames
+    )  # Assuming preprocess_frames takes list of numpy arrays
+    selected_frames_indices = select_optimal_frames(
+        scores=frames_scores, k=min(num_ref_views, len(raw_frames))
+    )
+    frames_to_save_in_scene_dir = [
+        raw_frames[frame_idx] for frame_idx in selected_frames_indices
+    ]
+
+    # The 'output_dir' here is the scene_dir where 'images' subfolder will be created
+    save_frames_to_scene_dir(frames=frames_to_save_in_scene_dir, scene_dir=output_dir)
+    return frames_to_save_in_scene_dir  # Returns the list of selected frame data (numpy arrays)
+
+
+def orchestrate_video_to_colmap_scene(
+    input_path,
+    num_ref_views,
+    max_size=1024,
+    base_work_dir="../outputs/processed_scenes_util",
+):
+    """
+    Orchestrates the full video/image folder preprocessing pipeline:
+    1. Creates a temporary scene directory.
+    2. Reads frames, selects optimal ones, saves them.
+    3. Runs COLMAP on the scene.
+    Returns the list of selected frame image data and the path to the COLMAP processed scene directory.
+    This is based on preprocess_input from gradio_demo.py.
+    """
+    # Create a unique scene directory
+    # If input_path is a file object, use its name. If a path string, use its basename.
+    input_name_part = ""
+    if hasattr(input_path, "name") and isinstance(input_path.name, str):
+        input_name_part = os.path.splitext(os.path.basename(input_path.name))[0]
+    elif isinstance(input_path, (str, os.PathLike)):
+        input_name_part = os.path.splitext(os.path.basename(input_path))[0]
+    else:  # Fallback for other types or if name is not available
+        input_name_part = "temp_scene"
+
+    # Using a structured output directory instead of pure tempfile.mkdtemp for easier inspection
+    # scene_dir_parent = tempfile.mkdtemp() # Original approach
+
+    # Ensure base_work_dir exists
+    os.makedirs(base_work_dir, exist_ok=True)
+    # Create a unique subdirectory within base_work_dir
+    timestamp = time.strftime("%Y%m%d-%H%M%S")
+    scene_dir = os.path.join(base_work_dir, f"{input_name_part}_{timestamp}")
+
+    os.makedirs(scene_dir, exist_ok=True)
+    print(f"Created scene directory for COLMAP: {scene_dir}")
+
+    selected_frames_data = process_input_for_colmap(
+        input_path, num_ref_views, scene_dir, max_size
+    )
+    if not selected_frames_data:
+        print(f"Frame processing failed for {input_path}. Aborting COLMAP.")
+        # Optionally clean up scene_dir if it's truly temporary and processing failed
+        # shutil.rmtree(scene_dir)
+        return [], None
+
+    run_colmap_on_scene(scene_dir)  # This function should create scene_dir/sparse/0
+
+    print(f"COLMAP processing complete for {scene_dir}")
+    return selected_frames_data, scene_dir

From 9fc32c2ec4bbc5966b014b6c7e4bd24af93818ee Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 4 Jun 2025 09:33:21 +0900
Subject: [PATCH 08/37] fix Namespace object has no attribute num_ref_views
 error

---
 docker-compose.yml                   |  4 +-
 gradio_demo.py                       | 88 ++++++++++++----------------
 notebooks/fit_model_to_scene_full.py | 24 +++++---
 source/utils_preprocess.py           | 14 +++--
 4 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 6801517..2f36e2c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,4 +13,6 @@ services:
               capabilities: [gpu] # Request GPU capabilities
     volumes:
       - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container
-      - ./output:/EDGS/output # Example: map a local 'output' folder
\ No newline at end of file
+      - ./output:/EDGS/output # Example: map a local 'output' folder
+      - ./scripts:/EDGS/scripts # Example: map a local 'scripts' folder
+      - ./sources:/EDGS/sources # Example: map a local 'sources' folder
\ No newline at end of file
diff --git a/gradio_demo.py b/gradio_demo.py
index d91841c..3ca0078 100644
--- a/gradio_demo.py
+++ b/gradio_demo.py
@@ -418,65 +418,51 @@ def start_training(scene_dir, num_ref_views, num_corrs, num_steps):
 
     log_output_box = gr.Textbox(label="🖥️ Log", lines=10, interactive=False)
 
-    def on_preprocess_click(
-        input_file_obj, num_ref_views_val
-    ):  # input_file_obj is from gr.File
-        # 'input_file_obj' from gr.File is a tempfile._TemporaryFileWrapper object
-        # It has a .name attribute which is the path to the temporary file
+    def on_preprocess_click(input_file_obj, num_ref_views_val):
+        """
+        Handles the preprocess button click.
+        Calls the main preprocessing orchestrator and updates the UI.
+        """
         if input_file_obj is None:
+            # Handles case where no file is uploaded if input_file component is not required
+            # or if the user clears the selection.
+            # For gr.Examples, input_file_obj will be a list containing a list of paths.
+            # For direct upload with file_count="multiple", it's a list of file objects.
+            # For direct upload with file_count="single", it's a single file object.
+            # orchestrate_video_to_colmap_scene should be robust to these.
             gr.Warning("Please upload a file or select an example.")
             return None, None, gr.update(interactive=False)
 
-        # Handle single file vs. list of files (if file_count="multiple")
-        actual_input_path = None
-        if isinstance(
-            input_file_obj, list
-        ):  # If file_count="multiple" and multiple files are uploaded
-            if not input_file_obj:
-                gr.Warning("No file provided in the list.")
-                return None, None, gr.update(interactive=False)
-            actual_input_path = input_file_obj[
-                0
-            ].name  # Process the first file for simplicity, or adapt
-            # If you expect a folder of images, you might need to handle this differently,
-            # as Gradio's gr.File with file_count="multiple" gives a list of temp file objects.
-            # The original process_input had logic for os.path.isdir(input_path).
-            # If users are meant to upload a folder, gr.File might not be the best component,
-            # or you'd need to zip/unzip. For now, assuming single video or first of multiple.
-        elif hasattr(input_file_obj, "name"):  # Single file object
-            actual_input_path = input_file_obj.name
-        else:
-            gr.Warning("Invalid input file.")
-            return None, None, gr.update(interactive=False)
-
-        # Use the refactored preprocessing function
-        # The first return value 'images_data' is a list of numpy arrays (the frame pixel data)
-        images_data, scene_dir_val = orchestrate_video_to_colmap_scene(
-            actual_input_path,  # Pass the path of the uploaded temp file
-            num_ref_views_val,
-            max_size=1024,  # Or get from a Gradio component
-            base_work_dir="./gradio_processed_scenes",  # Store Gradio outputs in a specific place
+        selected_bgr_frames, scene_dir = orchestrate_video_to_colmap_scene(
+            gradio_input_obj=input_file_obj,  # Pass the raw Gradio file object(s)
+            num_ref_views=num_ref_views_val,
+            max_size=1024,
+            base_work_dir="./gradio_processed_scenes",  # Or configure as needed
         )
-        if not scene_dir_val:
-            gr.Error("Preprocessing failed. Check logs.")
-            return None, None, gr.update(interactive=False)
 
-        # Convert numpy arrays (BGR from OpenCV) to RGB for Gradio gallery
-        gallery_images = []
-        if images_data:
-            for img_data_np in images_data:
-                if isinstance(img_data_np, np.ndarray):
-                    # Assuming frames from read_video_frames are BGR, convert to RGB for PIL/Gradio
-                    gallery_images.append(
-                        Image.fromarray(cv2.cvtColor(img_data_np, cv2.COLOR_BGR2RGB))
-                    )
-                else:  # If images_data contains PIL Images already
-                    gallery_images.append(img_data_np)
+        if not scene_dir:  # Indicates preprocessing failed
+            gr.Error("Preprocessing failed. Please check the logs or input file.")
+            return (
+                None,
+                None,
+                gr.update(interactive=False),
+            )  # Keep gallery empty, scene_dir None, button disabled
+
+        # Convert BGR numpy arrays to RGB for Gradio gallery.
+        # gr.Gallery can display a list of NumPy arrays (H, W, C) or PIL Images.
+        # Assuming selected_bgr_frames contains BGR NumPy arrays.
+        gallery_display_images = []
+        if selected_bgr_frames:
+            gallery_display_images = [
+                frame[..., ::-1]
+                for frame in selected_bgr_frames
+                if isinstance(frame, np.ndarray)
+            ]
 
         return (
-            gr.update(value=gallery_images),
-            scene_dir_val,
-            gr.update(interactive=True),
+            gr.update(value=gallery_display_images),
+            scene_dir,  # Update the scene_dir_state
+            gr.update(interactive=True),  # Enable the 'Start Reconstruction' button
         )
 
     def on_start_click(scene_dir, num_ref_views, num_corrs, num_steps):
diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py
index 0b977e5..f56a976 100644
--- a/notebooks/fit_model_to_scene_full.py
+++ b/notebooks/fit_model_to_scene_full.py
@@ -40,6 +40,18 @@
     default="../assets/examples/video_fruits.mp4",
     help="Path to the input video file.",
 )
+parser.add_argument(  # Add this argument
+    "--num_ref_views",
+    type=int,
+    default=16,  # Or any other sensible default
+    help="Number of reference views to extract from video for COLMAP.",
+)
+parser.add_argument(  # Add this argument based on your previous script structure
+    "--processed_scenes_dir",
+    type=str,
+    default="../output/processed_scenes",  # Or any other sensible default
+    help="Base directory where processed COLMAP scenes will be stored.",
+)
 args = parser.parse_args()
 # --- End argument parsing ---
 
@@ -51,29 +63,25 @@
 # # 3. Init input parameters
 
 # ## 3.1 Optionally preprocess video
-PATH_TO_VIDEO = args.video_path
-num_ref_views = 16  # how many frames you want to extract from video and colmap
-
 # process the input video
-if PATH_TO_VIDEO and os.path.exists(PATH_TO_VIDEO):
-    print(f"Starting video processing for: {PATH_TO_VIDEO}")
+if os.path.exists(args.video_path):
+    print(f"Starting video processing for: {args.video_path}")
     try:
         # The first return value 'images_data' might not be directly used by the trainer
         # if the Scene object loads everything from the COLMAP directory.
         _, scene_dir = orchestrate_video_to_colmap_scene(
-            PATH_TO_VIDEO,
+            args.video_path,
             args.num_ref_views,  # Assuming you added this arg
             max_size=1024,  # Or make it an arg
             base_work_dir=args.processed_scenes_dir,  # Assuming you added this arg
         )
         if scene_dir is None:
-            print(f"Failed to process video {PATH_TO_VIDEO}. Exiting.")
+            print(f"Failed to process video {args.video_path}. Exiting.")
             sys.exit(1)
     except Exception as e:
         print(f"Error during video preprocessing: {e}")
         sys.exit(1)
 
-
 # Update the config with your settings
 cfg.gs.dataset.images = "images"
 cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12
diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index d90a03f..cf0717b 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -12,8 +12,6 @@
 from PIL import Image
 from tqdm import tqdm
 
-WORKDIR = "../outputs/"
-
 
 def get_rotation_moviepy(video_path):
     clip = VideoFileClip(video_path)
@@ -417,15 +415,21 @@ def orchestrate_video_to_colmap_scene(
     input_path,
     num_ref_views,
     max_size=1024,
-    base_work_dir="../outputs/processed_scenes_util",
+    base_work_dir="../output/processed_scenes",
 ):
     """
     Orchestrates the full video/image folder preprocessing pipeline:
     1. Creates a temporary scene directory.
     2. Reads frames, selects optimal ones, saves them.
     3. Runs COLMAP on the scene.
-    Returns the list of selected frame image data and the path to the COLMAP processed scene directory.
-    This is based on preprocess_input from gradio_demo.py.
+    Args:
+        input_path (str or file-like): Path to video file or directory of images.
+        num_ref_views (int): Number of reference views to select.
+        max_size (int): Maximum size for width or height after resizing.
+        base_work_dir (str): Base directory for temporary scene directories.
+    Returns:
+        the list of selected frame image data and the path to the COLMAP processed scene directory.
+        This is based on preprocess_input from gradio_demo.py.
     """
     # Create a unique scene directory
     # If input_path is a file object, use its name. If a path string, use its basename.

From 6d7a5c03175d7d240819a0c0f3f043419992c2ff Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 10:55:48 +0900
Subject: [PATCH 09/37] fix error comes from wrong argument name gradio_obj

---
 gradio_demo.py             |  2 +-
 source/utils_preprocess.py | 39 +++++++++++++++++++++++++++++++-------
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/gradio_demo.py b/gradio_demo.py
index 3ca0078..b804fcc 100644
--- a/gradio_demo.py
+++ b/gradio_demo.py
@@ -434,7 +434,7 @@ def on_preprocess_click(input_file_obj, num_ref_views_val):
             return None, None, gr.update(interactive=False)
 
         selected_bgr_frames, scene_dir = orchestrate_video_to_colmap_scene(
-            gradio_input_obj=input_file_obj,  # Pass the raw Gradio file object(s)
+            input_path=input_file_obj,  # Pass the raw Gradio file object(s)
             num_ref_views=num_ref_views_val,
             max_size=1024,
             base_work_dir="./gradio_processed_scenes",  # Or configure as needed
diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index cf0717b..7d26d88 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -431,15 +431,40 @@ def orchestrate_video_to_colmap_scene(
         the list of selected frame image data and the path to the COLMAP processed scene directory.
         This is based on preprocess_input from gradio_demo.py.
     """
-    # Create a unique scene directory
-    # If input_path is a file object, use its name. If a path string, use its basename.
-    input_name_part = ""
-    if hasattr(input_path, "name") and isinstance(input_path.name, str):
+    actual_input_path_str = None
+    input_name_part = "temp_scene"  # Default
+
+    if hasattr(input_path, "name") and isinstance(
+        input_path.name, str
+    ):  # Gradio file object
+        actual_input_path_str = input_path.name
         input_name_part = os.path.splitext(os.path.basename(input_path.name))[0]
-    elif isinstance(input_path, (str, os.PathLike)):
+    elif isinstance(input_path, (str, os.PathLike)):  # Direct path string
+        actual_input_path_str = str(input_path)
         input_name_part = os.path.splitext(os.path.basename(input_path))[0]
-    else:  # Fallback for other types or if name is not available
-        input_name_part = "temp_scene"
+    elif (
+        isinstance(input_path, list) and input_path
+    ):  # List of Gradio file objects or paths (from gr.Examples)
+        # Handle list, e.g., take the first item
+        first_item = input_path[0]
+        if hasattr(first_item, "name") and isinstance(first_item.name, str):
+            actual_input_path_str = first_item.name
+            input_name_part = os.path.splitext(os.path.basename(first_item.name))[0]
+        elif isinstance(first_item, (str, os.PathLike)):
+            actual_input_path_str = str(first_item)
+            input_name_part = os.path.splitext(os.path.basename(first_item))[0]
+        else:
+            print(f"Warning: Unsupported item type in input list: {type(first_item)}")
+            return [], None
+    else:
+        print(f"Error: Unsupported input_path type: {type(input_path)}")
+        return [], None
+
+    if not actual_input_path_str:
+        print("Error: Could not determine a valid input file path.")
+        return [], None
+
+    print(f"Orchestrating COLMAP scene from: {actual_input_path_str}")
 
     # Using a structured output directory instead of pure tempfile.mkdtemp for easier inspection
     # scene_dir_parent = tempfile.mkdtemp() # Original approach

From c8f1af8c06e69a541b45b5c31ee61e7b47f63d59 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 11:05:57 +0900
Subject: [PATCH 10/37] move script files to script/

---
 README.md                               | 4 ++--
 docker-compose.yml                      | 4 ++--
 full_eval.py => script/full_eval.py     | 0
 gradio_demo.py => script/gradio_demo.py | 0
 install.sh => script/install.sh         | 0
 metrics.py => script/metrics.py         | 0
 train.py => script/train.py             | 0
 7 files changed, 4 insertions(+), 4 deletions(-)
 rename full_eval.py => script/full_eval.py (100%)
 rename gradio_demo.py => script/gradio_demo.py (100%)
 rename install.sh => script/install.sh (100%)
 rename metrics.py => script/metrics.py (100%)
 rename train.py => script/train.py (100%)

diff --git a/README.md b/README.md
index 06e9740..4996bc0 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ You can install it just:
 docker compose up -d
 ```
 
-or you can install with running `install.sh`.
+or you can install with running `script/install.sh`.
 
 <a id="sec-data"></a>
 ## 📦 Data
@@ -91,7 +91,7 @@ We evaluated on the following datasets:
 Use gradle demo;
 ```
 docker compose exec edgs-app bash
-python gradio_demo.py --port 7862
+python script/gradio_demo.py --port 7862
 ```
 
 #### Option B
diff --git a/docker-compose.yml b/docker-compose.yml
index 2f36e2c..f258c6d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,5 +14,5 @@ services:
     volumes:
       - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container
       - ./output:/EDGS/output # Example: map a local 'output' folder
-      - ./scripts:/EDGS/scripts # Example: map a local 'scripts' folder
-      - ./sources:/EDGS/sources # Example: map a local 'sources' folder
\ No newline at end of file
+      - ./script:/EDGS/script # Example: map a local 'scripts' folder
+      - ./source:/EDGS/source # Example: map a local 'sources' folder
\ No newline at end of file
diff --git a/full_eval.py b/script/full_eval.py
similarity index 100%
rename from full_eval.py
rename to script/full_eval.py
diff --git a/gradio_demo.py b/script/gradio_demo.py
similarity index 100%
rename from gradio_demo.py
rename to script/gradio_demo.py
diff --git a/install.sh b/script/install.sh
similarity index 100%
rename from install.sh
rename to script/install.sh
diff --git a/metrics.py b/script/metrics.py
similarity index 100%
rename from metrics.py
rename to script/metrics.py
diff --git a/train.py b/script/train.py
similarity index 100%
rename from train.py
rename to script/train.py

From cb17a2552ad49ed8769ca68dda7ce3e6b8834614 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 11:51:37 +0900
Subject: [PATCH 11/37] fix import in gradio_demo.py

---
 .../fit_model_to_scene_full.py                |  0
 script/gradio_demo.py                         |  6 ++
 script/train.py                               | 80 +++++++++++--------
 3 files changed, 51 insertions(+), 35 deletions(-)
 rename {notebooks => script}/fit_model_to_scene_full.py (100%)

diff --git a/notebooks/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
similarity index 100%
rename from notebooks/fit_model_to_scene_full.py
rename to script/fit_model_to_scene_full.py
diff --git a/script/gradio_demo.py b/script/gradio_demo.py
index b804fcc..edc7240 100644
--- a/script/gradio_demo.py
+++ b/script/gradio_demo.py
@@ -13,6 +13,12 @@
 import torch
 from hydra import compose, initialize
 
+# Add the project root directory to sys.path
+# so that modules from 'source' can be imported.
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
 from source.trainer import EDGSTrainer
 from source.utils_aux import set_seed
 from source.utils_preprocess import (
diff --git a/script/train.py b/script/train.py
index 646409a..e95cb63 100644
--- a/script/train.py
+++ b/script/train.py
@@ -1,63 +1,73 @@
 import os
-from source.trainer import EDGSTrainer
-from source.utils_aux import set_seed
+import sys
+from argparse import Namespace
+
+import hydra
 import omegaconf
 import wandb
-import hydra
-from argparse import Namespace
-from omegaconf import OmegaConf
+
+# Add the project root directory to sys.path
+# so that modules from 'source' can be imported.
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+from source.trainer import EDGSTrainer
+from source.utils_aux import set_seed
 
 
 @hydra.main(config_path="configs", config_name="train", version_base="1.2")
 def main(cfg: omegaconf.DictConfig):
-    _ = wandb.init(entity=cfg.wandb.entity,
-                   project=cfg.wandb.project,
-                   config=omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True),
-                   tags=[cfg.wandb.tag], 
-                   name = cfg.wandb.name,
-                   mode = cfg.wandb.mode)
+    _ = wandb.init(
+        entity=cfg.wandb.entity,
+        project=cfg.wandb.project,
+        config=omegaconf.OmegaConf.to_container(
+            cfg, resolve=True, throw_on_missing=True
+        ),
+        tags=[cfg.wandb.tag],
+        name=cfg.wandb.name,
+        mode=cfg.wandb.mode,
+    )
     omegaconf.OmegaConf.resolve(cfg)
     set_seed(cfg.seed)
 
     # Init output folder
     print("Output folder: {}".format(cfg.gs.dataset.model_path))
     os.makedirs(cfg.gs.dataset.model_path, exist_ok=True)
-    with open(os.path.join(cfg.gs.dataset.model_path, "cfg_args"), 'w') as cfg_log_f:
+    with open(os.path.join(cfg.gs.dataset.model_path, "cfg_args"), "w") as cfg_log_f:
         params = {
-                "sh_degree": 3,
-                "source_path": cfg.gs.dataset.source_path,
-                "model_path": cfg.gs.dataset.model_path,
-                "images": cfg.gs.dataset.images,
-                "depths": "",
-                "resolution": -1,
-                "_white_background": cfg.gs.dataset.white_background,
-                "train_test_exp": False,
-                "data_device": cfg.gs.dataset.data_device,
-                "eval": False,
-                "convert_SHs_python": False,
-                "compute_cov3D_python": False,
-                "debug": False,
-                "antialiasing": False   
-                    }
+            "sh_degree": 3,
+            "source_path": cfg.gs.dataset.source_path,
+            "model_path": cfg.gs.dataset.model_path,
+            "images": cfg.gs.dataset.images,
+            "depths": "",
+            "resolution": -1,
+            "_white_background": cfg.gs.dataset.white_background,
+            "train_test_exp": False,
+            "data_device": cfg.gs.dataset.data_device,
+            "eval": False,
+            "convert_SHs_python": False,
+            "compute_cov3D_python": False,
+            "debug": False,
+            "antialiasing": False,
+        }
         cfg_log_f.write(str(Namespace(**params)))
 
     # Init both agents
-    gs = hydra.utils.instantiate(cfg.gs) 
+    gs = hydra.utils.instantiate(cfg.gs)
 
     # Init trainer and launch training
-    trainer = EDGSTrainer(GS=gs,
-        training_config=cfg.gs.opt,
-        device=cfg.device)
-    
+    trainer = EDGSTrainer(GS=gs, training_config=cfg.gs.opt, device=cfg.device)
+
     trainer.load_checkpoints(cfg.load)
     trainer.timer.start()
-    trainer.init_with_corr(cfg.init_wC)      
+    trainer.init_with_corr(cfg.init_wC)
     trainer.train(cfg.train)
-    
+
     # All done
     wandb.finish()
     print("\nTraining complete.")
 
+
 if __name__ == "__main__":
     main()
-

From 6f45cef34dc9b4dfc67b2bc47cf95e3cddc71c6b Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 12:02:29 +0900
Subject: [PATCH 12/37] fix process colmap error

---
 source/utils_preprocess.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index 7d26d88..c3b47f9 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -423,7 +423,7 @@ def orchestrate_video_to_colmap_scene(
     2. Reads frames, selects optimal ones, saves them.
     3. Runs COLMAP on the scene.
     Args:
-        input_path (str or file-like): Path to video file or directory of images.
+        input_path (str or file-like): Path string, a Gradio file object, or a list (e.g., from gr.Examples).
         num_ref_views (int): Number of reference views to select.
         max_size (int): Maximum size for width or height after resizing.
         base_work_dir (str): Base directory for temporary scene directories.
@@ -444,13 +444,23 @@ def orchestrate_video_to_colmap_scene(
         input_name_part = os.path.splitext(os.path.basename(input_path))[0]
     elif (
         isinstance(input_path, list) and input_path
-    ):  # List of Gradio file objects or paths (from gr.Examples)
-        # Handle list, e.g., take the first item
-        first_item = input_path[0]
-        if hasattr(first_item, "name") and isinstance(first_item.name, str):
+    ):  # Handle list: take the first item.
+        # gr.Examples often wraps the path in another list, e.g., [['path/to/example.mp4']]
+        # So, we might need to unwrap it.
+        first_item_candidate = input_path[0]
+        if (
+            isinstance(first_item_candidate, list) and first_item_candidate
+        ):  # Check for nested list
+            first_item = first_item_candidate[0]
+        else:
+            first_item = first_item_candidate
+
+        if hasattr(first_item, "name") and isinstance(
+            first_item.name, str
+        ):  # Gradio file object in list
             actual_input_path_str = first_item.name
             input_name_part = os.path.splitext(os.path.basename(first_item.name))[0]
-        elif isinstance(first_item, (str, os.PathLike)):
+        elif isinstance(first_item, (str, os.PathLike)):  # Path string in list
             actual_input_path_str = str(first_item)
             input_name_part = os.path.splitext(os.path.basename(first_item))[0]
         else:
@@ -479,7 +489,7 @@ def orchestrate_video_to_colmap_scene(
     print(f"Created scene directory for COLMAP: {scene_dir}")
 
     selected_frames_data = process_input_for_colmap(
-        input_path, num_ref_views, scene_dir, max_size
+        actual_input_path_str, num_ref_views, scene_dir, max_size
     )
     if not selected_frames_data:
         print(f"Frame processing failed for {input_path}. Aborting COLMAP.")

From fa2feac0130b76a743336f6ae2d6cd6af072268b Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 13:29:57 +0900
Subject: [PATCH 13/37] fix relative directory in gradle_demo.py

---
 docker-compose.yml                | 2 +-
 script/fit_model_to_scene_full.py | 2 +-
 script/gradio_demo.py             | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index f258c6d..ca1d0a8 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,6 +13,6 @@ services:
               capabilities: [gpu] # Request GPU capabilities
     volumes:
       - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container
-      - ./output:/EDGS/output # Example: map a local 'output' folder
+      - ./outputs:/EDGS/outputs # Example: map a local 'output' folder
       - ./script:/EDGS/script # Example: map a local 'scripts' folder
       - ./source:/EDGS/source # Example: map a local 'sources' folder
\ No newline at end of file
diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index f56a976..602c880 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -49,7 +49,7 @@
 parser.add_argument(  # Add this argument based on your previous script structure
     "--processed_scenes_dir",
     type=str,
-    default="../output/processed_scenes",  # Or any other sensible default
+    default="../outputs/processed_scenes",  # Or any other sensible default
     help="Base directory where processed COLMAP scenes will be stored.",
 )
 args = parser.parse_args()
diff --git a/script/gradio_demo.py b/script/gradio_demo.py
index edc7240..50fabdd 100644
--- a/script/gradio_demo.py
+++ b/script/gradio_demo.py
@@ -77,11 +77,11 @@ def run_training_pipeline(
     num_steps=1_000,
     mode_toggle="Ours (EDGS)",
 ):
-    with initialize(config_path="./configs", version_base="1.1"):
+    with initialize(config_path="../configs", version_base="1.1"):
         cfg = compose(config_name="train")
 
     scene_name = os.path.basename(scene_dir)
-    model_output_dir = f"./outputs/{scene_name}_trained"
+    model_output_dir = f"../outputs/{scene_name}_trained"
 
     cfg.wandb.mode = "disabled"
     cfg.gs.dataset.model_path = model_output_dir

From 88b356ac26e78ce8321277512df805fe393cfcba Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 15:02:38 +0900
Subject: [PATCH 14/37] debug gradle_demoo to pass accessible copied result.

---
 script/gradio_demo.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/script/gradio_demo.py b/script/gradio_demo.py
index 50fabdd..07ec2ca 100644
--- a/script/gradio_demo.py
+++ b/script/gradio_demo.py
@@ -217,15 +217,20 @@ def run_training_pipeline(
         frames=path_renderings, output_path=final_video_path, fps=30, center_crop=0.85
     )
     MODEL_PATH = cfg.gs.dataset.model_path
-    ply_path = os.path.join(
+    original_ply_path = os.path.join(  # Renamed for clarity
         cfg.gs.dataset.model_path,
         f"point_cloud/iteration_{trainer.gs_step}/point_cloud.ply",
     )
-    shutil.copy(
-        ply_path, os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply")
+    # This is the path to the copied file in an allowed directory
+    copied_ply_path_for_serving = os.path.join(
+        STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply"
     )
+    shutil.copy(original_ply_path, copied_ply_path_for_serving)
 
-    return final_video_path, ply_path
+    return (
+        final_video_path,
+        copied_ply_path_for_serving,
+    )  # Return the path to the copied .ply file
 
 
 # Gradio Interface

From 4b6bade78a2462f5d80868fd043f8edf731833c9 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 15:18:06 +0900
Subject: [PATCH 15/37] fix process_input() to process_input_for_colmap()

---
 script/gradio_demo.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/script/gradio_demo.py b/script/gradio_demo.py
index 07ec2ca..8684e9e 100644
--- a/script/gradio_demo.py
+++ b/script/gradio_demo.py
@@ -23,6 +23,7 @@
 from source.utils_aux import set_seed
 from source.utils_preprocess import (
     orchestrate_video_to_colmap_scene,  # Import the new/refactored function
+    process_input_for_colmap,
     run_colmap_on_scene,
 )
 from source.visualization import (
@@ -320,7 +321,9 @@ def run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024):
     scene_dir = os.path.join(tmpdirname, "scene")
     os.makedirs(scene_dir, exist_ok=True)
 
-    selected_frames = process_input(input_path, num_ref_views, scene_dir, max_size)
+    selected_frames = process_input_for_colmap(
+        input_path, num_ref_views, scene_dir, max_size
+    )
     run_colmap_on_scene(scene_dir)
 
     return selected_frames, scene_dir

From dfd3bb4f4d9ca3f56cdc156ee47341c8f5f5d62f Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 6 Jun 2025 16:16:00 +0900
Subject: [PATCH 16/37] fix using abs path to be able to run
 fit_model_to_scene_full.py script from anywhere.

---
 README.md                         |  3 +--
 script/fit_model_to_scene_full.py | 22 +++++++++++++++-------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 4996bc0..1207e3e 100644
--- a/README.md
+++ b/README.md
@@ -98,8 +98,7 @@ python script/gradio_demo.py --port 7862
 From command line;
 ```
 docker compose exec edgs-app bash
-cd notebooks
-python fit_model_to_scene_full.py --video_path <your mp4 video>
+python script/fit_model_to_scene_full.py --video_path <your mp4 video>
 ```
 
 #### Option C
diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 602c880..63d6f70 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -22,8 +22,12 @@
 from matplotlib import pyplot as plt
 from omegaconf import OmegaConf
 
-sys.path.append("../")
-sys.path.append("../submodules/gaussian-splatting")
+# Add the project root directory to sys.path
+# so that modules from 'source' can be imported.
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+# sys.path.append("../submodules/gaussian-splatting")
 from source.trainer import EDGSTrainer
 from source.utils_aux import set_seed
 from source.utils_preprocess import (
@@ -37,19 +41,23 @@
 parser.add_argument(
     "--video_path",
     type=str,
-    default="../assets/examples/video_fruits.mp4",
+    default=os.path.join(
+        project_root, "assets", "examples", "video_fruits.mp4"
+    ),  # Use project_root
     help="Path to the input video file.",
 )
-parser.add_argument(  # Add this argument
+parser.add_argument(
     "--num_ref_views",
     type=int,
-    default=16,  # Or any other sensible default
+    default=16,
     help="Number of reference views to extract from video for COLMAP.",
 )
-parser.add_argument(  # Add this argument based on your previous script structure
+parser.add_argument(
     "--processed_scenes_dir",
     type=str,
-    default="../outputs/processed_scenes",  # Or any other sensible default
+    default=os.path.join(
+        project_root, "outputs", "processed_scenes"
+    ),  # Use project_root
     help="Base directory where processed COLMAP scenes will be stored.",
 )
 args = parser.parse_args()

From ebf25904a3e5e5d090efb190d6b9702f4853715c Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 10 Jun 2025 11:58:43 +0900
Subject: [PATCH 17/37] let it be able to open jupyter notebook from docker

---
 Dockerfile         |  2 +-
 docker-compose.yml | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d6782de..c5e0776 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -43,7 +43,7 @@ RUN /bin/bash -c "source activate edgs && \
   pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \
   pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \
   pip install -e ./submodules/RoMa && \
-  pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d"
+  pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3 jupyterlab matplotlib"
 
 # Expose the port for Gradio
 EXPOSE 7862
diff --git a/docker-compose.yml b/docker-compose.yml
index ca1d0a8..236adde 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,8 @@ services:
     build: . # Instructs Docker Compose to build using the Dockerfile in the current directory
     image: edgs-app # This is the name of the image you built
     ports:
-      - "7862:7862" # Map port 7862 on the host to port 7862 in the container
+      - "7862:7862" # For Gradio, if you still use it
+      - "8888:8888" # Map port 8888 for JupyterLab
     deploy:
       resources:
         reservations:
@@ -15,4 +16,13 @@ services:
       - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container
       - ./outputs:/EDGS/outputs # Example: map a local 'output' folder
       - ./script:/EDGS/script # Example: map a local 'scripts' folder
-      - ./source:/EDGS/source # Example: map a local 'sources' folder
\ No newline at end of file
+      - ./source:/EDGS/source # Example: map a local 'sources' folder
+    # Command to start JupyterLab
+    # --ip=0.0.0.0 makes it accessible from outside the container
+    # --allow-root is often needed when running in Docker
+    # --no-browser prevents it from trying to open a browser inside the container
+    # --notebook-dir specifies the directory JupyterLab should open in
+    command: >
+      sh -c "jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=/EDGS/notebooks"
+    stdin_open: true # Keep STDIN open for interactive processes
+    tty: true        # Allocate a TTY
\ No newline at end of file

From 221d8b9cb2b7ac8af5c9177f5cf74666efba9195 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Tue, 10 Jun 2025 18:14:38 +0900
Subject: [PATCH 18/37] let it be able to run A,B,C options in README

---
 Dockerfile                              |   2 +-
 README.md                               |  21 +++-
 docker-compose.yml                      |   9 +-
 notebooks/fit_model_to_scene_full.ipynb | 142 ++++++++++++++++++++++--
 4 files changed, 156 insertions(+), 18 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c5e0776..fc1ad50 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -43,7 +43,7 @@ RUN /bin/bash -c "source activate edgs && \
   pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \
   pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \
   pip install -e ./submodules/RoMa && \
-  pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3 jupyterlab matplotlib"
+  pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d jupyterlab matplotlib"
 
 # Expose the port for Gradio
 EXPOSE 7862
diff --git a/README.md b/README.md
index 1207e3e..803f139 100644
--- a/README.md
+++ b/README.md
@@ -95,13 +95,32 @@ python script/gradio_demo.py --port 7862
 ```
 
 #### Option B
-From command line;
+From command line.
+First you need to create wandb account.
+Then, edit configs/trainlyaml; wandb's "entity" for your user name, "project" for your created project name.
+
 ```
 docker compose exec edgs-app bash
 python script/fit_model_to_scene_full.py --video_path <your mp4 video>
 ```
 
 #### Option C
+Using Jupyter lab.
+First edit configs/trainlyaml's wandb part.
+You need to create wandb account and set it to the config file.
+```
+docker compose up edgs-app bash
+```
+And in the terminal in the docker container,
+```
+jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=notebooks
+```
+After JupyterLab starts, it will print URLs to the terminal. Look for a URL containing a token, like:
+    `http://127.0.0.1:8888/lab?token=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
+Open `http://localhost:8888` (or `http://127.0.0.1:8888`) in your host browser.
+When prompted for a "Password or token", paste the `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx` part from the URL in step 4 into the field and log in. Alternatively, you can paste the full URL from step 4 directly into your browser.
+
+#### Option D
 You can use the same data format as the [3DGS project](https://github.com/graphdeco-inria/gaussian-splatting?tab=readme-ov-file#processing-your-own-scenes). Please follow their guide to prepare your scene.
 
 Expected folder structure:
diff --git a/docker-compose.yml b/docker-compose.yml
index 236adde..3bc5aec 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -17,12 +17,7 @@ services:
       - ./outputs:/EDGS/outputs # Example: map a local 'output' folder
       - ./script:/EDGS/script # Example: map a local 'scripts' folder
       - ./source:/EDGS/source # Example: map a local 'sources' folder
-    # Command to start JupyterLab
-    # --ip=0.0.0.0 makes it accessible from outside the container
-    # --allow-root is often needed when running in Docker
-    # --no-browser prevents it from trying to open a browser inside the container
-    # --notebook-dir specifies the directory JupyterLab should open in
-    command: >
-      sh -c "jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=/EDGS/notebooks"
+      - ./config:/EDGS/config # Example: map a local 'config' folder
+      - ./notebooks:/EDGS/notebooks # Map a local 'notebooks' folder for JupyterLab
     stdin_open: true # Keep STDIN open for interactive processes
     tty: true        # Allocate a TTY
\ No newline at end of file
diff --git a/notebooks/fit_model_to_scene_full.ipynb b/notebooks/fit_model_to_scene_full.ipynb
index 09e6323..1d0b6c6 100644
--- a/notebooks/fit_model_to_scene_full.ipynb
+++ b/notebooks/fit_model_to_scene_full.ipynb
@@ -67,10 +67,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "59c64632-e31a-4ead-98a5-4ab0f295e54d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "xFormers not available\n"
+     ]
+    }
+   ],
    "source": [
     "import torch\n",
     "import numpy as np\n",
@@ -96,10 +104,96 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "925adfa3-c311-44b6-a8c4-a31fb7426947",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "gs:\n",
+      "  _target_: source.networks.Warper3DGS\n",
+      "  verbose: true\n",
+      "  viewpoint_stack: null\n",
+      "  sh_degree: 3\n",
+      "  opt:\n",
+      "    iterations: 30000\n",
+      "    position_lr_init: 0.00016\n",
+      "    position_lr_final: 1.6e-06\n",
+      "    position_lr_delay_mult: 0.01\n",
+      "    position_lr_max_steps: 30000\n",
+      "    feature_lr: 0.0025\n",
+      "    opacity_lr: 0.025\n",
+      "    scaling_lr: 0.005\n",
+      "    rotation_lr: 0.001\n",
+      "    percent_dense: 0.01\n",
+      "    lambda_dssim: 0.2\n",
+      "    densification_interval: 100\n",
+      "    opacity_reset_interval: 30000\n",
+      "    densify_from_iter: 500\n",
+      "    densify_until_iter: 15000\n",
+      "    densify_grad_threshold: 0.0002\n",
+      "    random_background: false\n",
+      "    save_iterations:\n",
+      "    - 3000\n",
+      "    - 7000\n",
+      "    - 15000\n",
+      "    - 30000\n",
+      "    batch_size: 64\n",
+      "    exposure_lr_init: 0.01\n",
+      "    exposure_lr_final: 0.0001\n",
+      "    exposure_lr_delay_steps: 0\n",
+      "    exposure_lr_delay_mult: 0.0\n",
+      "    TRAIN_CAM_IDX_TO_LOG: 50\n",
+      "    TEST_CAM_IDX_TO_LOG: 10\n",
+      "  pipe:\n",
+      "    convert_SHs_python: false\n",
+      "    compute_cov3D_python: false\n",
+      "    debug: false\n",
+      "    antialiasing: false\n",
+      "  dataset:\n",
+      "    densify_until_iter: 15000\n",
+      "    source_path: ''\n",
+      "    model_path: ''\n",
+      "    images: images\n",
+      "    resolution: -1\n",
+      "    white_background: false\n",
+      "    data_device: cuda\n",
+      "    eval: false\n",
+      "    depths: ''\n",
+      "    train_test_exp: false\n",
+      "seed: 228\n",
+      "wandb:\n",
+      "  mode: online\n",
+      "  entity: m-ogawa-sensyn\n",
+      "  project: Adv3DGS\n",
+      "  group: null\n",
+      "  name: null\n",
+      "  tag: debug\n",
+      "train:\n",
+      "  gs_epochs: 0\n",
+      "  reduce_opacity: true\n",
+      "  no_densify: false\n",
+      "  max_lr: true\n",
+      "load:\n",
+      "  gs: null\n",
+      "  gs_step: null\n",
+      "device: cuda:0\n",
+      "verbose: true\n",
+      "init_wC:\n",
+      "  use: true\n",
+      "  matches_per_ref: 15000\n",
+      "  num_refs: 180\n",
+      "  nns_per_ref: 3\n",
+      "  scaling_factor: 0.001\n",
+      "  proj_err_tolerance: 0.01\n",
+      "  roma_model: outdoors\n",
+      "  add_SfM_init: false\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "with initialize(config_path=\"../configs\", version_base=\"1.1\"):\n",
     "    cfg = compose(config_name=\"train\")\n",
@@ -124,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "07e4ca51",
    "metadata": {},
    "outputs": [],
@@ -177,10 +271,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "2056ee6f-dbb6-4ce8-86f0-5b4f9721d093",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output folder: ./scene_edgsed/\n"
+     ]
+    },
+    {
+     "ename": "InstantiationException",
+     "evalue": "Error in call to target 'source.networks.Warper3DGS':\nAssertionError('Could not recognize scene type!')\nfull_key: gs",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:92\u001b[0m, in \u001b[0;36m_call_target\u001b[0;34m(_target_, _partial_, args, kwargs, full_key)\u001b[0m\n\u001b[1;32m     91\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 92\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_target_\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     93\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/EDGS/notebooks/../source/networks.py:26\u001b[0m, in \u001b[0;36mWarper3DGS.__init__\u001b[0;34m(self, sh_degree, opt, pipe, dataset, viewpoint_stack, verbose, do_train_test_split)\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpipe \u001b[38;5;241m=\u001b[39m pipe\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscene \u001b[38;5;241m=\u001b[39m \u001b[43mScene\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgaussians\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m     27\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m do_train_test_split:\n",
+      "File \u001b[0;32m/EDGS/notebooks/../submodules/gaussian-splatting/scene/__init__.py:49\u001b[0m, in \u001b[0;36mScene.__init__\u001b[0;34m(self, args, gaussians, load_iteration, shuffle, resolution_scales)\u001b[0m\n\u001b[1;32m     48\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 49\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not recognize scene type!\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloaded_iter:\n",
+      "\u001b[0;31mAssertionError\u001b[0m: Could not recognize scene type!",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mInstantiationException\u001b[0m                    Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 12\u001b[0m\n\u001b[1;32m     10\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(cfg\u001b[38;5;241m.\u001b[39mgs\u001b[38;5;241m.\u001b[39mdataset\u001b[38;5;241m.\u001b[39mmodel_path, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     11\u001b[0m \u001b[38;5;66;03m# Init gs model\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m gs \u001b[38;5;241m=\u001b[39m \u001b[43mhydra\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minstantiate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgs\u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m     13\u001b[0m trainer \u001b[38;5;241m=\u001b[39m EDGSTrainer(GS\u001b[38;5;241m=\u001b[39mgs,\n\u001b[1;32m     14\u001b[0m                       training_config\u001b[38;5;241m=\u001b[39mcfg\u001b[38;5;241m.\u001b[39mgs\u001b[38;5;241m.\u001b[39mopt,\n\u001b[1;32m     15\u001b[0m                       device\u001b[38;5;241m=\u001b[39mcfg\u001b[38;5;241m.\u001b[39mdevice)\n",
+      "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:226\u001b[0m, in \u001b[0;36minstantiate\u001b[0;34m(config, *args, **kwargs)\u001b[0m\n\u001b[1;32m    223\u001b[0m     _convert_ \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mpop(_Keys\u001b[38;5;241m.\u001b[39mCONVERT, ConvertMode\u001b[38;5;241m.\u001b[39mNONE)\n\u001b[1;32m    224\u001b[0m     _partial_ \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mpop(_Keys\u001b[38;5;241m.\u001b[39mPARTIAL, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m--> 226\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minstantiate_node\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrecursive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_recursive_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_convert_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_partial_\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m OmegaConf\u001b[38;5;241m.\u001b[39mis_list(config):\n\u001b[1;32m    230\u001b[0m     \u001b[38;5;66;03m# Finalize config (convert targets to strings, merge with kwargs)\u001b[39;00m\n\u001b[1;32m    231\u001b[0m     config_copy \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(config)\n",
+      "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:347\u001b[0m, in \u001b[0;36minstantiate_node\u001b[0;34m(node, convert, recursive, partial, *args)\u001b[0m\n\u001b[1;32m    342\u001b[0m                 value \u001b[38;5;241m=\u001b[39m instantiate_node(\n\u001b[1;32m    343\u001b[0m                     value, convert\u001b[38;5;241m=\u001b[39mconvert, recursive\u001b[38;5;241m=\u001b[39mrecursive\n\u001b[1;32m    344\u001b[0m                 )\n\u001b[1;32m    345\u001b[0m             kwargs[key] \u001b[38;5;241m=\u001b[39m _convert_node(value, convert)\n\u001b[0;32m--> 347\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_call_target\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_target_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfull_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    348\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    349\u001b[0m     \u001b[38;5;66;03m# If ALL or PARTIAL non structured or OBJECT non structured,\u001b[39;00m\n\u001b[1;32m    350\u001b[0m     \u001b[38;5;66;03m# instantiate in dict and resolve interpolations eagerly.\u001b[39;00m\n\u001b[1;32m    351\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m convert \u001b[38;5;241m==\u001b[39m ConvertMode\u001b[38;5;241m.\u001b[39mALL \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m    352\u001b[0m         convert \u001b[38;5;129;01min\u001b[39;00m (ConvertMode\u001b[38;5;241m.\u001b[39mPARTIAL, ConvertMode\u001b[38;5;241m.\u001b[39mOBJECT)\n\u001b[1;32m    353\u001b[0m         \u001b[38;5;129;01mand\u001b[39;00m node\u001b[38;5;241m.\u001b[39m_metadata\u001b[38;5;241m.\u001b[39mobject_type \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28mdict\u001b[39m)\n\u001b[1;32m    354\u001b[0m     ):\n",
+      "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:97\u001b[0m, in \u001b[0;36m_call_target\u001b[0;34m(_target_, _partial_, args, kwargs, full_key)\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m full_key:\n\u001b[1;32m     96\u001b[0m     msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mfull_key: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_key\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 97\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InstantiationException(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n",
+      "\u001b[0;31mInstantiationException\u001b[0m: Error in call to target 'source.networks.Warper3DGS':\nAssertionError('Could not recognize scene type!')\nfull_key: gs"
+     ]
+    }
+   ],
    "source": [
     "_ = wandb.init(entity=cfg.wandb.entity,\n",
     "                   project=cfg.wandb.project,\n",
@@ -1781,7 +1905,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.21"
+   "version": "3.10.18"
   }
  },
  "nbformat": 4,

From d66a1bae6663fa8aac0d63b699da6c6acdcdb783 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 08:24:19 +0900
Subject: [PATCH 19/37] update config to change name

---
 configs/train.yaml | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/configs/train.yaml b/configs/train.yaml
index e40ec5b..de53b9d 100644
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -1,22 +1,22 @@
 defaults:
   - gs: base
-  - _self_ 
+  - _self_
 
 seed: 228
 
 wandb:
   mode: "online" # "disabled" for no logging
-  entity: "3dcorrespondence"
+  entity: "m-ogawa-sensyn"
   project: "Adv3DGS"
   group: null
   name: null
   tag: "debug"
-    
+
 train:
   gs_epochs: 0 # number of 3dgs iterations
-  reduce_opacity: True 
+  reduce_opacity: True
   no_densify: False # if True, the model will not be densified
-  max_lr: True 
+  max_lr: True
 
 load:
   gs: null #path to 3dgs checkpoint
@@ -33,6 +33,4 @@ init_wC:
   scaling_factor: 0.001
   proj_err_tolerance: 0.01
   roma_model: "outdoors" # you can change this to "indoors" or "outdoors"
-  add_SfM_init : False
-
-
+  add_SfM_init: False

From 5eac9d56cbe604b65c78d464f3b920baabee8b84 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 08:26:50 +0900
Subject: [PATCH 20/37] update README to run option A

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 803f139..4bc8242 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,8 @@ We evaluated on the following datasets:
 ### Using Your Own Dataset
 
 #### Option A
-Use gradle demo;
+Use gradle demo.
+After running `docker compose up -d`,
 ```
 docker compose exec edgs-app bash
 python script/gradio_demo.py --port 7862

From de7b415d21530f75127ca9623873cefac4ae5937 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 08:32:18 +0900
Subject: [PATCH 21/37] revert config/train

---
 configs/train.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/train.yaml b/configs/train.yaml
index de53b9d..98e257c 100644
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -5,8 +5,8 @@ defaults:
 seed: 228
 
 wandb:
-  mode: "online" # "disabled" for no logging
-  entity: "m-ogawa-sensyn"
+  mode: "disabled" # "disabled" for no logging
+  entity: "3dcorrespondence"
   project: "Adv3DGS"
   group: null
   name: null

From b120ed97096aca22a851a4eb7446784b286123c0 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 08:35:45 +0900
Subject: [PATCH 22/37] fix readme optionC

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4bc8242..6491f49 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ Using Jupyter lab.
 First edit configs/trainlyaml's wandb part.
 You need to create wandb account and set it to the config file.
 ```
-docker compose up edgs-app bash
+docker compose exec edgs-app bash
 ```
 And in the terminal in the docker container,
 ```

From c08c9e41b86b7c0d167e6df73d45eb222cbcbc12 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 08:44:59 +0900
Subject: [PATCH 23/37] disable wandb if config/train.yaml specify it.

---
 script/fit_model_to_scene_full.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 63d6f70..f6dd8c8 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -103,13 +103,20 @@
 
 
 # # 4. Initilize model and logger
-_ = wandb.init(
-    entity=cfg.wandb.entity,
-    project=cfg.wandb.project,
-    config=omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True),
-    name=cfg.wandb.name,
-    mode=cfg.wandb.mode,
-)
+if cfg.wandb.mode != "disabled":
+    _ = wandb.init(
+        entity=cfg.wandb.entity,
+        project=cfg.wandb.project,
+        config=omegaconf.OmegaConf.to_container(
+            cfg, resolve=True, throw_on_missing=True
+        ),
+        name=cfg.wandb.name,
+        mode=cfg.wandb.mode,
+    )
+else:
+    print(
+        "wandb logging is disabled (mode=disabled). Results will only be saved locally."
+    )
 omegaconf.OmegaConf.resolve(cfg)
 set_seed(cfg.seed)
 # Init output folder

From ca39f9bccd016e363fedf728e2a4d203849aa042 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 09:18:02 +0900
Subject: [PATCH 24/37] fix docker-compose config to configs

---
 docker-compose.yml                |  2 +-
 script/fit_model_to_scene_full.py | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 3bc5aec..be6c810 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -17,7 +17,7 @@ services:
       - ./outputs:/EDGS/outputs # Example: map a local 'output' folder
       - ./script:/EDGS/script # Example: map a local 'scripts' folder
       - ./source:/EDGS/source # Example: map a local 'sources' folder
-      - ./config:/EDGS/config # Example: map a local 'config' folder
+      - ./configs:/EDGS/configs # Example: map a local 'config' folder
       - ./notebooks:/EDGS/notebooks # Map a local 'notebooks' folder for JupyterLab
     stdin_open: true # Keep STDIN open for interactive processes
     tty: true        # Allocate a TTY
\ No newline at end of file
diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index f6dd8c8..54774ba 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -9,6 +9,7 @@
 
 # ## 2. Import libraries
 import argparse
+import logging
 import os
 import random
 import sys
@@ -34,6 +35,12 @@
     orchestrate_video_to_colmap_scene,  # Use the refactored function
 )
 
+# Initialize logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
 # --- Add argument parsing ---
 parser = argparse.ArgumentParser(
     description="Fit EDGS model to a scene, optionally from a video."
@@ -104,6 +111,11 @@
 
 # # 4. Initilize model and logger
 if cfg.wandb.mode != "disabled":
+    logging.info(
+        "wandb logging is enabled (mode={}). Results will be logged to wandb.".format(
+            cfg.wandb.mode
+        )
+    )
     _ = wandb.init(
         entity=cfg.wandb.entity,
         project=cfg.wandb.project,
@@ -114,8 +126,10 @@
         mode=cfg.wandb.mode,
     )
 else:
-    print(
-        "wandb logging is disabled (mode=disabled). Results will only be saved locally."
+    logging.info(
+        "wandb logging is disabled (mode={}). Results will not be logged to wandb.".format(
+            cfg.wandb.mode
+        )
     )
 omegaconf.OmegaConf.resolve(cfg)
 set_seed(cfg.seed)

From cd4c30ee0fa71a844117194f99de6951737fed4c Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 10:32:56 +0900
Subject: [PATCH 25/37] fix No such file or directory error for output models

---
 script/fit_model_to_scene_full.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 54774ba..cc50817 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -93,6 +93,9 @@
         if scene_dir is None:
             print(f"Failed to process video {args.video_path}. Exiting.")
             sys.exit(1)
+        cfg.gs.dataset.model_path = os.path.join(scene_dir, "models")
+        print(f"Set model_path to: {cfg.gs.dataset.model_path}")
+        os.makedirs(cfg.gs.dataset.model_path, exist_ok=True)
     except Exception as e:
         print(f"Error during video preprocessing: {e}")
         sys.exit(1)

From f64d37fca760b97e0441459dc2ae7966e753eaad Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 10:45:16 +0900
Subject: [PATCH 26/37] fix could not recongnize scene type error, by adding
 source_path for video.

---
 script/fit_model_to_scene_full.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index cc50817..9eb676c 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -93,6 +93,7 @@
         if scene_dir is None:
             print(f"Failed to process video {args.video_path}. Exiting.")
             sys.exit(1)
+        cfg.gs.dataset.source_path = scene_dir
         cfg.gs.dataset.model_path = os.path.join(scene_dir, "models")
         print(f"Set model_path to: {cfg.gs.dataset.model_path}")
         os.makedirs(cfg.gs.dataset.model_path, exist_ok=True)

From 6fa2dde42948afe274cc950f60d6ef425c16b8a4 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 10:52:20 +0900
Subject: [PATCH 27/37] fix You must call wandb.init() before wandb.log error
 by setting logwandb option to EDGSTrainer()

---
 script/fit_model_to_scene_full.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 9eb676c..6a22d95 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -142,7 +142,12 @@
 os.makedirs(cfg.gs.dataset.model_path, exist_ok=True)
 # Init gs model
 gs = hydra.utils.instantiate(cfg.gs)
-trainer = EDGSTrainer(GS=gs, training_config=cfg.gs.opt, device=cfg.device)
+trainer = EDGSTrainer(
+    GS=gs,
+    training_config=cfg.gs.opt,
+    device=cfg.device,
+    log_wandb=(cfg.wandb.mode != "disabled"),
+)
 
 
 # # 5. Init with matchings

From 94e9db7082453ae3b427ef027d8275b088326d9c Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 18 Jun 2025 13:59:58 +0900
Subject: [PATCH 28/37] fix no cuda gpus are available error

---
 docker-compose.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index be6c810..24975eb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,6 +12,8 @@ services:
             - driver: nvidia
               count: all # Use all available GPUs
               capabilities: [gpu] # Request GPU capabilities
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
     volumes:
       - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container
       - ./outputs:/EDGS/outputs # Example: map a local 'output' folder

From 16526ebde181724a27ec7f3ef23f65a2ed79c049 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Thu, 19 Jun 2025 09:34:11 +0900
Subject: [PATCH 29/37] remove wandb edit part from readme

---
 README.md          | 5 -----
 configs/train.yaml | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 6491f49..1aac793 100644
--- a/README.md
+++ b/README.md
@@ -97,9 +97,6 @@ python script/gradio_demo.py --port 7862
 
 #### Option B
 From command line.
-First you need to create wandb account.
-Then, edit configs/trainlyaml; wandb's "entity" for your user name, "project" for your created project name.
-
 ```
 docker compose exec edgs-app bash
 python script/fit_model_to_scene_full.py --video_path <your mp4 video>
@@ -107,8 +104,6 @@ python script/fit_model_to_scene_full.py --video_path <your mp4 video>
 
 #### Option C
 Using Jupyter lab.
-First edit configs/trainlyaml's wandb part.
-You need to create wandb account and set it to the config file.
 ```
 docker compose exec edgs-app bash
 ```
diff --git a/configs/train.yaml b/configs/train.yaml
index 98e257c..b4cabcc 100644
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -5,7 +5,7 @@ defaults:
 seed: 228
 
 wandb:
-  mode: "disabled" # "disabled" for no logging
+  mode: "disabled" # "online" or "disabled"
   entity: "3dcorrespondence"
   project: "Adv3DGS"
   group: null

From 1019c23ba5ad3b88490931a68459c762fb22c727 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Thu, 19 Jun 2025 10:11:36 +0900
Subject: [PATCH 30/37] remove unnecessary parameter change inside python file.

---
 README.md                         |  2 +-
 configs/train.yaml                |  2 +-
 script/fit_model_to_scene_full.py | 19 +------------------
 source/utils_preprocess.py        |  2 +-
 4 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 1aac793..4c66881 100644
--- a/README.md
+++ b/README.md
@@ -99,7 +99,7 @@ python script/gradio_demo.py --port 7862
 From command line.
 ```
 docker compose exec edgs-app bash
-python script/fit_model_to_scene_full.py --video_path <your mp4 video>
+python script/fit_model_to_scene_full.py --video_path <your mp4 video> [--processed_scenes_dir <output directory>]
 ```
 
 #### Option C
diff --git a/configs/train.yaml b/configs/train.yaml
index b4cabcc..585102e 100644
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -13,7 +13,7 @@ wandb:
   tag: "debug"
 
 train:
-  gs_epochs: 0 # number of 3dgs iterations
+  gs_epochs: 10 # number of 3dgs iterations
   reduce_opacity: True
   no_densify: False # if True, the model will not be densified
   max_lr: True
diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 6a22d95..2c0f2fd 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -53,12 +53,6 @@
     ),  # Use project_root
     help="Path to the input video file.",
 )
-parser.add_argument(
-    "--num_ref_views",
-    type=int,
-    default=16,
-    help="Number of reference views to extract from video for COLMAP.",
-)
 parser.add_argument(
     "--processed_scenes_dir",
     type=str,
@@ -86,7 +80,7 @@
         # if the Scene object loads everything from the COLMAP directory.
         _, scene_dir = orchestrate_video_to_colmap_scene(
             args.video_path,
-            args.num_ref_views,  # Assuming you added this arg
+            cfg.init_wC.num_refs,  # Assuming you added this arg
             max_size=1024,  # Or make it an arg
             base_work_dir=args.processed_scenes_dir,  # Assuming you added this arg
         )
@@ -101,17 +95,6 @@
         print(f"Error during video preprocessing: {e}")
         sys.exit(1)
 
-# Update the config with your settings
-cfg.gs.dataset.images = "images"
-cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12
-cfg.train.gs_epochs = 30000
-cfg.gs.opt.opacity_reset_interval = 1_000_000
-cfg.train.no_densify = True
-cfg.init_wC.matches_per_ref = 15_000
-cfg.init_wC.nns_per_ref = 3
-cfg.init_wC.num_refs = 180
-cfg.init_wC.roma_model = "outdoors"
-
 
 # # 4. Initilize model and logger
 if cfg.wandb.mode != "disabled":
diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index c3b47f9..94a5195 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -415,7 +415,7 @@ def orchestrate_video_to_colmap_scene(
     input_path,
     num_ref_views,
     max_size=1024,
-    base_work_dir="../output/processed_scenes",
+    base_work_dir="../outputs/processed_scenes",
 ):
     """
     Orchestrates the full video/image folder preprocessing pipeline:

From da1317d263230344da14a9a8a5889c2197cd56be Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Thu, 19 Jun 2025 11:21:06 +0900
Subject: [PATCH 31/37] set similar iteration with gradio_demo to
 fit_model_to_scene_full.py

---
 configs/train.yaml                |  2 +-
 script/fit_model_to_scene_full.py | 75 ++++++++++++++++---------------
 2 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/configs/train.yaml b/configs/train.yaml
index 585102e..fd3cac2 100644
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -28,7 +28,7 @@ verbose: true
 init_wC:
   use: True # use EDGS
   matches_per_ref: 15_000 # number of matches per reference
-  num_refs: 180 # number of reference images
+  num_refs: 18 # number of reference images
   nns_per_ref: 3 # number of nearest neighbors per reference
   scaling_factor: 0.001
   proj_err_tolerance: 0.01
diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 2c0f2fd..ad51f70 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -54,11 +54,9 @@
     help="Path to the input video file.",
 )
 parser.add_argument(
-    "--processed_scenes_dir",
+    "--outputs_dir",
     type=str,
-    default=os.path.join(
-        project_root, "outputs", "processed_scenes"
-    ),  # Use project_root
+    default=os.path.join(project_root, "outputs"),  # Use project_root
     help="Base directory where processed COLMAP scenes will be stored.",
 )
 args = parser.parse_args()
@@ -82,7 +80,7 @@
             args.video_path,
             cfg.init_wC.num_refs,  # Assuming you added this arg
             max_size=1024,  # Or make it an arg
-            base_work_dir=args.processed_scenes_dir,  # Assuming you added this arg
+            base_work_dir=args.outputs_dir,  # Assuming you added this arg
         )
         if scene_dir is None:
             print(f"Failed to process video {args.video_path}. Exiting.")
@@ -143,7 +141,7 @@
 with torch.no_grad():
     viewpoint_stack = trainer.GS.scene.getTrainCameras()
     viewpoint_cams_to_viz = random.sample(trainer.GS.scene.getTrainCameras(), 4)
-    for viewpoint_cam in viewpoint_cams_to_viz:
+    for idx, viewpoint_cam in enumerate(viewpoint_cams_to_viz):
         render_pkg = trainer.GS(viewpoint_cam)
         image = render_pkg["render"]
 
@@ -166,13 +164,20 @@
         ax[1].imshow(image_np)
         ax[1].axis("off")
         plt.tight_layout()
+        plt.savefig(
+            os.path.join(
+                cfg.gs.dataset.model_path,
+                f"viewpoint_{idx}_initial.png",
+            )
+        )
         plt.show()
+        plt.close(fig)
 
 
 # # 6.Optimize scene
 # Optimize first briefly for 5k steps and visualize results. We also disable saving of pretrained models. Train function can be changed for any other method
 trainer.saving_iterations = []
-cfg.train.gs_epochs = 5_000
+# cfg.train.gs_epochs = 5_000
 trainer.train(cfg.train)
 
 
@@ -209,39 +214,39 @@
     trainer.save_model()
 
 
-# # 7. Continue training until we reach total 30K training steps
-cfg.train.gs_epochs = 25_000
-trainer.train(cfg.train)
+# # # 7. Continue training until we reach total 30K training steps
+# cfg.train.gs_epochs = 25_000
+# trainer.train(cfg.train)
 
 
-# ### Visualize same viewpoints
-with torch.no_grad():
-    for viewpoint_cam in viewpoint_cams_to_viz:
-        render_pkg = trainer.GS(viewpoint_cam)
-        image = render_pkg["render"]
+# # ### Visualize same viewpoints
+# with torch.no_grad():
+#     for viewpoint_cam in viewpoint_cams_to_viz:
+#         render_pkg = trainer.GS(viewpoint_cam)
+#         image = render_pkg["render"]
 
-        image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)
-        image_gt_np = (
-            viewpoint_cam.original_image.clone()
-            .detach()
-            .cpu()
-            .numpy()
-            .transpose(1, 2, 0)
-        )
+#         image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)
+#         image_gt_np = (
+#             viewpoint_cam.original_image.clone()
+#             .detach()
+#             .cpu()
+#             .numpy()
+#             .transpose(1, 2, 0)
+#         )
 
-        # Clip values to be in the range [0, 1]
-        image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
-        image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8)
+#         # Clip values to be in the range [0, 1]
+#         image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
+#         image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8)
 
-        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
-        ax[0].imshow(image_gt_np)
-        ax[0].axis("off")
-        ax[1].imshow(image_np)
-        ax[1].axis("off")
-        plt.tight_layout()
-        plt.show()
+#         fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
+#         ax[0].imshow(image_gt_np)
+#         ax[0].axis("off")
+#         ax[1].imshow(image_np)
+#         ax[1].axis("off")
+#         plt.tight_layout()
+#         plt.show()
 
 
 # ### Save model
-with torch.no_grad():
-    trainer.save_model()
+# with torch.no_grad():
+#     trainer.save_model()

From 9ab2a765d03ef1e6376d03786a41aebe12553c9a Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Thu, 19 Jun 2025 16:51:36 +0900
Subject: [PATCH 32/37] add same with gradio demo setting option

---
 configs/train.yaml                |  2 +-
 script/fit_model_to_scene_full.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/configs/train.yaml b/configs/train.yaml
index fd3cac2..6e776d0 100644
--- a/configs/train.yaml
+++ b/configs/train.yaml
@@ -13,7 +13,7 @@ wandb:
   tag: "debug"
 
 train:
-  gs_epochs: 10 # number of 3dgs iterations
+  gs_epochs: 1000 # number of 3dgs iterations
   reduce_opacity: True
   no_densify: False # if True, the model will not be densified
   max_lr: True
diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index ad51f70..6a0db04 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -64,6 +64,22 @@
 
 with initialize(config_path="../configs", version_base="1.1"):
     cfg = compose(config_name="train")
+
+SAME_WITH_GRADIO_DEMO = True
+if SAME_WITH_GRADIO_DEMO:
+    cfg.gs.opt.opacity_reset_interval = 1_000_000
+    cfg.train.reduce_opacity = True
+    cfg.train.no_densify = True
+    cfg.train.max_lr = True
+    cfg.train.gs_epochs = 1000
+
+    cfg.init_wC.use = True
+    cfg.init_wC.nns_per_ref = 1
+    cfg.init_wC.add_SfM_init = False
+    cfg.init_wC.scaling_factor = 0.00077 * 2.0
+    cfg.init_wC.num_refs = 16
+    cfg.init_wC.matches_per_ref = 20000
+
 print(OmegaConf.to_yaml(cfg))
 
 

From d965e3ec95ccbabdbae57a03669e42cbb98f0a77 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 16 Jul 2025 11:17:21 +0900
Subject: [PATCH 33/37] fix CondaToSNonInteractiveError caused by latest conda
 update

---
 Dockerfile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index fc1ad50..aa2d20a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,6 +29,14 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
 ENV PATH="/opt/conda/bin:${PATH}"
 
 # Create the conda environment and install dependencies
+# Accept Anaconda TOS before using conda
+RUN conda init bash && \
+  conda config --set always_yes yes --set changeps1 no && \
+  conda config --add channels defaults && \
+  conda config --set channel_priority strict && \
+  conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
+  conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
+# Now you can safely create your environment
 RUN conda create -y -n edgs python=3.10 pip && \
   conda clean -afy && \
   echo "source activate edgs" > ~/.bashrc

From ec447206738e6512ce6ac862a11747c99e77edb2 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Wed, 16 Jul 2025 18:13:59 +0900
Subject: [PATCH 34/37] fix train.py config path

---
 script/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/train.py b/script/train.py
index e95cb63..895c1e3 100644
--- a/script/train.py
+++ b/script/train.py
@@ -16,7 +16,7 @@
 from source.utils_aux import set_seed
 
 
-@hydra.main(config_path="configs", config_name="train", version_base="1.2")
+@hydra.main(config_path="../configs", config_name="train", version_base="1.2")
 def main(cfg: omegaconf.DictConfig):
     _ = wandb.init(
         entity=cfg.wandb.entity,

From 68496226d2571d4734e1fc30ba3a5fca86b04e66 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Thu, 24 Jul 2025 10:55:23 +0900
Subject: [PATCH 35/37] fix option D in readme.

---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4c66881..e944857 100644
--- a/README.md
+++ b/README.md
@@ -133,6 +133,11 @@ scene_folder
         |---points3D.bin
 ```
 
+```
+docker compose exec edgs-app bash
+```
+Then run training command as described below section.
+
 Nerf synthetic format is also acceptable. 
 
 You can also use functions provided in our code to convert a collection of images or a sinlge video into a desired format. However, this may requre tweaking and processing time can be large for large collection of images with little overlap.
@@ -143,7 +148,7 @@ You can also use functions provided in our code to convert a collection of image
 
 To optimize on a single scene in COLMAP format use this code.  
 ```bash
-python train.py \
+python script/train.py \
   train.gs_epochs=30000 \
   train.no_densify=True \
   gs.dataset.source_path=<scene folder> \

From f3f2ff1e33ae9aafcdf7807eb091294dd2851a57 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 25 Jul 2025 16:00:05 +0900
Subject: [PATCH 36/37] fix memory overflow error

---
 source/utils_preprocess.py | 294 +++++++++++++++++++++++++++++++++----
 1 file changed, 265 insertions(+), 29 deletions(-)

diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index 94a5195..b9406c2 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -45,18 +45,23 @@ def resize_max_side(frame, max_size):
     return frame
 
 
-def read_video_frames(video_input, k=1, max_size=1024):
+def extract_video_frames_to_disk(video_input, output_dir, k=1, max_size=1024):
     """
-    Extracts every k-th frame from a video or list of images, resizes to max size, and returns frames as list.
+    Extracts every k-th frame from a video using ffmpeg, saves to disk to avoid memory overflow.
 
     Parameters:
         video_input (str, file-like, or list): Path to video file, file-like object, or list of image files.
+        output_dir (str): Directory to save extracted frames.
         k (int): Interval for frame extraction (every k-th frame).
         max_size (int): Maximum size for width or height after resizing.
 
     Returns:
-        frames (list): List of resized frames (numpy arrays).
+        frame_paths (list): List of paths to extracted frame files.
     """
+    import subprocess
+    import tempfile
+    import shutil
+    
     # Handle list of image files (not single video in a list)
     if isinstance(video_input, list):
         # If it's a single video in a list, treat it as video
@@ -65,13 +70,22 @@ def read_video_frames(video_input, k=1, max_size=1024):
         ):
             video_input = video_input[0]  # unwrap single video file
         else:
-            # Treat as list of images
-            frames = []
-            for img_file in video_input:
+            # Treat as list of images - copy and resize them
+            frame_paths = []
+            for idx, img_file in enumerate(video_input):
                 img = Image.open(img_file.name).convert("RGB")
-                img.thumbnail((max_size, max_size))
-                frames.append(np.array(img)[..., ::-1])
-            return frames
+                # Resize if necessary
+                width, height = img.size
+                if max(width, height) > max_size:
+                    scale = max_size / max(width, height)
+                    new_width = int(width * scale)
+                    new_height = int(height * scale)
+                    img = img.resize((new_width, new_height), Image.LANCZOS)
+                
+                output_path = os.path.join(output_dir, f"frame_{idx:08d}.jpg")
+                img.save(output_path, "JPEG", quality=95)
+                frame_paths.append(output_path)
+            return frame_paths
 
     # Handle file-like or path
     if hasattr(video_input, "name"):
@@ -83,31 +97,84 @@ def read_video_frames(video_input, k=1, max_size=1024):
             "Unsupported video input type. Must be a filepath, file-like object, or list of images."
         )
 
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+    
+    # Use ffmpeg to extract frames
+    print(f"Extracting frames from video using ffmpeg...")
+    try:
+        # First, get video info to calculate frame interval
+        result = subprocess.run([
+            'ffprobe', '-v', 'quiet', '-count_frames', '-select_streams', 'v:0',
+            '-show_entries', 'stream=nb_frames', '-of', 'csv=p=0', video_path
+        ], capture_output=True, text=True, check=True)
+        
+        total_frames = int(result.stdout.strip())
+        print(f"Total frames in video: {total_frames}")
+        
+        # Extract every k-th frame using ffmpeg with scaling
+        ffmpeg_cmd = [
+            'ffmpeg', '-i', video_path, '-y',
+            '-vf', f'select=not(mod(n\\,{k})),scale=w=min({max_size}\\,iw):h=min({max_size}\\,ih):force_original_aspect_ratio=decrease',
+            '-q:v', '2',  # High quality
+            os.path.join(output_dir, 'frame_%08d.jpg')
+        ]
+        
+        subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
+        
+        # Get list of extracted frame paths
+        frame_paths = sorted([
+            os.path.join(output_dir, f) for f in os.listdir(output_dir) 
+            if f.startswith('frame_') and f.endswith('.jpg')
+        ])
+        
+        print(f"Extracted {len(frame_paths)} frames to {output_dir}")
+        return frame_paths
+        
+    except subprocess.CalledProcessError as e:
+        print(f"ffmpeg failed: {e}")
+        # Fallback to opencv if ffmpeg fails
+        return extract_video_frames_fallback(video_path, output_dir, k, max_size)
+    except FileNotFoundError:
+        print("ffmpeg not found, using opencv fallback")
+        return extract_video_frames_fallback(video_path, output_dir, k, max_size)
+
+
+def extract_video_frames_fallback(video_path, output_dir, k=1, max_size=1024):
+    """
+    Fallback method using opencv, but saves frames to disk instead of memory.
+    """
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError(f"Error: Could not open video {video_path}.")
 
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_count = 0
-    frames = []
+    frame_paths = []
+
+    os.makedirs(output_dir, exist_ok=True)
 
-    with tqdm(total=total_frames // k, desc="Processing Video", unit="frame") as pbar:
+    with tqdm(total=total_frames // k, desc="Extracting Video Frames", unit="frame") as pbar:
         while True:
             ret, frame = cap.read()
             if not ret:
                 break
             if frame_count % k == 0:
-                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                # Resize frame
                 h, w = frame.shape[:2]
                 scale = max(h, w) / max_size
                 if scale > 1:
                     frame = cv2.resize(frame, (int(w / scale), int(h / scale)))
-                frames.append(frame[..., [2, 1, 0]])
+                
+                # Save frame to disk
+                frame_path = os.path.join(output_dir, f"frame_{len(frame_paths):08d}.jpg")
+                cv2.imwrite(frame_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
+                frame_paths.append(frame_path)
                 pbar.update(1)
             frame_count += 1
 
     cap.release()
-    return frames
+    return frame_paths
 
 
 def resize_max_side(frame, max_size):
@@ -202,9 +269,48 @@ def variance_of_laplacian(image):
     return cv2.Laplacian(image, cv2.CV_64F).var()
 
 
+def preprocess_frame_paths(frame_paths, verbose=False):
+    """
+    Compute sharpness scores for a list of frame files using multi-scale Laplacian variance.
+
+    Args:
+        frame_paths (list of str): List of paths to frame image files.
+        verbose (bool): If True, print scores.
+
+    Returns:
+        list of float: Sharpness scores for each frame.
+    """
+    scores = []
+
+    for idx, frame_path in enumerate(tqdm(frame_paths, desc="Scoring frames")):
+        # Load frame from disk
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            print(f"Warning: Could not load frame {frame_path}")
+            scores.append(0.0)
+            continue
+            
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+        fm = (
+            variance_of_laplacian(gray)
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75))
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5))
+            + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25))
+        )
+
+        if verbose:
+            print(f"Frame {idx} ({os.path.basename(frame_path)}): Sharpness Score = {fm:.2f}")
+
+        scores.append(fm)
+
+    return scores
+
+
 def preprocess_frames(frames, verbose=False):
     """
     Compute sharpness scores for a list of frames using multi-scale Laplacian variance.
+    DEPRECATED: Use preprocess_frame_paths instead to avoid memory issues.
 
     Args:
         frames (list of np.ndarray): List of frames (BGR images).
@@ -262,9 +368,32 @@ def select_optimal_frames(scores, k):
     return sorted(selected_indices)
 
 
+def copy_selected_frames_to_scene_dir(selected_frame_paths, scene_dir):
+    """
+    Copies selected frame files into the target scene directory under 'images/' subfolder.
+
+    Args:
+        selected_frame_paths (list of str): List of paths to selected frame files.
+        scene_dir (str): Target path where 'images/' subfolder will be created.
+    """
+    import shutil
+    
+    images_dir = os.path.join(scene_dir, "images")
+    os.makedirs(images_dir, exist_ok=True)
+
+    for idx, frame_path in enumerate(selected_frame_paths):
+        filename = os.path.join(
+            images_dir, f"{idx:08d}.jpg"
+        )  # 00000000.jpg, 00000001.jpg, etc.
+        shutil.copy2(frame_path, filename)
+
+    print(f"Copied {len(selected_frame_paths)} selected frames to {images_dir}")
+
+
 def save_frames_to_scene_dir(frames, scene_dir):
     """
     Saves a list of frames into the target scene directory under 'images/' subfolder.
+    DEPRECATED: Use copy_selected_frames_to_scene_dir to avoid memory issues.
 
     Args:
         frames (list of np.ndarray): List of frames (BGR images) to save.
@@ -282,12 +411,14 @@ def save_frames_to_scene_dir(frames, scene_dir):
     print(f"Saved {len(frames)} frames to {images_dir}")
 
 
-def run_colmap_on_scene(scene_dir):
+def run_colmap_on_scene(scene_dir, force_pinhole=True):
     """
     Runs feature extraction, matching, and mapping on all images inside scene_dir/images using pycolmap.
+    Forces PINHOLE camera model to avoid distortion issues.
 
     Args:
         scene_dir (str): Path to scene directory containing 'images' folder.
+        force_pinhole (bool): If True, forces PINHOLE camera model during reconstruction.
 
     TODO: if the function hasn't managed to match all the frames either increase image size,
     increase number of features or just remove those frames from the folder scene_dir/images
@@ -318,7 +449,7 @@ def run_colmap_on_scene(scene_dir):
     pycolmap.match_exhaustive(database_path)
     print(f"Finished feature matching in {(time.time() - start_time):.2f}s.")
 
-    # Step 3: Mapping
+    # Step 3: Mapping with PINHOLE camera model
     pipeline_options = pycolmap.IncrementalPipelineOptions()
     pipeline_options.min_num_matches = 15
     pipeline_options.multiple_models = True
@@ -330,6 +461,8 @@ def run_colmap_on_scene(scene_dir):
     pipeline_options.mapper.init_min_num_inliers = 30
     pipeline_options.mapper.init_max_error = 8.0
     pipeline_options.mapper.init_min_tri_angle = 5.0
+    
+    # Note: force_pinhole will be applied after reconstruction
 
     reconstruction = pycolmap.incremental_mapping(
         database_path=database_path,
@@ -339,21 +472,121 @@ def run_colmap_on_scene(scene_dir):
     )
     print(f"Finished incremental mapping in {(time.time() - start_time):.2f}s.")
 
-    # Step 4: Post-process Cameras to SIMPLE_PINHOLE
+    # Step 4: Ensure cameras are PINHOLE (double-check)
     recon_path = os.path.join(sparse_path, "0")
-    reconstruction = pycolmap.Reconstruction(recon_path)
-
-    for cam in reconstruction.cameras.values():
-        cam.model = "SIMPLE_PINHOLE"
-        cam.params = cam.params[:3]  # Keep only [f, cx, cy]
-
-    reconstruction.write(recon_path)
+    if os.path.exists(recon_path):
+        reconstruction = pycolmap.Reconstruction(recon_path)
+
+        for cam in reconstruction.cameras.values():
+            if force_pinhole and cam.model != "PINHOLE":
+                print(f"Converting camera {cam.camera_id} from {cam.model} to PINHOLE")
+                cam.model = "PINHOLE"
+                # Ensure we have exactly 4 parameters [fx, fy, cx, cy]
+                if len(cam.params) >= 4:
+                    cam.params = cam.params[:4]
+                elif len(cam.params) >= 3:
+                    # Duplicate focal length if we only have 3 params
+                    f, cx, cy = cam.params[:3]
+                    cam.params = [f, f, cx, cy]
+                else:
+                    # Default values if params are insufficient
+                    focal = max(cam.width, cam.height)
+                    cam.params = [focal, focal, cam.width/2, cam.height/2]
+
+        reconstruction.write(recon_path)
+        print(f"Saved reconstruction with PINHOLE cameras to {recon_path}")
 
     print(f"Total pipeline time: {(time.time() - start_time):.2f}s.")
 
 
 def process_input_for_colmap(input_path, num_ref_views, output_dir, max_size=1024):
     """
+    Memory-efficient helper function to process video/images, select optimal frames,
+    and save them to the output_dir/images without loading all frames into memory.
+    """
+    import tempfile
+    import shutil
+    
+    # Create temporary directory for extracted frames
+    temp_frames_dir = tempfile.mkdtemp(prefix="edgs_frames_")
+    
+    try:
+        if isinstance(input_path, (str, os.PathLike)):  # If input_path is a path string
+            if os.path.isdir(input_path):  # If it's a directory of images
+                print(f"Processing image directory: {input_path}")
+                # Copy and resize images to temp directory
+                frame_paths = []
+                image_files = sorted([
+                    f for f in os.listdir(input_path)
+                    if f.lower().endswith(("jpg", "jpeg", "png"))
+                ])
+                
+                for idx, img_file in enumerate(image_files):
+                    img = Image.open(os.path.join(input_path, img_file)).convert("RGB")
+                    # Resize if necessary
+                    width, height = img.size
+                    if max(width, height) > max_size:
+                        scale = max_size / max(width, height)
+                        new_width = int(width * scale)
+                        new_height = int(height * scale)
+                        img = img.resize((new_width, new_height), Image.LANCZOS)
+                    
+                    output_path = os.path.join(temp_frames_dir, f"frame_{idx:08d}.jpg")
+                    img.save(output_path, "JPEG", quality=95)
+                    frame_paths.append(output_path)
+                    
+            else:  # If it's a single video file path
+                print(f"Processing video file: {input_path}")
+                frame_paths = extract_video_frames_to_disk(
+                    video_input=input_path, 
+                    output_dir=temp_frames_dir, 
+                    max_size=max_size
+                )
+        elif hasattr(input_path, "name"):  # File-like object (e.g., from Gradio upload)
+            print(f"Processing uploaded video file: {input_path.name}")
+            frame_paths = extract_video_frames_to_disk(
+                video_input=input_path, 
+                output_dir=temp_frames_dir, 
+                max_size=max_size
+            )
+        else:
+            raise ValueError(f"Unsupported input_path type: {type(input_path)}")
+
+        if not frame_paths:
+            print("No frames extracted or read.")
+            return []
+
+        # Score frames without loading them all into memory
+        print(f"Scoring {len(frame_paths)} frames...")
+        frames_scores = preprocess_frame_paths(frame_paths)
+        
+        # Select optimal frames
+        selected_frames_indices = select_optimal_frames(
+            scores=frames_scores, k=min(num_ref_views, len(frame_paths))
+        )
+        
+        # Get paths to selected frames
+        selected_frame_paths = [frame_paths[idx] for idx in selected_frames_indices]
+        
+        print(f"Selected {len(selected_frame_paths)} optimal frames out of {len(frame_paths)}")
+
+        # Copy selected frames to scene directory
+        copy_selected_frames_to_scene_dir(selected_frame_paths, output_dir)
+        
+        # Return empty list since we're not loading frames into memory anymore
+        # The actual frames are saved to disk in the scene directory
+        return []
+        
+    finally:
+        # Clean up temporary directory
+        if os.path.exists(temp_frames_dir):
+            shutil.rmtree(temp_frames_dir)
+            print(f"Cleaned up temporary frame directory: {temp_frames_dir}")
+
+
+def process_input_for_colmap_legacy(input_path, num_ref_views, output_dir, max_size=1024):
+    """
+    DEPRECATED: Original memory-intensive version.
     Helper function to read frames from video or image folder, select optimal ones,
     and save them to the output_dir/images.
     This is based on process_input from gradio_demo.py.
@@ -488,16 +721,19 @@ def orchestrate_video_to_colmap_scene(
     os.makedirs(scene_dir, exist_ok=True)
     print(f"Created scene directory for COLMAP: {scene_dir}")
 
+    # Process video/images to extract and select optimal frames
     selected_frames_data = process_input_for_colmap(
         actual_input_path_str, num_ref_views, scene_dir, max_size
     )
-    if not selected_frames_data:
-        print(f"Frame processing failed for {input_path}. Aborting COLMAP.")
-        # Optionally clean up scene_dir if it's truly temporary and processing failed
-        # shutil.rmtree(scene_dir)
+    
+    # Check if images were saved to scene directory
+    images_dir = os.path.join(scene_dir, "images")
+    if not os.path.exists(images_dir) or not os.listdir(images_dir):
+        print(f"Frame processing failed for {input_path}. No images found in {images_dir}. Aborting COLMAP.")
         return [], None
 
-    run_colmap_on_scene(scene_dir)  # This function should create scene_dir/sparse/0
+    # Run COLMAP with PINHOLE camera model enforced
+    run_colmap_on_scene(scene_dir, force_pinhole=True)  # Force PINHOLE to avoid distortion
 
     print(f"COLMAP processing complete for {scene_dir}")
     return selected_frames_data, scene_dir

From 9f3884d2ee4a69ca65a85fa9931a123c8b270cb6 Mon Sep 17 00:00:00 2001
From: Masahiro Ogawa <m-ogawa@sensyn-robotics.com>
Date: Fri, 25 Jul 2025 17:25:34 +0900
Subject: [PATCH 37/37] fix memory error. and fix pinhole enforcement.

---
 script/fit_model_to_scene_full.py |   8 +-
 source/corr_init.py               |   5 +
 source/utils_preprocess.py        | 179 ++++++++++++++++++++++++++----
 3 files changed, 169 insertions(+), 23 deletions(-)

diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py
index 6a0db04..bffd51b 100644
--- a/script/fit_model_to_scene_full.py
+++ b/script/fit_model_to_scene_full.py
@@ -73,11 +73,11 @@
     cfg.train.max_lr = True
     cfg.train.gs_epochs = 1000
 
-    cfg.init_wC.use = True
+    cfg.init_wC.use = False  # Disable for fallback cases
     cfg.init_wC.nns_per_ref = 1
     cfg.init_wC.add_SfM_init = False
     cfg.init_wC.scaling_factor = 0.00077 * 2.0
-    cfg.init_wC.num_refs = 16
+    cfg.init_wC.num_refs = 2  # Reduce to minimum since COLMAP only found 2 cameras
     cfg.init_wC.matches_per_ref = 20000
 
 print(OmegaConf.to_yaml(cfg))
@@ -156,7 +156,9 @@
 # ### Visualize a few initial viewpoints
 with torch.no_grad():
     viewpoint_stack = trainer.GS.scene.getTrainCameras()
-    viewpoint_cams_to_viz = random.sample(trainer.GS.scene.getTrainCameras(), 4)
+    available_cams = trainer.GS.scene.getTrainCameras()
+    num_cams_to_viz = min(4, len(available_cams))
+    viewpoint_cams_to_viz = random.sample(available_cams, num_cams_to_viz)
     for idx, viewpoint_cam in enumerate(viewpoint_cams_to_viz):
         render_pkg = trainer.GS(viewpoint_cam)
         image = render_pkg["render"]
diff --git a/source/corr_init.py b/source/corr_init.py
index 09c94a2..bfd6985 100644
--- a/source/corr_init.py
+++ b/source/corr_init.py
@@ -787,6 +787,11 @@ def init_gaussians_with_corr_fast(gaussians, scene, cfg, device, verbose=False,
 
     # Dummy first pass to initialize model
     with torch.no_grad():
+        if len(viewpoint_stack) < 2:
+            print(f"⚠️  Warning: Only {len(viewpoint_stack)} viewpoints available. Need at least 2 for correspondence initialization.")
+            print("Skipping correspondence initialization - using SfM points only.")
+            return scene.train_cameras[3:], [], {}
+        
         viewpoint_cam1 = viewpoint_stack[0]
         viewpoint_cam2 = viewpoint_stack[1]
         imA = viewpoint_cam1.original_image.detach().cpu().numpy().transpose(1, 2, 0)
diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py
index b9406c2..14da72b 100644
--- a/source/utils_preprocess.py
+++ b/source/utils_preprocess.py
@@ -411,6 +411,71 @@ def save_frames_to_scene_dir(frames, scene_dir):
     print(f"Saved {len(frames)} frames to {images_dir}")
 
 
+def create_fallback_reconstruction(image_dir, sparse_path):
+    """
+    Create a minimal fallback reconstruction when COLMAP fails completely.
+    Creates a simple linear camera trajectory for the available images.
+    """
+    # No need to import colmap_loader - we'll create text files directly
+    
+    print("🔧 Creating fallback reconstruction with assumed camera positions...")
+    
+    # Get list of images
+    image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
+    
+    if len(image_files) < 1:
+        raise RuntimeError("No images found for fallback reconstruction")
+    
+    # Read first image to get dimensions
+    first_img_path = os.path.join(image_dir, image_files[0])
+    from PIL import Image
+    img = Image.open(first_img_path)
+    width, height = img.size
+    
+    # Create minimal reconstruction directory
+    fallback_dir = os.path.join(sparse_path, "0")
+    os.makedirs(fallback_dir, exist_ok=True)
+    
+    # Create cameras.txt with simple pinhole model
+    cameras_txt = os.path.join(fallback_dir, "cameras.txt")
+    focal = max(width, height)  # Simple focal length estimation
+    with open(cameras_txt, 'w') as f:
+        f.write("# Camera list with one line of data per camera:\n")
+        f.write("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n")
+        f.write(f"1 PINHOLE {width} {height} {focal} {focal} {width/2} {height/2}\n")
+    
+    # Create images.txt with linear trajectory
+    images_txt = os.path.join(fallback_dir, "images.txt")
+    with open(images_txt, 'w') as f:
+        f.write("# Image list with two lines of data per image:\n")
+        f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n")
+        f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n")
+        
+        for i, img_file in enumerate(image_files):
+            # Simple linear trajectory along Z-axis
+            tx, ty, tz = 0.0, 0.0, -i * 0.5
+            # Identity quaternion (no rotation)
+            qw, qx, qy, qz = 1.0, 0.0, 0.0, 0.0
+            
+            f.write(f"{i+1} {qw} {qx} {qy} {qz} {tx} {ty} {tz} 1 {img_file}\n")
+            f.write("\n")  # Empty line for points2D
+    
+    # Create minimal points3D.txt with a few dummy points
+    points_txt = os.path.join(fallback_dir, "points3D.txt")
+    with open(points_txt, 'w') as f:
+        f.write("# 3D point list with one line of data per point:\n")
+        f.write("# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n")
+        # Add some dummy 3D points for basic initialization
+        for i in range(10):
+            x, y, z = i * 0.1, 0.0, -1.0  # Simple grid of points
+            r, g, b = 128, 128, 128  # Gray color
+            error = 1.0
+            f.write(f"{i+1} {x} {y} {z} {r} {g} {b} {error}\n")
+    
+    print(f"✅ Created fallback reconstruction with {len(image_files)} cameras at {fallback_dir}")
+    print("⚠️  Note: This is a basic reconstruction with assumed camera positions. Results may be limited.")
+
+
 def run_colmap_on_scene(scene_dir, force_pinhole=True):
     """
     Runs feature extraction, matching, and mapping on all images inside scene_dir/images using pycolmap.
@@ -434,48 +499,121 @@ def run_colmap_on_scene(scene_dir, force_pinhole=True):
     # Make sure output directories exist
     os.makedirs(sparse_path, exist_ok=True)
 
-    # Step 1: Feature Extraction
+    # Step 1: Feature Extraction with more aggressive settings
     pycolmap.extract_features(
         database_path,
         image_dir,
         sift_options={
-            "max_num_features": 512 * 2,
-            "max_image_size": 512 * 1,
+            "max_num_features": 8192,  # Much higher feature count
+            "max_image_size": 1600,    # Higher resolution
+            "first_octave": -1,        # More detailed features
+            "num_octaves": 4,
+            "octave_resolution": 3,
+            "peak_threshold": 0.005,   # More lenient peak detection
+            "edge_threshold": 20,      # More lenient edge threshold
         },
     )
     print(f"Finished feature extraction in {(time.time() - start_time):.2f}s.")
 
-    # Step 2: Feature Matching
-    pycolmap.match_exhaustive(database_path)
+    # Step 2: Feature Matching with correct API
+    sift_matching_options = pycolmap.SiftMatchingOptions()
+    sift_matching_options.max_ratio = 0.9
+    sift_matching_options.max_distance = 0.8
+    sift_matching_options.cross_check = True
+    
+    pycolmap.match_exhaustive(database_path, sift_options=sift_matching_options)
     print(f"Finished feature matching in {(time.time() - start_time):.2f}s.")
 
-    # Step 3: Mapping with PINHOLE camera model
+    # Step 3: Mapping with more lenient parameters for challenging videos
     pipeline_options = pycolmap.IncrementalPipelineOptions()
-    pipeline_options.min_num_matches = 15
+    pipeline_options.min_num_matches = 8          # Lower minimum matches
     pipeline_options.multiple_models = True
     pipeline_options.max_num_models = 50
     pipeline_options.max_model_overlap = 20
-    pipeline_options.min_model_size = 10
+    pipeline_options.min_model_size = 3           # Allow smaller models
     pipeline_options.extract_colors = True
     pipeline_options.num_threads = 8
-    pipeline_options.mapper.init_min_num_inliers = 30
-    pipeline_options.mapper.init_max_error = 8.0
-    pipeline_options.mapper.init_min_tri_angle = 5.0
+    
+    # More lenient mapper options
+    pipeline_options.mapper.init_min_num_inliers = 15    # Lower inlier threshold
+    pipeline_options.mapper.init_max_error = 12.0        # Higher error tolerance
+    pipeline_options.mapper.init_min_tri_angle = 2.0     # Lower triangulation angle
+    pipeline_options.mapper.abs_pose_min_num_inliers = 15
+    pipeline_options.mapper.abs_pose_max_error = 12.0
+    pipeline_options.mapper.filter_max_reproj_error = 8.0
+    pipeline_options.mapper.filter_min_tri_angle = 1.5
     
     # Note: force_pinhole will be applied after reconstruction
 
-    reconstruction = pycolmap.incremental_mapping(
-        database_path=database_path,
-        image_path=image_dir,
-        output_path=sparse_path,
-        options=pipeline_options,
-    )
-    print(f"Finished incremental mapping in {(time.time() - start_time):.2f}s.")
+    try:
+        reconstruction = pycolmap.incremental_mapping(
+            database_path=database_path,
+            image_path=image_dir,
+            output_path=sparse_path,
+            options=pipeline_options,
+        )
+        print(f"Finished incremental mapping in {(time.time() - start_time):.2f}s.")
+    except Exception as e:
+        print(f"⚠️  Initial reconstruction failed: {e}")
+        print("🔄 Trying with even more lenient settings...")
+        
+        # Try with ultra-lenient settings as fallback
+        pipeline_options.min_num_matches = 5
+        pipeline_options.min_model_size = 2
+        pipeline_options.mapper.init_min_num_inliers = 10
+        pipeline_options.mapper.init_max_error = 20.0
+        pipeline_options.mapper.init_min_tri_angle = 1.0
+        
+        try:
+            reconstruction = pycolmap.incremental_mapping(
+                database_path=database_path,
+                image_path=image_dir,
+                output_path=sparse_path,
+                options=pipeline_options,
+            )
+            print(f"✅ Fallback reconstruction succeeded in {(time.time() - start_time):.2f}s.")
+        except Exception as e2:
+            print(f"❌ Both reconstruction attempts failed: {e2}")
+            raise RuntimeError("COLMAP reconstruction failed. The video might have insufficient overlap or features.")
 
-    # Step 4: Ensure cameras are PINHOLE (double-check)
+    # Step 4: Check if reconstruction was successful
     recon_path = os.path.join(sparse_path, "0")
+    if not os.path.exists(recon_path):
+        # Check for other reconstruction indices
+        reconstructions_found = []
+        for i in range(10):  # Check indices 0-9
+            alt_path = os.path.join(sparse_path, str(i))
+            if os.path.exists(alt_path) and any(os.path.exists(os.path.join(alt_path, f)) 
+                                             for f in ["cameras.bin", "images.bin", "points3D.bin"]):
+                reconstructions_found.append(i)
+        
+        if reconstructions_found:
+            # Use the largest reconstruction
+            best_idx = max(reconstructions_found)
+            recon_path = os.path.join(sparse_path, str(best_idx))
+            print(f"ℹ️  Using reconstruction {best_idx} instead of 0")
+            
+            # Move to index 0 for compatibility
+            target_path = os.path.join(sparse_path, "0")
+            if not os.path.exists(target_path):
+                import shutil
+                shutil.move(recon_path, target_path)
+                recon_path = target_path
+                print(f"📁 Moved reconstruction to sparse/0/")
+        else:
+            print("❌ COLMAP reconstruction failed - creating minimal fallback reconstruction")
+            return create_fallback_reconstruction(image_dir, sparse_path)
+
+    # Step 5: Convert cameras to PINHOLE if needed
     if os.path.exists(recon_path):
         reconstruction = pycolmap.Reconstruction(recon_path)
+        
+        if len(reconstruction.cameras) == 0:
+            raise RuntimeError("❌ Reconstruction contains no cameras")
+        if len(reconstruction.images) == 0:
+            raise RuntimeError("❌ Reconstruction contains no images")
+        if len(reconstruction.points3D) == 0:
+            print("⚠️  Warning: Reconstruction contains no 3D points")
 
         for cam in reconstruction.cameras.values():
             if force_pinhole and cam.model != "PINHOLE":
@@ -494,7 +632,8 @@ def run_colmap_on_scene(scene_dir, force_pinhole=True):
                     cam.params = [focal, focal, cam.width/2, cam.height/2]
 
         reconstruction.write(recon_path)
-        print(f"Saved reconstruction with PINHOLE cameras to {recon_path}")
+        print(f"✅ Saved reconstruction with PINHOLE cameras to {recon_path}")
+        print(f"📊 Reconstruction stats: {len(reconstruction.cameras)} cameras, {len(reconstruction.images)} images, {len(reconstruction.points3D)} points")
 
     print(f"Total pipeline time: {(time.time() - start_time):.2f}s.")