From 0db6ab0309ca423d3737662db610191964a9a740 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 11:24:06 +0900 Subject: [PATCH 01/37] add Dockerfile and confirm docker build successfully finished. --- .dockerignore | 27 +++++++++++++++++++++++++++ Dockerfile | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..628f753 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,27 @@ +# Ignore the following files and directories when building the Docker image +*.pyc +__pycache__/ +*.ipynb_checkpoints +*.log +*.csv +*.tsv +*.h5 +*.pth +*.pt +*.zip +*.tar.gz +*.egg-info/ +dist/ +build/ +.env +venv/ +.env.local +*.DS_Store +*.egg +*.whl +*.pkl +*.json +*.yaml +*.yml +assets/ +submodules/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ce675db --- /dev/null +++ b/Dockerfile @@ -0,0 +1,50 @@ +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 + +# Set the working directory +WORKDIR /EDGS + +# Install system dependencies first, including git, build-essential, and cmake +RUN apt-get update && apt-get install -y \ + git \ + wget \ + build-essential \ + cmake \ + ninja-build \ + && rm -rf /var/lib/apt/lists/* + +# Copy only essential files for cloning submodules first (e.g., .gitmodules) +# Or, if submodules are public, you might not need to copy anything specific for this step +# For simplicity, we'll copy everything, but this could be optimized +COPY . . + +# Initialize and update submodules +RUN git submodule init && git submodule update --recursive + +# Install Miniconda +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ + bash /tmp/miniconda.sh -b -p /opt/conda && \ + rm /tmp/miniconda.sh +ENV PATH="/opt/conda/bin:${PATH}" + +# Create the conda environment and install dependencies +RUN conda create -y -n edgs python=3.10 pip && \ + conda clean -afy && \ + echo "source activate edgs" > ~/.bashrc + +# Set CUDA architectures to compile for +ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX" + +# Activate the environment and install Python dependencies +RUN /bin/bash -c "source activate edgs && \ + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 && \ + pip install -e ./submodules/gaussian-splatting/submodules/diff-gaussian-rasterization && \ + pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \ + pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \ + pip install -e ./submodules/RoMa && \ + pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d" + +# Expose the port for Gradio +EXPOSE 7862 + +# Command to run the Gradio demo +CMD ["bash", "-c", "source activate edgs && python gradio_demo.py --port 7862"] \ No newline at end of file From f215ce8b85e7a4c57aed431e14fb1d4e349957eb Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 13:24:15 +0900 Subject: [PATCH 02/37] add docker compose file and confimed it works well --- Dockerfile | 2 ++ docker-compose.yml | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile index ce675db..9719c23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,8 @@ RUN apt-get update && apt-get install -y \ build-essential \ cmake \ ninja-build \ + libgl1-mesa-glx \ + libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # Copy only essential files for cloning submodules first (e.g., .gitmodules) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6801517 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +services: + edgs-app: + build: . # Instructs Docker Compose to build using the Dockerfile in the current directory + image: edgs-app # This is the name of the image you built + ports: + - "7862:7862" # Map port 7862 on the host to port 7862 in the container + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all # Use all available GPUs + capabilities: [gpu] # Request GPU capabilities + volumes: + - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container + - ./output:/EDGS/output # Example: map a local 'output' folder \ No newline at end of file From db084aff053981a903032b236c39906fd3799a61 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 13:53:12 +0900 Subject: [PATCH 03/37] fix asset file not found error --- .dockerignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 628f753..b6c1da7 100644 --- a/.dockerignore +++ b/.dockerignore @@ -23,5 +23,4 @@ venv/ *.json *.yaml *.yml -assets/ submodules/ \ No newline at end of file From a3f90013531934e0eb9e169876d6bd2f890e6160 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 13:58:43 +0900 Subject: [PATCH 04/37] update readme to use docker compose --- README.md | 39 ++++----------------------------------- 1 file changed, 4 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 8c40427..c6a5307 100644 --- a/README.md +++ b/README.md @@ -69,45 +69,14 @@ Alternatively, check our [Colab notebook](https://colab.research.google.com/gith ## đŸ› ī¸ Installation -You can either run `install.sh` or manually install using the following: +You can install it just: ```bash -git clone git@github.com:CompVis/EDGS.git --recursive -cd EDGS -git submodule update --init --recursive - -conda create -y -n edgs python=3.10 pip -conda activate edgs - -# Set up path to your CUDA. In our experience similar versions like 12.2 also work well -export CUDA_HOME=/usr/local/cuda-12.1 -export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH -export PATH=$CUDA_HOME/bin:$PATH - -conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia -y -conda install nvidia/label/cuda-12.1.0::cuda-toolkit -y - -pip install -e submodules/gaussian-splatting/submodules/diff-gaussian-rasterization -pip install -e submodules/gaussian-splatting/submodules/simple-knn - -# For COLMAP and pycolmap -# Optionally install original colmap but probably pycolmap suffices -# conda install conda-forge/label/colmap_dev::colmap -pip install pycolmap - - -pip install wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg -conda install numpy=1.26.4 -y -c conda-forge --override-channels - -pip install -e submodules/RoMa -conda install anaconda::jupyter --yes - -# Stuff necessary for gradio and visualizations -pip install gradio -pip install plotly scikit-learn moviepy==2.1.1 ffmpeg -pip install open3d +docker compose up ``` +or you can install with running `install.sh`. + ## đŸ“Ļ Data From 55fbbde3c4241282cdd99053d7487029b237c904 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 17:55:38 +0900 Subject: [PATCH 05/37] add command line EDGS python func, and let Dockerfile can choose gradle or command line. --- .gitignore | 3 + Dockerfile | 2 +- notebooks/fit_model_to_scene_full.py | 203 +++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 notebooks/fit_model_to_scene_full.py diff --git a/.gitignore b/.gitignore index a05a2b7..c38a86d 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ dmypy.json # Pyre type checker .pyre/ learnableearthparser/fast_sampler/_sampler.c + +# data +data/ diff --git a/Dockerfile b/Dockerfile index 9719c23..c69f166 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,4 +49,4 @@ RUN /bin/bash -c "source activate edgs && \ EXPOSE 7862 # Command to run the Gradio demo -CMD ["bash", "-c", "source activate edgs && python gradio_demo.py --port 7862"] \ No newline at end of file +CMD ["bash"] \ No newline at end of file diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py new file mode 100644 index 0000000..a73ab04 --- /dev/null +++ b/notebooks/fit_model_to_scene_full.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # EDGS: Eliminating Densification for Gaussian Splatting +# EDGS improves 3D Gaussian Splatting by removing the need for densification. It starts from a dense point cloud initialization based on 2D correspondences, leading to: +# - ⚡ Faster convergence (only 25% of training time) +# - 🌀 Higher rendering quality +# - 💡 No need for progressive densification + +# ## 2. Import libraries +import argparse +import os +import random +import sys + +import hydra +import numpy as np +import omegaconf +import torch +import wandb +from hydra import compose, initialize +from matplotlib import pyplot as plt +from omegaconf import OmegaConf + +sys.path.append("../") +sys.path.append("../submodules/gaussian-splatting") +from source.trainer import EDGSTrainer +from source.utils_aux import set_seed + +# --- Add argument parsing --- +parser = argparse.ArgumentParser( + description="Fit EDGS model to a scene, optionally from a video." +) +parser.add_argument( + "--video_path", + type=str, + default="../assets/examples/video_fruits.mp4", + help="Path to the input video file.", +) +args = parser.parse_args() +# --- End argument parsing --- + +with initialize(config_path="../configs", version_base="1.1"): + cfg = compose(config_name="train") +print(OmegaConf.to_yaml(cfg)) + + +# # 3. Init input parameters + +# ## 3.1 Optionally preprocess video +PATH_TO_VIDEO = args.video_path +num_ref_views = 16 # how many frames you want to extract from video and colmap + +# Update the config with your settings +cfg.wandb.name = "EDGS.demo.scene" +cfg.wandb.mode = "disabled" # "online" +cfg.gs.dataset.model_path = ( + "./scene_edgsed/" # "change this to your path to the processed scene" +) +cfg.gs.dataset.source_path = "../assets/scene_colmaped/" # "change this to your path" +# Optionally for video processed +# cfg.gs.dataset.source_path="../assets/video_colmaped/" +cfg.gs.dataset.images = "images" +cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12 +cfg.train.gs_epochs = 30000 +cfg.gs.opt.opacity_reset_interval = 1_000_000 +cfg.train.no_densify = True +cfg.init_wC.matches_per_ref = 15_000 +cfg.init_wC.nns_per_ref = 3 +cfg.init_wC.num_refs = 180 +cfg.init_wC.roma_model = "outdoors" + + +# # 4. Initilize model and logger +_ = wandb.init( + entity=cfg.wandb.entity, + project=cfg.wandb.project, + config=omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True), + name=cfg.wandb.name, + mode=cfg.wandb.mode, +) +omegaconf.OmegaConf.resolve(cfg) +set_seed(cfg.seed) +# Init output folder +print("Output folder: {}".format(cfg.gs.dataset.model_path)) +os.makedirs(cfg.gs.dataset.model_path, exist_ok=True) +# Init gs model +gs = hydra.utils.instantiate(cfg.gs) +trainer = EDGSTrainer(GS=gs, training_config=cfg.gs.opt, device=cfg.device) + + +# # 5. Init with matchings +trainer.timer.start() +trainer.init_with_corr(cfg.init_wC) +trainer.timer.pause() + + +# ### Visualize a few initial viewpoints +with torch.no_grad(): + viewpoint_stack = trainer.GS.scene.getTrainCameras() + viewpoint_cams_to_viz = random.sample(trainer.GS.scene.getTrainCameras(), 4) + for viewpoint_cam in viewpoint_cams_to_viz: + render_pkg = trainer.GS(viewpoint_cam) + image = render_pkg["render"] + + image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) + image_gt_np = ( + viewpoint_cam.original_image.clone() + .detach() + .cpu() + .numpy() + .transpose(1, 2, 0) + ) + + # Clip values to be in the range [0, 1] + image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) + image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8) + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) + ax[0].imshow(image_gt_np) + ax[0].axis("off") + ax[1].imshow(image_np) + ax[1].axis("off") + plt.tight_layout() + plt.show() + + +# # 6.Optimize scene +# Optimize first briefly for 5k steps and visualize results. We also disable saving of pretrained models. Train function can be changed for any other method +trainer.saving_iterations = [] +cfg.train.gs_epochs = 5_000 +trainer.train(cfg.train) + + +# ### Visualize same viewpoints +with torch.no_grad(): + for viewpoint_cam in viewpoint_cams_to_viz: + render_pkg = trainer.GS(viewpoint_cam) + image = render_pkg["render"] + + image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) + image_gt_np = ( + viewpoint_cam.original_image.clone() + .detach() + .cpu() + .numpy() + .transpose(1, 2, 0) + ) + + # Clip values to be in the range [0, 1] + image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) + image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8) + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) + ax[0].imshow(image_gt_np) + ax[0].axis("off") + ax[1].imshow(image_np) + ax[1].axis("off") + plt.tight_layout() + plt.show() + + +# ### Save model +with torch.no_grad(): + trainer.save_model() + + +# # 7. Continue training until we reach total 30K training steps +cfg.train.gs_epochs = 25_000 +trainer.train(cfg.train) + + +# ### Visualize same viewpoints +with torch.no_grad(): + for viewpoint_cam in viewpoint_cams_to_viz: + render_pkg = trainer.GS(viewpoint_cam) + image = render_pkg["render"] + + image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) + image_gt_np = ( + viewpoint_cam.original_image.clone() + .detach() + .cpu() + .numpy() + .transpose(1, 2, 0) + ) + + # Clip values to be in the range [0, 1] + image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) + image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8) + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) + ax[0].imshow(image_gt_np) + ax[0].axis("off") + ax[1].imshow(image_np) + ax[1].axis("off") + plt.tight_layout() + plt.show() + + +# ### Save model +with torch.no_grad(): + trainer.save_model() From 56aed1217476498ba5d3418836b65e3cc658cafe Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 19:29:56 +0900 Subject: [PATCH 06/37] fix to runnable --- Dockerfile | 4 ++-- README.md | 18 +++++++++++++- notebooks/fit_model_to_scene_full.py | 36 +++++++++++++++++++++------- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index c69f166..d6782de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,5 +48,5 @@ RUN /bin/bash -c "source activate edgs && \ # Expose the port for Gradio EXPOSE 7862 -# Command to run the Gradio demo -CMD ["bash"] \ No newline at end of file +# Keep the container running in detached mode +CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/README.md b/README.md index c6a5307..06e9740 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ Alternatively, check our [Colab notebook](https://colab.research.google.com/gith You can install it just: ```bash -docker compose up +docker compose up -d ``` or you can install with running `install.sh`. @@ -87,6 +87,22 @@ We evaluated on the following datasets: ### Using Your Own Dataset +#### Option A +Use gradle demo; +``` +docker compose exec edgs-app bash +python gradio_demo.py --port 7862 +``` + +#### Option B +From command line; +``` +docker compose exec edgs-app bash +cd notebooks +python fit_model_to_scene_full.py --video_path +``` + +#### Option C You can use the same data format as the [3DGS project](https://github.com/graphdeco-inria/gaussian-splatting?tab=readme-ov-file#processing-your-own-scenes). Please follow their guide to prepare your scene. Expected folder structure: diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py index a73ab04..bc29cd7 100644 --- a/notebooks/fit_model_to_scene_full.py +++ b/notebooks/fit_model_to_scene_full.py @@ -24,6 +24,7 @@ sys.path.append("../") sys.path.append("../submodules/gaussian-splatting") +from gradio_demo import preprocess_input from source.trainer import EDGSTrainer from source.utils_aux import set_seed @@ -51,15 +52,34 @@ PATH_TO_VIDEO = args.video_path num_ref_views = 16 # how many frames you want to extract from video and colmap +# process the input video +if True: + print("Starting video preprocessing...") + # Ensure num_corrs is defined. Using cfg.init_wC.matches_per_ref as likely intended. + num_corrs = cfg.init_wC.matches_per_ref + try: + images, scene_dir = preprocess_input(PATH_TO_VIDEO, num_ref_views, num_corrs) + print(f"Video preprocessed. Scene directory: {scene_dir}") + cfg.gs.dataset.source_path = scene_dir + # Define a model_path, e.g., in a subdirectory of the scene_dir or a dedicated output folder + cfg.gs.dataset.model_path = os.path.join( + os.path.dirname(scene_dir), os.path.basename(scene_dir) + "_edgs_model" + ) + print(f"Set dataset.source_path to: {cfg.gs.dataset.source_path}") + print(f"Set dataset.model_path to: {cfg.gs.dataset.model_path}") + except Exception as e: + print(f"Error during video preprocessing: {e}") + sys.exit(1) +else: + # This block will be used if video preprocessing is skipped. + # Ensure these paths are valid if this branch is taken. + print("Skipping video preprocessing. Using pre-configured paths.") + cfg.gs.dataset.model_path = "./scene_edgsed/" + cfg.gs.dataset.source_path = ( + "../assets/scene_colmaped/" # Ensure this is a valid COLMAP scene + ) + # Update the config with your settings -cfg.wandb.name = "EDGS.demo.scene" -cfg.wandb.mode = "disabled" # "online" -cfg.gs.dataset.model_path = ( - "./scene_edgsed/" # "change this to your path to the processed scene" -) -cfg.gs.dataset.source_path = "../assets/scene_colmaped/" # "change this to your path" -# Optionally for video processed -# cfg.gs.dataset.source_path="../assets/video_colmaped/" cfg.gs.dataset.images = "images" cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12 cfg.train.gs_epochs = 30000 From 3fd1a7c409209a8bdae273bc4c99825f7cc574dc Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 3 Jun 2025 20:53:47 +0900 Subject: [PATCH 07/37] debug fit_model_to_scene_full.py by creating util functions. --- gradio_demo.py | 423 ++++++++++++++++++--------- notebooks/fit_model_to_scene_full.py | 37 +-- source/utils_preprocess.py | 240 +++++++++++---- 3 files changed, 483 insertions(+), 217 deletions(-) diff --git a/gradio_demo.py b/gradio_demo.py index 9c55e44..d91841c 100644 --- a/gradio_demo.py +++ b/gradio_demo.py @@ -1,27 +1,34 @@ -import torch +import argparse +import contextlib +import io import os import shutil +import sys import tempfile -import argparse +import time + import gradio as gr -import sys -import io -from PIL import Image -import numpy as np -from source.utils_aux import set_seed -from source.utils_preprocess import read_video_frames, preprocess_frames, select_optimal_frames, save_frames_to_scene_dir, run_colmap_on_scene -from source.trainer import EDGSTrainer -from hydra import initialize, compose import hydra -import time -from source.visualization import generate_circular_camera_path, save_numpy_frames_as_mp4, generate_fully_smooth_cameras_with_tsp, put_text_on_image -import contextlib -import base64 +import numpy as np +import torch +from hydra import compose, initialize +from source.trainer import EDGSTrainer +from source.utils_aux import set_seed +from source.utils_preprocess import ( + orchestrate_video_to_colmap_scene, # Import the new/refactored function + run_colmap_on_scene, +) +from source.visualization import ( + generate_circular_camera_path, + generate_fully_smooth_cameras_with_tsp, + put_text_on_image, + save_numpy_frames_as_mp4, +) # Init RoMA model: -sys.path.append('../submodules/RoMa') -from romatch import roma_outdoor, roma_indoor +sys.path.append("../submodules/RoMa") +from romatch import roma_indoor roma_model = roma_indoor(device="cuda:0") roma_model.upsample_preds = False @@ -33,6 +40,7 @@ trainer = None + class Tee(io.TextIOBase): def __init__(self, *streams): self.streams = streams @@ -46,6 +54,7 @@ def flush(self): for stream in self.streams: stream.flush() + def capture_logs(func, *args, **kwargs): log_capture_string = io.StringIO() tee = Tee(sys.__stdout__, log_capture_string) @@ -53,12 +62,15 @@ def capture_logs(func, *args, **kwargs): result = func(*args, **kwargs) return result, log_capture_string.getvalue() + # Training Pipeline -def run_training_pipeline(scene_dir, - num_ref_views=16, - num_corrs_per_view=20000, - num_steps=1_000, - mode_toggle="Ours (EDGS)"): +def run_training_pipeline( + scene_dir, + num_ref_views=16, + num_corrs_per_view=20000, + num_steps=1_000, + mode_toggle="Ours (EDGS)", +): with initialize(config_path="./configs", version_base="1.1"): cfg = compose(config_name="train") @@ -72,8 +84,8 @@ def run_training_pipeline(scene_dir, cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12 cfg.train.gs_epochs = 30000 - - if mode_toggle=="Ours (EDGS)": + + if mode_toggle == "Ours (EDGS)": cfg.gs.opt.opacity_reset_interval = 1_000_000 cfg.train.reduce_opacity = True cfg.train.no_densify = True @@ -84,15 +96,20 @@ def run_training_pipeline(scene_dir, cfg.init_wC.nns_per_ref = 1 cfg.init_wC.num_refs = num_ref_views cfg.init_wC.add_SfM_init = False - cfg.init_wC.scaling_factor = 0.00077 * 2. - + cfg.init_wC.scaling_factor = 0.00077 * 2.0 + set_seed(cfg.seed) os.makedirs(cfg.gs.dataset.model_path, exist_ok=True) global trainer global MODEL_PATH generator3dgs = hydra.utils.instantiate(cfg.gs, do_train_test_split=False) - trainer = EDGSTrainer(GS=generator3dgs, training_config=cfg.gs.opt, device=cfg.device, log_wandb=cfg.wandb.mode != 'disabled') + trainer = EDGSTrainer( + GS=generator3dgs, + training_config=cfg.gs.opt, + device=cfg.device, + log_wandb=cfg.wandb.mode != "disabled", + ) # Disable evaluation and saving trainer.saving_iterations = [] @@ -102,13 +119,15 @@ def run_training_pipeline(scene_dir, trainer.timer.start() start_time = time.time() trainer.init_with_corr(cfg.init_wC, roma_model=roma_model) - time_for_init = time.time()-start_time + time_for_init = time.time() - start_time viewpoint_cams = trainer.GS.scene.getTrainCameras() - path_cameras = generate_fully_smooth_cameras_with_tsp(existing_cameras=viewpoint_cams, - n_selected=6, # 8 - n_points_per_segment=30, # 30 - closed=False) + path_cameras = generate_fully_smooth_cameras_with_tsp( + existing_cameras=viewpoint_cams, + n_selected=6, # 8 + n_points_per_segment=30, # 30 + closed=False, + ) path_cameras = path_cameras + path_cameras[::-1] path_renderings = [] @@ -122,13 +141,24 @@ def run_training_pipeline(scene_dir, image = render_pkg["render"] image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1) image_np = (image_np * 255).astype(np.uint8) - path_renderings.append(put_text_on_image(img=image_np, - text=f"Init stage.\nTime:{time_for_init:.3f}s. ")) - path_renderings = path_renderings + [put_text_on_image(img=image_np, text=f"Start fitting.\nTime:{time_for_init:.3f}s. ")]*30 - + path_renderings.append( + put_text_on_image( + img=image_np, text=f"Init stage.\nTime:{time_for_init:.3f}s. " + ) + ) + path_renderings = ( + path_renderings + + [ + put_text_on_image( + img=image_np, text=f"Start fitting.\nTime:{time_for_init:.3f}s. " + ) + ] + * 30 + ) + # Train and save visualizations during training. start_time = time.time() - for _ in range(int(num_steps//10)): + for _ in range(int(num_steps // 10)): with torch.no_grad(): viewpoint_cam = path_cameras[idx] idx = (idx + 1) % len(path_cameras) @@ -136,20 +166,27 @@ def run_training_pipeline(scene_dir, image = render_pkg["render"] image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1) image_np = (image_np * 255).astype(np.uint8) - path_renderings.append(put_text_on_image( - img=image_np, - text=f"Fitting stage.\nTime:{time_for_init + time.time()-start_time:.3f}s. ")) - + path_renderings.append( + put_text_on_image( + img=image_np, + text=f"Fitting stage.\nTime:{time_for_init + time.time() - start_time:.3f}s. ", + ) + ) + cfg.train.gs_epochs = 10 trainer.train(cfg.train) - print(f"Time elapsed: {(time_for_init + time.time()-start_time):.2f}s.") + print(f"Time elapsed: {(time_for_init + time.time() - start_time):.2f}s.") # if (cfg.init_wC.use == False) and (time_for_init + time.time()-start_time) > 60: # break final_time = time.time() - + # Add static frame. To highlight we're done - path_renderings += [put_text_on_image( - img=image_np, text=f"Done.\nTime:{time_for_init + final_time -start_time:.3f}s. ")]*30 + path_renderings += [ + put_text_on_image( + img=image_np, + text=f"Done.\nTime:{time_for_init + final_time - start_time:.3f}s. ", + ) + ] * 30 # Final rendering at the end. for _ in range(len(path_cameras)): with torch.no_grad(): @@ -159,37 +196,56 @@ def run_training_pipeline(scene_dir, image = render_pkg["render"] image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1) image_np = (image_np * 255).astype(np.uint8) - path_renderings.append(put_text_on_image(img=image_np, - text=f"Final result.\nTime:{time_for_init + final_time -start_time:.3f}s. ")) + path_renderings.append( + put_text_on_image( + img=image_np, + text=f"Final result.\nTime:{time_for_init + final_time - start_time:.3f}s. ", + ) + ) trainer.save_model() - final_video_path = os.path.join(STATIC_FILE_SERVING_FOLDER, f"{scene_name}_final.mp4") - save_numpy_frames_as_mp4(frames=path_renderings, output_path=final_video_path, fps=30, center_crop=0.85) + final_video_path = os.path.join( + STATIC_FILE_SERVING_FOLDER, f"{scene_name}_final.mp4" + ) + save_numpy_frames_as_mp4( + frames=path_renderings, output_path=final_video_path, fps=30, center_crop=0.85 + ) MODEL_PATH = cfg.gs.dataset.model_path - ply_path = os.path.join(cfg.gs.dataset.model_path, f"point_cloud/iteration_{trainer.gs_step}/point_cloud.ply") - shutil.copy(ply_path, os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply")) + ply_path = os.path.join( + cfg.gs.dataset.model_path, + f"point_cloud/iteration_{trainer.gs_step}/point_cloud.ply", + ) + shutil.copy( + ply_path, os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply") + ) return final_video_path, ply_path + # Gradio Interface def gradio_interface(input_path, num_ref_views, num_corrs, num_steps): - images, scene_dir = run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024) - shutil.copytree(scene_dir, STATIC_FILE_SERVING_FOLDER+'/scene_colmaped', dirs_exist_ok=True) - (final_video_path, ply_path), log_output = capture_logs(run_training_pipeline, - scene_dir, - num_ref_views, - num_corrs, - num_steps) + images, scene_dir = run_full_pipeline( + input_path, num_ref_views, num_corrs, max_size=1024 + ) + shutil.copytree( + scene_dir, STATIC_FILE_SERVING_FOLDER + "/scene_colmaped", dirs_exist_ok=True + ) + (final_video_path, ply_path), log_output = capture_logs( + run_training_pipeline, scene_dir, num_ref_views, num_corrs, num_steps + ) images_rgb = [img[:, :, ::-1] for img in images] return images_rgb, final_video_path, scene_dir, ply_path, log_output + # Dummy Render Functions def render_all_views(scene_dir): viewpoint_cams = trainer.GS.scene.getTrainCameras() - path_cameras = generate_fully_smooth_cameras_with_tsp(existing_cameras=viewpoint_cams, - n_selected=8, - n_points_per_segment=60, - closed=False) + path_cameras = generate_fully_smooth_cameras_with_tsp( + existing_cameras=viewpoint_cams, + n_selected=8, + n_points_per_segment=60, + closed=False, + ) path_cameras = path_cameras + path_cameras[::-1] path_renderings = [] @@ -200,19 +256,21 @@ def render_all_views(scene_dir): image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1) image_np = (image_np * 255).astype(np.uint8) path_renderings.append(image_np) - save_numpy_frames_as_mp4(frames=path_renderings, - output_path=os.path.join(STATIC_FILE_SERVING_FOLDER, "render_all_views.mp4"), - fps=30, - center_crop=0.85) - + save_numpy_frames_as_mp4( + frames=path_renderings, + output_path=os.path.join(STATIC_FILE_SERVING_FOLDER, "render_all_views.mp4"), + fps=30, + center_crop=0.85, + ) + return os.path.join(STATIC_FILE_SERVING_FOLDER, "render_all_views.mp4") + def render_circular_path(scene_dir): viewpoint_cams = trainer.GS.scene.getTrainCameras() - path_cameras = generate_circular_camera_path(existing_cameras=viewpoint_cams, - N=240, - radius_scale=0.65, - d=0) + path_cameras = generate_circular_camera_path( + existing_cameras=viewpoint_cams, N=240, radius_scale=0.65, d=0 + ) path_renderings = [] with torch.no_grad(): @@ -222,22 +280,29 @@ def render_circular_path(scene_dir): image_np = np.clip(image.detach().cpu().numpy().transpose(1, 2, 0), 0, 1) image_np = (image_np * 255).astype(np.uint8) path_renderings.append(image_np) - save_numpy_frames_as_mp4(frames=path_renderings, - output_path=os.path.join(STATIC_FILE_SERVING_FOLDER, "render_circular_path.mp4"), - fps=30, - center_crop=0.85) - + save_numpy_frames_as_mp4( + frames=path_renderings, + output_path=os.path.join( + STATIC_FILE_SERVING_FOLDER, "render_circular_path.mp4" + ), + fps=30, + center_crop=0.85, + ) + return os.path.join(STATIC_FILE_SERVING_FOLDER, "render_circular_path.mp4") + # Download Functions def download_cameras(): path = os.path.join(MODEL_PATH, "cameras.json") return f"[đŸ“Ĩ Download Cameras.json](file={path})" + def download_model(): path = os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply") return f"[đŸ“Ĩ Download Pretrained Model (.ply)](file={path})" + # Full pipeline helpers def run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024): tmpdirname = tempfile.mkdtemp() @@ -249,51 +314,27 @@ def run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024): return selected_frames, scene_dir -# Preprocess Input -def process_input(input_path, num_ref_views, output_dir, max_size=1024): - if isinstance(input_path, (str, os.PathLike)): - if os.path.isdir(input_path): - frames = [] - for img_file in sorted(os.listdir(input_path)): - if img_file.lower().endswith(('jpg', 'jpeg', 'png')): - img = Image.open(os.path.join(output_dir, img_file)).convert('RGB') - img.thumbnail((1024, 1024)) - frames.append(np.array(img)) - else: - frames = read_video_frames(video_input=input_path, max_size=max_size) - else: - frames = read_video_frames(video_input=input_path, max_size=max_size) - - frames_scores = preprocess_frames(frames) - selected_frames_indices = select_optimal_frames(scores=frames_scores, - k=min(num_ref_views, len(frames))) - selected_frames = [frames[frame_idx] for frame_idx in selected_frames_indices] - - save_frames_to_scene_dir(frames=selected_frames, scene_dir=output_dir) - return selected_frames - -def preprocess_input(input_path, num_ref_views, max_size=1024): - tmpdirname = tempfile.mkdtemp() - scene_dir = os.path.join(tmpdirname, "scene") - os.makedirs(scene_dir, exist_ok=True) - selected_frames = process_input(input_path, num_ref_views, scene_dir, max_size) - run_colmap_on_scene(scene_dir) - return selected_frames, scene_dir def start_training(scene_dir, num_ref_views, num_corrs, num_steps): - return capture_logs(run_training_pipeline, scene_dir, num_ref_views, num_corrs, num_steps) - + return capture_logs( + run_training_pipeline, scene_dir, num_ref_views, num_corrs, num_steps + ) + # Gradio App with gr.Blocks() as demo: with gr.Row(): with gr.Column(scale=6): - gr.Markdown(""" + gr.Markdown( + """ ## 📄 EDGS: Eliminating Densification for Efficient Convergence of 3DGS 🔗 Project Page - """, elem_id="header") + """, + elem_id="header", + ) - gr.Markdown(""" + gr.Markdown( + """ ### đŸ› ī¸ How to Use This Demo 1. Upload a **front-facing video** or **a folder of images** of a **static** scene. @@ -306,37 +347,52 @@ def start_training(scene_dir, num_ref_views, num_corrs, num_steps): ✅ Best for scenes with small camera motion. ❗ For full 360° or large-scale scenes, we recommend the Colab version (see project page). - """, elem_id="quickstart") - + """, + elem_id="quickstart", + ) scene_dir_state = gr.State() ply_model_state = gr.State() with gr.Row(): with gr.Column(scale=2): - input_file = gr.File(label="Upload Video or Images", - file_types=[".mp4", ".avi", ".mov", ".png", ".jpg", ".jpeg"], - file_count="multiple") + input_file = gr.File( + label="Upload Video or Images", + file_types=[".mp4", ".avi", ".mov", ".png", ".jpg", ".jpeg"], + file_count="multiple", + ) gr.Examples( - examples = [ + examples=[ [["assets/examples/video_bakery.mp4"]], [["assets/examples/video_flowers.mp4"]], [["assets/examples/video_fruits.mp4"]], [["assets/examples/video_plant.mp4"]], [["assets/examples/video_salad.mp4"]], [["assets/examples/video_tram.mp4"]], - [["assets/examples/video_tulips.mp4"]] - ], + [["assets/examples/video_tulips.mp4"]], + ], inputs=[input_file], label="đŸŽžī¸ ALternatively, try an Example Video", - examples_per_page=4 + examples_per_page=4, + ) + ref_slider = gr.Slider( + 4, 32, value=16, step=1, label="Number of Reference Views" + ) + corr_slider = gr.Slider( + 5000, + 30000, + value=20000, + step=1000, + label="Correspondences per Reference View", + ) + fit_steps_slider = gr.Slider( + 100, 5000, value=400, step=100, label="Number of optimization steps" ) - ref_slider = gr.Slider(4, 32, value=16, step=1, label="Number of Reference Views") - corr_slider = gr.Slider(5000, 30000, value=20000, step=1000, label="Correspondences per Reference View") - fit_steps_slider = gr.Slider(100, 5000, value=400, step=100, label="Number of optimization steps") preprocess_button = gr.Button("📸 Preprocess Input") start_button = gr.Button("🚀 Start Reconstruction", interactive=False) - gallery = gr.Gallery(label="Selected Reference Views", columns=4, height=300) + gallery = gr.Gallery( + label="Selected Reference Views", columns=4, height=300 + ) with gr.Column(scale=3): gr.Markdown("### đŸ‹ī¸ Training Visualization") @@ -351,43 +407,118 @@ def start_training(scene_dir, num_ref_views, num_corrs, num_steps): gr.Markdown("### đŸ“Ļ Output Files") with gr.Row(height=50): with gr.Column(): - #gr.Markdown(value=f"[đŸ“Ĩ Download .ply](file/point_cloud_final.ply)") + # gr.Markdown(value=f"[đŸ“Ĩ Download .ply](file/point_cloud_final.ply)") download_cameras_button = gr.Button("đŸ“Ĩ Download Cameras.json") download_cameras_file = gr.File(label="📄 Cameras.json") with gr.Column(): - download_model_button = gr.Button("đŸ“Ĩ Download Pretrained Model (.ply)") + download_model_button = gr.Button( + "đŸ“Ĩ Download Pretrained Model (.ply)" + ) download_model_file = gr.File(label="📄 Pretrained Model (.ply)") log_output_box = gr.Textbox(label="đŸ–Ĩī¸ Log", lines=10, interactive=False) - def on_preprocess_click(input_file, num_ref_views): - images, scene_dir = preprocess_input(input_file, num_ref_views) - return gr.update(value=[x[...,::-1] for x in images]), scene_dir, gr.update(interactive=True) + def on_preprocess_click( + input_file_obj, num_ref_views_val + ): # input_file_obj is from gr.File + # 'input_file_obj' from gr.File is a tempfile._TemporaryFileWrapper object + # It has a .name attribute which is the path to the temporary file + if input_file_obj is None: + gr.Warning("Please upload a file or select an example.") + return None, None, gr.update(interactive=False) + + # Handle single file vs. list of files (if file_count="multiple") + actual_input_path = None + if isinstance( + input_file_obj, list + ): # If file_count="multiple" and multiple files are uploaded + if not input_file_obj: + gr.Warning("No file provided in the list.") + return None, None, gr.update(interactive=False) + actual_input_path = input_file_obj[ + 0 + ].name # Process the first file for simplicity, or adapt + # If you expect a folder of images, you might need to handle this differently, + # as Gradio's gr.File with file_count="multiple" gives a list of temp file objects. + # The original process_input had logic for os.path.isdir(input_path). + # If users are meant to upload a folder, gr.File might not be the best component, + # or you'd need to zip/unzip. For now, assuming single video or first of multiple. + elif hasattr(input_file_obj, "name"): # Single file object + actual_input_path = input_file_obj.name + else: + gr.Warning("Invalid input file.") + return None, None, gr.update(interactive=False) + + # Use the refactored preprocessing function + # The first return value 'images_data' is a list of numpy arrays (the frame pixel data) + images_data, scene_dir_val = orchestrate_video_to_colmap_scene( + actual_input_path, # Pass the path of the uploaded temp file + num_ref_views_val, + max_size=1024, # Or get from a Gradio component + base_work_dir="./gradio_processed_scenes", # Store Gradio outputs in a specific place + ) + if not scene_dir_val: + gr.Error("Preprocessing failed. Check logs.") + return None, None, gr.update(interactive=False) + + # Convert numpy arrays (BGR from OpenCV) to RGB for Gradio gallery + gallery_images = [] + if images_data: + for img_data_np in images_data: + if isinstance(img_data_np, np.ndarray): + # Assuming frames from read_video_frames are BGR, convert to RGB for PIL/Gradio + gallery_images.append( + Image.fromarray(cv2.cvtColor(img_data_np, cv2.COLOR_BGR2RGB)) + ) + else: # If images_data contains PIL Images already + gallery_images.append(img_data_np) + + return ( + gr.update(value=gallery_images), + scene_dir_val, + gr.update(interactive=True), + ) def on_start_click(scene_dir, num_ref_views, num_corrs, num_steps): - (video_path, ply_path), logs = start_training(scene_dir, num_ref_views, num_corrs, num_steps) + (video_path, ply_path), logs = start_training( + scene_dir, num_ref_views, num_corrs, num_steps + ) return video_path, ply_path, logs preprocess_button.click( fn=on_preprocess_click, inputs=[input_file, ref_slider], - outputs=[gallery, scene_dir_state, start_button] + outputs=[gallery, scene_dir_state, start_button], ) start_button.click( fn=on_start_click, inputs=[scene_dir_state, ref_slider, corr_slider, fit_steps_slider], - outputs=[video_output, model3d_viewer, log_output_box] + outputs=[video_output, model3d_viewer, log_output_box], ) - render_all_views_button.click(fn=render_all_views, inputs=[scene_dir_state], outputs=[rendered_video_output]) - render_circular_path_button.click(fn=render_circular_path, inputs=[scene_dir_state], outputs=[rendered_video_output]) - - download_cameras_button.click(fn=lambda: os.path.join(MODEL_PATH, "cameras.json"), inputs=[], outputs=[download_cameras_file]) - download_model_button.click(fn=lambda: os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply"), inputs=[], outputs=[download_model_file]) + render_all_views_button.click( + fn=render_all_views, inputs=[scene_dir_state], outputs=[rendered_video_output] + ) + render_circular_path_button.click( + fn=render_circular_path, + inputs=[scene_dir_state], + outputs=[rendered_video_output], + ) + download_cameras_button.click( + fn=lambda: os.path.join(MODEL_PATH, "cameras.json"), + inputs=[], + outputs=[download_cameras_file], + ) + download_model_button.click( + fn=lambda: os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply"), + inputs=[], + outputs=[download_model_file], + ) - gr.Markdown(""" + gr.Markdown( + """ --- ### 📖 Detailed Overview @@ -413,12 +544,22 @@ def on_start_click(scene_dir, num_ref_views, num_corrs, num_steps): --- Preloaded models coming soon. (TODO) - """, elem_id="details") + """, + elem_id="details", + ) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Launch Gradio demo for EDGS preprocessing and 3D viewing.") - parser.add_argument("--port", type=int, default=7860, help="Port to launch the Gradio app on.") - parser.add_argument("--no_share", action='store_true', help="Disable Gradio sharing and assume local access (default: share=True)") + parser = argparse.ArgumentParser( + description="Launch Gradio demo for EDGS preprocessing and 3D viewing." + ) + parser.add_argument( + "--port", type=int, default=7860, help="Port to launch the Gradio app on." + ) + parser.add_argument( + "--no_share", + action="store_true", + help="Disable Gradio sharing and assume local access (default: share=True)", + ) args = parser.parse_args() demo.launch(server_name="0.0.0.0", server_port=args.port, share=not args.no_share) diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py index bc29cd7..0b977e5 100644 --- a/notebooks/fit_model_to_scene_full.py +++ b/notebooks/fit_model_to_scene_full.py @@ -24,9 +24,11 @@ sys.path.append("../") sys.path.append("../submodules/gaussian-splatting") -from gradio_demo import preprocess_input from source.trainer import EDGSTrainer from source.utils_aux import set_seed +from source.utils_preprocess import ( + orchestrate_video_to_colmap_scene, # Use the refactored function +) # --- Add argument parsing --- parser = argparse.ArgumentParser( @@ -53,31 +55,24 @@ num_ref_views = 16 # how many frames you want to extract from video and colmap # process the input video -if True: - print("Starting video preprocessing...") - # Ensure num_corrs is defined. Using cfg.init_wC.matches_per_ref as likely intended. - num_corrs = cfg.init_wC.matches_per_ref +if PATH_TO_VIDEO and os.path.exists(PATH_TO_VIDEO): + print(f"Starting video processing for: {PATH_TO_VIDEO}") try: - images, scene_dir = preprocess_input(PATH_TO_VIDEO, num_ref_views, num_corrs) - print(f"Video preprocessed. Scene directory: {scene_dir}") - cfg.gs.dataset.source_path = scene_dir - # Define a model_path, e.g., in a subdirectory of the scene_dir or a dedicated output folder - cfg.gs.dataset.model_path = os.path.join( - os.path.dirname(scene_dir), os.path.basename(scene_dir) + "_edgs_model" + # The first return value 'images_data' might not be directly used by the trainer + # if the Scene object loads everything from the COLMAP directory. + _, scene_dir = orchestrate_video_to_colmap_scene( + PATH_TO_VIDEO, + args.num_ref_views, # Assuming you added this arg + max_size=1024, # Or make it an arg + base_work_dir=args.processed_scenes_dir, # Assuming you added this arg ) - print(f"Set dataset.source_path to: {cfg.gs.dataset.source_path}") - print(f"Set dataset.model_path to: {cfg.gs.dataset.model_path}") + if scene_dir is None: + print(f"Failed to process video {PATH_TO_VIDEO}. Exiting.") + sys.exit(1) except Exception as e: print(f"Error during video preprocessing: {e}") sys.exit(1) -else: - # This block will be used if video preprocessing is skipped. - # Ensure these paths are valid if this branch is taken. - print("Skipping video preprocessing. Using pre-configured paths.") - cfg.gs.dataset.model_path = "./scene_edgsed/" - cfg.gs.dataset.source_path = ( - "../assets/scene_colmaped/" # Ensure this is a valid COLMAP scene - ) + # Update the config with your settings cfg.gs.dataset.images = "images" diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index 7c6dab3..d90a03f 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -1,17 +1,15 @@ # This file contains function for video or image collection preprocessing. # For video we do the preprocessing and select k sharpest frames. -# Afterwards scene is constructed +# Afterwards scene is constructed +import os +import time + import cv2 import numpy as np -from tqdm import tqdm import pycolmap -import os -import time -import tempfile -from moviepy import VideoFileClip from matplotlib import pyplot as plt +from moviepy import VideoFileClip from PIL import Image -import cv2 from tqdm import tqdm WORKDIR = "../outputs/" @@ -22,20 +20,25 @@ def get_rotation_moviepy(video_path): rotation = 0 try: - displaymatrix = clip.reader.infos['inputs'][0]['streams'][2]['metadata'].get('displaymatrix', '') - if 'rotation of' in displaymatrix: - angle = float(displaymatrix.strip().split('rotation of')[-1].split('degrees')[0]) + displaymatrix = clip.reader.infos["inputs"][0]["streams"][2]["metadata"].get( + "displaymatrix", "" + ) + if "rotation of" in displaymatrix: + angle = float( + displaymatrix.strip().split("rotation of")[-1].split("degrees")[0] + ) rotation = int(angle) % 360 - + except Exception as e: print(f"No displaymatrix rotation found: {e}") clip.reader.close() - #if clip.audio: + # if clip.audio: # clip.audio.reader.close_proc() return rotation + def resize_max_side(frame, max_size): h, w = frame.shape[:2] scale = max_size / max(h, w) @@ -43,6 +46,7 @@ def resize_max_side(frame, max_size): frame = cv2.resize(frame, (int(w * scale), int(h * scale))) return frame + def read_video_frames(video_input, k=1, max_size=1024): """ Extracts every k-th frame from a video or list of images, resizes to max size, and returns frames as list. @@ -58,7 +62,9 @@ def read_video_frames(video_input, k=1, max_size=1024): # Handle list of image files (not single video in a list) if isinstance(video_input, list): # If it's a single video in a list, treat it as video - if len(video_input) == 1 and video_input[0].name.endswith(('.mp4', '.avi', '.mov')): + if len(video_input) == 1 and video_input[0].name.endswith( + (".mp4", ".avi", ".mov") + ): video_input = video_input[0] # unwrap single video file else: # Treat as list of images @@ -66,18 +72,19 @@ def read_video_frames(video_input, k=1, max_size=1024): for img_file in video_input: img = Image.open(img_file.name).convert("RGB") img.thumbnail((max_size, max_size)) - frames.append(np.array(img)[...,::-1]) + frames.append(np.array(img)[..., ::-1]) return frames # Handle file-like or path - if hasattr(video_input, 'name'): + if hasattr(video_input, "name"): video_path = video_input.name elif isinstance(video_input, (str, os.PathLike)): video_path = str(video_input) else: - raise ValueError("Unsupported video input type. Must be a filepath, file-like object, or list of images.") + raise ValueError( + "Unsupported video input type. Must be a filepath, file-like object, or list of images." + ) - cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"Error: Could not open video {video_path}.") @@ -97,20 +104,21 @@ def read_video_frames(video_input, k=1, max_size=1024): scale = max(h, w) / max_size if scale > 1: frame = cv2.resize(frame, (int(w / scale), int(h / scale))) - frames.append(frame[...,[2,1,0]]) + frames.append(frame[..., [2, 1, 0]]) pbar.update(1) frame_count += 1 cap.release() return frames + def resize_max_side(frame, max_size): """ Resizes the frame so that its largest side equals max_size, maintaining aspect ratio. """ height, width = frame.shape[:2] max_dim = max(height, width) - + if max_dim <= max_size: return frame # No need to resize @@ -118,41 +126,47 @@ def resize_max_side(frame, max_size): new_width = int(width * scale) new_height = int(height * scale) - resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA) + resized_frame = cv2.resize( + frame, (new_width, new_height), interpolation=cv2.INTER_AREA + ) return resized_frame - def variance_of_laplacian(image): - # compute the Laplacian of the image and then return the focus - # measure, which is simply the variance of the Laplacian - return cv2.Laplacian(image, cv2.CV_64F).var() - -def process_all_frames(IMG_FOLDER = '/scratch/datasets/hq_data/night2_all_frames', - to_visualize=False, - save_images=True): + # compute the Laplacian of the image and then return the focus + # measure, which is simply the variance of the Laplacian + return cv2.Laplacian(image, cv2.CV_64F).var() + + +def process_all_frames( + IMG_FOLDER="/scratch/datasets/hq_data/night2_all_frames", + to_visualize=False, + save_images=True, +): dict_scores = {} - for idx, img_name in tqdm(enumerate(sorted([x for x in os.listdir(IMG_FOLDER) if '.png' in x]))): - - img = cv2.imread(os.path.join(IMG_FOLDER, img_name))#[250:, 100:] + for idx, img_name in tqdm( + enumerate(sorted([x for x in os.listdir(IMG_FOLDER) if ".png" in x])) + ): + img = cv2.imread(os.path.join(IMG_FOLDER, img_name)) # [250:, 100:] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - fm = variance_of_laplacian(gray) + \ - variance_of_laplacian(cv2.resize(gray, (0,0), fx=0.75, fy=0.75)) + \ - variance_of_laplacian(cv2.resize(gray, (0,0), fx=0.5, fy=0.5)) + \ - variance_of_laplacian(cv2.resize(gray, (0,0), fx=0.25, fy=0.25)) + fm = ( + variance_of_laplacian(gray) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75)) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25)) + ) if to_visualize: plt.figure() plt.title(f"Laplacian score: {fm:.2f}") - plt.imshow(img[..., [2,1,0]]) + plt.imshow(img[..., [2, 1, 0]]) plt.show() - dict_scores[idx] = {"idx" : idx, - "img_name" : img_name, - "score" : fm} + dict_scores[idx] = {"idx": idx, "img_name": img_name, "score": fm} if save_images: dict_scores[idx]["img"] = img - + return dict_scores + def select_optimal_frames(scores, k): """ Selects a minimal subset of frames while ensuring no gaps exceed k. @@ -165,12 +179,14 @@ def select_optimal_frames(scores, k): list of int: Indices of selected frames. """ n = len(scores) - selected = [0, n-1] + selected = [0, n - 1] i = 0 # Start at the first frame while i < n: # Find the best frame to select within the next k frames - best_idx = max(range(i, min(i + k + 1, n)), key=lambda x: scores[x], default=None) + best_idx = max( + range(i, min(i + k + 1, n)), key=lambda x: scores[x], default=None + ) if best_idx is None: break # No more frames left @@ -187,6 +203,7 @@ def variance_of_laplacian(image): """ return cv2.Laplacian(image, cv2.CV_64F).var() + def preprocess_frames(frames, verbose=False): """ Compute sharpness scores for a list of frames using multi-scale Laplacian variance. @@ -204,12 +221,12 @@ def preprocess_frames(frames, verbose=False): gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) fm = ( - variance_of_laplacian(gray) + - variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75)) + - variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)) + - variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25)) + variance_of_laplacian(gray) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75)) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25)) ) - + if verbose: print(f"Frame {idx}: Sharpness Score = {fm:.2f}") @@ -217,6 +234,7 @@ def preprocess_frames(frames, verbose=False): return scores + def select_optimal_frames(scores, k): """ Selects k frames by splitting into k segments and picking the sharpest frame from each. @@ -226,7 +244,7 @@ def select_optimal_frames(scores, k): k (int): Number of frames to select. Returns: - list of int: Indices of selected frames. + list of int: Indices of selected frames. """ n = len(scores) selected_indices = [] @@ -236,15 +254,16 @@ def select_optimal_frames(scores, k): start = i * segment_size end = (i + 1) * segment_size if i < k - 1 else n # Last chunk may be larger segment_scores = scores[start:end] - + if len(segment_scores) == 0: continue # Safety check if some segment is empty - + best_in_segment = start + np.argmax(segment_scores) selected_indices.append(best_in_segment) return sorted(selected_indices) + def save_frames_to_scene_dir(frames, scene_dir): """ Saves a list of frames into the target scene directory under 'images/' subfolder. @@ -257,7 +276,9 @@ def save_frames_to_scene_dir(frames, scene_dir): os.makedirs(images_dir, exist_ok=True) for idx, frame in enumerate(frames): - filename = os.path.join(images_dir, f"{idx:08d}.png") # 00000000.png, 00000001.png, etc. + filename = os.path.join( + images_dir, f"{idx:08d}.png" + ) # 00000000.png, 00000001.png, etc. cv2.imwrite(filename, frame) print(f"Saved {len(frames)} frames to {images_dir}") @@ -269,7 +290,7 @@ def run_colmap_on_scene(scene_dir): Args: scene_dir (str): Path to scene directory containing 'images' folder. - + TODO: if the function hasn't managed to match all the frames either increase image size, increase number of features or just remove those frames from the folder scene_dir/images """ @@ -280,7 +301,7 @@ def run_colmap_on_scene(scene_dir): database_path = os.path.join(scene_dir, "database.db") sparse_path = os.path.join(scene_dir, "sparse") image_dir = os.path.join(scene_dir, "images") - + # Make sure output directories exist os.makedirs(sparse_path, exist_ok=True) @@ -291,7 +312,7 @@ def run_colmap_on_scene(scene_dir): sift_options={ "max_num_features": 512 * 2, "max_image_size": 512 * 1, - } + }, ) print(f"Finished feature extraction in {(time.time() - start_time):.2f}s.") @@ -325,10 +346,119 @@ def run_colmap_on_scene(scene_dir): reconstruction = pycolmap.Reconstruction(recon_path) for cam in reconstruction.cameras.values(): - cam.model = 'SIMPLE_PINHOLE' + cam.model = "SIMPLE_PINHOLE" cam.params = cam.params[:3] # Keep only [f, cx, cy] reconstruction.write(recon_path) print(f"Total pipeline time: {(time.time() - start_time):.2f}s.") + +def process_input_for_colmap(input_path, num_ref_views, output_dir, max_size=1024): + """ + Helper function to read frames from video or image folder, select optimal ones, + and save them to the output_dir/images. + This is based on process_input from gradio_demo.py. + Renamed to avoid potential confusion if 'process_input' is too generic. + """ + frames_to_save_in_scene_dir = [] + if isinstance(input_path, (str, os.PathLike)): # If input_path is a path string + if os.path.isdir(input_path): # If it's a directory of images + print(f"Processing image directory: {input_path}") + raw_frames = [] + image_files = sorted( + [ + f + for f in os.listdir(input_path) + if f.lower().endswith(("jpg", "jpeg", "png")) + ] + ) + for img_file in image_files: + img = Image.open(os.path.join(input_path, img_file)).convert("RGB") + # Resize if necessary, similar to video frames + width, height = img.size + if max(width, height) > max_size: + scale = max_size / max(width, height) + new_width = int(width * scale) + new_height = int(height * scale) + img = img.resize((new_width, new_height), Image.LANCZOS) + raw_frames.append(np.array(img)) + else: # If it's a single video file path + print(f"Processing video file: {input_path}") + raw_frames = read_video_frames(video_input=input_path, max_size=max_size) + elif hasattr( + input_path, "name" + ): # If input_path is a file-like object (e.g., from Gradio upload) + print(f"Processing uploaded video file: {input_path.name}") + raw_frames = read_video_frames(video_input=input_path.name, max_size=max_size) + else: + raise ValueError(f"Unsupported input_path type: {type(input_path)}") + + if not raw_frames: + print("No frames extracted or read.") + return [] + + frames_scores = preprocess_frames( + raw_frames + ) # Assuming preprocess_frames takes list of numpy arrays + selected_frames_indices = select_optimal_frames( + scores=frames_scores, k=min(num_ref_views, len(raw_frames)) + ) + frames_to_save_in_scene_dir = [ + raw_frames[frame_idx] for frame_idx in selected_frames_indices + ] + + # The 'output_dir' here is the scene_dir where 'images' subfolder will be created + save_frames_to_scene_dir(frames=frames_to_save_in_scene_dir, scene_dir=output_dir) + return frames_to_save_in_scene_dir # Returns the list of selected frame data (numpy arrays) + + +def orchestrate_video_to_colmap_scene( + input_path, + num_ref_views, + max_size=1024, + base_work_dir="../outputs/processed_scenes_util", +): + """ + Orchestrates the full video/image folder preprocessing pipeline: + 1. Creates a temporary scene directory. + 2. Reads frames, selects optimal ones, saves them. + 3. Runs COLMAP on the scene. + Returns the list of selected frame image data and the path to the COLMAP processed scene directory. + This is based on preprocess_input from gradio_demo.py. + """ + # Create a unique scene directory + # If input_path is a file object, use its name. If a path string, use its basename. + input_name_part = "" + if hasattr(input_path, "name") and isinstance(input_path.name, str): + input_name_part = os.path.splitext(os.path.basename(input_path.name))[0] + elif isinstance(input_path, (str, os.PathLike)): + input_name_part = os.path.splitext(os.path.basename(input_path))[0] + else: # Fallback for other types or if name is not available + input_name_part = "temp_scene" + + # Using a structured output directory instead of pure tempfile.mkdtemp for easier inspection + # scene_dir_parent = tempfile.mkdtemp() # Original approach + + # Ensure base_work_dir exists + os.makedirs(base_work_dir, exist_ok=True) + # Create a unique subdirectory within base_work_dir + timestamp = time.strftime("%Y%m%d-%H%M%S") + scene_dir = os.path.join(base_work_dir, f"{input_name_part}_{timestamp}") + + os.makedirs(scene_dir, exist_ok=True) + print(f"Created scene directory for COLMAP: {scene_dir}") + + selected_frames_data = process_input_for_colmap( + input_path, num_ref_views, scene_dir, max_size + ) + if not selected_frames_data: + print(f"Frame processing failed for {input_path}. Aborting COLMAP.") + # Optionally clean up scene_dir if it's truly temporary and processing failed + # shutil.rmtree(scene_dir) + return [], None + + run_colmap_on_scene(scene_dir) # This function should create scene_dir/sparse/0 + + print(f"COLMAP processing complete for {scene_dir}") + return selected_frames_data, scene_dir From 9fc32c2ec4bbc5966b014b6c7e4bd24af93818ee Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 4 Jun 2025 09:33:21 +0900 Subject: [PATCH 08/37] fix Namespace object has no attribute num_ref_views error --- docker-compose.yml | 4 +- gradio_demo.py | 88 ++++++++++++---------------- notebooks/fit_model_to_scene_full.py | 24 +++++--- source/utils_preprocess.py | 14 +++-- 4 files changed, 65 insertions(+), 65 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6801517..2f36e2c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,4 +13,6 @@ services: capabilities: [gpu] # Request GPU capabilities volumes: - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container - - ./output:/EDGS/output # Example: map a local 'output' folder \ No newline at end of file + - ./output:/EDGS/output # Example: map a local 'output' folder + - ./scripts:/EDGS/scripts # Example: map a local 'scripts' folder + - ./sources:/EDGS/sources # Example: map a local 'sources' folder \ No newline at end of file diff --git a/gradio_demo.py b/gradio_demo.py index d91841c..3ca0078 100644 --- a/gradio_demo.py +++ b/gradio_demo.py @@ -418,65 +418,51 @@ def start_training(scene_dir, num_ref_views, num_corrs, num_steps): log_output_box = gr.Textbox(label="đŸ–Ĩī¸ Log", lines=10, interactive=False) - def on_preprocess_click( - input_file_obj, num_ref_views_val - ): # input_file_obj is from gr.File - # 'input_file_obj' from gr.File is a tempfile._TemporaryFileWrapper object - # It has a .name attribute which is the path to the temporary file + def on_preprocess_click(input_file_obj, num_ref_views_val): + """ + Handles the preprocess button click. + Calls the main preprocessing orchestrator and updates the UI. + """ if input_file_obj is None: + # Handles case where no file is uploaded if input_file component is not required + # or if the user clears the selection. + # For gr.Examples, input_file_obj will be a list containing a list of paths. + # For direct upload with file_count="multiple", it's a list of file objects. + # For direct upload with file_count="single", it's a single file object. + # orchestrate_video_to_colmap_scene should be robust to these. gr.Warning("Please upload a file or select an example.") return None, None, gr.update(interactive=False) - # Handle single file vs. list of files (if file_count="multiple") - actual_input_path = None - if isinstance( - input_file_obj, list - ): # If file_count="multiple" and multiple files are uploaded - if not input_file_obj: - gr.Warning("No file provided in the list.") - return None, None, gr.update(interactive=False) - actual_input_path = input_file_obj[ - 0 - ].name # Process the first file for simplicity, or adapt - # If you expect a folder of images, you might need to handle this differently, - # as Gradio's gr.File with file_count="multiple" gives a list of temp file objects. - # The original process_input had logic for os.path.isdir(input_path). - # If users are meant to upload a folder, gr.File might not be the best component, - # or you'd need to zip/unzip. For now, assuming single video or first of multiple. - elif hasattr(input_file_obj, "name"): # Single file object - actual_input_path = input_file_obj.name - else: - gr.Warning("Invalid input file.") - return None, None, gr.update(interactive=False) - - # Use the refactored preprocessing function - # The first return value 'images_data' is a list of numpy arrays (the frame pixel data) - images_data, scene_dir_val = orchestrate_video_to_colmap_scene( - actual_input_path, # Pass the path of the uploaded temp file - num_ref_views_val, - max_size=1024, # Or get from a Gradio component - base_work_dir="./gradio_processed_scenes", # Store Gradio outputs in a specific place + selected_bgr_frames, scene_dir = orchestrate_video_to_colmap_scene( + gradio_input_obj=input_file_obj, # Pass the raw Gradio file object(s) + num_ref_views=num_ref_views_val, + max_size=1024, + base_work_dir="./gradio_processed_scenes", # Or configure as needed ) - if not scene_dir_val: - gr.Error("Preprocessing failed. Check logs.") - return None, None, gr.update(interactive=False) - # Convert numpy arrays (BGR from OpenCV) to RGB for Gradio gallery - gallery_images = [] - if images_data: - for img_data_np in images_data: - if isinstance(img_data_np, np.ndarray): - # Assuming frames from read_video_frames are BGR, convert to RGB for PIL/Gradio - gallery_images.append( - Image.fromarray(cv2.cvtColor(img_data_np, cv2.COLOR_BGR2RGB)) - ) - else: # If images_data contains PIL Images already - gallery_images.append(img_data_np) + if not scene_dir: # Indicates preprocessing failed + gr.Error("Preprocessing failed. Please check the logs or input file.") + return ( + None, + None, + gr.update(interactive=False), + ) # Keep gallery empty, scene_dir None, button disabled + + # Convert BGR numpy arrays to RGB for Gradio gallery. + # gr.Gallery can display a list of NumPy arrays (H, W, C) or PIL Images. + # Assuming selected_bgr_frames contains BGR NumPy arrays. + gallery_display_images = [] + if selected_bgr_frames: + gallery_display_images = [ + frame[..., ::-1] + for frame in selected_bgr_frames + if isinstance(frame, np.ndarray) + ] return ( - gr.update(value=gallery_images), - scene_dir_val, - gr.update(interactive=True), + gr.update(value=gallery_display_images), + scene_dir, # Update the scene_dir_state + gr.update(interactive=True), # Enable the 'Start Reconstruction' button ) def on_start_click(scene_dir, num_ref_views, num_corrs, num_steps): diff --git a/notebooks/fit_model_to_scene_full.py b/notebooks/fit_model_to_scene_full.py index 0b977e5..f56a976 100644 --- a/notebooks/fit_model_to_scene_full.py +++ b/notebooks/fit_model_to_scene_full.py @@ -40,6 +40,18 @@ default="../assets/examples/video_fruits.mp4", help="Path to the input video file.", ) +parser.add_argument( # Add this argument + "--num_ref_views", + type=int, + default=16, # Or any other sensible default + help="Number of reference views to extract from video for COLMAP.", +) +parser.add_argument( # Add this argument based on your previous script structure + "--processed_scenes_dir", + type=str, + default="../output/processed_scenes", # Or any other sensible default + help="Base directory where processed COLMAP scenes will be stored.", +) args = parser.parse_args() # --- End argument parsing --- @@ -51,29 +63,25 @@ # # 3. Init input parameters # ## 3.1 Optionally preprocess video -PATH_TO_VIDEO = args.video_path -num_ref_views = 16 # how many frames you want to extract from video and colmap - # process the input video -if PATH_TO_VIDEO and os.path.exists(PATH_TO_VIDEO): - print(f"Starting video processing for: {PATH_TO_VIDEO}") +if os.path.exists(args.video_path): + print(f"Starting video processing for: {args.video_path}") try: # The first return value 'images_data' might not be directly used by the trainer # if the Scene object loads everything from the COLMAP directory. _, scene_dir = orchestrate_video_to_colmap_scene( - PATH_TO_VIDEO, + args.video_path, args.num_ref_views, # Assuming you added this arg max_size=1024, # Or make it an arg base_work_dir=args.processed_scenes_dir, # Assuming you added this arg ) if scene_dir is None: - print(f"Failed to process video {PATH_TO_VIDEO}. Exiting.") + print(f"Failed to process video {args.video_path}. Exiting.") sys.exit(1) except Exception as e: print(f"Error during video preprocessing: {e}") sys.exit(1) - # Update the config with your settings cfg.gs.dataset.images = "images" cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12 diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index d90a03f..cf0717b 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -12,8 +12,6 @@ from PIL import Image from tqdm import tqdm -WORKDIR = "../outputs/" - def get_rotation_moviepy(video_path): clip = VideoFileClip(video_path) @@ -417,15 +415,21 @@ def orchestrate_video_to_colmap_scene( input_path, num_ref_views, max_size=1024, - base_work_dir="../outputs/processed_scenes_util", + base_work_dir="../output/processed_scenes", ): """ Orchestrates the full video/image folder preprocessing pipeline: 1. Creates a temporary scene directory. 2. Reads frames, selects optimal ones, saves them. 3. Runs COLMAP on the scene. - Returns the list of selected frame image data and the path to the COLMAP processed scene directory. - This is based on preprocess_input from gradio_demo.py. + Args: + input_path (str or file-like): Path to video file or directory of images. + num_ref_views (int): Number of reference views to select. + max_size (int): Maximum size for width or height after resizing. + base_work_dir (str): Base directory for temporary scene directories. + Returns: + the list of selected frame image data and the path to the COLMAP processed scene directory. + This is based on preprocess_input from gradio_demo.py. """ # Create a unique scene directory # If input_path is a file object, use its name. If a path string, use its basename. From 6d7a5c03175d7d240819a0c0f3f043419992c2ff Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 10:55:48 +0900 Subject: [PATCH 09/37] fix error comes from wrong argument name gradio_obj --- gradio_demo.py | 2 +- source/utils_preprocess.py | 39 +++++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/gradio_demo.py b/gradio_demo.py index 3ca0078..b804fcc 100644 --- a/gradio_demo.py +++ b/gradio_demo.py @@ -434,7 +434,7 @@ def on_preprocess_click(input_file_obj, num_ref_views_val): return None, None, gr.update(interactive=False) selected_bgr_frames, scene_dir = orchestrate_video_to_colmap_scene( - gradio_input_obj=input_file_obj, # Pass the raw Gradio file object(s) + input_path=input_file_obj, # Pass the raw Gradio file object(s) num_ref_views=num_ref_views_val, max_size=1024, base_work_dir="./gradio_processed_scenes", # Or configure as needed diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index cf0717b..7d26d88 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -431,15 +431,40 @@ def orchestrate_video_to_colmap_scene( the list of selected frame image data and the path to the COLMAP processed scene directory. This is based on preprocess_input from gradio_demo.py. """ - # Create a unique scene directory - # If input_path is a file object, use its name. If a path string, use its basename. - input_name_part = "" - if hasattr(input_path, "name") and isinstance(input_path.name, str): + actual_input_path_str = None + input_name_part = "temp_scene" # Default + + if hasattr(input_path, "name") and isinstance( + input_path.name, str + ): # Gradio file object + actual_input_path_str = input_path.name input_name_part = os.path.splitext(os.path.basename(input_path.name))[0] - elif isinstance(input_path, (str, os.PathLike)): + elif isinstance(input_path, (str, os.PathLike)): # Direct path string + actual_input_path_str = str(input_path) input_name_part = os.path.splitext(os.path.basename(input_path))[0] - else: # Fallback for other types or if name is not available - input_name_part = "temp_scene" + elif ( + isinstance(input_path, list) and input_path + ): # List of Gradio file objects or paths (from gr.Examples) + # Handle list, e.g., take the first item + first_item = input_path[0] + if hasattr(first_item, "name") and isinstance(first_item.name, str): + actual_input_path_str = first_item.name + input_name_part = os.path.splitext(os.path.basename(first_item.name))[0] + elif isinstance(first_item, (str, os.PathLike)): + actual_input_path_str = str(first_item) + input_name_part = os.path.splitext(os.path.basename(first_item))[0] + else: + print(f"Warning: Unsupported item type in input list: {type(first_item)}") + return [], None + else: + print(f"Error: Unsupported input_path type: {type(input_path)}") + return [], None + + if not actual_input_path_str: + print("Error: Could not determine a valid input file path.") + return [], None + + print(f"Orchestrating COLMAP scene from: {actual_input_path_str}") # Using a structured output directory instead of pure tempfile.mkdtemp for easier inspection # scene_dir_parent = tempfile.mkdtemp() # Original approach From c8f1af8c06e69a541b45b5c31ee61e7b47f63d59 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 11:05:57 +0900 Subject: [PATCH 10/37] move script files to script/ --- README.md | 4 ++-- docker-compose.yml | 4 ++-- full_eval.py => script/full_eval.py | 0 gradio_demo.py => script/gradio_demo.py | 0 install.sh => script/install.sh | 0 metrics.py => script/metrics.py | 0 train.py => script/train.py | 0 7 files changed, 4 insertions(+), 4 deletions(-) rename full_eval.py => script/full_eval.py (100%) rename gradio_demo.py => script/gradio_demo.py (100%) rename install.sh => script/install.sh (100%) rename metrics.py => script/metrics.py (100%) rename train.py => script/train.py (100%) diff --git a/README.md b/README.md index 06e9740..4996bc0 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ You can install it just: docker compose up -d ``` -or you can install with running `install.sh`. +or you can install with running `script/install.sh`. ## đŸ“Ļ Data @@ -91,7 +91,7 @@ We evaluated on the following datasets: Use gradle demo; ``` docker compose exec edgs-app bash -python gradio_demo.py --port 7862 +python script/gradio_demo.py --port 7862 ``` #### Option B diff --git a/docker-compose.yml b/docker-compose.yml index 2f36e2c..f258c6d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,5 +14,5 @@ services: volumes: - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container - ./output:/EDGS/output # Example: map a local 'output' folder - - ./scripts:/EDGS/scripts # Example: map a local 'scripts' folder - - ./sources:/EDGS/sources # Example: map a local 'sources' folder \ No newline at end of file + - ./script:/EDGS/script # Example: map a local 'scripts' folder + - ./source:/EDGS/source # Example: map a local 'sources' folder \ No newline at end of file diff --git a/full_eval.py b/script/full_eval.py similarity index 100% rename from full_eval.py rename to script/full_eval.py diff --git a/gradio_demo.py b/script/gradio_demo.py similarity index 100% rename from gradio_demo.py rename to script/gradio_demo.py diff --git a/install.sh b/script/install.sh similarity index 100% rename from install.sh rename to script/install.sh diff --git a/metrics.py b/script/metrics.py similarity index 100% rename from metrics.py rename to script/metrics.py diff --git a/train.py b/script/train.py similarity index 100% rename from train.py rename to script/train.py From cb17a2552ad49ed8769ca68dda7ce3e6b8834614 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 11:51:37 +0900 Subject: [PATCH 11/37] fix import in gradio_demo.py --- .../fit_model_to_scene_full.py | 0 script/gradio_demo.py | 6 ++ script/train.py | 80 +++++++++++-------- 3 files changed, 51 insertions(+), 35 deletions(-) rename {notebooks => script}/fit_model_to_scene_full.py (100%) diff --git a/notebooks/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py similarity index 100% rename from notebooks/fit_model_to_scene_full.py rename to script/fit_model_to_scene_full.py diff --git a/script/gradio_demo.py b/script/gradio_demo.py index b804fcc..edc7240 100644 --- a/script/gradio_demo.py +++ b/script/gradio_demo.py @@ -13,6 +13,12 @@ import torch from hydra import compose, initialize +# Add the project root directory to sys.path +# so that modules from 'source' can be imported. +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if project_root not in sys.path: + sys.path.insert(0, project_root) + from source.trainer import EDGSTrainer from source.utils_aux import set_seed from source.utils_preprocess import ( diff --git a/script/train.py b/script/train.py index 646409a..e95cb63 100644 --- a/script/train.py +++ b/script/train.py @@ -1,63 +1,73 @@ import os -from source.trainer import EDGSTrainer -from source.utils_aux import set_seed +import sys +from argparse import Namespace + +import hydra import omegaconf import wandb -import hydra -from argparse import Namespace -from omegaconf import OmegaConf + +# Add the project root directory to sys.path +# so that modules from 'source' can be imported. +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if project_root not in sys.path: + sys.path.insert(0, project_root) + +from source.trainer import EDGSTrainer +from source.utils_aux import set_seed @hydra.main(config_path="configs", config_name="train", version_base="1.2") def main(cfg: omegaconf.DictConfig): - _ = wandb.init(entity=cfg.wandb.entity, - project=cfg.wandb.project, - config=omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True), - tags=[cfg.wandb.tag], - name = cfg.wandb.name, - mode = cfg.wandb.mode) + _ = wandb.init( + entity=cfg.wandb.entity, + project=cfg.wandb.project, + config=omegaconf.OmegaConf.to_container( + cfg, resolve=True, throw_on_missing=True + ), + tags=[cfg.wandb.tag], + name=cfg.wandb.name, + mode=cfg.wandb.mode, + ) omegaconf.OmegaConf.resolve(cfg) set_seed(cfg.seed) # Init output folder print("Output folder: {}".format(cfg.gs.dataset.model_path)) os.makedirs(cfg.gs.dataset.model_path, exist_ok=True) - with open(os.path.join(cfg.gs.dataset.model_path, "cfg_args"), 'w') as cfg_log_f: + with open(os.path.join(cfg.gs.dataset.model_path, "cfg_args"), "w") as cfg_log_f: params = { - "sh_degree": 3, - "source_path": cfg.gs.dataset.source_path, - "model_path": cfg.gs.dataset.model_path, - "images": cfg.gs.dataset.images, - "depths": "", - "resolution": -1, - "_white_background": cfg.gs.dataset.white_background, - "train_test_exp": False, - "data_device": cfg.gs.dataset.data_device, - "eval": False, - "convert_SHs_python": False, - "compute_cov3D_python": False, - "debug": False, - "antialiasing": False - } + "sh_degree": 3, + "source_path": cfg.gs.dataset.source_path, + "model_path": cfg.gs.dataset.model_path, + "images": cfg.gs.dataset.images, + "depths": "", + "resolution": -1, + "_white_background": cfg.gs.dataset.white_background, + "train_test_exp": False, + "data_device": cfg.gs.dataset.data_device, + "eval": False, + "convert_SHs_python": False, + "compute_cov3D_python": False, + "debug": False, + "antialiasing": False, + } cfg_log_f.write(str(Namespace(**params))) # Init both agents - gs = hydra.utils.instantiate(cfg.gs) + gs = hydra.utils.instantiate(cfg.gs) # Init trainer and launch training - trainer = EDGSTrainer(GS=gs, - training_config=cfg.gs.opt, - device=cfg.device) - + trainer = EDGSTrainer(GS=gs, training_config=cfg.gs.opt, device=cfg.device) + trainer.load_checkpoints(cfg.load) trainer.timer.start() - trainer.init_with_corr(cfg.init_wC) + trainer.init_with_corr(cfg.init_wC) trainer.train(cfg.train) - + # All done wandb.finish() print("\nTraining complete.") + if __name__ == "__main__": main() - From 6f45cef34dc9b4dfc67b2bc47cf95e3cddc71c6b Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 12:02:29 +0900 Subject: [PATCH 12/37] fix process colmap error --- source/utils_preprocess.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index 7d26d88..c3b47f9 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -423,7 +423,7 @@ def orchestrate_video_to_colmap_scene( 2. Reads frames, selects optimal ones, saves them. 3. Runs COLMAP on the scene. Args: - input_path (str or file-like): Path to video file or directory of images. + input_path (str or file-like): Path string, a Gradio file object, or a list (e.g., from gr.Examples). num_ref_views (int): Number of reference views to select. max_size (int): Maximum size for width or height after resizing. base_work_dir (str): Base directory for temporary scene directories. @@ -444,13 +444,23 @@ def orchestrate_video_to_colmap_scene( input_name_part = os.path.splitext(os.path.basename(input_path))[0] elif ( isinstance(input_path, list) and input_path - ): # List of Gradio file objects or paths (from gr.Examples) - # Handle list, e.g., take the first item - first_item = input_path[0] - if hasattr(first_item, "name") and isinstance(first_item.name, str): + ): # Handle list: take the first item. + # gr.Examples often wraps the path in another list, e.g., [['path/to/example.mp4']] + # So, we might need to unwrap it. + first_item_candidate = input_path[0] + if ( + isinstance(first_item_candidate, list) and first_item_candidate + ): # Check for nested list + first_item = first_item_candidate[0] + else: + first_item = first_item_candidate + + if hasattr(first_item, "name") and isinstance( + first_item.name, str + ): # Gradio file object in list actual_input_path_str = first_item.name input_name_part = os.path.splitext(os.path.basename(first_item.name))[0] - elif isinstance(first_item, (str, os.PathLike)): + elif isinstance(first_item, (str, os.PathLike)): # Path string in list actual_input_path_str = str(first_item) input_name_part = os.path.splitext(os.path.basename(first_item))[0] else: @@ -479,7 +489,7 @@ def orchestrate_video_to_colmap_scene( print(f"Created scene directory for COLMAP: {scene_dir}") selected_frames_data = process_input_for_colmap( - input_path, num_ref_views, scene_dir, max_size + actual_input_path_str, num_ref_views, scene_dir, max_size ) if not selected_frames_data: print(f"Frame processing failed for {input_path}. Aborting COLMAP.") From fa2feac0130b76a743336f6ae2d6cd6af072268b Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 13:29:57 +0900 Subject: [PATCH 13/37] fix relative directory in gradle_demo.py --- docker-compose.yml | 2 +- script/fit_model_to_scene_full.py | 2 +- script/gradio_demo.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f258c6d..ca1d0a8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,6 @@ services: capabilities: [gpu] # Request GPU capabilities volumes: - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container - - ./output:/EDGS/output # Example: map a local 'output' folder + - ./outputs:/EDGS/outputs # Example: map a local 'output' folder - ./script:/EDGS/script # Example: map a local 'scripts' folder - ./source:/EDGS/source # Example: map a local 'sources' folder \ No newline at end of file diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index f56a976..602c880 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -49,7 +49,7 @@ parser.add_argument( # Add this argument based on your previous script structure "--processed_scenes_dir", type=str, - default="../output/processed_scenes", # Or any other sensible default + default="../outputs/processed_scenes", # Or any other sensible default help="Base directory where processed COLMAP scenes will be stored.", ) args = parser.parse_args() diff --git a/script/gradio_demo.py b/script/gradio_demo.py index edc7240..50fabdd 100644 --- a/script/gradio_demo.py +++ b/script/gradio_demo.py @@ -77,11 +77,11 @@ def run_training_pipeline( num_steps=1_000, mode_toggle="Ours (EDGS)", ): - with initialize(config_path="./configs", version_base="1.1"): + with initialize(config_path="../configs", version_base="1.1"): cfg = compose(config_name="train") scene_name = os.path.basename(scene_dir) - model_output_dir = f"./outputs/{scene_name}_trained" + model_output_dir = f"../outputs/{scene_name}_trained" cfg.wandb.mode = "disabled" cfg.gs.dataset.model_path = model_output_dir From 88b356ac26e78ce8321277512df805fe393cfcba Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 15:02:38 +0900 Subject: [PATCH 14/37] debug gradle_demoo to pass accessible copied result. --- script/gradio_demo.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/script/gradio_demo.py b/script/gradio_demo.py index 50fabdd..07ec2ca 100644 --- a/script/gradio_demo.py +++ b/script/gradio_demo.py @@ -217,15 +217,20 @@ def run_training_pipeline( frames=path_renderings, output_path=final_video_path, fps=30, center_crop=0.85 ) MODEL_PATH = cfg.gs.dataset.model_path - ply_path = os.path.join( + original_ply_path = os.path.join( # Renamed for clarity cfg.gs.dataset.model_path, f"point_cloud/iteration_{trainer.gs_step}/point_cloud.ply", ) - shutil.copy( - ply_path, os.path.join(STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply") + # This is the path to the copied file in an allowed directory + copied_ply_path_for_serving = os.path.join( + STATIC_FILE_SERVING_FOLDER, "point_cloud_final.ply" ) + shutil.copy(original_ply_path, copied_ply_path_for_serving) - return final_video_path, ply_path + return ( + final_video_path, + copied_ply_path_for_serving, + ) # Return the path to the copied .ply file # Gradio Interface From 4b6bade78a2462f5d80868fd043f8edf731833c9 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 15:18:06 +0900 Subject: [PATCH 15/37] fix process_input() to process_input_for_colmap() --- script/gradio_demo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/script/gradio_demo.py b/script/gradio_demo.py index 07ec2ca..8684e9e 100644 --- a/script/gradio_demo.py +++ b/script/gradio_demo.py @@ -23,6 +23,7 @@ from source.utils_aux import set_seed from source.utils_preprocess import ( orchestrate_video_to_colmap_scene, # Import the new/refactored function + process_input_for_colmap, run_colmap_on_scene, ) from source.visualization import ( @@ -320,7 +321,9 @@ def run_full_pipeline(input_path, num_ref_views, num_corrs, max_size=1024): scene_dir = os.path.join(tmpdirname, "scene") os.makedirs(scene_dir, exist_ok=True) - selected_frames = process_input(input_path, num_ref_views, scene_dir, max_size) + selected_frames = process_input_for_colmap( + input_path, num_ref_views, scene_dir, max_size + ) run_colmap_on_scene(scene_dir) return selected_frames, scene_dir From dfd3bb4f4d9ca3f56cdc156ee47341c8f5f5d62f Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 6 Jun 2025 16:16:00 +0900 Subject: [PATCH 16/37] fix using abs path to be able to run fit_model_to_scene_full.py script from anywhere. --- README.md | 3 +-- script/fit_model_to_scene_full.py | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 4996bc0..1207e3e 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,7 @@ python script/gradio_demo.py --port 7862 From command line; ``` docker compose exec edgs-app bash -cd notebooks -python fit_model_to_scene_full.py --video_path +python script/fit_model_to_scene_full.py --video_path ``` #### Option C diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 602c880..63d6f70 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -22,8 +22,12 @@ from matplotlib import pyplot as plt from omegaconf import OmegaConf -sys.path.append("../") -sys.path.append("../submodules/gaussian-splatting") +# Add the project root directory to sys.path +# so that modules from 'source' can be imported. +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +if project_root not in sys.path: + sys.path.insert(0, project_root) +# sys.path.append("../submodules/gaussian-splatting") from source.trainer import EDGSTrainer from source.utils_aux import set_seed from source.utils_preprocess import ( @@ -37,19 +41,23 @@ parser.add_argument( "--video_path", type=str, - default="../assets/examples/video_fruits.mp4", + default=os.path.join( + project_root, "assets", "examples", "video_fruits.mp4" + ), # Use project_root help="Path to the input video file.", ) -parser.add_argument( # Add this argument +parser.add_argument( "--num_ref_views", type=int, - default=16, # Or any other sensible default + default=16, help="Number of reference views to extract from video for COLMAP.", ) -parser.add_argument( # Add this argument based on your previous script structure +parser.add_argument( "--processed_scenes_dir", type=str, - default="../outputs/processed_scenes", # Or any other sensible default + default=os.path.join( + project_root, "outputs", "processed_scenes" + ), # Use project_root help="Base directory where processed COLMAP scenes will be stored.", ) args = parser.parse_args() From ebf25904a3e5e5d090efb190d6b9702f4853715c Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 10 Jun 2025 11:58:43 +0900 Subject: [PATCH 17/37] let it be able to open jupyter notebook from docker --- Dockerfile | 2 +- docker-compose.yml | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index d6782de..c5e0776 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,7 +43,7 @@ RUN /bin/bash -c "source activate edgs && \ pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \ pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \ pip install -e ./submodules/RoMa && \ - pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d" + pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3 jupyterlab matplotlib" # Expose the port for Gradio EXPOSE 7862 diff --git a/docker-compose.yml b/docker-compose.yml index ca1d0a8..236adde 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,8 @@ services: build: . # Instructs Docker Compose to build using the Dockerfile in the current directory image: edgs-app # This is the name of the image you built ports: - - "7862:7862" # Map port 7862 on the host to port 7862 in the container + - "7862:7862" # For Gradio, if you still use it + - "8888:8888" # Map port 8888 for JupyterLab deploy: resources: reservations: @@ -15,4 +16,13 @@ services: - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container - ./outputs:/EDGS/outputs # Example: map a local 'output' folder - ./script:/EDGS/script # Example: map a local 'scripts' folder - - ./source:/EDGS/source # Example: map a local 'sources' folder \ No newline at end of file + - ./source:/EDGS/source # Example: map a local 'sources' folder + # Command to start JupyterLab + # --ip=0.0.0.0 makes it accessible from outside the container + # --allow-root is often needed when running in Docker + # --no-browser prevents it from trying to open a browser inside the container + # --notebook-dir specifies the directory JupyterLab should open in + command: > + sh -c "jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=/EDGS/notebooks" + stdin_open: true # Keep STDIN open for interactive processes + tty: true # Allocate a TTY \ No newline at end of file From 221d8b9cb2b7ac8af5c9177f5cf74666efba9195 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Tue, 10 Jun 2025 18:14:38 +0900 Subject: [PATCH 18/37] let it be able to run A,B,C options in README --- Dockerfile | 2 +- README.md | 21 +++- docker-compose.yml | 9 +- notebooks/fit_model_to_scene_full.ipynb | 142 ++++++++++++++++++++++-- 4 files changed, 156 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index c5e0776..fc1ad50 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,7 +43,7 @@ RUN /bin/bash -c "source activate edgs && \ pip install -e ./submodules/gaussian-splatting/submodules/simple-knn && \ pip install pycolmap wandb hydra-core tqdm torchmetrics lpips matplotlib rich plyfile imageio imageio-ffmpeg && \ pip install -e ./submodules/RoMa && \ - pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3 jupyterlab matplotlib" + pip install gradio plotly scikit-learn moviepy==2.1.1 ffmpeg open3d jupyterlab matplotlib" # Expose the port for Gradio EXPOSE 7862 diff --git a/README.md b/README.md index 1207e3e..803f139 100644 --- a/README.md +++ b/README.md @@ -95,13 +95,32 @@ python script/gradio_demo.py --port 7862 ``` #### Option B -From command line; +From command line. +First you need to create wandb account. +Then, edit configs/trainlyaml; wandb's "entity" for your user name, "project" for your created project name. + ``` docker compose exec edgs-app bash python script/fit_model_to_scene_full.py --video_path ``` #### Option C +Using Jupyter lab. +First edit configs/trainlyaml's wandb part. +You need to create wandb account and set it to the config file. +``` +docker compose up edgs-app bash +``` +And in the terminal in the docker container, +``` +jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=notebooks +``` +After JupyterLab starts, it will print URLs to the terminal. Look for a URL containing a token, like: + `http://127.0.0.1:8888/lab?token=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx` +Open `http://localhost:8888` (or `http://127.0.0.1:8888`) in your host browser. +When prompted for a "Password or token", paste the `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx` part from the URL in step 4 into the field and log in. Alternatively, you can paste the full URL from step 4 directly into your browser. + +#### Option D You can use the same data format as the [3DGS project](https://github.com/graphdeco-inria/gaussian-splatting?tab=readme-ov-file#processing-your-own-scenes). Please follow their guide to prepare your scene. Expected folder structure: diff --git a/docker-compose.yml b/docker-compose.yml index 236adde..3bc5aec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,12 +17,7 @@ services: - ./outputs:/EDGS/outputs # Example: map a local 'output' folder - ./script:/EDGS/script # Example: map a local 'scripts' folder - ./source:/EDGS/source # Example: map a local 'sources' folder - # Command to start JupyterLab - # --ip=0.0.0.0 makes it accessible from outside the container - # --allow-root is often needed when running in Docker - # --no-browser prevents it from trying to open a browser inside the container - # --notebook-dir specifies the directory JupyterLab should open in - command: > - sh -c "jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --notebook-dir=/EDGS/notebooks" + - ./config:/EDGS/config # Example: map a local 'config' folder + - ./notebooks:/EDGS/notebooks # Map a local 'notebooks' folder for JupyterLab stdin_open: true # Keep STDIN open for interactive processes tty: true # Allocate a TTY \ No newline at end of file diff --git a/notebooks/fit_model_to_scene_full.ipynb b/notebooks/fit_model_to_scene_full.ipynb index 09e6323..1d0b6c6 100644 --- a/notebooks/fit_model_to_scene_full.ipynb +++ b/notebooks/fit_model_to_scene_full.ipynb @@ -67,10 +67,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "59c64632-e31a-4ead-98a5-4ab0f295e54d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "xFormers not available\n" + ] + } + ], "source": [ "import torch\n", "import numpy as np\n", @@ -96,10 +104,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "925adfa3-c311-44b6-a8c4-a31fb7426947", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gs:\n", + " _target_: source.networks.Warper3DGS\n", + " verbose: true\n", + " viewpoint_stack: null\n", + " sh_degree: 3\n", + " opt:\n", + " iterations: 30000\n", + " position_lr_init: 0.00016\n", + " position_lr_final: 1.6e-06\n", + " position_lr_delay_mult: 0.01\n", + " position_lr_max_steps: 30000\n", + " feature_lr: 0.0025\n", + " opacity_lr: 0.025\n", + " scaling_lr: 0.005\n", + " rotation_lr: 0.001\n", + " percent_dense: 0.01\n", + " lambda_dssim: 0.2\n", + " densification_interval: 100\n", + " opacity_reset_interval: 30000\n", + " densify_from_iter: 500\n", + " densify_until_iter: 15000\n", + " densify_grad_threshold: 0.0002\n", + " random_background: false\n", + " save_iterations:\n", + " - 3000\n", + " - 7000\n", + " - 15000\n", + " - 30000\n", + " batch_size: 64\n", + " exposure_lr_init: 0.01\n", + " exposure_lr_final: 0.0001\n", + " exposure_lr_delay_steps: 0\n", + " exposure_lr_delay_mult: 0.0\n", + " TRAIN_CAM_IDX_TO_LOG: 50\n", + " TEST_CAM_IDX_TO_LOG: 10\n", + " pipe:\n", + " convert_SHs_python: false\n", + " compute_cov3D_python: false\n", + " debug: false\n", + " antialiasing: false\n", + " dataset:\n", + " densify_until_iter: 15000\n", + " source_path: ''\n", + " model_path: ''\n", + " images: images\n", + " resolution: -1\n", + " white_background: false\n", + " data_device: cuda\n", + " eval: false\n", + " depths: ''\n", + " train_test_exp: false\n", + "seed: 228\n", + "wandb:\n", + " mode: online\n", + " entity: m-ogawa-sensyn\n", + " project: Adv3DGS\n", + " group: null\n", + " name: null\n", + " tag: debug\n", + "train:\n", + " gs_epochs: 0\n", + " reduce_opacity: true\n", + " no_densify: false\n", + " max_lr: true\n", + "load:\n", + " gs: null\n", + " gs_step: null\n", + "device: cuda:0\n", + "verbose: true\n", + "init_wC:\n", + " use: true\n", + " matches_per_ref: 15000\n", + " num_refs: 180\n", + " nns_per_ref: 3\n", + " scaling_factor: 0.001\n", + " proj_err_tolerance: 0.01\n", + " roma_model: outdoors\n", + " add_SfM_init: false\n", + "\n" + ] + } + ], "source": [ "with initialize(config_path=\"../configs\", version_base=\"1.1\"):\n", " cfg = compose(config_name=\"train\")\n", @@ -124,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "07e4ca51", "metadata": {}, "outputs": [], @@ -177,10 +271,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "2056ee6f-dbb6-4ce8-86f0-5b4f9721d093", - "metadata": {}, - "outputs": [], + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output folder: ./scene_edgsed/\n" + ] + }, + { + "ename": "InstantiationException", + "evalue": "Error in call to target 'source.networks.Warper3DGS':\nAssertionError('Could not recognize scene type!')\nfull_key: gs", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:92\u001b[0m, in \u001b[0;36m_call_target\u001b[0;34m(_target_, _partial_, args, kwargs, full_key)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 92\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_target_\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m/EDGS/notebooks/../source/networks.py:26\u001b[0m, in \u001b[0;36mWarper3DGS.__init__\u001b[0;34m(self, sh_degree, opt, pipe, dataset, viewpoint_stack, verbose, do_train_test_split)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpipe \u001b[38;5;241m=\u001b[39m pipe\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscene \u001b[38;5;241m=\u001b[39m \u001b[43mScene\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgaussians\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m do_train_test_split:\n", + "File \u001b[0;32m/EDGS/notebooks/../submodules/gaussian-splatting/scene/__init__.py:49\u001b[0m, in \u001b[0;36mScene.__init__\u001b[0;34m(self, args, gaussians, load_iteration, shuffle, resolution_scales)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 49\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not recognize scene type!\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloaded_iter:\n", + "\u001b[0;31mAssertionError\u001b[0m: Could not recognize scene type!", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mInstantiationException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 12\u001b[0m\n\u001b[1;32m 10\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(cfg\u001b[38;5;241m.\u001b[39mgs\u001b[38;5;241m.\u001b[39mdataset\u001b[38;5;241m.\u001b[39mmodel_path, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Init gs model\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m gs \u001b[38;5;241m=\u001b[39m \u001b[43mhydra\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minstantiate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcfg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgs\u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m 13\u001b[0m trainer \u001b[38;5;241m=\u001b[39m EDGSTrainer(GS\u001b[38;5;241m=\u001b[39mgs,\n\u001b[1;32m 14\u001b[0m training_config\u001b[38;5;241m=\u001b[39mcfg\u001b[38;5;241m.\u001b[39mgs\u001b[38;5;241m.\u001b[39mopt,\n\u001b[1;32m 15\u001b[0m device\u001b[38;5;241m=\u001b[39mcfg\u001b[38;5;241m.\u001b[39mdevice)\n", + "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:226\u001b[0m, in \u001b[0;36minstantiate\u001b[0;34m(config, *args, **kwargs)\u001b[0m\n\u001b[1;32m 223\u001b[0m _convert_ \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mpop(_Keys\u001b[38;5;241m.\u001b[39mCONVERT, ConvertMode\u001b[38;5;241m.\u001b[39mNONE)\n\u001b[1;32m 224\u001b[0m _partial_ \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mpop(_Keys\u001b[38;5;241m.\u001b[39mPARTIAL, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m--> 226\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minstantiate_node\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrecursive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_recursive_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_convert_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_partial_\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m OmegaConf\u001b[38;5;241m.\u001b[39mis_list(config):\n\u001b[1;32m 230\u001b[0m \u001b[38;5;66;03m# Finalize config (convert targets to strings, merge with kwargs)\u001b[39;00m\n\u001b[1;32m 231\u001b[0m config_copy \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(config)\n", + "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:347\u001b[0m, in \u001b[0;36minstantiate_node\u001b[0;34m(node, convert, recursive, partial, *args)\u001b[0m\n\u001b[1;32m 342\u001b[0m value \u001b[38;5;241m=\u001b[39m instantiate_node(\n\u001b[1;32m 343\u001b[0m value, convert\u001b[38;5;241m=\u001b[39mconvert, recursive\u001b[38;5;241m=\u001b[39mrecursive\n\u001b[1;32m 344\u001b[0m )\n\u001b[1;32m 345\u001b[0m kwargs[key] \u001b[38;5;241m=\u001b[39m _convert_node(value, convert)\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_call_target\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_target_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfull_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 349\u001b[0m \u001b[38;5;66;03m# If ALL or PARTIAL non structured or OBJECT non structured,\u001b[39;00m\n\u001b[1;32m 350\u001b[0m \u001b[38;5;66;03m# instantiate in dict and resolve interpolations eagerly.\u001b[39;00m\n\u001b[1;32m 351\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m convert \u001b[38;5;241m==\u001b[39m ConvertMode\u001b[38;5;241m.\u001b[39mALL \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 352\u001b[0m convert \u001b[38;5;129;01min\u001b[39;00m (ConvertMode\u001b[38;5;241m.\u001b[39mPARTIAL, ConvertMode\u001b[38;5;241m.\u001b[39mOBJECT)\n\u001b[1;32m 353\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m node\u001b[38;5;241m.\u001b[39m_metadata\u001b[38;5;241m.\u001b[39mobject_type \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28mdict\u001b[39m)\n\u001b[1;32m 354\u001b[0m ):\n", + "File \u001b[0;32m/opt/conda/envs/edgs/lib/python3.10/site-packages/hydra/_internal/instantiate/_instantiate2.py:97\u001b[0m, in \u001b[0;36m_call_target\u001b[0;34m(_target_, _partial_, args, kwargs, full_key)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m full_key:\n\u001b[1;32m 96\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mfull_key: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_key\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 97\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InstantiationException(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n", + "\u001b[0;31mInstantiationException\u001b[0m: Error in call to target 'source.networks.Warper3DGS':\nAssertionError('Could not recognize scene type!')\nfull_key: gs" + ] + } + ], "source": [ "_ = wandb.init(entity=cfg.wandb.entity,\n", " project=cfg.wandb.project,\n", @@ -1781,7 +1905,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.21" + "version": "3.10.18" } }, "nbformat": 4, From d66a1bae6663fa8aac0d63b699da6c6acdcdb783 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 08:24:19 +0900 Subject: [PATCH 19/37] update config to change name --- configs/train.yaml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/configs/train.yaml b/configs/train.yaml index e40ec5b..de53b9d 100644 --- a/configs/train.yaml +++ b/configs/train.yaml @@ -1,22 +1,22 @@ defaults: - gs: base - - _self_ + - _self_ seed: 228 wandb: mode: "online" # "disabled" for no logging - entity: "3dcorrespondence" + entity: "m-ogawa-sensyn" project: "Adv3DGS" group: null name: null tag: "debug" - + train: gs_epochs: 0 # number of 3dgs iterations - reduce_opacity: True + reduce_opacity: True no_densify: False # if True, the model will not be densified - max_lr: True + max_lr: True load: gs: null #path to 3dgs checkpoint @@ -33,6 +33,4 @@ init_wC: scaling_factor: 0.001 proj_err_tolerance: 0.01 roma_model: "outdoors" # you can change this to "indoors" or "outdoors" - add_SfM_init : False - - + add_SfM_init: False From 5eac9d56cbe604b65c78d464f3b920baabee8b84 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 08:26:50 +0900 Subject: [PATCH 20/37] update README to run option A --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 803f139..4bc8242 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,8 @@ We evaluated on the following datasets: ### Using Your Own Dataset #### Option A -Use gradle demo; +Use gradle demo. +After running `docker compose up -d`, ``` docker compose exec edgs-app bash python script/gradio_demo.py --port 7862 From de7b415d21530f75127ca9623873cefac4ae5937 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 08:32:18 +0900 Subject: [PATCH 21/37] revert config/train --- configs/train.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/train.yaml b/configs/train.yaml index de53b9d..98e257c 100644 --- a/configs/train.yaml +++ b/configs/train.yaml @@ -5,8 +5,8 @@ defaults: seed: 228 wandb: - mode: "online" # "disabled" for no logging - entity: "m-ogawa-sensyn" + mode: "disabled" # "disabled" for no logging + entity: "3dcorrespondence" project: "Adv3DGS" group: null name: null From b120ed97096aca22a851a4eb7446784b286123c0 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 08:35:45 +0900 Subject: [PATCH 22/37] fix readme optionC --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4bc8242..6491f49 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ Using Jupyter lab. First edit configs/trainlyaml's wandb part. You need to create wandb account and set it to the config file. ``` -docker compose up edgs-app bash +docker compose exec edgs-app bash ``` And in the terminal in the docker container, ``` From c08c9e41b86b7c0d167e6df73d45eb222cbcbc12 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 08:44:59 +0900 Subject: [PATCH 23/37] disable wandb if config/train.yaml specify it. --- script/fit_model_to_scene_full.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 63d6f70..f6dd8c8 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -103,13 +103,20 @@ # # 4. Initilize model and logger -_ = wandb.init( - entity=cfg.wandb.entity, - project=cfg.wandb.project, - config=omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True), - name=cfg.wandb.name, - mode=cfg.wandb.mode, -) +if cfg.wandb.mode != "disabled": + _ = wandb.init( + entity=cfg.wandb.entity, + project=cfg.wandb.project, + config=omegaconf.OmegaConf.to_container( + cfg, resolve=True, throw_on_missing=True + ), + name=cfg.wandb.name, + mode=cfg.wandb.mode, + ) +else: + print( + "wandb logging is disabled (mode=disabled). Results will only be saved locally." + ) omegaconf.OmegaConf.resolve(cfg) set_seed(cfg.seed) # Init output folder From ca39f9bccd016e363fedf728e2a4d203849aa042 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 09:18:02 +0900 Subject: [PATCH 24/37] fix docker-compose config to configs --- docker-compose.yml | 2 +- script/fit_model_to_scene_full.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3bc5aec..be6c810 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,7 @@ services: - ./outputs:/EDGS/outputs # Example: map a local 'output' folder - ./script:/EDGS/script # Example: map a local 'scripts' folder - ./source:/EDGS/source # Example: map a local 'sources' folder - - ./config:/EDGS/config # Example: map a local 'config' folder + - ./configs:/EDGS/configs # Example: map a local 'config' folder - ./notebooks:/EDGS/notebooks # Map a local 'notebooks' folder for JupyterLab stdin_open: true # Keep STDIN open for interactive processes tty: true # Allocate a TTY \ No newline at end of file diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index f6dd8c8..54774ba 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -9,6 +9,7 @@ # ## 2. Import libraries import argparse +import logging import os import random import sys @@ -34,6 +35,12 @@ orchestrate_video_to_colmap_scene, # Use the refactored function ) +# Initialize logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + # --- Add argument parsing --- parser = argparse.ArgumentParser( description="Fit EDGS model to a scene, optionally from a video." @@ -104,6 +111,11 @@ # # 4. Initilize model and logger if cfg.wandb.mode != "disabled": + logging.info( + "wandb logging is enabled (mode={}). Results will be logged to wandb.".format( + cfg.wandb.mode + ) + ) _ = wandb.init( entity=cfg.wandb.entity, project=cfg.wandb.project, @@ -114,8 +126,10 @@ mode=cfg.wandb.mode, ) else: - print( - "wandb logging is disabled (mode=disabled). Results will only be saved locally." + logging.info( + "wandb logging is disabled (mode={}). Results will not be logged to wandb.".format( + cfg.wandb.mode + ) ) omegaconf.OmegaConf.resolve(cfg) set_seed(cfg.seed) From cd4c30ee0fa71a844117194f99de6951737fed4c Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 10:32:56 +0900 Subject: [PATCH 25/37] fix No such file or directory error for output models --- script/fit_model_to_scene_full.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 54774ba..cc50817 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -93,6 +93,9 @@ if scene_dir is None: print(f"Failed to process video {args.video_path}. Exiting.") sys.exit(1) + cfg.gs.dataset.model_path = os.path.join(scene_dir, "models") + print(f"Set model_path to: {cfg.gs.dataset.model_path}") + os.makedirs(cfg.gs.dataset.model_path, exist_ok=True) except Exception as e: print(f"Error during video preprocessing: {e}") sys.exit(1) From f64d37fca760b97e0441459dc2ae7966e753eaad Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 10:45:16 +0900 Subject: [PATCH 26/37] fix could not recongnize scene type error, by adding source_path for video. --- script/fit_model_to_scene_full.py | 1 + 1 file changed, 1 insertion(+) diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index cc50817..9eb676c 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -93,6 +93,7 @@ if scene_dir is None: print(f"Failed to process video {args.video_path}. Exiting.") sys.exit(1) + cfg.gs.dataset.source_path = scene_dir cfg.gs.dataset.model_path = os.path.join(scene_dir, "models") print(f"Set model_path to: {cfg.gs.dataset.model_path}") os.makedirs(cfg.gs.dataset.model_path, exist_ok=True) From 6fa2dde42948afe274cc950f60d6ef425c16b8a4 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 10:52:20 +0900 Subject: [PATCH 27/37] fix You must call wandb.init() before wandb.log error by setting logwandb option to EDGSTrainer() --- script/fit_model_to_scene_full.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 9eb676c..6a22d95 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -142,7 +142,12 @@ os.makedirs(cfg.gs.dataset.model_path, exist_ok=True) # Init gs model gs = hydra.utils.instantiate(cfg.gs) -trainer = EDGSTrainer(GS=gs, training_config=cfg.gs.opt, device=cfg.device) +trainer = EDGSTrainer( + GS=gs, + training_config=cfg.gs.opt, + device=cfg.device, + log_wandb=(cfg.wandb.mode != "disabled"), +) # # 5. Init with matchings From 94e9db7082453ae3b427ef027d8275b088326d9c Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 18 Jun 2025 13:59:58 +0900 Subject: [PATCH 28/37] fix no cuda gpus are available error --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index be6c810..24975eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,8 @@ services: - driver: nvidia count: all # Use all available GPUs capabilities: [gpu] # Request GPU capabilities + environment: + - NVIDIA_VISIBLE_DEVICES=all volumes: - ./data:/EDGS/data # Example: map a local 'data' folder to '/EDGS/data' in the container - ./outputs:/EDGS/outputs # Example: map a local 'output' folder From 16526ebde181724a27ec7f3ef23f65a2ed79c049 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Thu, 19 Jun 2025 09:34:11 +0900 Subject: [PATCH 29/37] remove wandb edit part from readme --- README.md | 5 ----- configs/train.yaml | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/README.md b/README.md index 6491f49..1aac793 100644 --- a/README.md +++ b/README.md @@ -97,9 +97,6 @@ python script/gradio_demo.py --port 7862 #### Option B From command line. -First you need to create wandb account. -Then, edit configs/trainlyaml; wandb's "entity" for your user name, "project" for your created project name. - ``` docker compose exec edgs-app bash python script/fit_model_to_scene_full.py --video_path @@ -107,8 +104,6 @@ python script/fit_model_to_scene_full.py --video_path #### Option C Using Jupyter lab. -First edit configs/trainlyaml's wandb part. -You need to create wandb account and set it to the config file. ``` docker compose exec edgs-app bash ``` diff --git a/configs/train.yaml b/configs/train.yaml index 98e257c..b4cabcc 100644 --- a/configs/train.yaml +++ b/configs/train.yaml @@ -5,7 +5,7 @@ defaults: seed: 228 wandb: - mode: "disabled" # "disabled" for no logging + mode: "disabled" # "online" or "disabled" entity: "3dcorrespondence" project: "Adv3DGS" group: null From 1019c23ba5ad3b88490931a68459c762fb22c727 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Thu, 19 Jun 2025 10:11:36 +0900 Subject: [PATCH 30/37] remove unnecessary parameter change inside python file. --- README.md | 2 +- configs/train.yaml | 2 +- script/fit_model_to_scene_full.py | 19 +------------------ source/utils_preprocess.py | 2 +- 4 files changed, 4 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 1aac793..4c66881 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ python script/gradio_demo.py --port 7862 From command line. ``` docker compose exec edgs-app bash -python script/fit_model_to_scene_full.py --video_path +python script/fit_model_to_scene_full.py --video_path [--processed_scenes_dir ] ``` #### Option C diff --git a/configs/train.yaml b/configs/train.yaml index b4cabcc..585102e 100644 --- a/configs/train.yaml +++ b/configs/train.yaml @@ -13,7 +13,7 @@ wandb: tag: "debug" train: - gs_epochs: 0 # number of 3dgs iterations + gs_epochs: 10 # number of 3dgs iterations reduce_opacity: True no_densify: False # if True, the model will not be densified max_lr: True diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 6a22d95..2c0f2fd 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -53,12 +53,6 @@ ), # Use project_root help="Path to the input video file.", ) -parser.add_argument( - "--num_ref_views", - type=int, - default=16, - help="Number of reference views to extract from video for COLMAP.", -) parser.add_argument( "--processed_scenes_dir", type=str, @@ -86,7 +80,7 @@ # if the Scene object loads everything from the COLMAP directory. _, scene_dir = orchestrate_video_to_colmap_scene( args.video_path, - args.num_ref_views, # Assuming you added this arg + cfg.init_wC.num_refs, # Assuming you added this arg max_size=1024, # Or make it an arg base_work_dir=args.processed_scenes_dir, # Assuming you added this arg ) @@ -101,17 +95,6 @@ print(f"Error during video preprocessing: {e}") sys.exit(1) -# Update the config with your settings -cfg.gs.dataset.images = "images" -cfg.gs.opt.TEST_CAM_IDX_TO_LOG = 12 -cfg.train.gs_epochs = 30000 -cfg.gs.opt.opacity_reset_interval = 1_000_000 -cfg.train.no_densify = True -cfg.init_wC.matches_per_ref = 15_000 -cfg.init_wC.nns_per_ref = 3 -cfg.init_wC.num_refs = 180 -cfg.init_wC.roma_model = "outdoors" - # # 4. Initilize model and logger if cfg.wandb.mode != "disabled": diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index c3b47f9..94a5195 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -415,7 +415,7 @@ def orchestrate_video_to_colmap_scene( input_path, num_ref_views, max_size=1024, - base_work_dir="../output/processed_scenes", + base_work_dir="../outputs/processed_scenes", ): """ Orchestrates the full video/image folder preprocessing pipeline: From da1317d263230344da14a9a8a5889c2197cd56be Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Thu, 19 Jun 2025 11:21:06 +0900 Subject: [PATCH 31/37] set similar iteration with gradio_demo to fit_model_to_scene_full.py --- configs/train.yaml | 2 +- script/fit_model_to_scene_full.py | 75 ++++++++++++++++--------------- 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/configs/train.yaml b/configs/train.yaml index 585102e..fd3cac2 100644 --- a/configs/train.yaml +++ b/configs/train.yaml @@ -28,7 +28,7 @@ verbose: true init_wC: use: True # use EDGS matches_per_ref: 15_000 # number of matches per reference - num_refs: 180 # number of reference images + num_refs: 18 # number of reference images nns_per_ref: 3 # number of nearest neighbors per reference scaling_factor: 0.001 proj_err_tolerance: 0.01 diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 2c0f2fd..ad51f70 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -54,11 +54,9 @@ help="Path to the input video file.", ) parser.add_argument( - "--processed_scenes_dir", + "--outputs_dir", type=str, - default=os.path.join( - project_root, "outputs", "processed_scenes" - ), # Use project_root + default=os.path.join(project_root, "outputs"), # Use project_root help="Base directory where processed COLMAP scenes will be stored.", ) args = parser.parse_args() @@ -82,7 +80,7 @@ args.video_path, cfg.init_wC.num_refs, # Assuming you added this arg max_size=1024, # Or make it an arg - base_work_dir=args.processed_scenes_dir, # Assuming you added this arg + base_work_dir=args.outputs_dir, # Assuming you added this arg ) if scene_dir is None: print(f"Failed to process video {args.video_path}. Exiting.") @@ -143,7 +141,7 @@ with torch.no_grad(): viewpoint_stack = trainer.GS.scene.getTrainCameras() viewpoint_cams_to_viz = random.sample(trainer.GS.scene.getTrainCameras(), 4) - for viewpoint_cam in viewpoint_cams_to_viz: + for idx, viewpoint_cam in enumerate(viewpoint_cams_to_viz): render_pkg = trainer.GS(viewpoint_cam) image = render_pkg["render"] @@ -166,13 +164,20 @@ ax[1].imshow(image_np) ax[1].axis("off") plt.tight_layout() + plt.savefig( + os.path.join( + cfg.gs.dataset.model_path, + f"viewpoint_{idx}_initial.png", + ) + ) plt.show() + plt.close(fig) # # 6.Optimize scene # Optimize first briefly for 5k steps and visualize results. We also disable saving of pretrained models. Train function can be changed for any other method trainer.saving_iterations = [] -cfg.train.gs_epochs = 5_000 +# cfg.train.gs_epochs = 5_000 trainer.train(cfg.train) @@ -209,39 +214,39 @@ trainer.save_model() -# # 7. Continue training until we reach total 30K training steps -cfg.train.gs_epochs = 25_000 -trainer.train(cfg.train) +# # # 7. Continue training until we reach total 30K training steps +# cfg.train.gs_epochs = 25_000 +# trainer.train(cfg.train) -# ### Visualize same viewpoints -with torch.no_grad(): - for viewpoint_cam in viewpoint_cams_to_viz: - render_pkg = trainer.GS(viewpoint_cam) - image = render_pkg["render"] +# # ### Visualize same viewpoints +# with torch.no_grad(): +# for viewpoint_cam in viewpoint_cams_to_viz: +# render_pkg = trainer.GS(viewpoint_cam) +# image = render_pkg["render"] - image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) - image_gt_np = ( - viewpoint_cam.original_image.clone() - .detach() - .cpu() - .numpy() - .transpose(1, 2, 0) - ) +# image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) +# image_gt_np = ( +# viewpoint_cam.original_image.clone() +# .detach() +# .cpu() +# .numpy() +# .transpose(1, 2, 0) +# ) - # Clip values to be in the range [0, 1] - image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) - image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8) +# # Clip values to be in the range [0, 1] +# image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) +# image_gt_np = np.clip(image_gt_np * 255, 0, 255).astype(np.uint8) - fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) - ax[0].imshow(image_gt_np) - ax[0].axis("off") - ax[1].imshow(image_np) - ax[1].axis("off") - plt.tight_layout() - plt.show() +# fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) +# ax[0].imshow(image_gt_np) +# ax[0].axis("off") +# ax[1].imshow(image_np) +# ax[1].axis("off") +# plt.tight_layout() +# plt.show() # ### Save model -with torch.no_grad(): - trainer.save_model() +# with torch.no_grad(): +# trainer.save_model() From 9ab2a765d03ef1e6376d03786a41aebe12553c9a Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Thu, 19 Jun 2025 16:51:36 +0900 Subject: [PATCH 32/37] add same with gradio demo setting option --- configs/train.yaml | 2 +- script/fit_model_to_scene_full.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/configs/train.yaml b/configs/train.yaml index fd3cac2..6e776d0 100644 --- a/configs/train.yaml +++ b/configs/train.yaml @@ -13,7 +13,7 @@ wandb: tag: "debug" train: - gs_epochs: 10 # number of 3dgs iterations + gs_epochs: 1000 # number of 3dgs iterations reduce_opacity: True no_densify: False # if True, the model will not be densified max_lr: True diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index ad51f70..6a0db04 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -64,6 +64,22 @@ with initialize(config_path="../configs", version_base="1.1"): cfg = compose(config_name="train") + +SAME_WITH_GRADIO_DEMO = True +if SAME_WITH_GRADIO_DEMO: + cfg.gs.opt.opacity_reset_interval = 1_000_000 + cfg.train.reduce_opacity = True + cfg.train.no_densify = True + cfg.train.max_lr = True + cfg.train.gs_epochs = 1000 + + cfg.init_wC.use = True + cfg.init_wC.nns_per_ref = 1 + cfg.init_wC.add_SfM_init = False + cfg.init_wC.scaling_factor = 0.00077 * 2.0 + cfg.init_wC.num_refs = 16 + cfg.init_wC.matches_per_ref = 20000 + print(OmegaConf.to_yaml(cfg)) From d965e3ec95ccbabdbae57a03669e42cbb98f0a77 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 16 Jul 2025 11:17:21 +0900 Subject: [PATCH 33/37] fix CondaToSNonInteractiveError caused by latest conda update --- Dockerfile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Dockerfile b/Dockerfile index fc1ad50..aa2d20a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,6 +29,14 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 ENV PATH="/opt/conda/bin:${PATH}" # Create the conda environment and install dependencies +# Accept Anaconda TOS before using conda +RUN conda init bash && \ + conda config --set always_yes yes --set changeps1 no && \ + conda config --add channels defaults && \ + conda config --set channel_priority strict && \ + conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \ + conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r +# Now you can safely create your environment RUN conda create -y -n edgs python=3.10 pip && \ conda clean -afy && \ echo "source activate edgs" > ~/.bashrc From ec447206738e6512ce6ac862a11747c99e77edb2 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Wed, 16 Jul 2025 18:13:59 +0900 Subject: [PATCH 34/37] fix train.py config path --- script/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/train.py b/script/train.py index e95cb63..895c1e3 100644 --- a/script/train.py +++ b/script/train.py @@ -16,7 +16,7 @@ from source.utils_aux import set_seed -@hydra.main(config_path="configs", config_name="train", version_base="1.2") +@hydra.main(config_path="../configs", config_name="train", version_base="1.2") def main(cfg: omegaconf.DictConfig): _ = wandb.init( entity=cfg.wandb.entity, From 68496226d2571d4734e1fc30ba3a5fca86b04e66 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Thu, 24 Jul 2025 10:55:23 +0900 Subject: [PATCH 35/37] fix option D in readme. --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c66881..e944857 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,11 @@ scene_folder |---points3D.bin ``` +``` +docker compose exec edgs-app bash +``` +Then run training command as described below section. + Nerf synthetic format is also acceptable. You can also use functions provided in our code to convert a collection of images or a sinlge video into a desired format. However, this may requre tweaking and processing time can be large for large collection of images with little overlap. @@ -143,7 +148,7 @@ You can also use functions provided in our code to convert a collection of image To optimize on a single scene in COLMAP format use this code. ```bash -python train.py \ +python script/train.py \ train.gs_epochs=30000 \ train.no_densify=True \ gs.dataset.source_path= \ From f3f2ff1e33ae9aafcdf7807eb091294dd2851a57 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 25 Jul 2025 16:00:05 +0900 Subject: [PATCH 36/37] fix memory overflow error --- source/utils_preprocess.py | 294 +++++++++++++++++++++++++++++++++---- 1 file changed, 265 insertions(+), 29 deletions(-) diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index 94a5195..b9406c2 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -45,18 +45,23 @@ def resize_max_side(frame, max_size): return frame -def read_video_frames(video_input, k=1, max_size=1024): +def extract_video_frames_to_disk(video_input, output_dir, k=1, max_size=1024): """ - Extracts every k-th frame from a video or list of images, resizes to max size, and returns frames as list. + Extracts every k-th frame from a video using ffmpeg, saves to disk to avoid memory overflow. Parameters: video_input (str, file-like, or list): Path to video file, file-like object, or list of image files. + output_dir (str): Directory to save extracted frames. k (int): Interval for frame extraction (every k-th frame). max_size (int): Maximum size for width or height after resizing. Returns: - frames (list): List of resized frames (numpy arrays). + frame_paths (list): List of paths to extracted frame files. """ + import subprocess + import tempfile + import shutil + # Handle list of image files (not single video in a list) if isinstance(video_input, list): # If it's a single video in a list, treat it as video @@ -65,13 +70,22 @@ def read_video_frames(video_input, k=1, max_size=1024): ): video_input = video_input[0] # unwrap single video file else: - # Treat as list of images - frames = [] - for img_file in video_input: + # Treat as list of images - copy and resize them + frame_paths = [] + for idx, img_file in enumerate(video_input): img = Image.open(img_file.name).convert("RGB") - img.thumbnail((max_size, max_size)) - frames.append(np.array(img)[..., ::-1]) - return frames + # Resize if necessary + width, height = img.size + if max(width, height) > max_size: + scale = max_size / max(width, height) + new_width = int(width * scale) + new_height = int(height * scale) + img = img.resize((new_width, new_height), Image.LANCZOS) + + output_path = os.path.join(output_dir, f"frame_{idx:08d}.jpg") + img.save(output_path, "JPEG", quality=95) + frame_paths.append(output_path) + return frame_paths # Handle file-like or path if hasattr(video_input, "name"): @@ -83,31 +97,84 @@ def read_video_frames(video_input, k=1, max_size=1024): "Unsupported video input type. Must be a filepath, file-like object, or list of images." ) + # Create output directory + os.makedirs(output_dir, exist_ok=True) + + # Use ffmpeg to extract frames + print(f"Extracting frames from video using ffmpeg...") + try: + # First, get video info to calculate frame interval + result = subprocess.run([ + 'ffprobe', '-v', 'quiet', '-count_frames', '-select_streams', 'v:0', + '-show_entries', 'stream=nb_frames', '-of', 'csv=p=0', video_path + ], capture_output=True, text=True, check=True) + + total_frames = int(result.stdout.strip()) + print(f"Total frames in video: {total_frames}") + + # Extract every k-th frame using ffmpeg with scaling + ffmpeg_cmd = [ + 'ffmpeg', '-i', video_path, '-y', + '-vf', f'select=not(mod(n\\,{k})),scale=w=min({max_size}\\,iw):h=min({max_size}\\,ih):force_original_aspect_ratio=decrease', + '-q:v', '2', # High quality + os.path.join(output_dir, 'frame_%08d.jpg') + ] + + subprocess.run(ffmpeg_cmd, check=True, capture_output=True) + + # Get list of extracted frame paths + frame_paths = sorted([ + os.path.join(output_dir, f) for f in os.listdir(output_dir) + if f.startswith('frame_') and f.endswith('.jpg') + ]) + + print(f"Extracted {len(frame_paths)} frames to {output_dir}") + return frame_paths + + except subprocess.CalledProcessError as e: + print(f"ffmpeg failed: {e}") + # Fallback to opencv if ffmpeg fails + return extract_video_frames_fallback(video_path, output_dir, k, max_size) + except FileNotFoundError: + print("ffmpeg not found, using opencv fallback") + return extract_video_frames_fallback(video_path, output_dir, k, max_size) + + +def extract_video_frames_fallback(video_path, output_dir, k=1, max_size=1024): + """ + Fallback method using opencv, but saves frames to disk instead of memory. + """ cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"Error: Could not open video {video_path}.") total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_count = 0 - frames = [] + frame_paths = [] + + os.makedirs(output_dir, exist_ok=True) - with tqdm(total=total_frames // k, desc="Processing Video", unit="frame") as pbar: + with tqdm(total=total_frames // k, desc="Extracting Video Frames", unit="frame") as pbar: while True: ret, frame = cap.read() if not ret: break if frame_count % k == 0: - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # Resize frame h, w = frame.shape[:2] scale = max(h, w) / max_size if scale > 1: frame = cv2.resize(frame, (int(w / scale), int(h / scale))) - frames.append(frame[..., [2, 1, 0]]) + + # Save frame to disk + frame_path = os.path.join(output_dir, f"frame_{len(frame_paths):08d}.jpg") + cv2.imwrite(frame_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 95]) + frame_paths.append(frame_path) pbar.update(1) frame_count += 1 cap.release() - return frames + return frame_paths def resize_max_side(frame, max_size): @@ -202,9 +269,48 @@ def variance_of_laplacian(image): return cv2.Laplacian(image, cv2.CV_64F).var() +def preprocess_frame_paths(frame_paths, verbose=False): + """ + Compute sharpness scores for a list of frame files using multi-scale Laplacian variance. + + Args: + frame_paths (list of str): List of paths to frame image files. + verbose (bool): If True, print scores. + + Returns: + list of float: Sharpness scores for each frame. + """ + scores = [] + + for idx, frame_path in enumerate(tqdm(frame_paths, desc="Scoring frames")): + # Load frame from disk + frame = cv2.imread(frame_path) + if frame is None: + print(f"Warning: Could not load frame {frame_path}") + scores.append(0.0) + continue + + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + + fm = ( + variance_of_laplacian(gray) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.75, fy=0.75)) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)) + + variance_of_laplacian(cv2.resize(gray, (0, 0), fx=0.25, fy=0.25)) + ) + + if verbose: + print(f"Frame {idx} ({os.path.basename(frame_path)}): Sharpness Score = {fm:.2f}") + + scores.append(fm) + + return scores + + def preprocess_frames(frames, verbose=False): """ Compute sharpness scores for a list of frames using multi-scale Laplacian variance. + DEPRECATED: Use preprocess_frame_paths instead to avoid memory issues. Args: frames (list of np.ndarray): List of frames (BGR images). @@ -262,9 +368,32 @@ def select_optimal_frames(scores, k): return sorted(selected_indices) +def copy_selected_frames_to_scene_dir(selected_frame_paths, scene_dir): + """ + Copies selected frame files into the target scene directory under 'images/' subfolder. + + Args: + selected_frame_paths (list of str): List of paths to selected frame files. + scene_dir (str): Target path where 'images/' subfolder will be created. + """ + import shutil + + images_dir = os.path.join(scene_dir, "images") + os.makedirs(images_dir, exist_ok=True) + + for idx, frame_path in enumerate(selected_frame_paths): + filename = os.path.join( + images_dir, f"{idx:08d}.jpg" + ) # 00000000.jpg, 00000001.jpg, etc. + shutil.copy2(frame_path, filename) + + print(f"Copied {len(selected_frame_paths)} selected frames to {images_dir}") + + def save_frames_to_scene_dir(frames, scene_dir): """ Saves a list of frames into the target scene directory under 'images/' subfolder. + DEPRECATED: Use copy_selected_frames_to_scene_dir to avoid memory issues. Args: frames (list of np.ndarray): List of frames (BGR images) to save. @@ -282,12 +411,14 @@ def save_frames_to_scene_dir(frames, scene_dir): print(f"Saved {len(frames)} frames to {images_dir}") -def run_colmap_on_scene(scene_dir): +def run_colmap_on_scene(scene_dir, force_pinhole=True): """ Runs feature extraction, matching, and mapping on all images inside scene_dir/images using pycolmap. + Forces PINHOLE camera model to avoid distortion issues. Args: scene_dir (str): Path to scene directory containing 'images' folder. + force_pinhole (bool): If True, forces PINHOLE camera model during reconstruction. TODO: if the function hasn't managed to match all the frames either increase image size, increase number of features or just remove those frames from the folder scene_dir/images @@ -318,7 +449,7 @@ def run_colmap_on_scene(scene_dir): pycolmap.match_exhaustive(database_path) print(f"Finished feature matching in {(time.time() - start_time):.2f}s.") - # Step 3: Mapping + # Step 3: Mapping with PINHOLE camera model pipeline_options = pycolmap.IncrementalPipelineOptions() pipeline_options.min_num_matches = 15 pipeline_options.multiple_models = True @@ -330,6 +461,8 @@ def run_colmap_on_scene(scene_dir): pipeline_options.mapper.init_min_num_inliers = 30 pipeline_options.mapper.init_max_error = 8.0 pipeline_options.mapper.init_min_tri_angle = 5.0 + + # Note: force_pinhole will be applied after reconstruction reconstruction = pycolmap.incremental_mapping( database_path=database_path, @@ -339,21 +472,121 @@ def run_colmap_on_scene(scene_dir): ) print(f"Finished incremental mapping in {(time.time() - start_time):.2f}s.") - # Step 4: Post-process Cameras to SIMPLE_PINHOLE + # Step 4: Ensure cameras are PINHOLE (double-check) recon_path = os.path.join(sparse_path, "0") - reconstruction = pycolmap.Reconstruction(recon_path) - - for cam in reconstruction.cameras.values(): - cam.model = "SIMPLE_PINHOLE" - cam.params = cam.params[:3] # Keep only [f, cx, cy] - - reconstruction.write(recon_path) + if os.path.exists(recon_path): + reconstruction = pycolmap.Reconstruction(recon_path) + + for cam in reconstruction.cameras.values(): + if force_pinhole and cam.model != "PINHOLE": + print(f"Converting camera {cam.camera_id} from {cam.model} to PINHOLE") + cam.model = "PINHOLE" + # Ensure we have exactly 4 parameters [fx, fy, cx, cy] + if len(cam.params) >= 4: + cam.params = cam.params[:4] + elif len(cam.params) >= 3: + # Duplicate focal length if we only have 3 params + f, cx, cy = cam.params[:3] + cam.params = [f, f, cx, cy] + else: + # Default values if params are insufficient + focal = max(cam.width, cam.height) + cam.params = [focal, focal, cam.width/2, cam.height/2] + + reconstruction.write(recon_path) + print(f"Saved reconstruction with PINHOLE cameras to {recon_path}") print(f"Total pipeline time: {(time.time() - start_time):.2f}s.") def process_input_for_colmap(input_path, num_ref_views, output_dir, max_size=1024): """ + Memory-efficient helper function to process video/images, select optimal frames, + and save them to the output_dir/images without loading all frames into memory. + """ + import tempfile + import shutil + + # Create temporary directory for extracted frames + temp_frames_dir = tempfile.mkdtemp(prefix="edgs_frames_") + + try: + if isinstance(input_path, (str, os.PathLike)): # If input_path is a path string + if os.path.isdir(input_path): # If it's a directory of images + print(f"Processing image directory: {input_path}") + # Copy and resize images to temp directory + frame_paths = [] + image_files = sorted([ + f for f in os.listdir(input_path) + if f.lower().endswith(("jpg", "jpeg", "png")) + ]) + + for idx, img_file in enumerate(image_files): + img = Image.open(os.path.join(input_path, img_file)).convert("RGB") + # Resize if necessary + width, height = img.size + if max(width, height) > max_size: + scale = max_size / max(width, height) + new_width = int(width * scale) + new_height = int(height * scale) + img = img.resize((new_width, new_height), Image.LANCZOS) + + output_path = os.path.join(temp_frames_dir, f"frame_{idx:08d}.jpg") + img.save(output_path, "JPEG", quality=95) + frame_paths.append(output_path) + + else: # If it's a single video file path + print(f"Processing video file: {input_path}") + frame_paths = extract_video_frames_to_disk( + video_input=input_path, + output_dir=temp_frames_dir, + max_size=max_size + ) + elif hasattr(input_path, "name"): # File-like object (e.g., from Gradio upload) + print(f"Processing uploaded video file: {input_path.name}") + frame_paths = extract_video_frames_to_disk( + video_input=input_path, + output_dir=temp_frames_dir, + max_size=max_size + ) + else: + raise ValueError(f"Unsupported input_path type: {type(input_path)}") + + if not frame_paths: + print("No frames extracted or read.") + return [] + + # Score frames without loading them all into memory + print(f"Scoring {len(frame_paths)} frames...") + frames_scores = preprocess_frame_paths(frame_paths) + + # Select optimal frames + selected_frames_indices = select_optimal_frames( + scores=frames_scores, k=min(num_ref_views, len(frame_paths)) + ) + + # Get paths to selected frames + selected_frame_paths = [frame_paths[idx] for idx in selected_frames_indices] + + print(f"Selected {len(selected_frame_paths)} optimal frames out of {len(frame_paths)}") + + # Copy selected frames to scene directory + copy_selected_frames_to_scene_dir(selected_frame_paths, output_dir) + + # Return empty list since we're not loading frames into memory anymore + # The actual frames are saved to disk in the scene directory + return [] + + finally: + # Clean up temporary directory + if os.path.exists(temp_frames_dir): + shutil.rmtree(temp_frames_dir) + print(f"Cleaned up temporary frame directory: {temp_frames_dir}") + + +def process_input_for_colmap_legacy(input_path, num_ref_views, output_dir, max_size=1024): + """ + DEPRECATED: Original memory-intensive version. Helper function to read frames from video or image folder, select optimal ones, and save them to the output_dir/images. This is based on process_input from gradio_demo.py. @@ -488,16 +721,19 @@ def orchestrate_video_to_colmap_scene( os.makedirs(scene_dir, exist_ok=True) print(f"Created scene directory for COLMAP: {scene_dir}") + # Process video/images to extract and select optimal frames selected_frames_data = process_input_for_colmap( actual_input_path_str, num_ref_views, scene_dir, max_size ) - if not selected_frames_data: - print(f"Frame processing failed for {input_path}. Aborting COLMAP.") - # Optionally clean up scene_dir if it's truly temporary and processing failed - # shutil.rmtree(scene_dir) + + # Check if images were saved to scene directory + images_dir = os.path.join(scene_dir, "images") + if not os.path.exists(images_dir) or not os.listdir(images_dir): + print(f"Frame processing failed for {input_path}. No images found in {images_dir}. Aborting COLMAP.") return [], None - run_colmap_on_scene(scene_dir) # This function should create scene_dir/sparse/0 + # Run COLMAP with PINHOLE camera model enforced + run_colmap_on_scene(scene_dir, force_pinhole=True) # Force PINHOLE to avoid distortion print(f"COLMAP processing complete for {scene_dir}") return selected_frames_data, scene_dir From 9f3884d2ee4a69ca65a85fa9931a123c8b270cb6 Mon Sep 17 00:00:00 2001 From: Masahiro Ogawa Date: Fri, 25 Jul 2025 17:25:34 +0900 Subject: [PATCH 37/37] fix memory error. and fix pinhole enforcement. --- script/fit_model_to_scene_full.py | 8 +- source/corr_init.py | 5 + source/utils_preprocess.py | 179 ++++++++++++++++++++++++++---- 3 files changed, 169 insertions(+), 23 deletions(-) diff --git a/script/fit_model_to_scene_full.py b/script/fit_model_to_scene_full.py index 6a0db04..bffd51b 100644 --- a/script/fit_model_to_scene_full.py +++ b/script/fit_model_to_scene_full.py @@ -73,11 +73,11 @@ cfg.train.max_lr = True cfg.train.gs_epochs = 1000 - cfg.init_wC.use = True + cfg.init_wC.use = False # Disable for fallback cases cfg.init_wC.nns_per_ref = 1 cfg.init_wC.add_SfM_init = False cfg.init_wC.scaling_factor = 0.00077 * 2.0 - cfg.init_wC.num_refs = 16 + cfg.init_wC.num_refs = 2 # Reduce to minimum since COLMAP only found 2 cameras cfg.init_wC.matches_per_ref = 20000 print(OmegaConf.to_yaml(cfg)) @@ -156,7 +156,9 @@ # ### Visualize a few initial viewpoints with torch.no_grad(): viewpoint_stack = trainer.GS.scene.getTrainCameras() - viewpoint_cams_to_viz = random.sample(trainer.GS.scene.getTrainCameras(), 4) + available_cams = trainer.GS.scene.getTrainCameras() + num_cams_to_viz = min(4, len(available_cams)) + viewpoint_cams_to_viz = random.sample(available_cams, num_cams_to_viz) for idx, viewpoint_cam in enumerate(viewpoint_cams_to_viz): render_pkg = trainer.GS(viewpoint_cam) image = render_pkg["render"] diff --git a/source/corr_init.py b/source/corr_init.py index 09c94a2..bfd6985 100644 --- a/source/corr_init.py +++ b/source/corr_init.py @@ -787,6 +787,11 @@ def init_gaussians_with_corr_fast(gaussians, scene, cfg, device, verbose=False, # Dummy first pass to initialize model with torch.no_grad(): + if len(viewpoint_stack) < 2: + print(f"âš ī¸ Warning: Only {len(viewpoint_stack)} viewpoints available. Need at least 2 for correspondence initialization.") + print("Skipping correspondence initialization - using SfM points only.") + return scene.train_cameras[3:], [], {} + viewpoint_cam1 = viewpoint_stack[0] viewpoint_cam2 = viewpoint_stack[1] imA = viewpoint_cam1.original_image.detach().cpu().numpy().transpose(1, 2, 0) diff --git a/source/utils_preprocess.py b/source/utils_preprocess.py index b9406c2..14da72b 100644 --- a/source/utils_preprocess.py +++ b/source/utils_preprocess.py @@ -411,6 +411,71 @@ def save_frames_to_scene_dir(frames, scene_dir): print(f"Saved {len(frames)} frames to {images_dir}") +def create_fallback_reconstruction(image_dir, sparse_path): + """ + Create a minimal fallback reconstruction when COLMAP fails completely. + Creates a simple linear camera trajectory for the available images. + """ + # No need to import colmap_loader - we'll create text files directly + + print("🔧 Creating fallback reconstruction with assumed camera positions...") + + # Get list of images + image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]) + + if len(image_files) < 1: + raise RuntimeError("No images found for fallback reconstruction") + + # Read first image to get dimensions + first_img_path = os.path.join(image_dir, image_files[0]) + from PIL import Image + img = Image.open(first_img_path) + width, height = img.size + + # Create minimal reconstruction directory + fallback_dir = os.path.join(sparse_path, "0") + os.makedirs(fallback_dir, exist_ok=True) + + # Create cameras.txt with simple pinhole model + cameras_txt = os.path.join(fallback_dir, "cameras.txt") + focal = max(width, height) # Simple focal length estimation + with open(cameras_txt, 'w') as f: + f.write("# Camera list with one line of data per camera:\n") + f.write("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n") + f.write(f"1 PINHOLE {width} {height} {focal} {focal} {width/2} {height/2}\n") + + # Create images.txt with linear trajectory + images_txt = os.path.join(fallback_dir, "images.txt") + with open(images_txt, 'w') as f: + f.write("# Image list with two lines of data per image:\n") + f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n") + f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n") + + for i, img_file in enumerate(image_files): + # Simple linear trajectory along Z-axis + tx, ty, tz = 0.0, 0.0, -i * 0.5 + # Identity quaternion (no rotation) + qw, qx, qy, qz = 1.0, 0.0, 0.0, 0.0 + + f.write(f"{i+1} {qw} {qx} {qy} {qz} {tx} {ty} {tz} 1 {img_file}\n") + f.write("\n") # Empty line for points2D + + # Create minimal points3D.txt with a few dummy points + points_txt = os.path.join(fallback_dir, "points3D.txt") + with open(points_txt, 'w') as f: + f.write("# 3D point list with one line of data per point:\n") + f.write("# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n") + # Add some dummy 3D points for basic initialization + for i in range(10): + x, y, z = i * 0.1, 0.0, -1.0 # Simple grid of points + r, g, b = 128, 128, 128 # Gray color + error = 1.0 + f.write(f"{i+1} {x} {y} {z} {r} {g} {b} {error}\n") + + print(f"✅ Created fallback reconstruction with {len(image_files)} cameras at {fallback_dir}") + print("âš ī¸ Note: This is a basic reconstruction with assumed camera positions. Results may be limited.") + + def run_colmap_on_scene(scene_dir, force_pinhole=True): """ Runs feature extraction, matching, and mapping on all images inside scene_dir/images using pycolmap. @@ -434,48 +499,121 @@ def run_colmap_on_scene(scene_dir, force_pinhole=True): # Make sure output directories exist os.makedirs(sparse_path, exist_ok=True) - # Step 1: Feature Extraction + # Step 1: Feature Extraction with more aggressive settings pycolmap.extract_features( database_path, image_dir, sift_options={ - "max_num_features": 512 * 2, - "max_image_size": 512 * 1, + "max_num_features": 8192, # Much higher feature count + "max_image_size": 1600, # Higher resolution + "first_octave": -1, # More detailed features + "num_octaves": 4, + "octave_resolution": 3, + "peak_threshold": 0.005, # More lenient peak detection + "edge_threshold": 20, # More lenient edge threshold }, ) print(f"Finished feature extraction in {(time.time() - start_time):.2f}s.") - # Step 2: Feature Matching - pycolmap.match_exhaustive(database_path) + # Step 2: Feature Matching with correct API + sift_matching_options = pycolmap.SiftMatchingOptions() + sift_matching_options.max_ratio = 0.9 + sift_matching_options.max_distance = 0.8 + sift_matching_options.cross_check = True + + pycolmap.match_exhaustive(database_path, sift_options=sift_matching_options) print(f"Finished feature matching in {(time.time() - start_time):.2f}s.") - # Step 3: Mapping with PINHOLE camera model + # Step 3: Mapping with more lenient parameters for challenging videos pipeline_options = pycolmap.IncrementalPipelineOptions() - pipeline_options.min_num_matches = 15 + pipeline_options.min_num_matches = 8 # Lower minimum matches pipeline_options.multiple_models = True pipeline_options.max_num_models = 50 pipeline_options.max_model_overlap = 20 - pipeline_options.min_model_size = 10 + pipeline_options.min_model_size = 3 # Allow smaller models pipeline_options.extract_colors = True pipeline_options.num_threads = 8 - pipeline_options.mapper.init_min_num_inliers = 30 - pipeline_options.mapper.init_max_error = 8.0 - pipeline_options.mapper.init_min_tri_angle = 5.0 + + # More lenient mapper options + pipeline_options.mapper.init_min_num_inliers = 15 # Lower inlier threshold + pipeline_options.mapper.init_max_error = 12.0 # Higher error tolerance + pipeline_options.mapper.init_min_tri_angle = 2.0 # Lower triangulation angle + pipeline_options.mapper.abs_pose_min_num_inliers = 15 + pipeline_options.mapper.abs_pose_max_error = 12.0 + pipeline_options.mapper.filter_max_reproj_error = 8.0 + pipeline_options.mapper.filter_min_tri_angle = 1.5 # Note: force_pinhole will be applied after reconstruction - reconstruction = pycolmap.incremental_mapping( - database_path=database_path, - image_path=image_dir, - output_path=sparse_path, - options=pipeline_options, - ) - print(f"Finished incremental mapping in {(time.time() - start_time):.2f}s.") + try: + reconstruction = pycolmap.incremental_mapping( + database_path=database_path, + image_path=image_dir, + output_path=sparse_path, + options=pipeline_options, + ) + print(f"Finished incremental mapping in {(time.time() - start_time):.2f}s.") + except Exception as e: + print(f"âš ī¸ Initial reconstruction failed: {e}") + print("🔄 Trying with even more lenient settings...") + + # Try with ultra-lenient settings as fallback + pipeline_options.min_num_matches = 5 + pipeline_options.min_model_size = 2 + pipeline_options.mapper.init_min_num_inliers = 10 + pipeline_options.mapper.init_max_error = 20.0 + pipeline_options.mapper.init_min_tri_angle = 1.0 + + try: + reconstruction = pycolmap.incremental_mapping( + database_path=database_path, + image_path=image_dir, + output_path=sparse_path, + options=pipeline_options, + ) + print(f"✅ Fallback reconstruction succeeded in {(time.time() - start_time):.2f}s.") + except Exception as e2: + print(f"❌ Both reconstruction attempts failed: {e2}") + raise RuntimeError("COLMAP reconstruction failed. The video might have insufficient overlap or features.") - # Step 4: Ensure cameras are PINHOLE (double-check) + # Step 4: Check if reconstruction was successful recon_path = os.path.join(sparse_path, "0") + if not os.path.exists(recon_path): + # Check for other reconstruction indices + reconstructions_found = [] + for i in range(10): # Check indices 0-9 + alt_path = os.path.join(sparse_path, str(i)) + if os.path.exists(alt_path) and any(os.path.exists(os.path.join(alt_path, f)) + for f in ["cameras.bin", "images.bin", "points3D.bin"]): + reconstructions_found.append(i) + + if reconstructions_found: + # Use the largest reconstruction + best_idx = max(reconstructions_found) + recon_path = os.path.join(sparse_path, str(best_idx)) + print(f"â„šī¸ Using reconstruction {best_idx} instead of 0") + + # Move to index 0 for compatibility + target_path = os.path.join(sparse_path, "0") + if not os.path.exists(target_path): + import shutil + shutil.move(recon_path, target_path) + recon_path = target_path + print(f"📁 Moved reconstruction to sparse/0/") + else: + print("❌ COLMAP reconstruction failed - creating minimal fallback reconstruction") + return create_fallback_reconstruction(image_dir, sparse_path) + + # Step 5: Convert cameras to PINHOLE if needed if os.path.exists(recon_path): reconstruction = pycolmap.Reconstruction(recon_path) + + if len(reconstruction.cameras) == 0: + raise RuntimeError("❌ Reconstruction contains no cameras") + if len(reconstruction.images) == 0: + raise RuntimeError("❌ Reconstruction contains no images") + if len(reconstruction.points3D) == 0: + print("âš ī¸ Warning: Reconstruction contains no 3D points") for cam in reconstruction.cameras.values(): if force_pinhole and cam.model != "PINHOLE": @@ -494,7 +632,8 @@ def run_colmap_on_scene(scene_dir, force_pinhole=True): cam.params = [focal, focal, cam.width/2, cam.height/2] reconstruction.write(recon_path) - print(f"Saved reconstruction with PINHOLE cameras to {recon_path}") + print(f"✅ Saved reconstruction with PINHOLE cameras to {recon_path}") + print(f"📊 Reconstruction stats: {len(reconstruction.cameras)} cameras, {len(reconstruction.images)} images, {len(reconstruction.points3D)} points") print(f"Total pipeline time: {(time.time() - start_time):.2f}s.")