From 269a64c301ef185ed451731593c469794fd6019d Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Thu, 3 Jul 2025 15:14:16 +0200
Subject: [PATCH 01/28] Initial refactoring

---
 .gitignore            | 14 +++++++++
 CMakeLists.txt        |  7 +++--
 inc/segmentation.h    | 11 ++++++++
 src/main.cpp          | 66 ++++++++-----------------------------------
 src/sam_inference.cpp |  2 +-
 src/segmentation.cpp  | 53 ++++++++++++++++++++++++++++++++++
 6 files changed, 94 insertions(+), 59 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 inc/segmentation.h
 create mode 100644 src/segmentation.cpp
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ddd4b43
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+build/
+images/*
+onnxruntime*/
+onnxruntime/*
+docker/*
+CMakefile
+CMakeCache.txt
+CMakeFiles/*
+cmake_install.cmake
+Makefile
+SPEED-SAM-C-TENSORRT/
+sam_inference/model/FastSAM-x.onnx
+mask*
+segmentation_results*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8036a89..d1e926c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ include_directories(${OpenCV_INCLUDE_DIRS})
 
 # -------------- ONNXRuntime  ------------------#
 set(ONNXRUNTIME_VERSION 1.21.0)
-set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../onnxruntime-linux-x64-gpu-1.21.1")
+set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1")
 include_directories(${ONNXRUNTIME_ROOT}/include)
 
 # -------------- Cuda ------------------#
@@ -25,6 +25,7 @@ include_directories(/usr/local/cuda/include)
 set(PROJECT_SOURCES
         src/main.cpp
         src/sam_inference.cpp
+        src/segmentation.cpp
         src/utils.cpp
 )
 
@@ -44,10 +45,10 @@ endif ()
 
 # Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml
 # and put it in the same folder of the executable file
-configure_file(./model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
+configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
 
 # Copy yolov8n.onnx file to the same folder of the executable file
-configure_file(./model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
+configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
 
 # Create folder name images in the same folder of the executable file
 add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
diff --git a/inc/segmentation.h b/inc/segmentation.h
new file mode 100644
index 0000000..9617001
--- /dev/null
+++ b/inc/segmentation.h
@@ -0,0 +1,11 @@
+#include <iostream>
+#include <iomanip>
+#include <filesystem>
+#include <fstream>
+#include <random>
+#include <tuple>
+
+#include "sam_inference.h"
+
+std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer();
+void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img);
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 7481567..3905ead 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,67 +1,23 @@
-#include <iostream>
-#include <iomanip>
-#include "sam_inference.h"
-#include <filesystem>
-#include <fstream>
-#include <random>
+#include "segmentation.h"
 
-
-
-void SegmentAnything() {
-
-    SAM* samSegmentor = new SAM;
-    SEG::DL_INIT_PARAM params;
-    SEG::DL_INIT_PARAM params1;
-
-    params.rectConfidenceThreshold = 0.1;
-    params.iouThreshold = 0.5;
-    params.modelPath = "SAM_encoder.onnx";
-    params.imgSize = { 1024, 1024 };
-
-    params1 = params;
-    params1.modelType = SEG::SAM_SEGMENT_DECODER;
-    params1.modelPath = "SAM_mask_decoder.onnx";
-
-
-    #ifdef USE_CUDA
-    params.cudaEnable = true;
-    #else
-    params.cudaEnable = false;
-    #endif
-
-
-
-    //Running inference
+int main()
+{
+     //Running inference
+    std::vector<std::unique_ptr<SAM>> samSegmentors;
+    SEG::DL_INIT_PARAM params_encoder;
+    SEG::DL_INIT_PARAM params_decoder;
+    std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
     std::filesystem::path current_path = std::filesystem::current_path();
-    std::filesystem::path imgs_path = current_path / "../../pipeline/build/images";
-    std::vector<SEG::DL_RESULT> resSam;
+    std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images";
     for (auto& i : std::filesystem::directory_iterator(imgs_path))
     {
         if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
         {
             std::string img_path = i.path().string();
             cv::Mat img = cv::imread(img_path);
+            SegmentAnything(samSegmentors, params_encoder, params_decoder, img);
 
-            SEG::DL_RESULT res;
-            samSegmentor->CreateSession(params);
-            SEG::MODEL_TYPE modelTypeRef = params.modelType;
-            samSegmentor->RunSession(img, resSam, modelTypeRef, res);
-
-
-
-
-            samSegmentor->CreateSession(params1);
-            modelTypeRef = params1.modelType;
-            samSegmentor->RunSession(img, resSam, modelTypeRef, res);
-            std::cout << "Press any key to exit" << std::endl;
-            cv::imshow("Result of Detection", img);
-            cv::waitKey(0);
-            cv::destroyAllWindows();
         }
     }
-}
-
-int main()
-{
-    SegmentAnything();
+    return 0;
 }
\ No newline at end of file
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 3820338..69671ec 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -4,7 +4,7 @@
 #include <typeinfo>
 
 #define benchmark
-//#define ROI
+#define ROI
 // #define min(a,b)            (((a) < (b)) ? (a) : (b))
 
 SAM::SAM() {
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
new file mode 100644
index 0000000..4b2c022
--- /dev/null
+++ b/src/segmentation.cpp
@@ -0,0 +1,53 @@
+#include "segmentation.h"
+
+std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM> Initializer()
+{
+    std::vector<std::unique_ptr<SAM>> samSegmentors;
+    samSegmentors.push_back(std::make_unique<SAM>());
+    samSegmentors.push_back(std::make_unique<SAM>());
+
+    std::unique_ptr<SAM> samSegmentorEncoder = std::make_unique<SAM>();
+    std::unique_ptr<SAM> samSegmentorDecoder = std::make_unique<SAM>();
+    SEG::DL_INIT_PARAM params_encoder;
+    SEG::DL_INIT_PARAM params_decoder;
+
+    params_encoder.rectConfidenceThreshold = 0.1;
+    params_encoder.iouThreshold = 0.5;
+    params_encoder.modelPath = "SAM_encoder.onnx";
+    params_encoder.imgSize = { 1024, 1024 };
+
+    params_decoder = params_encoder;
+    params_decoder.modelType = SEG::SAM_SEGMENT_DECODER;
+    params_decoder.modelPath = "SAM_mask_decoder.onnx";
+
+
+
+    #ifdef USE_CUDA
+    params_encoder.cudaEnable = true;
+    #else
+    params_encoder.cudaEnable = false;
+    #endif
+
+    samSegmentorEncoder->CreateSession(params_encoder);
+    samSegmentorDecoder->CreateSession(params_decoder);
+    samSegmentors[0] = std::move(samSegmentorEncoder);
+    samSegmentors[1] = std::move(samSegmentorDecoder);
+    return {std::move(samSegmentors), params_encoder, params_decoder};
+}
+
+void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) {
+
+    std::vector<SEG::DL_RESULT> resSam;
+    SEG::DL_RESULT res;
+
+    SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType;
+    samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res);
+
+
+    modelTypeRef = params_decoder.modelType;
+    samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res);
+    std::cout << "Press any key to exit" << std::endl;
+    cv::imshow("Result of Detection", img);
+    cv::waitKey(0);
+    cv::destroyAllWindows();
+}

From 3f0aa166208a0ecb4d18a356e1d0582c5d09bbcc Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 4 Jul 2025 10:02:37 +0200
Subject: [PATCH 02/28] create catkin package

---
 package.xml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 package.xml

diff --git a/package.xml b/package.xml
new file mode 100644
index 0000000..cde009a
--- /dev/null
+++ b/package.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<?xml-model
+  href="http://download.ros.org/schema/package_format3.xsd"
+  schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>sam_onnx_ros</name>
+  <version>0.0.0</version>
+  <description>Segment Anything Model (SAM) segmentation</description>
+
+  <maintainer email="iasonth95@gmail.com">Iason Theodorou</maintainer>
+
+  <license>ToDo</license>
+
+  <buildtool_depend>catkin</buildtool_depend>
+
+  <build_depend>libopencv-dev</build_depend>
+  <exec_depend>libopencv-dev</exec_depend>
+  <build_depend>onnxruntime_ros</build_depend>
+  <exec_depend>onnxruntime_ros</exec_depend>
+
+  <test_depend>catkin_lint_cmake</test_depend>
+
+  <doc_depend>doxygen</doc_depend>
+
+  <export>
+    <rosdoc config="rosdoc.yaml" />
+  </export>
+
+</package>
\ No newline at end of file

From fe7719659e32123483c5163f9122ccfb7ae58ac5 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 4 Jul 2025 10:03:43 +0200
Subject: [PATCH 03/28] included CI tests

---
 .github/workflows/main.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .github/workflows/main.yml

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..21edb06
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,13 @@
+name: CI
+
+on: [push, pull_request]
+
+jobs:
+  tue-ci:
+    name: TUe CI - ${{ github.event_name }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: TUe CI
+        uses: tue-robotics/tue-env/ci/main@master
+        with:
+          package: ${{ github.event.repository.name }}
\ No newline at end of file

From 17bd100227eb9e88c1d1d1901744152befdcda04 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 4 Jul 2025 10:04:18 +0200
Subject: [PATCH 04/28] update of CMakeLists to include some initial needed
 components

---
 CMakeLists.txt | 53 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d1e926c..9e7ed33 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,13 +1,17 @@
 cmake_minimum_required(VERSION 3.5)
 
 set(PROJECT_NAME SAMOnnxRuntimeCPPInference)
-project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX)
+project(sam_onnx_ros)
+
+# -------------- CMake Policies ------------------#
+#add_compile_options(-Wall -Werror=all)
+#add_compile_options(-Wextra -Werror=extra)
 
 # -------------- Support C++17 for using filesystem  ------------------#
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS ON)
-set(CMAKE_INCLUDE_CURRENT_DIR ON)
+#set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
 # -------------- OpenCV  ------------------#
 find_package(OpenCV REQUIRED)
@@ -22,6 +26,33 @@ include_directories(${ONNXRUNTIME_ROOT}/include)
 add_definitions(-DUSE_CUDA=1)
 include_directories(/usr/local/cuda/include)
 
+# find_package(catkin REQUIRED
+#   COMPONENTS
+#   onnxruntime_ros
+# )
+
+# ------------------------------------------------------------------------------------------------
+#                                        CATKIN EXPORT
+# ------------------------------------------------------------------------------------------------
+
+# catkin_package(
+#   INCLUDE_DIRS include
+#   LIBRARIES ${PROJECT_NAME}
+#   CATKIN_DEPENDS
+#   DEPENDS OpenCV
+# )
+
+# ------------------------------------------------------------------------------------------------
+#                                           BUILD
+# ------------------------------------------------------------------------------------------------
+
+include_directories(
+  include
+  SYSTEM
+  ${OpenCV_INCLUDE_DIRS}
+  ${catkin_INCLUDE_DIRS}
+)
+
 set(PROJECT_SOURCES
         src/main.cpp
         src/sam_inference.cpp
@@ -33,21 +64,11 @@ add_executable(${PROJECT_NAME} ${PROJECT_SOURCES})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc)
 
 # Link OpenCV libraries along with ONNX Runtime
-target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so)
-
-# For Windows system, copy onnxruntime.dll to the same folder of the executable file
-if (WIN32)
-    add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
-            COMMAND ${CMAKE_COMMAND} -E copy_if_different
-            "${ONNXRUNTIME_ROOT}/lib/onnxruntime.dll"
-            $<TARGET_FILE_DIR:${PROJECT_NAME}>)
-endif ()
-
-# Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml
-# and put it in the same folder of the executable file
-configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
+target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so)
 
-# Copy yolov8n.onnx file to the same folder of the executable file
+
+# Copy sam_<model>.onnx file to the same folder of the executable file
+configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
 configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
 
 # Create folder name images in the same folder of the executable file

From 2ef2fc4e9b7d11bfcd774d7c7b40d96945f55eb5 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 4 Jul 2025 13:36:46 +0200
Subject: [PATCH 05/28] Return the mask on SegmentAnything function (not
 working properly)

---
 inc/dl_types.h       |  1 +
 inc/segmentation.h   |  2 +-
 src/main.cpp         | 11 +++++++++--
 src/segmentation.cpp | 15 +++++++++++----
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/inc/dl_types.h b/inc/dl_types.h
index 54bd60f..72bd1fe 100644
--- a/inc/dl_types.h
+++ b/inc/dl_types.h
@@ -29,6 +29,7 @@ typedef struct _DL_INIT_PARAM
     int intraOpNumThreads = 1;
     //std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
 
+    // Overloaded output operator for _DL_INIT_PARAM to print its contents
     friend std::ostream& operator<<(std::ostream& os, _DL_INIT_PARAM& param)
     {
         os << "modelPath: " << param.modelPath << "\n";
diff --git a/inc/segmentation.h b/inc/segmentation.h
index 9617001..46e954e 100644
--- a/inc/segmentation.h
+++ b/inc/segmentation.h
@@ -8,4 +8,4 @@
 #include "sam_inference.h"
 
 std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer();
-void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img);
\ No newline at end of file
+std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img);
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 3905ead..5c22108 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -15,8 +15,15 @@ int main()
         {
             std::string img_path = i.path().string();
             cv::Mat img = cv::imread(img_path);
-            SegmentAnything(samSegmentors, params_encoder, params_decoder, img);
-
+            std::vector<cv::Mat> masks;
+            masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img);
+            for (int j = 0; j < masks.size(); j++)
+            {
+                std::cout << "Press any key to exit" << std::endl;
+                cv::imshow("Result of MASKS", masks[j]);
+                cv::waitKey(0);
+                cv::destroyAllWindows();
+            }
         }
     }
     return 0;
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 4b2c022..2962563 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -35,7 +35,7 @@ std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P
     return {std::move(samSegmentors), params_encoder, params_decoder};
 }
 
-void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) {
+std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) {
 
     std::vector<SEG::DL_RESULT> resSam;
     SEG::DL_RESULT res;
@@ -46,8 +46,15 @@ void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::DL_I
 
     modelTypeRef = params_decoder.modelType;
     samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res);
-    std::cout << "Press any key to exit" << std::endl;
-    cv::imshow("Result of Detection", img);
-    cv::waitKey(0);
+
+    //cv::destroyAllWindows();
+    cv::Mat finalMask = res.masks[0];
+    std::cout << "Final mask size: " << finalMask.size() << std::endl;
+
+    for (const auto& mask : res.masks) {
+        cv::imshow("Mask", mask);
+        cv::waitKey(0);
+    }
     cv::destroyAllWindows();
+    return std::move(res.masks);
 }

From 70fd58af017182389c1b0385fe2c1a3695513e97 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 19 Aug 2025 21:19:57 +0200
Subject: [PATCH 06/28] Updated CMake and removed not needed parts of the code

---
 CMakeLists.txt                   | 24 +++++++++++---------
 {inc => include}/dl_types.h      |  0
 {inc => include}/sam_inference.h |  0
 {inc => include}/segmentation.h  |  0
 {inc => include}/utils.h         |  6 -----
 src/sam_inference.cpp            | 39 +-------------------------------
 src/utils.cpp                    |  7 ++----
 7 files changed, 16 insertions(+), 60 deletions(-)
 rename {inc => include}/dl_types.h (100%)
 rename {inc => include}/sam_inference.h (100%)
 rename {inc => include}/segmentation.h (100%)
 rename {inc => include}/utils.h (96%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e7ed33..1270d93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.5)
 
 set(PROJECT_NAME SAMOnnxRuntimeCPPInference)
 project(sam_onnx_ros)
+project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX)
 
 # -------------- CMake Policies ------------------#
 #add_compile_options(-Wall -Werror=all)
@@ -17,6 +18,7 @@ set(CMAKE_CXX_EXTENSIONS ON)
 find_package(OpenCV REQUIRED)
 include_directories(${OpenCV_INCLUDE_DIRS})
 
+
 # -------------- ONNXRuntime  ------------------#
 set(ONNXRUNTIME_VERSION 1.21.0)
 set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1")
@@ -26,21 +28,21 @@ include_directories(${ONNXRUNTIME_ROOT}/include)
 add_definitions(-DUSE_CUDA=1)
 include_directories(/usr/local/cuda/include)
 
-# find_package(catkin REQUIRED
-#   COMPONENTS
-#   onnxruntime_ros
-# )
+find_package(catkin REQUIRED
+  COMPONENTS
+  #onnxruntime_ros
+)
 
 # ------------------------------------------------------------------------------------------------
 #                                        CATKIN EXPORT
 # ------------------------------------------------------------------------------------------------
 
-# catkin_package(
-#   INCLUDE_DIRS include
-#   LIBRARIES ${PROJECT_NAME}
-#   CATKIN_DEPENDS
-#   DEPENDS OpenCV
-# )
+catkin_package(
+  INCLUDE_DIRS include
+  LIBRARIES ${PROJECT_NAME}
+  CATKIN_DEPENDS
+  DEPENDS OpenCV
+)
 
 # ------------------------------------------------------------------------------------------------
 #                                           BUILD
@@ -61,7 +63,7 @@ set(PROJECT_SOURCES
 )
 
 add_executable(${PROJECT_NAME} ${PROJECT_SOURCES})
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
 
 # Link OpenCV libraries along with ONNX Runtime
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so)
diff --git a/inc/dl_types.h b/include/dl_types.h
similarity index 100%
rename from inc/dl_types.h
rename to include/dl_types.h
diff --git a/inc/sam_inference.h b/include/sam_inference.h
similarity index 100%
rename from inc/sam_inference.h
rename to include/sam_inference.h
diff --git a/inc/segmentation.h b/include/segmentation.h
similarity index 100%
rename from inc/segmentation.h
rename to include/segmentation.h
diff --git a/inc/utils.h b/include/utils.h
similarity index 96%
rename from inc/utils.h
rename to include/utils.h
index 0e7a8d7..1bded56 100644
--- a/inc/utils.h
+++ b/include/utils.h
@@ -2,12 +2,6 @@
 
 #define    RET_OK nullptr
 
-#ifdef _WIN32
-#include <Windows.h>
-#include <direct.h>
-#include <io.h>
-#endif
-
 #include <string>
 #include <vector>
 #include <cstdio>
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 69671ec..77cef8e 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -71,26 +71,12 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
             sessionOption.AppendExecutionProvider_CUDA(cudaOption);
         }
 
-        //OrtTensorRTProviderOptions trtOptions{};
-        //trtOptions.device_id = 0;
-        //trtOptions.trt_fp16_enable = true;
-        //sessionOption.AppendExecutionProvider_TensorRT(trtOptions);
-
         sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
         sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
         sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
 
-#ifdef _WIN32
-        int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast<int>(iParams.modelPath.length()), nullptr, 0);
-        wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1];
-        MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast<int>(iParams.modelPath.length()), wide_cstr, ModelPathSize);
-        wide_cstr[ModelPathSize] = L'\0';
-        const wchar_t* modelPath = wide_cstr;
-#else
         const char* modelPath = iParams.modelPath.c_str();
-#endif // _WIN32
 
-        //session = new Ort::Session(env, modelPath, sessionOption);
         session = std::make_unique<Ort::Session>(env, modelPath, sessionOption);
         Ort::AllocatorWithDefaultOptions allocator;
         size_t inputNodesNum = session->GetInputCount();
@@ -111,14 +97,6 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
         }
         options = Ort::RunOptions{ nullptr };
 
-        //std::vector<long int> input_shape;
-        //std::vector<long int> output_shape;
-        //size_t input_tensor_size = 0;
-        //size_t output_tensor_size = 0;
-        //Get input and output tensor size
-
-        //auto input_tensor_size = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementCount();
-        //auto output_tensor_size = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementCount();
         auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
         auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
         auto output_type = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementType();
@@ -159,13 +137,7 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
             }
             else if (modelType == SEG::SAM_SEGMENT_DECODER)
             {
-                // For SAM decoder model, the input size is different
-                // Assuming the input size is 236x64x64 for the decoder
-                // You can adjust this based on your actual model requirements
-                // For example, if the input size is 1x3x236x64, you can set it as follows:
-                // inputNodeDims = { 1, 3, 236, 64 };
-                // But here we are using 1x236x64x64 as per your original code
-
+                // Input size or SAM decoder model is 256x64x64 for the decoder
                 inputNodeDims = { 1, 256, 64, 64 };
             }
             TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
@@ -209,7 +181,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
             auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
             std::vector<int64_t> outputNodeDims = tensor_info.GetShape();
             auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
-            //std::vector<int64_t> outputNodeDims = outputTensor.front().GetTensorTypeAndShapeInfo().GetShape();
             delete[] blob;
 
             int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size
@@ -234,14 +205,12 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
             break;
         }
         case SEG::SAM_SEGMENT_DECODER:
-        //case <OTHER MODEL>:
         {
             // Use embeddings from the last result
             std::vector<float> embeddings = result.embeddings;
             // Create tensor for decoder
             std::vector<int64_t> decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements
 
-
             // Create  point coordinates and labels
     #ifdef ROI
 
@@ -258,8 +227,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
                 return "[SAM]: NO valid Box.";
             }
 
-            //cv::Rect bbox1(138, 29, 170, 301);
-
             std::vector<cv::Rect> boundingBoxes;
             boundingBoxes.push_back(bbox);
     #endif // ROI
@@ -344,10 +311,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
 
 
                 utilities.overlay(output_tensors, iImg, imgSize, result);
-                //std::cout << "Press any key to exit" << std::endl;
-                //cv::imshow("Result of INTERMEDIATE Detection", iImg);
-                //cv::waitKey(0);
-                //cv::destroyAllWindows();
             }
             // Add the result to oResult
             oResult.push_back(result);
diff --git a/src/utils.cpp b/src/utils.cpp
index ce75a0b..153c0ac 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -172,7 +172,7 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
                     }
                 }
 
-                // 1. Calculate the dimensions the image had during preprocessing
+            // 1. Calculate the dimensions the image had during preprocessing
             float scale;
             int processedWidth, processedHeight;
             if (iImg.cols >= iImg.rows) {
@@ -184,9 +184,6 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
                 processedWidth = int(iImg.cols * scale);
                 processedHeight = imgSize[1];
             }
-            // 2. Resize mask to match the SAM input dimensions
-            //cv::Mat resizedMask;
-            //cv::resize(mask, resizedMask, cv::Size(256, 256));
 
             // 3. Extract the portion that corresponds to the actual image (no padding)
             int cropWidth = std::min(256, int(256 * processedWidth / (float)imgSize[0]));
@@ -207,7 +204,7 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
             }
 
             // Apply the Guided Filter
-            // cv::Mat filteredMask;
+            cv::Mat filteredMask;
             int radius = 2;
             double eps = 0.01;
             cv::ximgproc::guidedFilter(iImg, finalMask, finalMask, radius, eps);

From 6c5c097d9f2752c2ae675bdc159095cd55ac97a1 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 26 Aug 2025 20:34:24 +0200
Subject: [PATCH 07/28] Updated code format

---
 include/dl_types.h      | 109 +++++-----
 include/sam_inference.h |  32 +--
 include/utils.h         |  68 +++---
 src/main.cpp            |   4 +-
 src/sam_inference.cpp   | 447 ++++++++++++++++++++--------------------
 src/segmentation.cpp    |  19 +-
 src/utils.cpp           | 207 ++++++++++---------
 7 files changed, 433 insertions(+), 453 deletions(-)

diff --git a/include/dl_types.h b/include/dl_types.h
index 72bd1fe..632c7c6 100644
--- a/include/dl_types.h
+++ b/include/dl_types.h
@@ -1,69 +1,66 @@
 #pragma once
 namespace SEG
 {
-enum MODEL_TYPE
-{
-    //FLOAT32 MODEL
-    SAM_SEGMENT_ENCODER = 1,
-    SAM_SEGMENT_DECODER = 2,
-    //YOLO_CLS = 3,
-
-    //FLOAT16 MODEL
-    //YOLO_DETECT_V8_HALF = 4,
-    //YOLO_POSE_V8_HALF = 5,
-    //YOLO_CLS_HALF = 6
-};
-
+    enum MODEL_TYPE
+    {
+        // FLOAT32 MODEL
+        SAM_SEGMENT_ENCODER = 1,
+        SAM_SEGMENT_DECODER = 2,
+        // YOLO_CLS = 3,
 
-typedef struct _DL_INIT_PARAM
-{
-    // Yolo & Common Part
-    std::string modelPath;
-    MODEL_TYPE modelType = SAM_SEGMENT_ENCODER;
-    std::vector<int> imgSize = { 640, 640 };
-    float rectConfidenceThreshold = 0.6;
-    float iouThreshold = 0.5;
-    int	keyPointsNum = 2; //Note:kpt number for pose
-    bool cudaEnable = false;
-    int logSeverityLevel = 3;
-    int intraOpNumThreads = 1;
-    //std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
+        // FLOAT16 MODEL
+        // YOLO_DETECT_V8_HALF = 4,
+        // YOLO_POSE_V8_HALF = 5,
+        // YOLO_CLS_HALF = 6
+    };
 
-    // Overloaded output operator for _DL_INIT_PARAM to print its contents
-    friend std::ostream& operator<<(std::ostream& os, _DL_INIT_PARAM& param)
+    typedef struct _DL_INIT_PARAM
     {
-        os << "modelPath: " << param.modelPath << "\n";
-        os << "modelType: " << param.modelType << "\n";
-        os << "imgSize: ";
-        for (const auto& size : param.imgSize)
-            os << size << " ";
-        os << "\n";
-        os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n";
-        os << "iouThreshold: " << param.iouThreshold << "\n";
-        os << "keyPointsNum: " << param.keyPointsNum << "\n";
-        os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n";
-        os << "logSeverityLevel: " << param.logSeverityLevel << "\n";
-        os << "intraOpNumThreads: " << param.intraOpNumThreads;
-        return os;
-    }
-
-} DL_INIT_PARAM;
+        // Yolo & Common Part
+        std::string modelPath;
+        MODEL_TYPE modelType = SAM_SEGMENT_ENCODER;
+        std::vector<int> imgSize = {640, 640};
+        float rectConfidenceThreshold = 0.6;
+        float iouThreshold = 0.5;
+        int keyPointsNum = 2; // Note:kpt number for pose
+        bool cudaEnable = false;
+        int logSeverityLevel = 3;
+        int intraOpNumThreads = 1;
+        // std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
 
+        // Overloaded output operator for _DL_INIT_PARAM to print its contents
+        friend std::ostream &operator<<(std::ostream &os, _DL_INIT_PARAM &param)
+        {
+            os << "modelPath: " << param.modelPath << "\n";
+            os << "modelType: " << param.modelType << "\n";
+            os << "imgSize: ";
+            for (const auto &size : param.imgSize)
+                os << size << " ";
+            os << "\n";
+            os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n";
+            os << "iouThreshold: " << param.iouThreshold << "\n";
+            os << "keyPointsNum: " << param.keyPointsNum << "\n";
+            os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n";
+            os << "logSeverityLevel: " << param.logSeverityLevel << "\n";
+            os << "intraOpNumThreads: " << param.intraOpNumThreads;
+            return os;
+        }
 
-typedef struct _DL_RESULT
-{
+    } DL_INIT_PARAM;
 
-    //Yolo Part
-    int classId;
-    float confidence;
-    std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
-    std::vector<cv::Point2f> keyPoints;
+    typedef struct _DL_RESULT
+    {
 
-    // Sam Part
-    std::vector<float> embeddings;
-    // Masks for SAM decoder model output
-    std::vector<cv::Mat> masks; // Each cv::Mat represents a mask
+        // Yolo Part
+        int classId;
+        float confidence;
+        std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
+        std::vector<cv::Point2f> keyPoints;
 
+        // Sam Part
+        std::vector<float> embeddings;
+        // Masks for SAM decoder model output
+        std::vector<cv::Mat> masks; // Each cv::Mat represents a mask
 
-} DL_RESULT;
+    } DL_RESULT;
 } // namespace SEG
\ No newline at end of file
diff --git a/include/sam_inference.h b/include/sam_inference.h
index 8a15c38..8910bda 100644
--- a/include/sam_inference.h
+++ b/include/sam_inference.h
@@ -1,12 +1,6 @@
 #pragma once
 
-#define    RET_OK nullptr
-
-#ifdef _WIN32
-#include <Windows.h>
-#include <direct.h>
-#include <io.h>
-#endif
+#define RET_OK nullptr
 
 #include <string>
 #include <vector>
@@ -18,10 +12,6 @@
 #include <cuda_fp16.h>
 #endif
 
-
-
-
-
 class SAM
 {
 public:
@@ -30,18 +20,15 @@ class SAM
     ~SAM();
 
 public:
+    const char *CreateSession(SEG::DL_INIT_PARAM &iParams);
 
-    const char* CreateSession(SEG::DL_INIT_PARAM& iParams);
-
-    const char* RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result);
-
-    char* WarmUpSession(SEG::MODEL_TYPE modelType);
-
-    template<typename N>
-    char* TensorProcess(clock_t& starttime_1, const cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,
-        SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT>& oResult, Utils& utilities, SEG::DL_RESULT& result);
+    const char *RunSession(const cv::Mat &iImg, std::vector<SEG::DL_RESULT> &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result);
 
+    char *WarmUpSession(SEG::MODEL_TYPE modelType);
 
+    template <typename N>
+    char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
+                        SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT> &oResult, Utils &utilities, SEG::DL_RESULT &result);
 
     std::vector<std::string> classes{};
 
@@ -50,12 +37,11 @@ class SAM
     std::unique_ptr<Ort::Session> session;
     bool cudaEnable;
     Ort::RunOptions options;
-    std::vector<const char*> inputNodeNames;
-    std::vector<const char*> outputNodeNames;
+    std::vector<const char *> inputNodeNames;
+    std::vector<const char *> outputNodeNames;
 
     SEG::MODEL_TYPE modelType;
     std::vector<int> imgSize;
     float rectConfidenceThreshold;
     float iouThreshold;
-
 };
\ No newline at end of file
diff --git a/include/utils.h b/include/utils.h
index 1bded56..7ff7f9c 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#define    RET_OK nullptr
+#define RET_OK nullptr
 
 #include <string>
 #include <vector>
@@ -14,42 +14,42 @@
 
 class Utils
 {
-    public:
-        Utils();
-        ~Utils();
-
-        void overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg, std::vector<int> iImgSize, SEG::DL_RESULT& result);
-        char* PreProcess(const cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg);
-        void ScaleBboxPoints(const cv::Mat& iImg, std::vector<int> iImgSize, std::vector<float>& pointCoords, std::vector<float>& PointsCoordsScaled);
-
-        std::vector<Ort::Value> PrepareInputTensor(Ort::Value& decoderInputTensor, std::vector<float>& pointCoordsScaled, std::vector<int64_t> pointCoordsDims,
-                                                    std::vector<float>& pointLabels, std::vector<int64_t> pointLabelsDims, std::vector<float>& maskInput,
-                                                    std::vector<int64_t> maskInputDims, std::vector<float>& hasMaskInput, std::vector<int64_t> hasMaskInputDims);
-
-        // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW.
-        // Note: Code in header file since it is used outside of this utils src code.
-        template<typename T>
-        char* BlobFromImage(const cv::Mat& iImg, T& iBlob) {
-            int channels = iImg.channels();
-            int imgHeight = iImg.rows;
-            int imgWidth = iImg.cols;
-
-            for (int c = 0; c < channels; c++)
+public:
+    Utils();
+    ~Utils();
+
+    void overlay(std::vector<Ort::Value> &output_tensors, const cv::Mat &iImg, std::vector<int> iImgSize, SEG::DL_RESULT &result);
+    char *PreProcess(const cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat &oImg);
+    void ScaleBboxPoints(const cv::Mat &iImg, std::vector<int> iImgSize, std::vector<float> &pointCoords, std::vector<float> &PointsCoordsScaled);
+
+    std::vector<Ort::Value> PrepareInputTensor(Ort::Value &decoderInputTensor, std::vector<float> &pointCoordsScaled, std::vector<int64_t> pointCoordsDims,
+                                               std::vector<float> &pointLabels, std::vector<int64_t> pointLabelsDims, std::vector<float> &maskInput,
+                                               std::vector<int64_t> maskInputDims, std::vector<float> &hasMaskInput, std::vector<int64_t> hasMaskInputDims);
+
+    // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW.
+    // Note: Code in header file since it is used outside of this utils src code.
+    template <typename T>
+    char *BlobFromImage(const cv::Mat &iImg, T &iBlob)
+    {
+        int channels = iImg.channels();
+        int imgHeight = iImg.rows;
+        int imgWidth = iImg.cols;
+
+        for (int c = 0; c < channels; c++)
+        {
+            for (int h = 0; h < imgHeight; h++)
             {
-                for (int h = 0; h < imgHeight; h++)
+                for (int w = 0; w < imgWidth; w++)
                 {
-                    for (int w = 0; w < imgWidth; w++)
-                    {
-                        iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type(
-                            (iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
-                    }
+                    iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type(
+                        (iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
                 }
             }
-            return RET_OK;
         }
-        private:
-            float resizeScales;
-            float resizeScalesBbox; //letterbox scale
+        return RET_OK;
+    }
 
-
-    };
+private:
+    float resizeScales;
+    float resizeScalesBbox; // letterbox scale
+};
diff --git a/src/main.cpp b/src/main.cpp
index 5c22108..3c8091d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -2,14 +2,14 @@
 
 int main()
 {
-     //Running inference
+    // Running inference
     std::vector<std::unique_ptr<SAM>> samSegmentors;
     SEG::DL_INIT_PARAM params_encoder;
     SEG::DL_INIT_PARAM params_decoder;
     std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
     std::filesystem::path current_path = std::filesystem::current_path();
     std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images";
-    for (auto& i : std::filesystem::directory_iterator(imgs_path))
+    for (auto &i : std::filesystem::directory_iterator(imgs_path))
     {
         if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
         {
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 77cef8e..8a07b6b 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -5,19 +5,20 @@
 
 #define benchmark
 #define ROI
-// #define min(a,b)            (((a) < (b)) ? (a) : (b))
-
-SAM::SAM() {
 
+SAM::SAM()
+{
 }
 
-
-SAM::~SAM() {
+SAM::~SAM()
+{
     // Clean up input/output node names
-    for (auto& name : inputNodeNames) {
+    for (auto &name : inputNodeNames)
+    {
         delete[] name;
     }
-    for (auto& name : outputNodeNames) {
+    for (auto &name : outputNodeNames)
+    {
         delete[] name;
     }
 }
@@ -25,24 +26,30 @@ SAM::~SAM() {
 #ifdef USE_CUDA
 namespace Ort
 {
-    template<>
-    struct TypeToTensorType<half> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
+    template <>
+    struct TypeToTensorType<half>
+    {
+        static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
+    };
 }
 #endif
 
-
-const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
-    const char* Ret = RET_OK;
-    if (session) {
+const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams)
+{
+    const char *Ret = RET_OK;
+    if (session)
+    {
         session.reset(); // Release previous session
 
         // Clear node names
-        for (auto& name : inputNodeNames) {
+        for (auto &name : inputNodeNames)
+        {
             delete[] name;
         }
         inputNodeNames.clear();
 
-        for (auto& name : outputNodeNames) {
+        for (auto &name : outputNodeNames)
+        {
             delete[] name;
         }
         outputNodeNames.clear();
@@ -75,7 +82,7 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
         sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
         sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
 
-        const char* modelPath = iParams.modelPath.c_str();
+        const char *modelPath = iParams.modelPath.c_str();
 
         session = std::make_unique<Ort::Session>(env, modelPath, sessionOption);
         Ort::AllocatorWithDefaultOptions allocator;
@@ -83,7 +90,7 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
         for (size_t i = 0; i < inputNodesNum; i++)
         {
             Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
-            char* temp_buf = new char[50];
+            char *temp_buf = new char[50];
             strcpy(temp_buf, input_node_name.get());
             inputNodeNames.push_back(temp_buf);
         }
@@ -91,11 +98,11 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
         for (size_t i = 0; i < OutputNodesNum; i++)
         {
             Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
-            char* temp_buf = new char[10];
+            char *temp_buf = new char[10];
             strcpy(temp_buf, output_node_name.get());
             outputNodeNames.push_back(temp_buf);
         }
-        options = Ort::RunOptions{ nullptr };
+        options = Ort::RunOptions{nullptr};
 
         auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
         auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
@@ -104,78 +111,79 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) {
         WarmUpSession(modelType);
         return RET_OK;
     }
-    catch (const std::exception& e)
+    catch (const std::exception &e)
     {
-        const char* str1 = "[SAM]:";
-        const char* str2 = e.what();
+        const char *str1 = "[SAM]:";
+        const char *str2 = e.what();
         std::string result = std::string(str1) + std::string(str2);
-        char* merged = new char[result.length() + 1];
+        char *merged = new char[result.length() + 1];
         std::strcpy(merged, result.c_str());
         std::cout << merged << std::endl;
         delete[] merged;
         return "[SAM]:Create session failed.";
     }
-
 }
 
-const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result) {
-    #ifdef benchmark
-        clock_t starttime_1 = clock();
-    #endif // benchmark
-        Utils utilities;
-        const char* Ret = RET_OK;
-        cv::Mat processedImg;
-        utilities.PreProcess(iImg, imgSize, processedImg);
-        if (modelType < 4)
+const char *SAM::RunSession(const cv::Mat &iImg, std::vector<SEG::DL_RESULT> &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result)
+{
+#ifdef benchmark
+    clock_t starttime_1 = clock();
+#endif // benchmark
+    Utils utilities;
+    const char *Ret = RET_OK;
+    cv::Mat processedImg;
+    utilities.PreProcess(iImg, imgSize, processedImg);
+    if (modelType < 4)
+    {
+        float *blob = new float[processedImg.total() * 3];
+        utilities.BlobFromImage(processedImg, blob);
+        std::vector<int64_t> inputNodeDims;
+        if (modelType == SEG::SAM_SEGMENT_ENCODER)
         {
-            float* blob = new float[processedImg.total() * 3];
-            utilities.BlobFromImage(processedImg, blob);
-            std::vector<int64_t> inputNodeDims;
-            if (modelType == SEG::SAM_SEGMENT_ENCODER)
-            {
-                inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) };
-            }
-            else if (modelType == SEG::SAM_SEGMENT_DECODER)
-            {
-                // Input size or SAM decoder model is 256x64x64 for the decoder
-                inputNodeDims = { 1, 256, 64, 64 };
-            }
-            TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
+            inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
         }
-        else
+        else if (modelType == SEG::SAM_SEGMENT_DECODER)
         {
-    #ifdef USE_CUDA
-            half* blob = new half[processedImg.total() * 3];
-            utilities.BlobFromImage(processedImg, blob);
-            std::vector<int64_t> inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) };
-            TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
-    #endif
+            // Input size or SAM decoder model is 256x64x64 for the decoder
+            inputNodeDims = {1, 256, 64, 64};
         }
-
-        return Ret;
+        TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
+    }
+    else
+    {
+#ifdef USE_CUDA
+        half *blob = new half[processedImg.total() * 3];
+        utilities.BlobFromImage(processedImg, blob);
+        std::vector<int64_t> inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
+        TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
+#endif
     }
 
-    template<typename N>
-    char* SAM::TensorProcess(clock_t& starttime_1, const cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,
-        SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT>& oResult, Utils& utilities, SEG::DL_RESULT& result) {
+    return Ret;
+}
 
-        switch (modelType)
-        {
-        case SEG::SAM_SEGMENT_ENCODER:
+template <typename N>
+char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
+                         SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT> &oResult, Utils &utilities, SEG::DL_RESULT &result)
+{
+
+    switch (modelType)
+    {
+    case SEG::SAM_SEGMENT_ENCODER:
         // case OTHER_SAM_MODEL:
         {
 
             Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
                 Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
                 inputNodeDims.data(), inputNodeDims.size());
-        #ifdef benchmark
+#ifdef benchmark
             clock_t starttime_2 = clock();
-        #endif // benchmark
+#endif // benchmark
             auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(),
-                outputNodeNames.size());
-        #ifdef benchmark
+                                             outputNodeNames.size());
+#ifdef benchmark
             clock_t starttime_3 = clock();
-        #endif // benchmark
+#endif // benchmark
 
             Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
             auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
@@ -184,10 +192,9 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
             delete[] blob;
 
             int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size
-            result.embeddings.assign(output, output + embeddingSize); // Save embeddings
+            result.embeddings.assign(output, output + embeddingSize);                      // Save embeddings
 
-
-    #ifdef benchmark
+#ifdef benchmark
             clock_t starttime_4 = clock();
             double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
             double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
@@ -200,149 +207,140 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector<SEG::DL_RESULT>& oR
             {
                 std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
             }
-    #endif // benchmark
+#endif // benchmark
 
             break;
         }
-        case SEG::SAM_SEGMENT_DECODER:
-        {
-            // Use embeddings from the last result
-            std::vector<float> embeddings = result.embeddings;
-            // Create tensor for decoder
-            std::vector<int64_t> decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements
-
-            // Create  point coordinates and labels
-    #ifdef ROI
-
-            // Create a window for user interaction
-            namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE);
-
-            // Let the user select the bounding box
-            cv::Rect bbox = selectROI("Select and View Result", iImg, false, false);
-
-            // Check if a valid bounding box was selected
-            if (bbox.width == 0 || bbox.height == 0)
-            {
-                std::cerr << "No valid bounding box selected." << std::endl;
-                return "[SAM]: NO valid Box.";
-            }
-
-            std::vector<cv::Rect> boundingBoxes;
-            boundingBoxes.push_back(bbox);
-    #endif // ROI
-            //boundingBoxes.push_back(bbox1);
-            // Declare timing variables BEFORE the loop
-            #ifdef benchmark
-            clock_t starttime_2 = 0;
-            clock_t starttime_3 = 0;
-            #endif // benchmark
-
-        #ifdef ROI
-            for (const auto &bbox : boundingBoxes)
-        #else
-            for (const auto &bbox : result.boxes)
-        #endif // ROI
-            {
-                Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
-                    Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-                    embeddings.data(), // Use the embeddings from the encoder
-                    embeddings.size(), // Total number of elements
-                    decoderInputDims.data(),
-                    decoderInputDims.size()
-                );
-                // Use center of bounding box as foreground point
-                float centerX = bbox.x + bbox.width/2;
-                float centerY = bbox.y + bbox.height/2;
-
-                // Convert bounding box to points
-                std::vector<float> pointCoords = {
-                    (float)bbox.x, (float)bbox.y,                              // Top-left
-                    (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right
-                };
-
-
-                std::vector<float> pointCoordsScaled;
-
-                std::vector<int64_t> pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y)
-
-                // Labels for the points
-                std::vector<float> pointLabels = {2.0f, 3.0f}; // Box prompt labels
-                std::vector<int64_t> pointLabelsDims = {1, 2};
-
-                // Create dummy mask_input and has_mask_input
-                std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
-                std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
-
-
-                std::vector<float> hasMaskInput = {0.0f}; // No mask provided
-                std::vector<int64_t> hasMaskInputDims = {1};
-
-                utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
+    case SEG::SAM_SEGMENT_DECODER:
+    {
+        // Use embeddings from the last result
+        std::vector<float> embeddings = result.embeddings;
+        // Create tensor for decoder
+        std::vector<int64_t> decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements
 
+        // Create  point coordinates and labels
+#ifdef ROI
 
+        // Create a window for user interaction
+        namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE);
 
+        // Let the user select the bounding box
+        cv::Rect bbox = selectROI("Select and View Result", iImg, false, false);
 
-                std::vector<Ort::Value> inputTensors  = utilities.PrepareInputTensor(
-                    decoderInputTensor,
-                    pointCoordsScaled,
-                    pointCoordsDims,
-                    pointLabels,
-                    pointLabelsDims,
-                    maskInput,
-                    maskInputDims,
-                    hasMaskInput,
-                    hasMaskInputDims
-                );
-
-            #ifdef benchmark
-                starttime_2 = clock();
-            #endif // benchmark
-                auto output_tensors = session->Run(
-                    options,
-                    inputNodeNames.data(),
-                    inputTensors.data(),
-                    inputTensors.size(),
-                    outputNodeNames.data(),
-                    outputNodeNames.size());
+        // Check if a valid bounding box was selected
+        if (bbox.width == 0 || bbox.height == 0)
+        {
+            std::cerr << "No valid bounding box selected." << std::endl;
+            return "[SAM]: NO valid Box.";
+        }
 
-            #ifdef benchmark
-                starttime_3 = clock();
-            #endif // benchmark
+        std::vector<cv::Rect> boundingBoxes;
+        boundingBoxes.push_back(bbox);
+#endif // ROI
+       // boundingBoxes.push_back(bbox1);
+       // Declare timing variables BEFORE the loop
+#ifdef benchmark
+        clock_t starttime_2 = 0;
+        clock_t starttime_3 = 0;
+#endif // benchmark
+
+#ifdef ROI
+        for (const auto &bbox : boundingBoxes)
+#else
+        for (const auto &bbox : result.boxes)
+#endif // ROI
+        {
+            Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
+                Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+                embeddings.data(), // Use the embeddings from the encoder
+                embeddings.size(), // Total number of elements
+                decoderInputDims.data(),
+                decoderInputDims.size());
+            // Use center of bounding box as foreground point
+            float centerX = bbox.x + bbox.width / 2;
+            float centerY = bbox.y + bbox.height / 2;
+
+            // Convert bounding box to points
+            std::vector<float> pointCoords = {
+                (float)bbox.x, (float)bbox.y,                               // Top-left
+                (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right
+            };
 
+            std::vector<float> pointCoordsScaled;
+
+            std::vector<int64_t> pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y)
+
+            // Labels for the points
+            std::vector<float> pointLabels = {2.0f, 3.0f}; // Box prompt labels
+            std::vector<int64_t> pointLabelsDims = {1, 2};
+
+            // Create dummy mask_input and has_mask_input
+            std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
+            std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
+
+            std::vector<float> hasMaskInput = {0.0f}; // No mask provided
+            std::vector<int64_t> hasMaskInputDims = {1};
+
+            utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
+
+            std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
+                decoderInputTensor,
+                pointCoordsScaled,
+                pointCoordsDims,
+                pointLabels,
+                pointLabelsDims,
+                maskInput,
+                maskInputDims,
+                hasMaskInput,
+                hasMaskInputDims);
+
+#ifdef benchmark
+            starttime_2 = clock();
+#endif // benchmark
+            auto output_tensors = session->Run(
+                options,
+                inputNodeNames.data(),
+                inputTensors.data(),
+                inputTensors.size(),
+                outputNodeNames.data(),
+                outputNodeNames.size());
 
-                utilities.overlay(output_tensors, iImg, imgSize, result);
-            }
-            // Add the result to oResult
-            oResult.push_back(result);
+#ifdef benchmark
+            starttime_3 = clock();
+#endif // benchmark
 
-            delete[] blob;
+            utilities.overlay(output_tensors, iImg, imgSize, result);
+        }
+        // Add the result to oResult
+        oResult.push_back(result);
 
-        #ifdef benchmark
-            clock_t starttime_4 = clock();
-            double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
-            double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
-            double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
-            if (cudaEnable)
-            {
-                std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
-            }
-            else
-            {
-                std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
-            }
-        #endif // benchmark
-                break;
-            }
+        delete[] blob;
 
-        default:
-            std::cout << "[SAM]: " << "Not support model type." << std::endl;
+#ifdef benchmark
+        clock_t starttime_4 = clock();
+        double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
+        double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
+        double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
+        if (cudaEnable)
+        {
+            std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
         }
-        return RET_OK;
-
+        else
+        {
+            std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
+        }
+#endif // benchmark
+        break;
     }
 
+    default:
+        std::cout << "[SAM]: " << "Not support model type." << std::endl;
+    }
+    return RET_OK;
+}
 
-char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
+char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType)
+{
     clock_t starttime_1 = clock();
     Utils utilities;
     cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
@@ -350,17 +348,18 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
     utilities.PreProcess(iImg, imgSize, processedImg);
     if (modelType < 4)
     {
-        float* blob = new float[iImg.total() * 3];
+        float *blob = new float[iImg.total() * 3];
         utilities.BlobFromImage(processedImg, blob);
-        std::vector<int64_t> SAM_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) };
+        std::vector<int64_t> SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)};
         switch (modelType)
         {
-        case SEG::SAM_SEGMENT_ENCODER: {
+        case SEG::SAM_SEGMENT_ENCODER:
+        {
             Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
                 Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
                 SAM_input_node_dims.data(), SAM_input_node_dims.size());
             auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(),
-                outputNodeNames.size());
+                                               outputNodeNames.size());
             delete[] blob;
             clock_t starttime_4 = clock();
             double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
@@ -371,36 +370,36 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
             break;
         }
 
-        case SEG::SAM_SEGMENT_DECODER: {
-            std::vector<int64_t> inputNodeDims = { 1, 256, 64, 64 }; // BUG: That was 236 instead of 256
+        case SEG::SAM_SEGMENT_DECODER:
+        {
+            std::vector<int64_t> inputNodeDims = {1, 256, 64, 64}; // BUG: That was 236 instead of 256
             // Use embeddings from the last result
-            std::vector<float> dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values
-            std::vector<int64_t> decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements
-
+            std::vector<float> dummyEmbeddings(256 * 64 * 64, 1.0f);  // Fill with zeros or any dummy values
+            std::vector<int64_t> decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements
 
             // Create dummy point coordinates and labels
             std::vector<cv::Rect> boundingBoxes = {
                 cv::Rect(0, 0, 100, 100), // Example bounding box with (x, y, width, height)
-                //cv::Rect(0, 0, 473, 359) // Another example bounding box
+                // cv::Rect(0, 0, 473, 359) // Another example bounding box
             };
-            for (const auto& bbox : boundingBoxes) {
+            for (const auto &bbox : boundingBoxes)
+            {
                 Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
                     Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
                     dummyEmbeddings.data(), // Use the embeddings from the encoder
                     dummyEmbeddings.size(), // Total number of elements
                     decoderInputDims.data(),
-                    decoderInputDims.size()
-                );
+                    decoderInputDims.size());
                 // Convert bounding box to points
                 // Use center of bounding box as foreground point
-                float centerX = bbox.x + bbox.width/2;
-                float centerY = bbox.y + bbox.height/2;
+                float centerX = bbox.x + bbox.width / 2;
+                float centerY = bbox.y + bbox.height / 2;
 
                 std::vector<float> pointCoords = {
-                    centerX, centerY  // Center point (foreground)
+                    centerX, centerY // Center point (foreground)
                 };
 
-                std::vector<int64_t> pointCoordsDims = { 1, 1, 2 }; // 2 points, each with (x, y)
+                std::vector<int64_t> pointCoordsDims = {1, 1, 2}; // 2 points, each with (x, y)
 
                 std::vector<float> pointCoordsScaled;
 
@@ -408,14 +407,14 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
 
                 // Labels for the points
                 std::vector<float> pointLabels = {1.0f}; // All points are foreground
-                std::vector<int64_t> pointLabelsDims = { 1, 1};
+                std::vector<int64_t> pointLabelsDims = {1, 1};
                 // Create dummy mask_input and has_mask_input
                 std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
-                std::vector<int64_t> maskInputDims = { 1, 1, 256, 256 };
-                std::vector<float> hasMaskInput = { 0.0f }; // No mask provided
-                std::vector<int64_t> hasMaskInputDims = { 1 };
+                std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
+                std::vector<float> hasMaskInput = {0.0f}; // No mask provided
+                std::vector<int64_t> hasMaskInputDims = {1};
 
-                std::vector<Ort::Value> inputTensors  = utilities.PrepareInputTensor(
+                std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
                     decoderInputTensor,
                     pointCoordsScaled,
                     pointCoordsDims,
@@ -424,8 +423,7 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
                     maskInput,
                     maskInputDims,
                     hasMaskInput,
-                    hasMaskInputDims
-                );
+                    hasMaskInputDims);
 
                 auto output_tensors = session->Run(
                     options,
@@ -433,8 +431,8 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
                     inputTensors.data(),
                     inputTensors.size(),
                     outputNodeNames.data(),
-                    outputNodeNames.size()
-                ); }
+                    outputNodeNames.size());
+            }
 
             outputNodeNames.size();
             delete[] blob;
@@ -447,15 +445,14 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
 
             break;
         }
-    }
-
+        }
     }
     else
     {
 #ifdef USE_CUDA
-        half* blob = new half[iImg.total() * 3];
+        half *blob = new half[iImg.total() * 3];
         utilities.BlobFromImage(processedImg, blob);
-        std::vector<int64_t> SAM_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) };
+        std::vector<int64_t> SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)};
         Ort::Value input_tensor = Ort::Value::CreateTensor<half>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size());
         auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size());
         delete[] blob;
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 2962563..8b5338c 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -14,19 +14,17 @@ std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P
     params_encoder.rectConfidenceThreshold = 0.1;
     params_encoder.iouThreshold = 0.5;
     params_encoder.modelPath = "SAM_encoder.onnx";
-    params_encoder.imgSize = { 1024, 1024 };
+    params_encoder.imgSize = {1024, 1024};
 
     params_decoder = params_encoder;
     params_decoder.modelType = SEG::SAM_SEGMENT_DECODER;
     params_decoder.modelPath = "SAM_mask_decoder.onnx";
 
-
-
-    #ifdef USE_CUDA
+#ifdef USE_CUDA
     params_encoder.cudaEnable = true;
-    #else
+#else
     params_encoder.cudaEnable = false;
-    #endif
+#endif
 
     samSegmentorEncoder->CreateSession(params_encoder);
     samSegmentorDecoder->CreateSession(params_decoder);
@@ -35,7 +33,8 @@ std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P
     return {std::move(samSegmentors), params_encoder, params_decoder};
 }
 
-std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) {
+std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors, SEG::DL_INIT_PARAM &params_encoder, SEG::DL_INIT_PARAM &params_decoder, cv::Mat &img)
+{
 
     std::vector<SEG::DL_RESULT> resSam;
     SEG::DL_RESULT res;
@@ -43,15 +42,15 @@ std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegme
     SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType;
     samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res);
 
-
     modelTypeRef = params_decoder.modelType;
     samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res);
 
-    //cv::destroyAllWindows();
+    // cv::destroyAllWindows();
     cv::Mat finalMask = res.masks[0];
     std::cout << "Final mask size: " << finalMask.size() << std::endl;
 
-    for (const auto& mask : res.masks) {
+    for (const auto &mask : res.masks)
+    {
         cv::imshow("Mask", mask);
         cv::waitKey(0);
     }
diff --git a/src/utils.cpp b/src/utils.cpp
index 153c0ac..8d76ac1 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -2,15 +2,16 @@
 #include <opencv2/ximgproc/edge_filter.hpp> // for guided filter
 
 // Constructor
-Utils::Utils(){
-
+Utils::Utils()
+{
 }
 
 // Destructor
-Utils::~Utils(){
+Utils::~Utils()
+{
 }
 
-char* Utils::PreProcess(const cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg)
+char *Utils::PreProcess(const cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat &oImg)
 {
     if (iImg.channels() == 3)
     {
@@ -22,41 +23,46 @@ char* Utils::PreProcess(const cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat&
         cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB);
     }
 
-
-        if (iImg.cols >= iImg.rows)
-        {
-            resizeScales = iImg.cols / (float)iImgSize.at(0);
-            cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales)));
-        }
-        else
-        {
-            resizeScales = iImg.rows / (float)iImgSize.at(0);
-            cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
-        }
-        cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
-        oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows)));
-        oImg = tempImg;
+    if (iImg.cols >= iImg.rows)
+    {
+        resizeScales = iImg.cols / (float)iImgSize.at(0);
+        cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales)));
+    }
+    else
+    {
+        resizeScales = iImg.rows / (float)iImgSize.at(0);
+        cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
+    }
+    cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
+    oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows)));
+    oImg = tempImg;
 
     return RET_OK;
 }
 
-void Utils::ScaleBboxPoints(const cv::Mat& iImg, std::vector<int> imgSize, std::vector<float>& pointCoords, std::vector<float>& pointCoordsScaled){
+void Utils::ScaleBboxPoints(const cv::Mat &iImg, std::vector<int> imgSize, std::vector<float> &pointCoords, std::vector<float> &pointCoordsScaled)
+{
 
     pointCoordsScaled.clear();
 
     // Calculate same scale as preprocessing
     float scale;
-    if (iImg.cols >= iImg.rows) {
+    if (iImg.cols >= iImg.rows)
+    {
         scale = imgSize[0] / (float)iImg.cols;
         resizeScalesBbox = iImg.cols / (float)imgSize[0];
-    } else {
+    }
+    else
+    {
         scale = imgSize[1] / (float)iImg.rows;
         resizeScalesBbox = iImg.rows / (float)imgSize[1];
     }
 
-    // TOP-LEFT placement (matching PreProcess)
-    for (size_t i = 0; i < pointCoords.size(); i += 2) {
-        if (i + 1 < pointCoords.size()) {
+    // Top-Left placement (matching PreProcess)
+    for (size_t i = 0; i < pointCoords.size(); i += 2)
+    {
+        if (i + 1 < pointCoords.size())
+        {
             float x = pointCoords[i];
             float y = pointCoords[i + 1];
 
@@ -70,68 +76,59 @@ void Utils::ScaleBboxPoints(const cv::Mat& iImg, std::vector<int> imgSize, std::
     }
 }
 
-std::vector<Ort::Value> Utils::PrepareInputTensor(Ort::Value& decoderInputTensor, std::vector<float>& pointCoordsScaled, std::vector<int64_t> pointCoordsDims, std::vector<float>& pointLabels,
-    std::vector<int64_t> pointLabelsDims, std::vector<float>& maskInput, std::vector<int64_t> maskInputDims, std::vector<float>& hasMaskInput, std::vector<int64_t> hasMaskInputDims){
-
-Ort::Value pointCoordsTensor = Ort::Value::CreateTensor<float>(
-    Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-    pointCoordsScaled.data(),
-    pointCoordsScaled.size(),
-    pointCoordsDims.data(),
-    pointCoordsDims.size()
-);
-
-
-
-Ort::Value pointLabelsTensor = Ort::Value::CreateTensor<float>(
-    Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-    pointLabels.data(),
-    pointLabels.size(),
-    pointLabelsDims.data(),
-    pointLabelsDims.size()
-);
-
-
-
-Ort::Value maskInputTensor = Ort::Value::CreateTensor<float>(
-    Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-    maskInput.data(),
-    maskInput.size(),
-    maskInputDims.data(),
-    maskInputDims.size()
-);
-
-
-
-Ort::Value hasMaskInputTensor = Ort::Value::CreateTensor<float>(
-    Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-    hasMaskInput.data(),
-    hasMaskInput.size(),
-    hasMaskInputDims.data(),
-    hasMaskInputDims.size()
-);
-
-// Pass all inputs to the decoder
-std::vector<Ort::Value> inputTensors;
-inputTensors.push_back(std::move(decoderInputTensor));
-inputTensors.push_back(std::move(pointCoordsTensor));
-inputTensors.push_back(std::move(pointLabelsTensor));
-inputTensors.push_back(std::move(maskInputTensor));
-inputTensors.push_back(std::move(hasMaskInputTensor));
+std::vector<Ort::Value> Utils::PrepareInputTensor(Ort::Value &decoderInputTensor, std::vector<float> &pointCoordsScaled, std::vector<int64_t> pointCoordsDims, std::vector<float> &pointLabels,
+                                                  std::vector<int64_t> pointLabelsDims, std::vector<float> &maskInput, std::vector<int64_t> maskInputDims, std::vector<float> &hasMaskInput, std::vector<int64_t> hasMaskInputDims)
+{
 
-return inputTensors;
+    Ort::Value pointCoordsTensor = Ort::Value::CreateTensor<float>(
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+        pointCoordsScaled.data(),
+        pointCoordsScaled.size(),
+        pointCoordsDims.data(),
+        pointCoordsDims.size());
+
+    Ort::Value pointLabelsTensor = Ort::Value::CreateTensor<float>(
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+        pointLabels.data(),
+        pointLabels.size(),
+        pointLabelsDims.data(),
+        pointLabelsDims.size());
+
+    Ort::Value maskInputTensor = Ort::Value::CreateTensor<float>(
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+        maskInput.data(),
+        maskInput.size(),
+        maskInputDims.data(),
+        maskInputDims.size());
+
+    Ort::Value hasMaskInputTensor = Ort::Value::CreateTensor<float>(
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+        hasMaskInput.data(),
+        hasMaskInput.size(),
+        hasMaskInputDims.data(),
+        hasMaskInputDims.size());
+
+    // Pass all inputs to the decoder
+    std::vector<Ort::Value> inputTensors;
+    inputTensors.push_back(std::move(decoderInputTensor));
+    inputTensors.push_back(std::move(pointCoordsTensor));
+    inputTensors.push_back(std::move(pointLabelsTensor));
+    inputTensors.push_back(std::move(maskInputTensor));
+    inputTensors.push_back(std::move(hasMaskInputTensor));
+
+    return inputTensors;
 }
-void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg, std::vector<int> imgSize, SEG::DL_RESULT& result){
+void Utils::overlay(std::vector<Ort::Value> &output_tensors, const cv::Mat &iImg, std::vector<int> imgSize, SEG::DL_RESULT &result)
+{
     // Process decoder output (masks)
     if (output_tensors.size() > 0)
     {
         // Get the masks from the output tensor
-        auto scoresTensor = std::move(output_tensors[0]);  // IoU scores
-        auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG
+        auto scoresTensor = std::move(output_tensors[0]); // IoU scores
+        auto masksTensor = std::move(output_tensors[1]);  // First output should be the masks PROBABLY WRONG
         auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo();
         auto masksShape = masksInfo.GetShape();
 
-
         if (masksShape.size() == 4)
         {
             auto masksData = masksTensor.GetTensorMutableData<float>();
@@ -142,7 +139,6 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
             size_t height = masksShape[2];    // Height of mask
             size_t width = masksShape[3];     // Width of mask
 
-
             // Find the best mask (highest IoU score)
             float bestScore = -1;
             size_t bestMaskIndex = 0;
@@ -152,34 +148,39 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
 
                 float score = scoresData[i];
 
-                if (score > bestScore) {
+                if (score > bestScore)
+                {
                     bestScore = score;
                     bestMaskIndex = i;
                 }
             }
-            std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl;
-                // Create OpenCV Mat for the mask
-                cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1);
+            // std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl;
+
+            // Create OpenCV Mat for the mask
+            cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1);
 
-                // Convert float mask to binary mask
-                for (size_t h = 0; h < height; ++h)
+            // Convert float mask to binary mask
+            for (size_t h = 0; h < height; ++h)
+            {
+                for (size_t w = 0; w < width; ++w)
                 {
-                    for (size_t w = 0; w < width; ++w)
-                    {
-                        size_t idx = (bestMaskIndex * height * width) + (h * width) + w;
-                        float value = masksData[idx];
-                        mask.at<uchar>(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5
-                    }
+                    size_t idx = (bestMaskIndex * height * width) + (h * width) + w;
+                    float value = masksData[idx];
+                    mask.at<uchar>(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5
                 }
+            }
 
             // 1. Calculate the dimensions the image had during preprocessing
             float scale;
             int processedWidth, processedHeight;
-            if (iImg.cols >= iImg.rows) {
+            if (iImg.cols >= iImg.rows)
+            {
                 scale = (float)imgSize[0] / iImg.cols;
                 processedWidth = imgSize[0];
                 processedHeight = int(iImg.rows * scale);
-            } else {
+            }
+            else
+            {
                 scale = (float)imgSize[1] / iImg.rows;
                 processedWidth = int(iImg.cols * scale);
                 processedHeight = imgSize[1];
@@ -238,7 +239,6 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
                 }
             }*/
 
-
             // Find contours of the mask
             std::vector<std::vector<cv::Point>> contours;
             cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
@@ -251,16 +251,17 @@ void Utils::overlay(std::vector<Ort::Value>& output_tensors, const cv::Mat& iImg
             // Draw contours with a thick, high-contrast outline
             cv::drawContours(iImg, contours, -1, cv::Scalar(0, 255, 255), 2); // Yellow outline
 
-
             // Save or display the result
             cv::imwrite("segmentation_result_" + std::to_string(bestMaskIndex) + ".jpg", iImg);
             cv::imwrite("mask_" + std::to_string(bestMaskIndex) + ".jpg", finalMask);
-            }else
-            {
-                std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl;
-            }
-        }else
-            {
-                std::cerr << "[SAM]: No masks found in the output tensor." << std::endl;
-            }
-    }
\ No newline at end of file
+        }
+        else
+        {
+            std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl;
+        }
+    }
+    else
+    {
+        std::cerr << "[SAM]: No masks found in the output tensor." << std::endl;
+    }
+}
\ No newline at end of file

From e10d45d557e34227e34d8422e3a8eeeed0afbb7e Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 29 Aug 2025 13:35:05 +0200
Subject: [PATCH 08/28] Small refactoring of the module

---
 CMakeLists.txt          |   6 +-
 include/dl_types.h      |  17 +-
 include/sam_inference.h |  14 +-
 include/segmentation.h  |  13 +-
 include/utils.h         |   6 +-
 src/main.cpp            |   6 +-
 src/sam_inference.cpp   | 760 +++++++++++++++++++---------------------
 src/segmentation.cpp    |  83 ++---
 8 files changed, 434 insertions(+), 471 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1270d93..8cb430a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.5)
 
-set(PROJECT_NAME SAMOnnxRuntimeCPPInference)
-project(sam_onnx_ros)
+set(PROJECT_NAME sam_onnx_ros)
+
 project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX)
 
 # -------------- CMake Policies ------------------#
@@ -13,7 +13,7 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS ON)
 #set(CMAKE_INCLUDE_CURRENT_DIR ON)
-
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 # -------------- OpenCV  ------------------#
 find_package(OpenCV REQUIRED)
 include_directories(${OpenCV_INCLUDE_DIRS})
diff --git a/include/dl_types.h b/include/dl_types.h
index 632c7c6..5141284 100644
--- a/include/dl_types.h
+++ b/include/dl_types.h
@@ -1,4 +1,11 @@
-#pragma once
+#ifndef DL_TYPES_H
+#define DL_TYPES_H
+
+#include <opencv4/opencv2/opencv.hpp>
+#include <opencv4/opencv2/core/types.hpp>
+#include <vector>
+#include <string>
+
 namespace SEG
 {
     enum MODEL_TYPE
@@ -29,7 +36,7 @@ namespace SEG
         // std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
 
         // Overloaded output operator for _DL_INIT_PARAM to print its contents
-        friend std::ostream &operator<<(std::ostream &os, _DL_INIT_PARAM &param)
+        friend std::ostream &operator<<(std::ostream &os, const _DL_INIT_PARAM &param)
         {
             os << "modelPath: " << param.modelPath << "\n";
             os << "modelType: " << param.modelType << "\n";
@@ -51,9 +58,6 @@ namespace SEG
     typedef struct _DL_RESULT
     {
 
-        // Yolo Part
-        int classId;
-        float confidence;
         std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
         std::vector<cv::Point2f> keyPoints;
 
@@ -63,4 +67,5 @@ namespace SEG
         std::vector<cv::Mat> masks; // Each cv::Mat represents a mask
 
     } DL_RESULT;
-} // namespace SEG
\ No newline at end of file
+} // namespace SEG
+#endif // DL_TYPES_H
\ No newline at end of file
diff --git a/include/sam_inference.h b/include/sam_inference.h
index 8910bda..d63701c 100644
--- a/include/sam_inference.h
+++ b/include/sam_inference.h
@@ -1,12 +1,12 @@
-#pragma once
+#ifndef SAMINFERENCE_H
+#define SAMINFERENCE_H
 
-#define RET_OK nullptr
 
+#define RET_OK nullptr
+#include <memory>
 #include <string>
 #include <vector>
 #include <cstdio>
-#include <opencv2/opencv.hpp>
-#include "onnxruntime_cxx_api.h"
 #include "utils.h"
 #ifdef USE_CUDA
 #include <cuda_fp16.h>
@@ -27,7 +27,7 @@ class SAM
     char *WarmUpSession(SEG::MODEL_TYPE modelType);
 
     template <typename N>
-    char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
+    const char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
                         SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT> &oResult, Utils &utilities, SEG::DL_RESULT &result);
 
     std::vector<std::string> classes{};
@@ -44,4 +44,6 @@ class SAM
     std::vector<int> imgSize;
     float rectConfidenceThreshold;
     float iouThreshold;
-};
\ No newline at end of file
+};
+
+#endif // SAMINFERENCE_H
\ No newline at end of file
diff --git a/include/segmentation.h b/include/segmentation.h
index 46e954e..b341f8d 100644
--- a/include/segmentation.h
+++ b/include/segmentation.h
@@ -1,11 +1,10 @@
-#include <iostream>
-#include <iomanip>
-#include <filesystem>
-#include <fstream>
-#include <random>
+#ifndef SEGMENTATION_H
+#define SEGMENTATION_H
+
 #include <tuple>
 
 #include "sam_inference.h"
-
 std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer();
-std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img);
\ No newline at end of file
+std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img);
+
+#endif // SEGMENTATION_H
\ No newline at end of file
diff --git a/include/utils.h b/include/utils.h
index 7ff7f9c..333c9e3 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -1,11 +1,11 @@
-#pragma once
+#ifndef UTILS_H
+#define UTILS_H
 
 #define RET_OK nullptr
 
 #include <string>
 #include <vector>
 #include <cstdio>
-#include <opencv2/opencv.hpp>
 #include "onnxruntime_cxx_api.h"
 #include "dl_types.h"
 #ifdef USE_CUDA
@@ -53,3 +53,5 @@ class Utils
     float resizeScales;
     float resizeScalesBbox; // letterbox scale
 };
+
+#endif // UTILS_H
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 3c8091d..2b2d602 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,5 +1,8 @@
 #include "segmentation.h"
-
+#include <iostream>
+#include <vector>
+#include <filesystem>
+#include <opencv2/opencv.hpp>
 int main()
 {
     // Running inference
@@ -24,6 +27,7 @@ int main()
                 cv::waitKey(0);
                 cv::destroyAllWindows();
             }
+            std::cout << "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo" << std::endl;
         }
     }
     return 0;
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 8a07b6b..9c0463b 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -1,468 +1,426 @@
 #include "sam_inference.h"
 #include "utils.h"
 #include <regex>
-#include <typeinfo>
 
 #define benchmark
 #define ROI
 
-SAM::SAM()
-{
-}
+SAM::SAM() {}
 
-SAM::~SAM()
-{
-    // Clean up input/output node names
-    for (auto &name : inputNodeNames)
-    {
-        delete[] name;
-    }
-    for (auto &name : outputNodeNames)
-    {
-        delete[] name;
-    }
+SAM::~SAM() {
+  // Clean up input/output node names
+  for (auto &name : inputNodeNames) {
+    delete[] name;
+  }
+  for (auto &name : outputNodeNames) {
+    delete[] name;
+  }
 }
 
 #ifdef USE_CUDA
-namespace Ort
-{
-    template <>
-    struct TypeToTensorType<half>
-    {
-        static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
-    };
-}
+namespace Ort {
+template <> struct TypeToTensorType<half> {
+  static constexpr ONNXTensorElementDataType type =
+      ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16;
+};
+} // namespace Ort
 #endif
 
-const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams)
-{
-    const char *Ret = RET_OK;
-    if (session)
-    {
-        session.reset(); // Release previous session
-
-        // Clear node names
-        for (auto &name : inputNodeNames)
-        {
-            delete[] name;
-        }
-        inputNodeNames.clear();
-
-        for (auto &name : outputNodeNames)
-        {
-            delete[] name;
-        }
-        outputNodeNames.clear();
+const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
+  const char *Ret = RET_OK;
+  if (session) {
+    session.reset(); // Release previous session
+
+    // Clear node names
+    for (auto &name : inputNodeNames) {
+      delete[] name;
     }
-    std::regex pattern("[\u4e00-\u9fa5]");
-    bool result = std::regex_search(iParams.modelPath, pattern);
-    if (result)
-    {
-        Ret = "[SAM]:Your model path is error.Change your model path without chinese characters.";
-        std::cout << Ret << std::endl;
-        return Ret;
+    inputNodeNames.clear();
+
+    for (auto &name : outputNodeNames) {
+      delete[] name;
     }
-    try
-    {
-        rectConfidenceThreshold = iParams.rectConfidenceThreshold;
-        iouThreshold = iParams.iouThreshold;
-        imgSize = iParams.imgSize;
-        modelType = iParams.modelType;
-        cudaEnable = iParams.cudaEnable;
-        env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam");
-        Ort::SessionOptions sessionOption;
-        if (iParams.cudaEnable)
-        {
-            OrtCUDAProviderOptions cudaOption;
-            cudaOption.device_id = 0;
-            sessionOption.AppendExecutionProvider_CUDA(cudaOption);
-        }
-
-        sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
-        sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
-        sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
-
-        const char *modelPath = iParams.modelPath.c_str();
-
-        session = std::make_unique<Ort::Session>(env, modelPath, sessionOption);
-        Ort::AllocatorWithDefaultOptions allocator;
-        size_t inputNodesNum = session->GetInputCount();
-        for (size_t i = 0; i < inputNodesNum; i++)
-        {
-            Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
-            char *temp_buf = new char[50];
-            strcpy(temp_buf, input_node_name.get());
-            inputNodeNames.push_back(temp_buf);
-        }
-        size_t OutputNodesNum = session->GetOutputCount();
-        for (size_t i = 0; i < OutputNodesNum; i++)
-        {
-            Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
-            char *temp_buf = new char[10];
-            strcpy(temp_buf, output_node_name.get());
-            outputNodeNames.push_back(temp_buf);
-        }
-        options = Ort::RunOptions{nullptr};
-
-        auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
-        auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
-        auto output_type = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementType();
-
-        WarmUpSession(modelType);
-        return RET_OK;
+    outputNodeNames.clear();
+  }
+  std::regex pattern("[\u4e00-\u9fa5]");
+  bool result = std::regex_search(iParams.modelPath, pattern);
+  if (result) {
+    Ret = "[SAM]:Your model path is error.Change your model path without "
+          "chinese characters.";
+    std::cout << Ret << std::endl;
+    return Ret;
+  }
+  try {
+    rectConfidenceThreshold = iParams.rectConfidenceThreshold;
+    iouThreshold = iParams.iouThreshold;
+    imgSize = iParams.imgSize;
+    modelType = iParams.modelType;
+    cudaEnable = iParams.cudaEnable;
+    env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam");
+    Ort::SessionOptions sessionOption;
+    if (iParams.cudaEnable) {
+      OrtCUDAProviderOptions cudaOption;
+      cudaOption.device_id = 0;
+      sessionOption.AppendExecutionProvider_CUDA(cudaOption);
     }
-    catch (const std::exception &e)
-    {
-        const char *str1 = "[SAM]:";
-        const char *str2 = e.what();
-        std::string result = std::string(str1) + std::string(str2);
-        char *merged = new char[result.length() + 1];
-        std::strcpy(merged, result.c_str());
-        std::cout << merged << std::endl;
-        delete[] merged;
-        return "[SAM]:Create session failed.";
+
+    sessionOption.SetGraphOptimizationLevel(
+        GraphOptimizationLevel::ORT_ENABLE_ALL);
+    sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
+    sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
+
+    const char *modelPath = iParams.modelPath.c_str();
+
+    session = std::make_unique<Ort::Session>(env, modelPath, sessionOption);
+    Ort::AllocatorWithDefaultOptions allocator;
+    size_t inputNodesNum = session->GetInputCount();
+    for (size_t i = 0; i < inputNodesNum; i++) {
+      Ort::AllocatedStringPtr input_node_name =
+          session->GetInputNameAllocated(i, allocator);
+      char *temp_buf = new char[50];
+      strcpy(temp_buf, input_node_name.get());
+      inputNodeNames.push_back(temp_buf);
+    }
+    size_t OutputNodesNum = session->GetOutputCount();
+    for (size_t i = 0; i < OutputNodesNum; i++) {
+      Ort::AllocatedStringPtr output_node_name =
+          session->GetOutputNameAllocated(i, allocator);
+      char *temp_buf = new char[10];
+      strcpy(temp_buf, output_node_name.get());
+      outputNodeNames.push_back(temp_buf);
     }
+    options = Ort::RunOptions{nullptr};
+
+    auto input_shape =
+        session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
+    auto output_shape =
+        session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
+    auto output_type = session->GetOutputTypeInfo(0)
+                           .GetTensorTypeAndShapeInfo()
+                           .GetElementType();
+
+    WarmUpSession(modelType);
+    return RET_OK;
+  } catch (const std::exception &e) {
+    const char *str1 = "[SAM]:";
+    const char *str2 = e.what();
+    std::string str_result = std::string(str1) + std::string(str2);
+    char *merged = new char[str_result.length() + 1];
+    std::strcpy(merged, str_result.c_str());
+    std::cout << merged << std::endl;
+    delete[] merged;
+    return "[SAM]:Create session failed.";
+  }
 }
 
-const char *SAM::RunSession(const cv::Mat &iImg, std::vector<SEG::DL_RESULT> &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result)
-{
+const char *SAM::RunSession(const cv::Mat &iImg,
+                            std::vector<SEG::DL_RESULT> &oResult,
+                            SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) {
 #ifdef benchmark
-    clock_t starttime_1 = clock();
+  clock_t starttime_1 = clock();
 #endif // benchmark
-    Utils utilities;
-    const char *Ret = RET_OK;
-    cv::Mat processedImg;
-    utilities.PreProcess(iImg, imgSize, processedImg);
-    if (modelType < 4)
-    {
-        float *blob = new float[processedImg.total() * 3];
-        utilities.BlobFromImage(processedImg, blob);
-        std::vector<int64_t> inputNodeDims;
-        if (modelType == SEG::SAM_SEGMENT_ENCODER)
-        {
-            inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
-        }
-        else if (modelType == SEG::SAM_SEGMENT_DECODER)
-        {
-            // Input size or SAM decoder model is 256x64x64 for the decoder
-            inputNodeDims = {1, 256, 64, 64};
-        }
-        TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
-    }
-    else
-    {
-#ifdef USE_CUDA
-        half *blob = new half[processedImg.total() * 3];
-        utilities.BlobFromImage(processedImg, blob);
-        std::vector<int64_t> inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
-        TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result);
-#endif
-    }
-
-    return Ret;
+  Utils utilities;
+  const char *Ret = RET_OK;
+  cv::Mat processedImg;
+  utilities.PreProcess(iImg, imgSize, processedImg);
+  float *blob = new float[processedImg.total() * 3];
+  utilities.BlobFromImage(processedImg, blob);
+  std::vector<int64_t> inputNodeDims;
+  if (modelType == SEG::SAM_SEGMENT_ENCODER) {
+    inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
+  } else if (modelType == SEG::SAM_SEGMENT_DECODER) {
+    // Input size or SAM decoder model is 256x64x64 for the decoder
+    inputNodeDims = {1, 256, 64, 64};
+  }
+  TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult,
+                utilities, result);
+
+  return Ret;
 }
 
 template <typename N>
-char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
-                         SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT> &oResult, Utils &utilities, SEG::DL_RESULT &result)
-{
-
-    switch (modelType)
+const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
+                               N &blob, std::vector<int64_t> &inputNodeDims,
+                               SEG::MODEL_TYPE modelType,
+                               std::vector<SEG::DL_RESULT> &oResult,
+                               Utils &utilities, SEG::DL_RESULT &result) {
+
+  switch (modelType) {
+  case SEG::SAM_SEGMENT_ENCODER:
+    // case OTHER_SAM_MODEL:
     {
-    case SEG::SAM_SEGMENT_ENCODER:
-        // case OTHER_SAM_MODEL:
-        {
 
-            Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
-                Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
-                inputNodeDims.data(), inputNodeDims.size());
+      Ort::Value inputTensor =
+          Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
+              Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+              blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(),
+              inputNodeDims.size());
 #ifdef benchmark
-            clock_t starttime_2 = clock();
+      clock_t starttime_2 = clock();
 #endif // benchmark
-            auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(),
-                                             outputNodeNames.size());
+      auto outputTensor =
+          session->Run(options, inputNodeNames.data(), &inputTensor, 1,
+                       outputNodeNames.data(), outputNodeNames.size());
 #ifdef benchmark
-            clock_t starttime_3 = clock();
+      clock_t starttime_3 = clock();
 #endif // benchmark
 
-            Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
-            auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
-            std::vector<int64_t> outputNodeDims = tensor_info.GetShape();
-            auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
-            delete[] blob;
+      Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
+      auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
+      std::vector<int64_t> outputNodeDims = tensor_info.GetShape();
+      auto output =
+          outputTensor.front()
+              .GetTensorMutableData<typename std::remove_pointer<N>::type>();
+      delete[] blob;
 
-            int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size
-            result.embeddings.assign(output, output + embeddingSize);                      // Save embeddings
+      int embeddingSize = outputNodeDims[1] * outputNodeDims[2] *
+                          outputNodeDims[3]; // Flattened size
+      result.embeddings.assign(output,
+                               output + embeddingSize); // Save embeddings
 
 #ifdef benchmark
-            clock_t starttime_4 = clock();
-            double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
-            double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
-            double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
-            if (cudaEnable)
-            {
-                std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
-            }
-            else
-            {
-                std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
-            }
+      clock_t starttime_4 = clock();
+      double pre_process_time =
+          (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
+      double process_time =
+          (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
+      double post_process_time =
+          (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
+      if (cudaEnable) {
+        std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, "
+                  << process_time << "ms inference, " << post_process_time
+                  << "ms post-process." << std::endl;
+      } else {
+        std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, "
+                  << process_time << "ms inference, " << post_process_time
+                  << "ms post-process." << std::endl;
+      }
 #endif // benchmark
 
-            break;
-        }
-    case SEG::SAM_SEGMENT_DECODER:
-    {
-        // Use embeddings from the last result
-        std::vector<float> embeddings = result.embeddings;
-        // Create tensor for decoder
-        std::vector<int64_t> decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements
-
-        // Create  point coordinates and labels
+      break;
+    }
+  case SEG::SAM_SEGMENT_DECODER: {
+    // Use embeddings from the last result
+    std::vector<float> embeddings = result.embeddings;
+    // Create tensor for decoder
+    std::vector<int64_t> decoderInputDims = {
+        1, 256, 64, 64}; // Adjust based on your decoder's requirements
+
+    // Create  point coordinates and labels
 #ifdef ROI
 
-        // Create a window for user interaction
-        namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE);
+    // Create a window for user interaction
+    namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE);
 
-        // Let the user select the bounding box
-        cv::Rect bbox = selectROI("Select and View Result", iImg, false, false);
+    // Let the user select the bounding box
+    cv::Rect bbox = selectROI("Select and View Result", iImg, false, false);
 
-        // Check if a valid bounding box was selected
-        if (bbox.width == 0 || bbox.height == 0)
-        {
-            std::cerr << "No valid bounding box selected." << std::endl;
-            return "[SAM]: NO valid Box.";
-        }
+    // Check if a valid bounding box was selected
+    if (bbox.width == 0 || bbox.height == 0) {
+      std::cerr << "No valid bounding box selected." << std::endl;
+      return "[SAM]: NO valid Box.";
+    }
 
-        std::vector<cv::Rect> boundingBoxes;
-        boundingBoxes.push_back(bbox);
+    std::vector<cv::Rect> boundingBoxes;
+    boundingBoxes.push_back(bbox);
 #endif // ROI
        // boundingBoxes.push_back(bbox1);
        // Declare timing variables BEFORE the loop
 #ifdef benchmark
-        clock_t starttime_2 = 0;
-        clock_t starttime_3 = 0;
+    clock_t starttime_2 = 0;
+    clock_t starttime_3 = 0;
 #endif // benchmark
 
 #ifdef ROI
-        for (const auto &bbox : boundingBoxes)
+    for (const auto &box : boundingBoxes)
 #else
-        for (const auto &bbox : result.boxes)
+    for (const auto &box : result.boxes)
 #endif // ROI
-        {
-            Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
-                Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-                embeddings.data(), // Use the embeddings from the encoder
-                embeddings.size(), // Total number of elements
-                decoderInputDims.data(),
-                decoderInputDims.size());
-            // Use center of bounding box as foreground point
-            float centerX = bbox.x + bbox.width / 2;
-            float centerY = bbox.y + bbox.height / 2;
-
-            // Convert bounding box to points
-            std::vector<float> pointCoords = {
-                (float)bbox.x, (float)bbox.y,                               // Top-left
-                (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right
-            };
-
-            std::vector<float> pointCoordsScaled;
-
-            std::vector<int64_t> pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y)
-
-            // Labels for the points
-            std::vector<float> pointLabels = {2.0f, 3.0f}; // Box prompt labels
-            std::vector<int64_t> pointLabelsDims = {1, 2};
-
-            // Create dummy mask_input and has_mask_input
-            std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
-            std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
-
-            std::vector<float> hasMaskInput = {0.0f}; // No mask provided
-            std::vector<int64_t> hasMaskInputDims = {1};
-
-            utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
-
-            std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
-                decoderInputTensor,
-                pointCoordsScaled,
-                pointCoordsDims,
-                pointLabels,
-                pointLabelsDims,
-                maskInput,
-                maskInputDims,
-                hasMaskInput,
-                hasMaskInputDims);
+    {
+      Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
+          Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+          embeddings.data(), // Use the embeddings from the encoder
+          embeddings.size(), // Total number of elements
+          decoderInputDims.data(), decoderInputDims.size());
+      // Use center of bounding box as foreground point
+      float centerX = box.x + box.width / 2.0;
+      float centerY = box.y + box.height / 2.0;
+
+      // Convert bounding box to points
+      std::vector<float> pointCoords = {
+          (float)box.x, (float)box.y, // Top-left
+          (float)(box.x + box.width),
+          (float)(box.y + box.height) // Bottom-right
+      };
+
+      std::vector<float> pointCoordsScaled;
+
+      std::vector<int64_t> pointCoordsDims = {1, 2,
+                                              2}; // 2 points, each with (x, y)
+
+      // Labels for the points
+      std::vector<float> pointLabels = {2.0f, 3.0f}; // Box prompt labels
+      std::vector<int64_t> pointLabelsDims = {1, 2};
+
+      // Create dummy mask_input and has_mask_input
+      std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
+      std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
+
+      std::vector<float> hasMaskInput = {0.0f}; // No mask provided
+      std::vector<int64_t> hasMaskInputDims = {1};
+
+      utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
+
+      std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
+          decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels,
+          pointLabelsDims, maskInput, maskInputDims, hasMaskInput,
+          hasMaskInputDims);
 
 #ifdef benchmark
-            starttime_2 = clock();
+      starttime_2 = clock();
 #endif // benchmark
-            auto output_tensors = session->Run(
-                options,
-                inputNodeNames.data(),
-                inputTensors.data(),
-                inputTensors.size(),
-                outputNodeNames.data(),
-                outputNodeNames.size());
+      auto output_tensors = session->Run(
+          options, inputNodeNames.data(), inputTensors.data(),
+          inputTensors.size(), outputNodeNames.data(), outputNodeNames.size());
 
 #ifdef benchmark
-            starttime_3 = clock();
+      starttime_3 = clock();
 #endif // benchmark
 
-            utilities.overlay(output_tensors, iImg, imgSize, result);
-        }
-        // Add the result to oResult
-        oResult.push_back(result);
+      utilities.overlay(output_tensors, iImg, imgSize, result);
+    }
+    // Add the result to oResult
+    oResult.push_back(result);
 
-        delete[] blob;
+    delete[] blob;
 
 #ifdef benchmark
-        clock_t starttime_4 = clock();
-        double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
-        double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
-        double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
-        if (cudaEnable)
-        {
-            std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
-        }
-        else
-        {
-            std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
-        }
-#endif // benchmark
-        break;
+    clock_t starttime_4 = clock();
+    double pre_process_time =
+        (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
+    double process_time =
+        (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
+    double post_process_time =
+        (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
+    if (cudaEnable) {
+      std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, "
+                << process_time << "ms inference, " << post_process_time
+                << "ms post-process." << std::endl;
+    } else {
+      std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, "
+                << process_time << "ms inference, " << post_process_time
+                << "ms post-process." << std::endl;
     }
+#endif // benchmark
+    break;
+  }
 
-    default:
-        std::cout << "[SAM]: " << "Not support model type." << std::endl;
-    }
-    return RET_OK;
+  default:
+    std::cout << "[SAM]: " << "Not support model type." << std::endl;
+  }
+  return RET_OK;
 }
 
-char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType)
-{
-    clock_t starttime_1 = clock();
-    Utils utilities;
-    cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
-    cv::Mat processedImg;
-    utilities.PreProcess(iImg, imgSize, processedImg);
-    if (modelType < 4)
-    {
-        float *blob = new float[iImg.total() * 3];
-        utilities.BlobFromImage(processedImg, blob);
-        std::vector<int64_t> SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)};
-        switch (modelType)
-        {
-        case SEG::SAM_SEGMENT_ENCODER:
-        {
-            Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
-                Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
-                SAM_input_node_dims.data(), SAM_input_node_dims.size());
-            auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(),
-                                               outputNodeNames.size());
-            delete[] blob;
-            clock_t starttime_4 = clock();
-            double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
-            if (cudaEnable)
-            {
-                std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
-            }
-            break;
-        }
-
-        case SEG::SAM_SEGMENT_DECODER:
-        {
-            std::vector<int64_t> inputNodeDims = {1, 256, 64, 64}; // BUG: That was 236 instead of 256
-            // Use embeddings from the last result
-            std::vector<float> dummyEmbeddings(256 * 64 * 64, 1.0f);  // Fill with zeros or any dummy values
-            std::vector<int64_t> decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements
-
-            // Create dummy point coordinates and labels
-            std::vector<cv::Rect> boundingBoxes = {
-                cv::Rect(0, 0, 100, 100), // Example bounding box with (x, y, width, height)
-                // cv::Rect(0, 0, 473, 359) // Another example bounding box
-            };
-            for (const auto &bbox : boundingBoxes)
-            {
-                Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
-                    Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-                    dummyEmbeddings.data(), // Use the embeddings from the encoder
-                    dummyEmbeddings.size(), // Total number of elements
-                    decoderInputDims.data(),
-                    decoderInputDims.size());
-                // Convert bounding box to points
-                // Use center of bounding box as foreground point
-                float centerX = bbox.x + bbox.width / 2;
-                float centerY = bbox.y + bbox.height / 2;
-
-                std::vector<float> pointCoords = {
-                    centerX, centerY // Center point (foreground)
-                };
-
-                std::vector<int64_t> pointCoordsDims = {1, 1, 2}; // 2 points, each with (x, y)
-
-                std::vector<float> pointCoordsScaled;
-
-                utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
-
-                // Labels for the points
-                std::vector<float> pointLabels = {1.0f}; // All points are foreground
-                std::vector<int64_t> pointLabelsDims = {1, 1};
-                // Create dummy mask_input and has_mask_input
-                std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
-                std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
-                std::vector<float> hasMaskInput = {0.0f}; // No mask provided
-                std::vector<int64_t> hasMaskInputDims = {1};
-
-                std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
-                    decoderInputTensor,
-                    pointCoordsScaled,
-                    pointCoordsDims,
-                    pointLabels,
-                    pointLabelsDims,
-                    maskInput,
-                    maskInputDims,
-                    hasMaskInput,
-                    hasMaskInputDims);
-
-                auto output_tensors = session->Run(
-                    options,
-                    inputNodeNames.data(),
-                    inputTensors.data(),
-                    inputTensors.size(),
-                    outputNodeNames.data(),
-                    outputNodeNames.size());
-            }
-
-            outputNodeNames.size();
-            delete[] blob;
-            clock_t starttime_4 = clock();
-            double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
-            if (cudaEnable)
-            {
-                std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
-            }
-
-            break;
-        }
-        }
+char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
+  clock_t starttime_1 = clock();
+  Utils utilities;
+  cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
+  cv::Mat processedImg;
+  utilities.PreProcess(iImg, imgSize, processedImg);
+
+  float *blob = new float[iImg.total() * 3];
+  utilities.BlobFromImage(processedImg, blob);
+  std::vector<int64_t> SAM_input_node_dims = {1, 3, imgSize.at(0),
+                                              imgSize.at(1)};
+  switch (modelType) {
+  case SEG::SAM_SEGMENT_ENCODER: {
+    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
+        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob,
+        3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(),
+        SAM_input_node_dims.size());
+    auto output_tensors =
+        session->Run(options, inputNodeNames.data(), &input_tensor, 1,
+                     outputNodeNames.data(), outputNodeNames.size());
+    delete[] blob;
+    clock_t starttime_4 = clock();
+    double post_process_time =
+        (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
+    if (cudaEnable) {
+      std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time
+                << " ms. " << std::endl;
     }
-    else
-    {
-#ifdef USE_CUDA
-        half *blob = new half[iImg.total() * 3];
-        utilities.BlobFromImage(processedImg, blob);
-        std::vector<int64_t> SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)};
-        Ort::Value input_tensor = Ort::Value::CreateTensor<half>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size());
-        auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size());
-        delete[] blob;
-        clock_t starttime_4 = clock();
-        double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
-        if (cudaEnable)
-        {
-            std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
-        }
-#endif
+    break;
+  }
+
+  case SEG::SAM_SEGMENT_DECODER: {
+    std::vector<int64_t> inputNodeDims = {
+        1, 256, 64, 64}; // BUG: That was 236 instead of 256
+    // Use embeddings from the last result
+    std::vector<float> dummyEmbeddings(
+        256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values
+    std::vector<int64_t> decoderInputDims = {
+        1, 256, 64, 64}; // Adjust based on your decoder's requirements
+
+    // Create dummy point coordinates and labels
+    std::vector<cv::Rect> boundingBoxes = {
+        cv::Rect(0, 0, 100,
+                 100), // Example bounding box with (x, y, width, height)
+        // cv::Rect(0, 0, 473, 359) // Another example bounding box
+    };
+    for (const auto &bbox : boundingBoxes) {
+      Ort::Value decoderInputTensor = Ort::Value::CreateTensor<float>(
+          Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
+          dummyEmbeddings.data(), // Use the embeddings from the encoder
+          dummyEmbeddings.size(), // Total number of elements
+          decoderInputDims.data(), decoderInputDims.size());
+      // Convert bounding box to points
+      // Use center of bounding box as foreground point
+      float centerX = bbox.x + bbox.width / 2.0;
+      float centerY = bbox.y + bbox.height / 2.0;
+
+      std::vector<float> pointCoords = {
+          centerX, centerY // Center point (foreground)
+      };
+
+      std::vector<int64_t> pointCoordsDims = {1, 1,
+                                              2}; // 2 points, each with (x, y)
+
+      std::vector<float> pointCoordsScaled;
+
+      utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
+
+      // Labels for the points
+      std::vector<float> pointLabels = {1.0f}; // All points are foreground
+      std::vector<int64_t> pointLabelsDims = {1, 1};
+      // Create dummy mask_input and has_mask_input
+      std::vector<float> maskInput(256 * 256, 0.0f); // Fill with zeros
+      std::vector<int64_t> maskInputDims = {1, 1, 256, 256};
+      std::vector<float> hasMaskInput = {0.0f}; // No mask provided
+      std::vector<int64_t> hasMaskInputDims = {1};
+
+      std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
+          decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels,
+          pointLabelsDims, maskInput, maskInputDims, hasMaskInput,
+          hasMaskInputDims);
+
+      auto output_tensors = session->Run(
+          options, inputNodeNames.data(), inputTensors.data(),
+          inputTensors.size(), outputNodeNames.data(), outputNodeNames.size());
     }
-    return RET_OK;
+
+    outputNodeNames.size();
+    delete[] blob;
+    clock_t starttime_4 = clock();
+    double post_process_time =
+        (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
+    if (cudaEnable) {
+      std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time
+                << " ms. " << std::endl;
+    }
+
+    break;
+  }
+  }
+
+  return RET_OK;
 }
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 8b5338c..585dd13 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -1,59 +1,52 @@
 #include "segmentation.h"
 
-std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM> Initializer()
-{
-    std::vector<std::unique_ptr<SAM>> samSegmentors;
-    samSegmentors.push_back(std::make_unique<SAM>());
-    samSegmentors.push_back(std::make_unique<SAM>());
-
-    std::unique_ptr<SAM> samSegmentorEncoder = std::make_unique<SAM>();
-    std::unique_ptr<SAM> samSegmentorDecoder = std::make_unique<SAM>();
-    SEG::DL_INIT_PARAM params_encoder;
-    SEG::DL_INIT_PARAM params_decoder;
-
-    params_encoder.rectConfidenceThreshold = 0.1;
-    params_encoder.iouThreshold = 0.5;
-    params_encoder.modelPath = "SAM_encoder.onnx";
-    params_encoder.imgSize = {1024, 1024};
-
-    params_decoder = params_encoder;
-    params_decoder.modelType = SEG::SAM_SEGMENT_DECODER;
-    params_decoder.modelPath = "SAM_mask_decoder.onnx";
+std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM,
+           SEG::DL_INIT_PARAM>
+Initializer() {
+  std::vector<std::unique_ptr<SAM>> samSegmentors;
+  samSegmentors.push_back(std::make_unique<SAM>());
+  samSegmentors.push_back(std::make_unique<SAM>());
+
+  std::unique_ptr<SAM> samSegmentorEncoder = std::make_unique<SAM>();
+  std::unique_ptr<SAM> samSegmentorDecoder = std::make_unique<SAM>();
+  SEG::DL_INIT_PARAM params_encoder;
+  SEG::DL_INIT_PARAM params_decoder;
+
+  params_encoder.rectConfidenceThreshold = 0.1;
+  params_encoder.iouThreshold = 0.5;
+  params_encoder.modelPath = "SAM_encoder.onnx";
+  params_encoder.imgSize = {1024, 1024};
+
+  params_decoder = params_encoder;
+  params_decoder.modelType = SEG::SAM_SEGMENT_DECODER;
+  params_decoder.modelPath = "SAM_mask_decoder.onnx";
 
 #ifdef USE_CUDA
-    params_encoder.cudaEnable = true;
+  params_encoder.cudaEnable = true;
 #else
-    params_encoder.cudaEnable = false;
+  params_encoder.cudaEnable = false;
 #endif
 
-    samSegmentorEncoder->CreateSession(params_encoder);
-    samSegmentorDecoder->CreateSession(params_decoder);
-    samSegmentors[0] = std::move(samSegmentorEncoder);
-    samSegmentors[1] = std::move(samSegmentorDecoder);
-    return {std::move(samSegmentors), params_encoder, params_decoder};
+  samSegmentorEncoder->CreateSession(params_encoder);
+  samSegmentorDecoder->CreateSession(params_decoder);
+  samSegmentors[0] = std::move(samSegmentorEncoder);
+  samSegmentors[1] = std::move(samSegmentorDecoder);
+  return {std::move(samSegmentors), params_encoder, params_decoder};
 }
 
-std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors, SEG::DL_INIT_PARAM &params_encoder, SEG::DL_INIT_PARAM &params_decoder, cv::Mat &img)
-{
+std::vector<cv::Mat>
+SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors,
+                const SEG::DL_INIT_PARAM &params_encoder,
+                const SEG::DL_INIT_PARAM &params_decoder, cv::Mat &img) {
 
-    std::vector<SEG::DL_RESULT> resSam;
-    SEG::DL_RESULT res;
+  std::vector<SEG::DL_RESULT> resSam;
+  SEG::DL_RESULT res;
 
-    SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType;
-    samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res);
+  SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType;
+  samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res);
 
-    modelTypeRef = params_decoder.modelType;
-    samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res);
+  modelTypeRef = params_decoder.modelType;
+  samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res);
 
-    // cv::destroyAllWindows();
-    cv::Mat finalMask = res.masks[0];
-    std::cout << "Final mask size: " << finalMask.size() << std::endl;
-
-    for (const auto &mask : res.masks)
-    {
-        cv::imshow("Mask", mask);
-        cv::waitKey(0);
-    }
-    cv::destroyAllWindows();
-    return std::move(res.masks);
+  return std::move(res.masks);
 }

From 5cb4ab4bb2e20bd1bd01edef921854ab1c3de46c Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 2 Sep 2025 18:10:33 +0200
Subject: [PATCH 09/28] Refactor post processing for better accuracy and
 performance. Also correcting preprocessing scalling issue on long images

---
 include/utils.h       |   3 +-
 src/main.cpp          |   9 +-
 src/sam_inference.cpp |  10 +-
 src/utils.cpp         | 221 ++++++++++++++++--------------------------
 4 files changed, 93 insertions(+), 150 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index 333c9e3..e81b236 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -18,7 +18,6 @@ class Utils
     Utils();
     ~Utils();
 
-    void overlay(std::vector<Ort::Value> &output_tensors, const cv::Mat &iImg, std::vector<int> iImgSize, SEG::DL_RESULT &result);
     char *PreProcess(const cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat &oImg);
     void ScaleBboxPoints(const cv::Mat &iImg, std::vector<int> iImgSize, std::vector<float> &pointCoords, std::vector<float> &PointsCoordsScaled);
 
@@ -26,6 +25,8 @@ class Utils
                                                std::vector<float> &pointLabels, std::vector<int64_t> pointLabelsDims, std::vector<float> &maskInput,
                                                std::vector<int64_t> maskInputDims, std::vector<float> &hasMaskInput, std::vector<int64_t> hasMaskInputDims);
 
+    void PostProcess(std::vector<Ort::Value> &output_tensors, const cv::Mat &iImg, std::vector<int> iImgSize, SEG::DL_RESULT &result);
+
     // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW.
     // Note: Code in header file since it is used outside of this utils src code.
     template <typename T>
diff --git a/src/main.cpp b/src/main.cpp
index 2b2d602..9399779 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -20,14 +20,7 @@ int main()
             cv::Mat img = cv::imread(img_path);
             std::vector<cv::Mat> masks;
             masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img);
-            for (int j = 0; j < masks.size(); j++)
-            {
-                std::cout << "Press any key to exit" << std::endl;
-                cv::imshow("Result of MASKS", masks[j]);
-                cv::waitKey(0);
-                cv::destroyAllWindows();
-            }
-            std::cout << "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo" << std::endl;
+
         }
     }
     return 0;
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 9c0463b..3f6a09e 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -206,7 +206,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
     std::vector<int64_t> decoderInputDims = {
         1, 256, 64, 64}; // Adjust based on your decoder's requirements
 
-    // Create  point coordinates and labels
+    // Create point coordinates for testing purposes
 #ifdef ROI
 
     // Create a window for user interaction
@@ -224,8 +224,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
     std::vector<cv::Rect> boundingBoxes;
     boundingBoxes.push_back(bbox);
 #endif // ROI
-       // boundingBoxes.push_back(bbox1);
-       // Declare timing variables BEFORE the loop
+
 #ifdef benchmark
     clock_t starttime_2 = 0;
     clock_t starttime_3 = 0;
@@ -255,8 +254,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
 
       std::vector<float> pointCoordsScaled;
 
-      std::vector<int64_t> pointCoordsDims = {1, 2,
-                                              2}; // 2 points, each with (x, y)
+      std::vector<int64_t> pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y)
 
       // Labels for the points
       std::vector<float> pointLabels = {2.0f, 3.0f}; // Box prompt labels
@@ -287,7 +285,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
       starttime_3 = clock();
 #endif // benchmark
 
-      utilities.overlay(output_tensors, iImg, imgSize, result);
+      utilities.PostProcess(output_tensors, iImg, imgSize, result);
     }
     // Add the result to oResult
     oResult.push_back(result);
diff --git a/src/utils.cpp b/src/utils.cpp
index 8d76ac1..f7721db 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -30,7 +30,7 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat
     }
     else
     {
-        resizeScales = iImg.rows / (float)iImgSize.at(0);
+        resizeScales = iImg.rows / (float)iImgSize.at(1);
         cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
     }
     cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
@@ -118,150 +118,101 @@ std::vector<Ort::Value> Utils::PrepareInputTensor(Ort::Value &decoderInputTensor
 
     return inputTensors;
 }
-void Utils::overlay(std::vector<Ort::Value> &output_tensors, const cv::Mat &iImg, std::vector<int> imgSize, SEG::DL_RESULT &result)
+void Utils::PostProcess(std::vector<Ort::Value> &output_tensors, const cv::Mat &iImg, std::vector<int> imgSize, SEG::DL_RESULT &result)
 {
-    // Process decoder output (masks)
-    if (output_tensors.size() > 0)
+    if (output_tensors.size() < 2)
     {
-        // Get the masks from the output tensor
-        auto scoresTensor = std::move(output_tensors[0]); // IoU scores
-        auto masksTensor = std::move(output_tensors[1]);  // First output should be the masks PROBABLY WRONG
-        auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo();
-        auto masksShape = masksInfo.GetShape();
+        std::cerr << "[SAM]: Decoder returned insufficient outputs." << std::endl;
+        return;
+    }
+
+    // Assume [scores, masks]; consider shape-based detection later
+    auto scoresTensor = std::move(output_tensors[0]);
+    auto masksTensor  = std::move(output_tensors[1]);
+
+    auto masksInfo  = masksTensor.GetTensorTypeAndShapeInfo();
+    auto masksShape = masksInfo.GetShape();
+
+    if (masksShape.size() == 4)
+    {
+        auto masksData  = masksTensor.GetTensorMutableData<float>();
+        auto scoresData = scoresTensor.GetTensorMutableData<float>();
 
-        if (masksShape.size() == 4)
+        const size_t numMasks = static_cast<size_t>(masksShape[1]);
+        const size_t height   = static_cast<size_t>(masksShape[2]);
+        const size_t width    = static_cast<size_t>(masksShape[3]);
+
+        // Pick best mask by score
+        float bestScore = -1.0f;
+        size_t bestMaskIndex = 0;
+        for (size_t i = 0; i < numMasks; ++i)
         {
-            auto masksData = masksTensor.GetTensorMutableData<float>();
-            auto scoresData = scoresTensor.GetTensorMutableData<float>();
-
-            size_t batchSize = masksShape[0]; // Usually 1
-            size_t numMasks = masksShape[1];  // Number of masks (typically 1)
-            size_t height = masksShape[2];    // Height of mask
-            size_t width = masksShape[3];     // Width of mask
-
-            // Find the best mask (highest IoU score)
-            float bestScore = -1;
-            size_t bestMaskIndex = 0;
-
-            for (size_t i = 0; i < numMasks; ++i)
-            {
-
-                float score = scoresData[i];
-
-                if (score > bestScore)
-                {
-                    bestScore = score;
-                    bestMaskIndex = i;
-                }
-            }
-            // std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl;
-
-            // Create OpenCV Mat for the mask
-            cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1);
-
-            // Convert float mask to binary mask
-            for (size_t h = 0; h < height; ++h)
-            {
-                for (size_t w = 0; w < width; ++w)
-                {
-                    size_t idx = (bestMaskIndex * height * width) + (h * width) + w;
-                    float value = masksData[idx];
-                    mask.at<uchar>(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5
-                }
-            }
-
-            // 1. Calculate the dimensions the image had during preprocessing
-            float scale;
-            int processedWidth, processedHeight;
-            if (iImg.cols >= iImg.rows)
-            {
-                scale = (float)imgSize[0] / iImg.cols;
-                processedWidth = imgSize[0];
-                processedHeight = int(iImg.rows * scale);
-            }
-            else
-            {
-                scale = (float)imgSize[1] / iImg.rows;
-                processedWidth = int(iImg.cols * scale);
-                processedHeight = imgSize[1];
-            }
-
-            // 3. Extract the portion that corresponds to the actual image (no padding)
-            int cropWidth = std::min(256, int(256 * processedWidth / (float)imgSize[0]));
-            int cropHeight = std::min(256, int(256 * processedHeight / (float)imgSize[1]));
-            cv::Mat croppedMask = mask(cv::Rect(0, 0, cropWidth, cropHeight));
-
-            // 4. Resize directly to original image dimensions in one step
-            cv::Mat finalMask;
-
-            // Use INTER_NEAREST for binary masks - preserves hard edges
-            cv::resize(croppedMask, finalMask, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_NEAREST);
-
-            ////////////////////// GUIDED BILATERAL FILTER /////////////////////////
-            // Convert the upscaled mask to CV_8UC1 if necessary
-            if (finalMask.type() != CV_8UC1)
-            {
-                finalMask.convertTo(finalMask, CV_8UC1);
-            }
-
-            // Apply the Guided Filter
-            cv::Mat filteredMask;
-            int radius = 2;
-            double eps = 0.01;
-            cv::ximgproc::guidedFilter(iImg, finalMask, finalMask, radius, eps);
-            ////////////////////// END: GUIDED BILATERAL FILTER /////////////////////////
-
-            ////////////////////// MORPHOLOGICAN OPERATIONS /////////////////////////
-            // Morphological operations to clean up the mask
-            int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100); // Adaptive size
-            cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize));
-
-            // CLOSE operation: fills small holes in the mask
-            cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel);
-
-            // OPEN operation: removes small noise
-            cv::morphologyEx(finalMask, finalMask, cv::MORPH_OPEN, kernel);
-
-            ////////////////////// END: MORPHOLOGICAN OPERATIONS /////////////////////////
-
-            // Re-threshold after resizing to ensure binary mask (critical step)
-
-            cv::threshold(finalMask, finalMask, 127, 255, cv::THRESH_BINARY);
-            result.masks.push_back(finalMask);
-
-            /*// Add IoU scores if available (typically second tensor)
-            if (output_tensors.size() > 1) {
-                auto scoresTensor = std::move(output_tensors[1]);
-                auto scoresData = scoresTensor.GetTensorMutableData<float>();
-                if (i < scoresTensor.GetTensorTypeAndShapeInfo().GetShape()[1]) {
-                    result.confidence = scoresData[i];
-                    std::cout << "Mask confidence: " << result.confidence << std::endl;
-                }
-            }*/
-
-            // Find contours of the mask
-            std::vector<std::vector<cv::Point>> contours;
-            cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
-
-            // Create a semi-transparent overlay
-            cv::Mat colorMask = cv::Mat::zeros(iImg.size(), CV_8UC3);
-            colorMask.setTo(cv::Scalar(0, 200, 0), finalMask); // Green fill
-            cv::addWeighted(iImg, 0.7, colorMask, 0.3, 0, iImg);
-
-            // Draw contours with a thick, high-contrast outline
-            cv::drawContours(iImg, contours, -1, cv::Scalar(0, 255, 255), 2); // Yellow outline
-
-            // Save or display the result
-            cv::imwrite("segmentation_result_" + std::to_string(bestMaskIndex) + ".jpg", iImg);
-            cv::imwrite("mask_" + std::to_string(bestMaskIndex) + ".jpg", finalMask);
+            const float s = scoresData ? scoresData[i] : 0.0f;
+            if (s > bestScore) { bestScore = s; bestMaskIndex = i; }
+        }
+
+        // Compute preprocessed region (top-left anchored)
+        float scale;
+        int processedWidth, processedHeight;
+        if (iImg.cols >= iImg.rows)
+        {
+            scale = static_cast<float>(imgSize[0]) / static_cast<float>(iImg.cols);
+            processedWidth  = imgSize[0];
+            processedHeight = static_cast<int>(iImg.rows * scale);
         }
         else
         {
-            std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl;
+            scale = static_cast<float>(imgSize[1]) / static_cast<float>(iImg.rows);
+            processedWidth  = static_cast<int>(iImg.cols * scale);
+            processedHeight = imgSize[1];
         }
+
+        auto clampDim = [](int v, int lo, int hi) { return std::max(lo, std::min(v, hi)); };
+
+        // Wrap selected mask plane as float prob map
+        const size_t planeOffset = bestMaskIndex * height * width;
+        cv::Mat prob32f(static_cast<int>(height), static_cast<int>(width), CV_32F,
+                        const_cast<float*>(masksData + planeOffset));
+
+        // Crop in mask space using proportional dimensions (no hardcoded 256)
+        const int cropW = clampDim(static_cast<int>(std::round(static_cast<float>(width)  * processedWidth  / static_cast<float>(imgSize[0]))), 1, static_cast<int>(width));
+        const int cropH = clampDim(static_cast<int>(std::round(static_cast<float>(height) * processedHeight / static_cast<float>(imgSize[1]))), 1, static_cast<int>(height));
+        cv::Mat probCropped = prob32f(cv::Rect(0, 0, cropW, cropH));
+
+        // Resize probabilities to original image (linear)
+        cv::Mat probResized;
+        cv::resize(probCropped, probResized, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_LINEAR);
+
+        // Threshold once to binary mask
+        cv::Mat finalMask;
+        cv::compare(probResized, 0.5f, finalMask, cv::CMP_GT); // CV_8U 0/255
+
+        // Morphological cleanup (light, then ensure binary)
+        int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100);
+        cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize));
+        cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel);
+        cv::morphologyEx(finalMask, finalMask, cv::MORPH_OPEN, kernel);
+        cv::threshold(finalMask, finalMask, 127, 255, cv::THRESH_BINARY);
+
+        // Save mask
+        result.masks.push_back(finalMask);
+
+        // Overlay for display on a copy (iImg is const)
+        cv::Mat overlay = iImg.clone();
+        std::vector<std::vector<cv::Point>> contours;
+        cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
+
+        cv::Mat colorMask = cv::Mat::zeros(overlay.size(), CV_8UC3);
+        colorMask.setTo(cv::Scalar(0, 200, 0), finalMask);
+        cv::addWeighted(overlay, 0.7, colorMask, 0.3, 0, overlay);
+        cv::drawContours(overlay, contours, -1, cv::Scalar(0, 255, 255), 2);
+
+        cv::imshow("SAM Segmentation", overlay);
+        cv::waitKey(0);
+        cv::destroyAllWindows();
     }
     else
     {
-        std::cerr << "[SAM]: No masks found in the output tensor." << std::endl;
+        std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl;
     }
 }
\ No newline at end of file

From 0fe00dae324f419817a7e72d13624fa30f85296c Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 2 Sep 2025 21:06:50 +0200
Subject: [PATCH 10/28] Added tests (still not working with catkin)

---
 CMakeLists.txt    |  44 ++++++++++++++----
 package.xml       |   7 ++-
 test/sam_test.cpp | 114 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 11 deletions(-)
 create mode 100644 test/sam_test.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8cb430a..f4087dc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,8 +1,6 @@
-cmake_minimum_required(VERSION 3.5)
+cmake_minimum_required(VERSION 3.0.2)
 
-set(PROJECT_NAME sam_onnx_ros)
-
-project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX)
+project(sam_onnx_ros)
 
 # -------------- CMake Policies ------------------#
 #add_compile_options(-Wall -Werror=all)
@@ -30,6 +28,11 @@ include_directories(/usr/local/cuda/include)
 
 find_package(catkin REQUIRED
   COMPONENTS
+  roscpp
+  tue_config
+  tue_filesystem
+  code_profiler
+
   #onnxruntime_ros
 )
 
@@ -62,12 +65,22 @@ set(PROJECT_SOURCES
         src/utils.cpp
 )
 
-add_executable(${PROJECT_NAME} ${PROJECT_SOURCES})
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
-
-# Link OpenCV libraries along with ONNX Runtime
-target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so)
+# Build core library (no main.cpp here)
+add_library(sam_onnx_ros_core
+        src/sam_inference.cpp
+        src/segmentation.cpp
+        src/utils.cpp
+)
+target_link_libraries(sam_onnx_ros_core
+    ${OpenCV_LIBS}
+    ${catkin_LIBRARIES}
+    ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so
+)
+target_include_directories(sam_onnx_ros_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 
+# Main executable links the core lib
+add_executable(${PROJECT_NAME} src/main.cpp)
+target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core)
 
 # Copy sam_<model>.onnx file to the same folder of the executable file
 configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
@@ -78,6 +91,19 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images
 )
 
+# Enable testing and add a gtest
+if (CATKIN_ENABLE_TESTING)
+  find_package(catkin_lint_cmake REQUIRED)
+  catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
+
+  catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp)
+  if(TARGET sam_onnx_ros_tests)
+    target_link_libraries(sam_onnx_ros_tests sam_onnx_ros_core ${catkin_LIBRARIES})
+    target_include_directories(sam_onnx_ros_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+  endif()
+endif()
+
+
 #If you want to debug
 set(CMAKE_BUILD_TYPE Debug)
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
diff --git a/package.xml b/package.xml
index cde009a..250abed 100644
--- a/package.xml
+++ b/package.xml
@@ -12,14 +12,17 @@
   <license>ToDo</license>
 
   <buildtool_depend>catkin</buildtool_depend>
+  <depend>roscpp</depend>
+  <depend>libpcl-common</depend>
+
 
   <build_depend>libopencv-dev</build_depend>
   <exec_depend>libopencv-dev</exec_depend>
   <build_depend>onnxruntime_ros</build_depend>
   <exec_depend>onnxruntime_ros</exec_depend>
-
   <test_depend>catkin_lint_cmake</test_depend>
-
+<test_depend>gtest</test_depend>
+<test_depend>rostest</test_depend>
   <doc_depend>doxygen</doc_depend>
 
   <export>
diff --git a/test/sam_test.cpp b/test/sam_test.cpp
new file mode 100644
index 0000000..5f1024f
--- /dev/null
+++ b/test/sam_test.cpp
@@ -0,0 +1,114 @@
+#include "segmentation.h"
+#include "sam_inference.h"
+#include <gtest/gtest.h>
+#include <opencv2/opencv.hpp>
+#include "dl_types.h"
+#include "utils.h"
+#include <filesystem>
+
+class SamInferenceTest : public ::testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        // Create test images with different characteristics
+        testImage_640x640 = cv::Mat::ones(640, 640, CV_8UC3) * 255;
+        testImage_800x600 = cv::Mat::ones(600, 800, CV_8UC3) * 128;
+
+        // Create a more realistic test image with some patterns
+        testImage_realistic = cv::Mat(640, 640, CV_8UC3);
+        cv::randu(testImage_realistic, cv::Scalar(0,0,0), cv::Scalar(255,255,255));
+
+        // Setup common parameters
+        NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows };
+
+        sam = std::make_unique<SAM>();
+        params.rectConfidenceThreshold = 0.1f;
+        params.iouThreshold = 0.5f;
+        params.imgSize = {1024, 1024};
+        params.modelType = SEG::SAM_SEGMENT_ENCODER;
+        params.modelPath = "SAM_encoder.onnx"; // copied to build/ by CMake
+#ifdef USE_CUDA
+        params.cudaEnable = true;
+#else
+        params.cudaEnable = false;
+#endif
+    }
+
+    void TearDown() override { sam.reset(); }
+
+    // Test data
+    Utils utilities;
+    cv::Mat testImage_640x640, testImage_800x600, testImage_realistic;
+    SEG::DL_INIT_PARAM params;
+    std::unique_ptr<SAM> sam;
+    std::vector<int> NonSquareImgSize;
+};
+
+
+
+TEST_F(SamInferenceTest, ObjectCreation)
+{
+    EXPECT_NO_THROW({
+        SAM localSam;
+    });
+}
+
+TEST_F(SamInferenceTest, PreProcessSquareImage)
+{
+    cv::Mat processedImg;
+    const char* result = utilities.PreProcess(testImage_640x640, params.imgSize, processedImg);
+
+    EXPECT_EQ(result, nullptr) << "PreProcess should succeed";
+    EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024";
+    EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty";
+}
+
+TEST_F(SamInferenceTest, PreProcessRectangularImage)
+{
+    cv::Mat processedImg;
+    const char* result = utilities.PreProcess(testImage_800x600, NonSquareImgSize, processedImg);
+
+    EXPECT_EQ(result, nullptr) << "PreProcess should succeed";
+    EXPECT_EQ(processedImg.size(), cv::Size(800, 600)) << "Output should be letterboxed to 800x600";
+    EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty";
+}
+
+TEST_F(SamInferenceTest, CreateSessionWithValidModel)
+{
+    if (!std::filesystem::exists("SAM_encoder.onnx")) {
+        GTEST_SKIP() << "Model not found in build dir";
+    }
+    const char* result = sam->CreateSession(params);
+    EXPECT_EQ(result, nullptr) << "CreateSession should succeed with valid parameters";
+}
+
+TEST_F(SamInferenceTest, CreateSessionWithInvalidModel)
+{
+    params.modelPath = "nonexistent_model.onnx";
+    const char* result = sam->CreateSession(params);
+    EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path";
+}
+
+TEST_F(SamInferenceTest, FullInferencePipeline)
+{
+    if (!std::filesystem::exists("SAM_encoder.onnx") ||
+        !std::filesystem::exists("SAM_mask_decoder.onnx")) {
+        GTEST_SKIP() << "Models not found in build dir";
+    }
+
+    // Use the package Initializer/SegmentAnything for the full pipeline
+    std::vector<std::unique_ptr<SAM>> samSegmentors;
+    SEG::DL_INIT_PARAM params_encoder, params_decoder;
+    std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
+
+    auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic);
+    EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector";
+}
+
+// Run all tests
+int main(int argc, char **argv)
+{
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
\ No newline at end of file

From 9f210122ba54b26a19a1be4c31f14ff455d07857 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Thu, 4 Sep 2025 12:55:03 +0200
Subject: [PATCH 11/28] Fixed catkin workspace for both code and tests

---
 CMakeLists.txt        | 26 +++++++++++++-------------
 package.xml           |  8 ++------
 src/main.cpp          |  2 +-
 src/sam_inference.cpp |  2 +-
 src/segmentation.cpp  |  4 ++--
 test/sam_test.cpp     | 35 +++++++++++++++--------------------
 6 files changed, 34 insertions(+), 43 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f4087dc..cece29a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,10 +28,10 @@ include_directories(/usr/local/cuda/include)
 
 find_package(catkin REQUIRED
   COMPONENTS
-  roscpp
-  tue_config
-  tue_filesystem
-  code_profiler
+  # roscpp
+  # tue_config
+  # tue_filesystem
+  # code_profiler
 
   #onnxruntime_ros
 )
@@ -42,7 +42,8 @@ find_package(catkin REQUIRED
 
 catkin_package(
   INCLUDE_DIRS include
-  LIBRARIES ${PROJECT_NAME}
+  #LIBRARIES ${PROJECT_NAME}
+  LIBRARIES sam_onnx_ros_core
   CATKIN_DEPENDS
   DEPENDS OpenCV
 )
@@ -83,18 +84,18 @@ add_executable(${PROJECT_NAME} src/main.cpp)
 target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core)
 
 # Copy sam_<model>.onnx file to the same folder of the executable file
-configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
-configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
+configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
+configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
 
 # Create folder name images in the same folder of the executable file
 add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images
 )
 
-# Enable testing and add a gtest
+# # Enable testing
 if (CATKIN_ENABLE_TESTING)
-  find_package(catkin_lint_cmake REQUIRED)
-  catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
+#   find_package(catkin_lint_cmake REQUIRED)
+#   catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
 
   catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp)
   if(TARGET sam_onnx_ros_tests)
@@ -103,7 +104,6 @@ if (CATKIN_ENABLE_TESTING)
   endif()
 endif()
 
-
 #If you want to debug
-set(CMAKE_BUILD_TYPE Debug)
-set(CMAKE_CXX_FLAGS_DEBUG "-g")
+# set(CMAKE_BUILD_TYPE Debug)
+# set(CMAKE_CXX_FLAGS_DEBUG "-g")
diff --git a/package.xml b/package.xml
index 250abed..b00e6d6 100644
--- a/package.xml
+++ b/package.xml
@@ -9,20 +9,16 @@
 
   <maintainer email="iasonth95@gmail.com">Iason Theodorou</maintainer>
 
-  <license>ToDo</license>
+  <license>BSD</license>
 
   <buildtool_depend>catkin</buildtool_depend>
-  <depend>roscpp</depend>
-  <depend>libpcl-common</depend>
-
 
   <build_depend>libopencv-dev</build_depend>
   <exec_depend>libopencv-dev</exec_depend>
   <build_depend>onnxruntime_ros</build_depend>
   <exec_depend>onnxruntime_ros</exec_depend>
+
   <test_depend>catkin_lint_cmake</test_depend>
-<test_depend>gtest</test_depend>
-<test_depend>rostest</test_depend>
   <doc_depend>doxygen</doc_depend>
 
   <export>
diff --git a/src/main.cpp b/src/main.cpp
index 9399779..454e086 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -11,7 +11,7 @@ int main()
     SEG::DL_INIT_PARAM params_decoder;
     std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
     std::filesystem::path current_path = std::filesystem::current_path();
-    std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images";
+    std::filesystem::path imgs_path =  "/home/amigo/Documents/repos/hero_sam/pipeline/build/images"; // current_path / <- you could use
     for (auto &i : std::filesystem::directory_iterator(imgs_path))
     {
         if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 3f6a09e..c9bacbe 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -85,7 +85,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
     for (size_t i = 0; i < OutputNodesNum; i++) {
       Ort::AllocatedStringPtr output_node_name =
           session->GetOutputNameAllocated(i, allocator);
-      char *temp_buf = new char[10];
+      char *temp_buf = new char[50];
       strcpy(temp_buf, output_node_name.get());
       outputNodeNames.push_back(temp_buf);
     }
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 585dd13..25b8fae 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -14,12 +14,12 @@ Initializer() {
 
   params_encoder.rectConfidenceThreshold = 0.1;
   params_encoder.iouThreshold = 0.5;
-  params_encoder.modelPath = "SAM_encoder.onnx";
+  params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx";
   params_encoder.imgSize = {1024, 1024};
 
   params_decoder = params_encoder;
   params_decoder.modelType = SEG::SAM_SEGMENT_DECODER;
-  params_decoder.modelPath = "SAM_mask_decoder.onnx";
+  params_decoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_mask_decoder.onnx";
 
 #ifdef USE_CUDA
   params_encoder.cudaEnable = true;
diff --git a/test/sam_test.cpp b/test/sam_test.cpp
index 5f1024f..75a9c98 100644
--- a/test/sam_test.cpp
+++ b/test/sam_test.cpp
@@ -22,27 +22,25 @@ class SamInferenceTest : public ::testing::Test
         // Setup common parameters
         NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows };
 
-        sam = std::make_unique<SAM>();
-        params.rectConfidenceThreshold = 0.1f;
-        params.iouThreshold = 0.5f;
-        params.imgSize = {1024, 1024};
-        params.modelType = SEG::SAM_SEGMENT_ENCODER;
-        params.modelPath = "SAM_encoder.onnx"; // copied to build/ by CMake
+        // Use the package Initializer/SegmentAnything for the full pipeline
+
+        std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
+
 #ifdef USE_CUDA
-        params.cudaEnable = true;
+        params_encoder.cudaEnable = true;
 #else
-        params.cudaEnable = false;
+        params_encoder.cudaEnable = false;
 #endif
     }
 
-    void TearDown() override { sam.reset(); }
+    void TearDown() override { samSegmentors[0].reset(); samSegmentors[1].reset(); }
 
     // Test data
     Utils utilities;
     cv::Mat testImage_640x640, testImage_800x600, testImage_realistic;
-    SEG::DL_INIT_PARAM params;
-    std::unique_ptr<SAM> sam;
     std::vector<int> NonSquareImgSize;
+    std::vector<std::unique_ptr<SAM>> samSegmentors;
+    SEG::DL_INIT_PARAM params_encoder, params_decoder;
 };
 
 
@@ -57,7 +55,7 @@ TEST_F(SamInferenceTest, ObjectCreation)
 TEST_F(SamInferenceTest, PreProcessSquareImage)
 {
     cv::Mat processedImg;
-    const char* result = utilities.PreProcess(testImage_640x640, params.imgSize, processedImg);
+    const char* result = utilities.PreProcess(testImage_640x640, params_encoder.imgSize, processedImg);
 
     EXPECT_EQ(result, nullptr) << "PreProcess should succeed";
     EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024";
@@ -79,14 +77,14 @@ TEST_F(SamInferenceTest, CreateSessionWithValidModel)
     if (!std::filesystem::exists("SAM_encoder.onnx")) {
         GTEST_SKIP() << "Model not found in build dir";
     }
-    const char* result = sam->CreateSession(params);
-    EXPECT_EQ(result, nullptr) << "CreateSession should succeed with valid parameters";
+
+    EXPECT_NE(samSegmentors[0], nullptr) << "CreateSession should succeed with valid parameters";
 }
 
 TEST_F(SamInferenceTest, CreateSessionWithInvalidModel)
 {
-    params.modelPath = "nonexistent_model.onnx";
-    const char* result = sam->CreateSession(params);
+    params_encoder.modelPath = "nonexistent_model.onnx";
+    const char* result = samSegmentors[0]->CreateSession(params_encoder);
     EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path";
 }
 
@@ -97,10 +95,7 @@ TEST_F(SamInferenceTest, FullInferencePipeline)
         GTEST_SKIP() << "Models not found in build dir";
     }
 
-    // Use the package Initializer/SegmentAnything for the full pipeline
-    std::vector<std::unique_ptr<SAM>> samSegmentors;
-    SEG::DL_INIT_PARAM params_encoder, params_decoder;
-    std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
+
 
     auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic);
     EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector";

From a9fce070c810f6c8799326652785e3d5fe768e58 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 5 Sep 2025 12:38:53 +0200
Subject: [PATCH 12/28] fixed functionallity for the tests to pass and added
 logging definition

---
 src/main.cpp          | 3 ++-
 src/sam_inference.cpp | 4 +++-
 src/utils.cpp         | 6 +++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index 454e086..c9624c5 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -3,6 +3,7 @@
 #include <vector>
 #include <filesystem>
 #include <opencv2/opencv.hpp>
+
 int main()
 {
     // Running inference
@@ -11,7 +12,7 @@ int main()
     SEG::DL_INIT_PARAM params_decoder;
     std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
     std::filesystem::path current_path = std::filesystem::current_path();
-    std::filesystem::path imgs_path =  "/home/amigo/Documents/repos/hero_sam/pipeline/build/images"; // current_path / <- you could use
+    std::filesystem::path imgs_path =  "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use
     for (auto &i : std::filesystem::directory_iterator(imgs_path))
     {
         if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index c9bacbe..24af832 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -3,7 +3,7 @@
 #include <regex>
 
 #define benchmark
-#define ROI
+//#define ROI
 
 SAM::SAM() {}
 
@@ -223,6 +223,8 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
 
     std::vector<cv::Rect> boundingBoxes;
     boundingBoxes.push_back(bbox);
+#else
+    result.boxes.push_back(cv::Rect(0, 0, iImg.cols, iImg.rows));
 #endif // ROI
 
 #ifdef benchmark
diff --git a/src/utils.cpp b/src/utils.cpp
index f7721db..ca66b6c 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,5 +1,6 @@
 #include "utils.h"
 #include <opencv2/ximgproc/edge_filter.hpp> // for guided filter
+#define LOGGING
 
 // Constructor
 Utils::Utils()
@@ -33,7 +34,8 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat
         resizeScales = iImg.rows / (float)iImgSize.at(1);
         cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
     }
-    cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
+    //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
+    cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3);
     oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows)));
     oImg = tempImg;
 
@@ -198,6 +200,7 @@ void Utils::PostProcess(std::vector<Ort::Value> &output_tensors, const cv::Mat &
         result.masks.push_back(finalMask);
 
         // Overlay for display on a copy (iImg is const)
+        #ifdef LOGGING
         cv::Mat overlay = iImg.clone();
         std::vector<std::vector<cv::Point>> contours;
         cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
@@ -210,6 +213,7 @@ void Utils::PostProcess(std::vector<Ort::Value> &output_tensors, const cv::Mat &
         cv::imshow("SAM Segmentation", overlay);
         cv::waitKey(0);
         cv::destroyAllWindows();
+        #endif // LOGGING
     }
     else
     {

From dffbcd3544fd097eb007614c1cbe1de7b9b1c7fa Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 5 Sep 2025 12:56:59 +0200
Subject: [PATCH 13/28] renamed private members of utils and sam_inference

---
 include/sam_inference.h |  22 +++----
 include/utils.h         |   4 +-
 src/sam_inference.cpp   | 132 ++++++++++++++++++++--------------------
 src/utils.cpp           |  12 ++--
 4 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/include/sam_inference.h b/include/sam_inference.h
index d63701c..7bff0b1 100644
--- a/include/sam_inference.h
+++ b/include/sam_inference.h
@@ -33,17 +33,17 @@ class SAM
     std::vector<std::string> classes{};
 
 private:
-    Ort::Env env;
-    std::unique_ptr<Ort::Session> session;
-    bool cudaEnable;
-    Ort::RunOptions options;
-    std::vector<const char *> inputNodeNames;
-    std::vector<const char *> outputNodeNames;
-
-    SEG::MODEL_TYPE modelType;
-    std::vector<int> imgSize;
-    float rectConfidenceThreshold;
-    float iouThreshold;
+    Ort::Env _env;
+    std::unique_ptr<Ort::Session> _session;
+    bool _cudaEnable;
+    Ort::RunOptions _options;
+    std::vector<const char *> _inputNodeNames;
+    std::vector<const char *> _outputNodeNames;
+
+    SEG::MODEL_TYPE _modelType;
+    std::vector<int> _imgSize;
+    float _rectConfidenceThreshold;
+    float _iouThreshold;
 };
 
 #endif // SAMINFERENCE_H
\ No newline at end of file
diff --git a/include/utils.h b/include/utils.h
index e81b236..6cb8819 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -51,8 +51,8 @@ class Utils
     }
 
 private:
-    float resizeScales;
-    float resizeScalesBbox; // letterbox scale
+    float _resizeScales;
+    float _resizeScalesBbox; // letterbox scale
 };
 
 #endif // UTILS_H
\ No newline at end of file
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 24af832..f12c56b 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -9,10 +9,10 @@ SAM::SAM() {}
 
 SAM::~SAM() {
   // Clean up input/output node names
-  for (auto &name : inputNodeNames) {
+  for (auto &name : _inputNodeNames) {
     delete[] name;
   }
-  for (auto &name : outputNodeNames) {
+  for (auto &name : _outputNodeNames) {
     delete[] name;
   }
 }
@@ -28,19 +28,19 @@ template <> struct TypeToTensorType<half> {
 
 const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
   const char *Ret = RET_OK;
-  if (session) {
-    session.reset(); // Release previous session
+  if (_session) {
+    _session.reset(); // Release previous _session
 
     // Clear node names
-    for (auto &name : inputNodeNames) {
+    for (auto &name : _inputNodeNames) {
       delete[] name;
     }
-    inputNodeNames.clear();
+    _inputNodeNames.clear();
 
-    for (auto &name : outputNodeNames) {
+    for (auto &name : _outputNodeNames) {
       delete[] name;
     }
-    outputNodeNames.clear();
+    _outputNodeNames.clear();
   }
   std::regex pattern("[\u4e00-\u9fa5]");
   bool result = std::regex_search(iParams.modelPath, pattern);
@@ -51,55 +51,55 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
     return Ret;
   }
   try {
-    rectConfidenceThreshold = iParams.rectConfidenceThreshold;
-    iouThreshold = iParams.iouThreshold;
-    imgSize = iParams.imgSize;
-    modelType = iParams.modelType;
-    cudaEnable = iParams.cudaEnable;
-    env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam");
-    Ort::SessionOptions sessionOption;
+    _rectConfidenceThreshold = iParams.rectConfidenceThreshold;
+    _iouThreshold = iParams.iouThreshold;
+    _imgSize = iParams.imgSize;
+    _modelType = iParams.modelType;
+    _cudaEnable = iParams.cudaEnable;
+    _env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam");
+    Ort::SessionOptions _sessionOption;
     if (iParams.cudaEnable) {
       OrtCUDAProviderOptions cudaOption;
       cudaOption.device_id = 0;
-      sessionOption.AppendExecutionProvider_CUDA(cudaOption);
+      _sessionOption.AppendExecutionProvider_CUDA(cudaOption);
     }
 
-    sessionOption.SetGraphOptimizationLevel(
+    _sessionOption.SetGraphOptimizationLevel(
         GraphOptimizationLevel::ORT_ENABLE_ALL);
-    sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
-    sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
+    _sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
+    _sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
 
     const char *modelPath = iParams.modelPath.c_str();
 
-    session = std::make_unique<Ort::Session>(env, modelPath, sessionOption);
+    _session = std::make_unique<Ort::Session>(_env, modelPath, _sessionOption);
     Ort::AllocatorWithDefaultOptions allocator;
-    size_t inputNodesNum = session->GetInputCount();
+    size_t inputNodesNum = _session->GetInputCount();
     for (size_t i = 0; i < inputNodesNum; i++) {
       Ort::AllocatedStringPtr input_node_name =
-          session->GetInputNameAllocated(i, allocator);
+          _session->GetInputNameAllocated(i, allocator);
       char *temp_buf = new char[50];
       strcpy(temp_buf, input_node_name.get());
-      inputNodeNames.push_back(temp_buf);
+      _inputNodeNames.push_back(temp_buf);
     }
-    size_t OutputNodesNum = session->GetOutputCount();
+    size_t OutputNodesNum = _session->GetOutputCount();
     for (size_t i = 0; i < OutputNodesNum; i++) {
       Ort::AllocatedStringPtr output_node_name =
-          session->GetOutputNameAllocated(i, allocator);
+          _session->GetOutputNameAllocated(i, allocator);
       char *temp_buf = new char[50];
       strcpy(temp_buf, output_node_name.get());
-      outputNodeNames.push_back(temp_buf);
+      _outputNodeNames.push_back(temp_buf);
     }
-    options = Ort::RunOptions{nullptr};
+    _options = Ort::RunOptions{nullptr};
 
     auto input_shape =
-        session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
+        _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
     auto output_shape =
-        session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
-    auto output_type = session->GetOutputTypeInfo(0)
+        _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
+    auto output_type = _session->GetOutputTypeInfo(0)
                            .GetTensorTypeAndShapeInfo()
                            .GetElementType();
 
-    WarmUpSession(modelType);
+    WarmUpSession(_modelType);
     return RET_OK;
   } catch (const std::exception &e) {
     const char *str1 = "[SAM]:";
@@ -109,30 +109,30 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
     std::strcpy(merged, str_result.c_str());
     std::cout << merged << std::endl;
     delete[] merged;
-    return "[SAM]:Create session failed.";
+    return "[SAM]:Create _session failed.";
   }
 }
 
 const char *SAM::RunSession(const cv::Mat &iImg,
                             std::vector<SEG::DL_RESULT> &oResult,
-                            SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) {
+                            SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) {
 #ifdef benchmark
   clock_t starttime_1 = clock();
 #endif // benchmark
   Utils utilities;
   const char *Ret = RET_OK;
   cv::Mat processedImg;
-  utilities.PreProcess(iImg, imgSize, processedImg);
+  utilities.PreProcess(iImg, _imgSize, processedImg);
   float *blob = new float[processedImg.total() * 3];
   utilities.BlobFromImage(processedImg, blob);
   std::vector<int64_t> inputNodeDims;
-  if (modelType == SEG::SAM_SEGMENT_ENCODER) {
-    inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
-  } else if (modelType == SEG::SAM_SEGMENT_DECODER) {
+  if (_modelType == SEG::SAM_SEGMENT_ENCODER) {
+    inputNodeDims = {1, 3, _imgSize.at(0), _imgSize.at(1)};
+  } else if (_modelType == SEG::SAM_SEGMENT_DECODER) {
     // Input size or SAM decoder model is 256x64x64 for the decoder
     inputNodeDims = {1, 256, 64, 64};
   }
-  TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult,
+  TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult,
                 utilities, result);
 
   return Ret;
@@ -141,11 +141,11 @@ const char *SAM::RunSession(const cv::Mat &iImg,
 template <typename N>
 const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
                                N &blob, std::vector<int64_t> &inputNodeDims,
-                               SEG::MODEL_TYPE modelType,
+                               SEG::MODEL_TYPE _modelType,
                                std::vector<SEG::DL_RESULT> &oResult,
                                Utils &utilities, SEG::DL_RESULT &result) {
 
-  switch (modelType) {
+  switch (_modelType) {
   case SEG::SAM_SEGMENT_ENCODER:
     // case OTHER_SAM_MODEL:
     {
@@ -153,14 +153,14 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
       Ort::Value inputTensor =
           Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
               Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU),
-              blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(),
+              blob, 3 * _imgSize.at(0) * _imgSize.at(1), inputNodeDims.data(),
               inputNodeDims.size());
 #ifdef benchmark
       clock_t starttime_2 = clock();
 #endif // benchmark
       auto outputTensor =
-          session->Run(options, inputNodeNames.data(), &inputTensor, 1,
-                       outputNodeNames.data(), outputNodeNames.size());
+          _session->Run(_options, _inputNodeNames.data(), &inputTensor, 1,
+                       _outputNodeNames.data(), _outputNodeNames.size());
 #ifdef benchmark
       clock_t starttime_3 = clock();
 #endif // benchmark
@@ -186,7 +186,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
           (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
       double post_process_time =
           (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
-      if (cudaEnable) {
+      if (_cudaEnable) {
         std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, "
                   << process_time << "ms inference, " << post_process_time
                   << "ms post-process." << std::endl;
@@ -269,7 +269,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
       std::vector<float> hasMaskInput = {0.0f}; // No mask provided
       std::vector<int64_t> hasMaskInputDims = {1};
 
-      utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
+      utilities.ScaleBboxPoints(iImg, _imgSize, pointCoords, pointCoordsScaled);
 
       std::vector<Ort::Value> inputTensors = utilities.PrepareInputTensor(
           decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels,
@@ -279,15 +279,15 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
 #ifdef benchmark
       starttime_2 = clock();
 #endif // benchmark
-      auto output_tensors = session->Run(
-          options, inputNodeNames.data(), inputTensors.data(),
-          inputTensors.size(), outputNodeNames.data(), outputNodeNames.size());
+      auto output_tensors = _session->Run(
+          _options, _inputNodeNames.data(), inputTensors.data(),
+          inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size());
 
 #ifdef benchmark
       starttime_3 = clock();
 #endif // benchmark
 
-      utilities.PostProcess(output_tensors, iImg, imgSize, result);
+      utilities.PostProcess(output_tensors, iImg, _imgSize, result);
     }
     // Add the result to oResult
     oResult.push_back(result);
@@ -302,7 +302,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
         (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
     double post_process_time =
         (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
-    if (cudaEnable) {
+    if (_cudaEnable) {
       std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, "
                 << process_time << "ms inference, " << post_process_time
                 << "ms post-process." << std::endl;
@@ -321,31 +321,31 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
   return RET_OK;
 }
 
-char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
+char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) {
   clock_t starttime_1 = clock();
   Utils utilities;
-  cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
+  cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3);
   cv::Mat processedImg;
-  utilities.PreProcess(iImg, imgSize, processedImg);
+  utilities.PreProcess(iImg, _imgSize, processedImg);
 
   float *blob = new float[iImg.total() * 3];
   utilities.BlobFromImage(processedImg, blob);
-  std::vector<int64_t> SAM_input_node_dims = {1, 3, imgSize.at(0),
-                                              imgSize.at(1)};
-  switch (modelType) {
+  std::vector<int64_t> SAM_input_node_dims = {1, 3, _imgSize.at(0),
+                                              _imgSize.at(1)};
+  switch (_modelType) {
   case SEG::SAM_SEGMENT_ENCODER: {
     Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
         Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob,
-        3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(),
+        3 * _imgSize.at(0) * _imgSize.at(1), SAM_input_node_dims.data(),
         SAM_input_node_dims.size());
     auto output_tensors =
-        session->Run(options, inputNodeNames.data(), &input_tensor, 1,
-                     outputNodeNames.data(), outputNodeNames.size());
+        _session->Run(_options, _inputNodeNames.data(), &input_tensor, 1,
+                     _outputNodeNames.data(), _outputNodeNames.size());
     delete[] blob;
     clock_t starttime_4 = clock();
     double post_process_time =
         (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
-    if (cudaEnable) {
+    if (_cudaEnable) {
       std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time
                 << " ms. " << std::endl;
     }
@@ -387,7 +387,7 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
 
       std::vector<float> pointCoordsScaled;
 
-      utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled);
+      utilities.ScaleBboxPoints(iImg, _imgSize, pointCoords, pointCoordsScaled);
 
       // Labels for the points
       std::vector<float> pointLabels = {1.0f}; // All points are foreground
@@ -403,17 +403,17 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) {
           pointLabelsDims, maskInput, maskInputDims, hasMaskInput,
           hasMaskInputDims);
 
-      auto output_tensors = session->Run(
-          options, inputNodeNames.data(), inputTensors.data(),
-          inputTensors.size(), outputNodeNames.data(), outputNodeNames.size());
+      auto output_tensors = _session->Run(
+          _options, _inputNodeNames.data(), inputTensors.data(),
+          inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size());
     }
 
-    outputNodeNames.size();
+    _outputNodeNames.size();
     delete[] blob;
     clock_t starttime_4 = clock();
     double post_process_time =
         (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
-    if (cudaEnable) {
+    if (_cudaEnable) {
       std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time
                 << " ms. " << std::endl;
     }
diff --git a/src/utils.cpp b/src/utils.cpp
index ca66b6c..2c826ab 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -26,13 +26,13 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat
 
     if (iImg.cols >= iImg.rows)
     {
-        resizeScales = iImg.cols / (float)iImgSize.at(0);
-        cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales)));
+        _resizeScales = iImg.cols / (float)iImgSize.at(0);
+        cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / _resizeScales)));
     }
     else
     {
-        resizeScales = iImg.rows / (float)iImgSize.at(1);
-        cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
+        _resizeScales = iImg.rows / (float)iImgSize.at(1);
+        cv::resize(oImg, oImg, cv::Size(int(iImg.cols / _resizeScales), iImgSize.at(1)));
     }
     //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
     cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3);
@@ -52,12 +52,12 @@ void Utils::ScaleBboxPoints(const cv::Mat &iImg, std::vector<int> imgSize, std::
     if (iImg.cols >= iImg.rows)
     {
         scale = imgSize[0] / (float)iImg.cols;
-        resizeScalesBbox = iImg.cols / (float)imgSize[0];
+        _resizeScalesBbox = iImg.cols / (float)imgSize[0];
     }
     else
     {
         scale = imgSize[1] / (float)iImg.rows;
-        resizeScalesBbox = iImg.rows / (float)imgSize[1];
+        _resizeScalesBbox = iImg.rows / (float)imgSize[1];
     }
 
     // Top-Left placement (matching PreProcess)

From 959a3ffc6d2d597c0a6c80d6a5a34f15280ffab7 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 5 Sep 2025 15:10:28 +0200
Subject: [PATCH 14/28] Separrated test files per category (utils or sam
 related for now)

---
 CMakeLists.txt      |  18 ++++-
 test/sam_test.cpp   |  68 +++++++----------
 test/test_utils.cpp | 175 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 214 insertions(+), 47 deletions(-)
 create mode 100644 test/test_utils.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cece29a..ffc7611 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,13 +97,23 @@ if (CATKIN_ENABLE_TESTING)
 #   find_package(catkin_lint_cmake REQUIRED)
 #   catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
 
-  catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp)
-  if(TARGET sam_onnx_ros_tests)
-    target_link_libraries(sam_onnx_ros_tests sam_onnx_ros_core ${catkin_LIBRARIES})
-    target_include_directories(sam_onnx_ros_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+ # Utils unit tests (no models needed)
+  catkin_add_gtest(utils_tests test/test_utils.cpp)
+  if(TARGET utils_tests)
+    target_link_libraries(utils_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES})
+    target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+  endif()
+
+  # SAM integration-ish tests (may need models)
+  catkin_add_gtest(sam_tests test/sam_test.cpp)
+  if(TARGET sam_tests)
+    target_link_libraries(sam_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES})
+    target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
   endif()
 endif()
 
+
+
 #If you want to debug
 # set(CMAKE_BUILD_TYPE Debug)
 # set(CMAKE_CXX_FLAGS_DEBUG "-g")
diff --git a/test/sam_test.cpp b/test/sam_test.cpp
index 75a9c98..521b0d8 100644
--- a/test/sam_test.cpp
+++ b/test/sam_test.cpp
@@ -1,41 +1,46 @@
-#include "segmentation.h"
-#include "sam_inference.h"
 #include <gtest/gtest.h>
 #include <opencv2/opencv.hpp>
-#include "dl_types.h"
-#include "utils.h"
 #include <filesystem>
+#include "segmentation.h"
+#include "sam_inference.h"
+#include "dl_types.h"
+
+// This file contains higher-level (integration-ish) tests.
+// They cover object/session creation and a full pipeline run using synthetic images.
+// These tests may require the .onnx model files to be present next to the binary or in a known dir.
 
 class SamInferenceTest : public ::testing::Test
 {
 protected:
     void SetUp() override
     {
-        // Create test images with different characteristics
+        // Create simple synthetic images:
+        // - a white 640x640 (square)
+        // - a gray 800x600 (non-square)
         testImage_640x640 = cv::Mat::ones(640, 640, CV_8UC3) * 255;
         testImage_800x600 = cv::Mat::ones(600, 800, CV_8UC3) * 128;
 
-        // Create a more realistic test image with some patterns
+        // A "random noise" image to simulate realistic content for end-to-end checks.
         testImage_realistic = cv::Mat(640, 640, CV_8UC3);
         cv::randu(testImage_realistic, cv::Scalar(0,0,0), cv::Scalar(255,255,255));
 
-        // Setup common parameters
+        // Cache non-square size for preprocessing helpers.
         NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows };
 
-        // Use the package Initializer/SegmentAnything for the full pipeline
-
+        // Use package helpers to build default params and SAM objects.
         std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
 
 #ifdef USE_CUDA
-        params_encoder.cudaEnable = true;
+        params_encoder.cudaEnable = true;  // Enable CUDA if compiled with it
 #else
-        params_encoder.cudaEnable = false;
+        params_encoder.cudaEnable = false; // Otherwise run on CPU
 #endif
     }
 
+    // Clean up the SAM objects after each test.
     void TearDown() override { samSegmentors[0].reset(); samSegmentors[1].reset(); }
 
-    // Test data
+    // Test data and objects shared across tests.
     Utils utilities;
     cv::Mat testImage_640x640, testImage_800x600, testImage_realistic;
     std::vector<int> NonSquareImgSize;
@@ -43,8 +48,7 @@ class SamInferenceTest : public ::testing::Test
     SEG::DL_INIT_PARAM params_encoder, params_decoder;
 };
 
-
-
+// Simple smoke test: we can construct a SAM object without throwing.
 TEST_F(SamInferenceTest, ObjectCreation)
 {
     EXPECT_NO_THROW({
@@ -52,26 +56,8 @@ TEST_F(SamInferenceTest, ObjectCreation)
     });
 }
 
-TEST_F(SamInferenceTest, PreProcessSquareImage)
-{
-    cv::Mat processedImg;
-    const char* result = utilities.PreProcess(testImage_640x640, params_encoder.imgSize, processedImg);
-
-    EXPECT_EQ(result, nullptr) << "PreProcess should succeed";
-    EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024";
-    EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty";
-}
-
-TEST_F(SamInferenceTest, PreProcessRectangularImage)
-{
-    cv::Mat processedImg;
-    const char* result = utilities.PreProcess(testImage_800x600, NonSquareImgSize, processedImg);
-
-    EXPECT_EQ(result, nullptr) << "PreProcess should succeed";
-    EXPECT_EQ(processedImg.size(), cv::Size(800, 600)) << "Output should be letterboxed to 800x600";
-    EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty";
-}
-
+// Confirms that with a present encoder model we can initialize a session.
+// Skips if the model file is not available.
 TEST_F(SamInferenceTest, CreateSessionWithValidModel)
 {
     if (!std::filesystem::exists("SAM_encoder.onnx")) {
@@ -81,6 +67,7 @@ TEST_F(SamInferenceTest, CreateSessionWithValidModel)
     EXPECT_NE(samSegmentors[0], nullptr) << "CreateSession should succeed with valid parameters";
 }
 
+// Confirms that giving an invalid model path returns an error (no crash).
 TEST_F(SamInferenceTest, CreateSessionWithInvalidModel)
 {
     params_encoder.modelPath = "nonexistent_model.onnx";
@@ -88,6 +75,8 @@ TEST_F(SamInferenceTest, CreateSessionWithInvalidModel)
     EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path";
 }
 
+// End-to-end check: with both encoder/decoder models present, the pipeline runs
+// and returns a mask vector. Skips if models are not available.
 TEST_F(SamInferenceTest, FullInferencePipeline)
 {
     if (!std::filesystem::exists("SAM_encoder.onnx") ||
@@ -95,15 +84,8 @@ TEST_F(SamInferenceTest, FullInferencePipeline)
         GTEST_SKIP() << "Models not found in build dir";
     }
 
-
-
     auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic);
-    EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector";
-}
 
-// Run all tests
-int main(int argc, char **argv)
-{
-    testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
+    // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).)
+    EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector";
 }
\ No newline at end of file
diff --git a/test/test_utils.cpp b/test/test_utils.cpp
new file mode 100644
index 0000000..27eaaa0
--- /dev/null
+++ b/test/test_utils.cpp
@@ -0,0 +1,175 @@
+#include <gtest/gtest.h>
+#include <opencv2/opencv.hpp>
+#include "utils.h"
+
+// This file contains small, focused unit tests for Utils.
+// We verify image preprocessing (channel conversion, aspect-preserving resize, padding)
+// and coordinate scaling to match preprocessing.
+
+// Lightweight fixture: gives each test a fresh Utils instance.
+class UtilsTest : public ::testing::Test {
+protected:
+    Utils u;
+};
+
+// Checks that a grayscale (1-channel) image is converted to RGB (3-channel)
+// and the output image is exactly the requested target size (letterboxed).
+TEST_F(UtilsTest, GrayscaleToRGBKeepsSize) {
+    cv::Mat gray = cv::Mat::zeros(300, 500, CV_8UC1);
+    cv::Mat out;
+    std::vector<int> target{1024, 1024};
+
+    // Call PreProcess and expect no error.
+    const char* err = u.PreProcess(gray, target, out);
+    ASSERT_EQ(err, nullptr);
+
+    // After preprocessing, we must have 3 channels (RGB).
+    EXPECT_EQ(out.channels(), 3);
+
+    // The letterboxed output must match the target canvas size.
+    EXPECT_EQ(out.size(), cv::Size(target[0], target[1]));
+}
+
+// Verifies three things:
+// 1) Aspect ratio is preserved when resizing to the target.
+// 2) The resized image is placed at the top-left (0,0).
+// 3) The padding area is zero (black).
+TEST_F(UtilsTest, PreprocessTopLeftPaddingAndAspect) {
+    const cv::Scalar fill(10, 20, 30); // Input color in BGR
+    cv::Mat img(720, 1280, CV_8UC3, fill);
+    cv::Mat out;
+    std::vector<int> target{1024, 1024};
+
+    ASSERT_EQ(u.PreProcess(img, target, out), nullptr);
+    ASSERT_EQ(out.size(), cv::Size(target[0], target[1]));
+    ASSERT_EQ(out.channels(), 3);
+
+    // Width drives resizing here (landscape). Width becomes 1024, height scales accordingly.
+    int resized_w = target[0];
+    int resized_h = static_cast<int>(img.rows / (img.cols / static_cast<float>(target[0])));
+
+    // PreProcess converts BGR -> RGB, so expected color is swapped.
+    cv::Scalar expected_rgb(fill[2], fill[1], fill[0]);
+
+    // The top-left region (resized content) should keep the image color.
+    cv::Mat roi_top = out(cv::Rect(0, 0, resized_w, resized_h));
+    cv::Scalar mean_top = cv::mean(roi_top);
+    EXPECT_NEAR(mean_top[0], expected_rgb[0], 1.0);
+    EXPECT_NEAR(mean_top[1], expected_rgb[1], 1.0);
+    EXPECT_NEAR(mean_top[2], expected_rgb[2], 1.0);
+
+    // The area below the resized content (padding) must be zeros.
+    if (resized_h < target[1]) {
+        cv::Mat roi_pad = out(cv::Rect(0, resized_h, target[0], target[1] - resized_h));
+        cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY);
+        EXPECT_EQ(cv::countNonZero(gray), 0);
+    }
+}
+
+// Parameterized fixture: used with TEST_P to run the same test body
+// for many (input size, target size) pairs.
+class UtilsPreprocessParamTest
+    : public ::testing::TestWithParam<std::tuple<cv::Size, cv::Size>> {
+protected:
+    Utils u;
+};
+
+// TEST_P defines a parameterized test. It runs once per parameter set.
+// We assert that:
+// - Output size equals the target canvas.
+// - Output has 3 channels (RGB).
+// - The padding area (bottom or right) is zero depending on which side letterboxes.
+TEST_P(UtilsPreprocessParamTest, LetterboxWithinBoundsAndChannels3) {
+    const auto [inSize, target] = GetParam();
+    cv::Mat img(inSize, CV_8UC3, cv::Scalar(1, 2, 3));
+    cv::Mat out;
+
+    ASSERT_EQ(u.PreProcess(img, {target.width, target.height}, out), nullptr);
+    EXPECT_EQ(out.size(), target);
+    EXPECT_EQ(out.channels(), 3);
+
+    // Detect which side letterboxes and check that the padded region is zeros.
+    if (inSize.width >= inSize.height) {
+        int resized_h = static_cast<int>(inSize.height / (inSize.width / static_cast<float>(target.width)));
+        if (resized_h < target.height) {
+            cv::Mat roi_pad = out(cv::Rect(0, resized_h, target.width, target.height - resized_h));
+            cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY);
+            EXPECT_EQ(cv::countNonZero(gray), 0);
+        }
+    } else {
+        int resized_w = static_cast<int>(inSize.width / (inSize.height / static_cast<float>(target.height)));
+        if (resized_w < target.width) {
+            cv::Mat roi_pad = out(cv::Rect(resized_w, 0, target.width - resized_w, target.height));
+            cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY);
+            EXPECT_EQ(cv::countNonZero(gray), 0);
+        }
+    }
+}
+
+// INSTANTIATE_TEST_SUITE_P provides the concrete parameter values.
+// Each pair (input size, target size) creates a separate test instance.
+INSTANTIATE_TEST_SUITE_P(
+    ManySizes,
+    UtilsPreprocessParamTest,
+    ::testing::Values(
+        std::make_tuple(cv::Size(640, 640),  cv::Size(1024, 1024)), // square -> square
+        std::make_tuple(cv::Size(800, 600),  cv::Size(800, 600)),    // same size (no resize)
+        std::make_tuple(cv::Size(600, 800),  cv::Size(800, 600)),    // portrait -> landscape
+        std::make_tuple(cv::Size(1280, 720), cv::Size(1024, 1024))   // wide -> square
+    )
+);
+
+// Separate fixture for point scaling tests.
+class UtilsScaleBboxPointsTest : public ::testing::Test {
+protected:
+    Utils u;
+};
+
+// If the input size and target size are the same, scaling should do nothing.
+TEST_F(UtilsScaleBboxPointsTest, IdentityWhenSameSize) {
+    cv::Mat img(600, 800, CV_8UC3);
+    std::vector<int> target{800, 600};
+    std::vector<float> pts{100.f, 100.f, 700.f, 500.f};
+    std::vector<float> scaled;
+
+    u.ScaleBboxPoints(img, target, pts, scaled);
+    ASSERT_EQ(scaled.size(), pts.size());
+    EXPECT_NEAR(scaled[0], pts[0], 1e-3);
+    EXPECT_NEAR(scaled[1], pts[1], 1e-3);
+    EXPECT_NEAR(scaled[2], pts[2], 1e-3);
+    EXPECT_NEAR(scaled[3], pts[3], 1e-3);
+}
+
+// When width drives the resize (landscape), both x and y are scaled by the same factor.
+// We expect coordinates to be multiplied by target_width / input_width.
+TEST_F(UtilsScaleBboxPointsTest, ScalesWidthDominant) {
+    cv::Mat img(300, 600, CV_8UC3);                  // h=300, w=600 (w >= h)
+    std::vector<int> target{1200, 600};              // width doubles
+    std::vector<float> pts{100.f, 50.f, 500.f, 250.f};
+    std::vector<float> scaled;
+
+    u.ScaleBboxPoints(img, target, pts, scaled);
+    ASSERT_EQ(scaled.size(), pts.size());
+    const float scale = target[0] / static_cast<float>(img.cols); // 1200/600 = 2
+    EXPECT_NEAR(scaled[0], pts[0] * scale, 1e-3);
+    EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3);
+    EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3);
+    EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3);
+}
+
+// When height drives the resize (portrait), both x and y are scaled by the same factor.
+// We expect coordinates to be multiplied by target_height / input_height.
+TEST_F(UtilsScaleBboxPointsTest, ScalesHeightDominant) {
+    cv::Mat img(600, 300, CV_8UC3);                  // h=600, w=300 (h > w)
+    std::vector<int> target{600, 1200};              // height doubles
+    std::vector<float> pts{100.f, 50.f, 200.f, 500.f};
+    std::vector<float> scaled;
+
+    u.ScaleBboxPoints(img, target, pts, scaled);
+    ASSERT_EQ(scaled.size(), pts.size());
+    const float scale = target[1] / static_cast<float>(img.rows); // 1200/600 = 2
+    EXPECT_NEAR(scaled[0], pts[0] * scale, 1e-3);
+    EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3);
+    EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3);
+    EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3);
+}
\ No newline at end of file

From fa97e64961952205c6de52b5573aac2b88a64d23 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Wed, 10 Sep 2025 14:35:19 +0200
Subject: [PATCH 15/28] Updated initializer and SegmentAnything modules to
 store the data to the custom result structs properly

---
 CMakeLists.txt         |  6 +++---
 include/segmentation.h |  6 ++++--
 src/main.cpp           |  8 +++++---
 src/segmentation.cpp   | 18 +++++++++---------
 src/utils.cpp          |  2 +-
 5 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ffc7611..1489bcf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ include_directories(${OpenCV_INCLUDE_DIRS})
 
 # -------------- ONNXRuntime  ------------------#
 set(ONNXRUNTIME_VERSION 1.21.0)
-set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1")
+set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1")
 include_directories(${ONNXRUNTIME_ROOT}/include)
 
 # -------------- Cuda ------------------#
@@ -84,8 +84,8 @@ add_executable(${PROJECT_NAME} src/main.cpp)
 target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core)
 
 # Copy sam_<model>.onnx file to the same folder of the executable file
-configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
-configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
+configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
+configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
 
 # Create folder name images in the same folder of the executable file
 add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
diff --git a/include/segmentation.h b/include/segmentation.h
index b341f8d..e6a6d67 100644
--- a/include/segmentation.h
+++ b/include/segmentation.h
@@ -4,7 +4,9 @@
 #include <tuple>
 
 #include "sam_inference.h"
-std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer();
-std::vector<cv::Mat> SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img);
+std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM, SEG::DL_RESULT, std::vector<SEG::DL_RESULT>> Initializer();
+void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, const cv::Mat& img,
+std::vector<SEG::DL_RESULT> &resSam,
+  SEG::DL_RESULT &res);
 
 #endif // SEGMENTATION_H
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index c9624c5..e52e9ce 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -10,7 +10,9 @@ int main()
     std::vector<std::unique_ptr<SAM>> samSegmentors;
     SEG::DL_INIT_PARAM params_encoder;
     SEG::DL_INIT_PARAM params_decoder;
-    std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
+    std::vector<SEG::DL_RESULT> resSam;
+    SEG::DL_RESULT res;
+    std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer();
     std::filesystem::path current_path = std::filesystem::current_path();
     std::filesystem::path imgs_path =  "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use
     for (auto &i : std::filesystem::directory_iterator(imgs_path))
@@ -19,8 +21,8 @@ int main()
         {
             std::string img_path = i.path().string();
             cv::Mat img = cv::imread(img_path);
-            std::vector<cv::Mat> masks;
-            masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img);
+
+            SegmentAnything(samSegmentors, params_encoder, params_decoder, img, resSam, res);
 
         }
     }
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 25b8fae..5bb9744 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -1,7 +1,7 @@
 #include "segmentation.h"
 
 std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM,
-           SEG::DL_INIT_PARAM>
+           SEG::DL_INIT_PARAM, SEG::DL_RESULT, std::vector<SEG::DL_RESULT>>
 Initializer() {
   std::vector<std::unique_ptr<SAM>> samSegmentors;
   samSegmentors.push_back(std::make_unique<SAM>());
@@ -11,7 +11,8 @@ Initializer() {
   std::unique_ptr<SAM> samSegmentorDecoder = std::make_unique<SAM>();
   SEG::DL_INIT_PARAM params_encoder;
   SEG::DL_INIT_PARAM params_decoder;
-
+  SEG::DL_RESULT res;
+  std::vector<SEG::DL_RESULT> resSam;
   params_encoder.rectConfidenceThreshold = 0.1;
   params_encoder.iouThreshold = 0.5;
   params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx";
@@ -31,16 +32,15 @@ Initializer() {
   samSegmentorDecoder->CreateSession(params_decoder);
   samSegmentors[0] = std::move(samSegmentorEncoder);
   samSegmentors[1] = std::move(samSegmentorDecoder);
-  return {std::move(samSegmentors), params_encoder, params_decoder};
+  return {std::move(samSegmentors), params_encoder, params_decoder, res, resSam};
 }
 
-std::vector<cv::Mat>
-SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors,
+void SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors,
                 const SEG::DL_INIT_PARAM &params_encoder,
-                const SEG::DL_INIT_PARAM &params_decoder, cv::Mat &img) {
+                const SEG::DL_INIT_PARAM &params_decoder, const cv::Mat &img, std::vector<SEG::DL_RESULT> &resSam,
+  SEG::DL_RESULT &res) {
+
 
-  std::vector<SEG::DL_RESULT> resSam;
-  SEG::DL_RESULT res;
 
   SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType;
   samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res);
@@ -48,5 +48,5 @@ SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors,
   modelTypeRef = params_decoder.modelType;
   samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res);
 
-  return std::move(res.masks);
+  // return std::move(res.masks);
 }
diff --git a/src/utils.cpp b/src/utils.cpp
index 2c826ab..28a7ded 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,6 +1,6 @@
 #include "utils.h"
 #include <opencv2/ximgproc/edge_filter.hpp> // for guided filter
-#define LOGGING
+//#define LOGGING
 
 // Constructor
 Utils::Utils()

From 53989b69f48aa2d4d172a55a6d44b3565963921e Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Wed, 10 Sep 2025 15:51:18 +0200
Subject: [PATCH 16/28] Enabled cuda on the decoder as well

---
 src/segmentation.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 5bb9744..23efd59 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -24,6 +24,8 @@ Initializer() {
 
 #ifdef USE_CUDA
   params_encoder.cudaEnable = true;
+  params_decoder.cudaEnable = true;
+
 #else
   params_encoder.cudaEnable = false;
 #endif

From 7490f03db3faf315f715260477c6e3549eaf63a3 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Wed, 10 Sep 2025 16:14:56 +0200
Subject: [PATCH 17/28] Fixed small bug of adding an extra (full img) bounding
 box

---
 src/sam_inference.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index f12c56b..de1b408 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -223,8 +223,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
 
     std::vector<cv::Rect> boundingBoxes;
     boundingBoxes.push_back(bbox);
-#else
-    result.boxes.push_back(cv::Rect(0, 0, iImg.cols, iImg.rows));
+
 #endif // ROI
 
 #ifdef benchmark

From 01da1fc659d9d7fa6e635b1430e19569d40b5549 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 12 Sep 2025 12:13:22 +0200
Subject: [PATCH 18/28] Aligned dimensions [high width] between onnx and opencv

---
 src/main.cpp          |  2 +-
 src/sam_inference.cpp | 19 +++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index e52e9ce..9744737 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -14,7 +14,7 @@ int main()
     SEG::DL_RESULT res;
     std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer();
     std::filesystem::path current_path = std::filesystem::current_path();
-    std::filesystem::path imgs_path =  "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use
+    std::filesystem::path imgs_path =  "/home/amigo/Documents/repos/hero_sam.bak/sam_inference/build/images"; // current_path / <- you could use
     for (auto &i : std::filesystem::directory_iterator(imgs_path))
     {
         if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index de1b408..0ccec9a 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -93,6 +93,16 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
 
     auto input_shape =
         _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
+    // Optional shape check when model has fixed dims (not -1)
+    if (input_shape.size() >= 4 && input_shape[2] > 0 && input_shape[3] > 0) {
+      const int64_t expectH = _imgSize.at(1);
+      const int64_t expectW = _imgSize.at(0);
+      if (input_shape[2] != expectH || input_shape[3] != expectW) {
+        std::cerr << "[SAM]: Model input (H,W)=(" << input_shape[2] << "," << input_shape[3]
+                  << ") mismatches configured imgSize (W,H)=(" << _imgSize[0] << "," << _imgSize[1] << ")."
+                  << std::endl;
+      }
+    }
     auto output_shape =
         _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
     auto output_type = _session->GetOutputTypeInfo(0)
@@ -127,9 +137,9 @@ const char *SAM::RunSession(const cv::Mat &iImg,
   utilities.BlobFromImage(processedImg, blob);
   std::vector<int64_t> inputNodeDims;
   if (_modelType == SEG::SAM_SEGMENT_ENCODER) {
-    inputNodeDims = {1, 3, _imgSize.at(0), _imgSize.at(1)};
+    // NCHW: H = imgSize[1], W = imgSize[0]
+    inputNodeDims = {1, 3, _imgSize.at(1), _imgSize.at(0)};
   } else if (_modelType == SEG::SAM_SEGMENT_DECODER) {
-    // Input size or SAM decoder model is 256x64x64 for the decoder
     inputNodeDims = {1, 256, 64, 64};
   }
   TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult,
@@ -329,8 +339,9 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) {
 
   float *blob = new float[iImg.total() * 3];
   utilities.BlobFromImage(processedImg, blob);
-  std::vector<int64_t> SAM_input_node_dims = {1, 3, _imgSize.at(0),
-                                              _imgSize.at(1)};
+
+  // NCHW: H = imgSize[1], W = imgSize[0]
+  std::vector<int64_t> SAM_input_node_dims = {1, 3, _imgSize.at(1), _imgSize.at(0)};
   switch (_modelType) {
   case SEG::SAM_SEGMENT_ENCODER: {
     Ort::Value input_tensor = Ort::Value::CreateTensor<float>(

From 7cdf39aa9e29124011e911ce9123495eb88df049 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 12 Sep 2025 12:14:29 +0200
Subject: [PATCH 19/28] corrected tests for the new segmentation way of
 inference (initialize and segment anything interfaces changed) and added one
 more test to check the image dimensions W,H

---
 src/sam_inference.cpp | 11 +----------
 test/sam_test.cpp     |  9 ++++++---
 test/test_utils.cpp   | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 0ccec9a..3ae5677 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -93,16 +93,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
 
     auto input_shape =
         _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
-    // Optional shape check when model has fixed dims (not -1)
-    if (input_shape.size() >= 4 && input_shape[2] > 0 && input_shape[3] > 0) {
-      const int64_t expectH = _imgSize.at(1);
-      const int64_t expectW = _imgSize.at(0);
-      if (input_shape[2] != expectH || input_shape[3] != expectW) {
-        std::cerr << "[SAM]: Model input (H,W)=(" << input_shape[2] << "," << input_shape[3]
-                  << ") mismatches configured imgSize (W,H)=(" << _imgSize[0] << "," << _imgSize[1] << ")."
-                  << std::endl;
-      }
-    }
+
     auto output_shape =
         _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
     auto output_type = _session->GetOutputTypeInfo(0)
diff --git a/test/sam_test.cpp b/test/sam_test.cpp
index 521b0d8..2ae4c73 100644
--- a/test/sam_test.cpp
+++ b/test/sam_test.cpp
@@ -28,13 +28,14 @@ class SamInferenceTest : public ::testing::Test
         NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows };
 
         // Use package helpers to build default params and SAM objects.
-        std::tie(samSegmentors, params_encoder, params_decoder) = Initializer();
+        std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer();
 
 #ifdef USE_CUDA
         params_encoder.cudaEnable = true;  // Enable CUDA if compiled with it
 #else
         params_encoder.cudaEnable = false; // Otherwise run on CPU
 #endif
+
     }
 
     // Clean up the SAM objects after each test.
@@ -46,6 +47,8 @@ class SamInferenceTest : public ::testing::Test
     std::vector<int> NonSquareImgSize;
     std::vector<std::unique_ptr<SAM>> samSegmentors;
     SEG::DL_INIT_PARAM params_encoder, params_decoder;
+    SEG::DL_RESULT res;
+    std::vector<SEG::DL_RESULT> resSam;
 };
 
 // Simple smoke test: we can construct a SAM object without throwing.
@@ -84,8 +87,8 @@ TEST_F(SamInferenceTest, FullInferencePipeline)
         GTEST_SKIP() << "Models not found in build dir";
     }
 
-    auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic);
+    SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic, resSam, res);
 
     // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).)
-    EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector";
+    EXPECT_TRUE(res.masks.size() >= 0) << "Masks should be a valid output vector";
 }
\ No newline at end of file
diff --git a/test/test_utils.cpp b/test/test_utils.cpp
index 27eaaa0..a03b31b 100644
--- a/test/test_utils.cpp
+++ b/test/test_utils.cpp
@@ -66,6 +66,22 @@ TEST_F(UtilsTest, PreprocessTopLeftPaddingAndAspect) {
     }
 }
 
+// Explicitly ensure imgSize is interpreted as [W, H] in PreProcess for non-square targets.
+TEST_F(UtilsTest, PreprocessNonSquareWidthHeightOrder) {
+    // Input image: H=300, W=500
+    cv::Mat img(300, 500, CV_8UC3, cv::Scalar(5, 6, 7));
+
+    // Target canvas (W,H) with non-square dims
+    std::vector<int> target{640, 480};
+    cv::Mat out;
+
+    ASSERT_EQ(u.PreProcess(img, target, out), nullptr);
+    // cols = width, rows = height
+    EXPECT_EQ(out.cols, target[0]);
+    EXPECT_EQ(out.rows, target[1]);
+    EXPECT_EQ(out.size(), cv::Size(target[0], target[1]));
+}
+
 // Parameterized fixture: used with TEST_P to run the same test body
 // for many (input size, target size) pairs.
 class UtilsPreprocessParamTest

From b587a1cf2e01aabbe7bb97ab8c476239c3526b7d Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Fri, 12 Sep 2025 14:10:23 +0200
Subject: [PATCH 20/28] Removed typo / from model path

---
 src/segmentation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 23efd59..41176de 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -15,7 +15,7 @@ Initializer() {
   std::vector<SEG::DL_RESULT> resSam;
   params_encoder.rectConfidenceThreshold = 0.1;
   params_encoder.iouThreshold = 0.5;
-  params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx";
+  params_encoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx";
   params_encoder.imgSize = {1024, 1024};
 
   params_decoder = params_encoder;

From b83bf61153c56c0b1d759ad7e939369704cbb65d Mon Sep 17 00:00:00 2001
From: Matthijs van der Burgh <MatthijsBurgh@outlook.com>
Date: Tue, 16 Sep 2025 10:23:52 +0200
Subject: [PATCH 21/28] Bump min required cmake version to 3.14

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1489bcf..12ca294 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.2)
+cmake_minimum_required(VERSION 3.14)
 
 project(sam_onnx_ros)
 

From 733167f3919e540972fba0733e7823072f32b27c Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 16 Sep 2025 20:11:04 +0200
Subject: [PATCH 22/28] EOF line added and package.xml structure update

---
 LICENSE                 | 37 ++++++++++++++++++++-----------------
 README.md               | 11 +++--------
 include/dl_types.h      |  2 +-
 include/sam_inference.h |  2 +-
 include/segmentation.h  |  2 +-
 include/utils.h         |  2 +-
 package.xml             |  7 +++++--
 src/main.cpp            |  2 +-
 src/utils.cpp           |  2 +-
 test/sam_test.cpp       |  2 +-
 test/test_utils.cpp     |  2 +-
 11 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/LICENSE b/LICENSE
index 7121e4a..c640c63 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,22 +1,25 @@
-Custom License Agreement
+BSD 2-Clause License
 
-1. License Grant You are hereby granted a non-exclusive, non-transferable license to use, reproduce, and distribute the code (hereinafter referred to as "the Software") under the following conditions:
+Copyright (c) 2021, Eindhoven University of Technology - CST Robotics Group
+All rights reserved.
 
-2. Conditions of Use
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
 
-Non-Commercial Use: You may use the Software for personal, educational, or non-commercial purposes without any additional permissions.
-Commercial Use: Any commercial use of the Software, including but not limited to selling, licensing, or using it in a commercial product, requires prior written permission from the original developer.
-3. Contact Requirement
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
 
-If you wish to use the Software for commercial purposes, you must contact the original developer at [https://www.linkedin.com/in/hamdi-boukamcha/] to obtain a commercial license.
-The terms of any commercial license will be mutually agreed upon and may involve a licensing fee.
-4. Attribution
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
 
-Regardless of whether you are using the Software for commercial or non-commercial purposes, you must provide appropriate credit to the original developer in any distributions or products that use the Software.
-5. Disclaimer of Warranty
-
-The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the original developer be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software.
-6. Governing Law
-
-This License Agreement shall be governed by and construed in accordance with the laws of France.
-By using the Software, you agree to abide by the terms outlined in this License Agreement.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
index 0efca14..4692d73 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,8 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten
     Dynamic Shape Support: Efficient handling of variable input sizes using optimization profiles.
     CUDA Optimization: Leverage CUDA for preprocessing and efficient memory handling.
 
-## 📢 Performance 
- ### Infernce Time 
+## 📢 Performance
+ ### Infernce Time
 
 | Component                  | SpeedSAM |
 |----------------------------|-----------|
@@ -51,7 +51,7 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten
     │   ├── main.cpp          # Main entry point
     │   └── speedSam.cpp      # Implementation of the SpeedSam class
     └── CMakeLists.txt        # CMake configuration
-    
+
 # 🚀 Installation
 ## Prerequisites
     git clone https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git
@@ -94,8 +94,3 @@ If you use this code in your research, please cite the repository as follows:
             publisher = {GitHub},
             howpublished = {\url{https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT//}},
         }
-
-    
-
-   
-
diff --git a/include/dl_types.h b/include/dl_types.h
index 5141284..033df56 100644
--- a/include/dl_types.h
+++ b/include/dl_types.h
@@ -68,4 +68,4 @@ namespace SEG
 
     } DL_RESULT;
 } // namespace SEG
-#endif // DL_TYPES_H
\ No newline at end of file
+#endif // DL_TYPES_H
diff --git a/include/sam_inference.h b/include/sam_inference.h
index 7bff0b1..6b4713e 100644
--- a/include/sam_inference.h
+++ b/include/sam_inference.h
@@ -46,4 +46,4 @@ class SAM
     float _iouThreshold;
 };
 
-#endif // SAMINFERENCE_H
\ No newline at end of file
+#endif // SAMINFERENCE_H
diff --git a/include/segmentation.h b/include/segmentation.h
index e6a6d67..83102e0 100644
--- a/include/segmentation.h
+++ b/include/segmentation.h
@@ -9,4 +9,4 @@ void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, const SEG
 std::vector<SEG::DL_RESULT> &resSam,
   SEG::DL_RESULT &res);
 
-#endif // SEGMENTATION_H
\ No newline at end of file
+#endif // SEGMENTATION_H
diff --git a/include/utils.h b/include/utils.h
index 6cb8819..a471512 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -55,4 +55,4 @@ class Utils
     float _resizeScalesBbox; // letterbox scale
 };
 
-#endif // UTILS_H
\ No newline at end of file
+#endif // UTILS_H
diff --git a/package.xml b/package.xml
index b00e6d6..7d17b8d 100644
--- a/package.xml
+++ b/package.xml
@@ -7,6 +7,7 @@
   <version>0.0.0</version>
   <description>Segment Anything Model (SAM) segmentation</description>
 
+  <author email="iasonth95@gmail.com">Iason Theodorou</author>
   <maintainer email="iasonth95@gmail.com">Iason Theodorou</maintainer>
 
   <license>BSD</license>
@@ -14,15 +15,17 @@
   <buildtool_depend>catkin</buildtool_depend>
 
   <build_depend>libopencv-dev</build_depend>
-  <exec_depend>libopencv-dev</exec_depend>
   <build_depend>onnxruntime_ros</build_depend>
+
+  <exec_depend>libopencv-dev</exec_depend>
   <exec_depend>onnxruntime_ros</exec_depend>
 
   <test_depend>catkin_lint_cmake</test_depend>
+
   <doc_depend>doxygen</doc_depend>
 
   <export>
     <rosdoc config="rosdoc.yaml" />
   </export>
 
-</package>
\ No newline at end of file
+</package>
diff --git a/src/main.cpp b/src/main.cpp
index 9744737..cd0f9dd 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -27,4 +27,4 @@ int main()
         }
     }
     return 0;
-}
\ No newline at end of file
+}
diff --git a/src/utils.cpp b/src/utils.cpp
index 28a7ded..643dba4 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -219,4 +219,4 @@ void Utils::PostProcess(std::vector<Ort::Value> &output_tensors, const cv::Mat &
     {
         std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl;
     }
-}
\ No newline at end of file
+}
diff --git a/test/sam_test.cpp b/test/sam_test.cpp
index 2ae4c73..cd54d7e 100644
--- a/test/sam_test.cpp
+++ b/test/sam_test.cpp
@@ -91,4 +91,4 @@ TEST_F(SamInferenceTest, FullInferencePipeline)
 
     // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).)
     EXPECT_TRUE(res.masks.size() >= 0) << "Masks should be a valid output vector";
-}
\ No newline at end of file
+}
diff --git a/test/test_utils.cpp b/test/test_utils.cpp
index a03b31b..92d20ab 100644
--- a/test/test_utils.cpp
+++ b/test/test_utils.cpp
@@ -188,4 +188,4 @@ TEST_F(UtilsScaleBboxPointsTest, ScalesHeightDominant) {
     EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3);
     EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3);
     EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3);
-}
\ No newline at end of file
+}

From 386f54628e801c499f4473d387eadc340f12aa25 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 23 Sep 2025 18:51:58 +0200
Subject: [PATCH 23/28] Update read me and include better comments

---
 README.md             | 93 +++++++++++++++----------------------------
 src/sam_inference.cpp |  9 +++--
 2 files changed, 37 insertions(+), 65 deletions(-)

diff --git a/README.md b/README.md
index 4692d73..4b7a36b 100644
--- a/README.md
+++ b/README.md
@@ -1,40 +1,32 @@
-# SPEED SAM C++ TENSORRT
-![SAM C++ TENSORRT](assets/speed_sam_cpp_tenosrrt.PNG)
+# SAM C++ ONNX implementation
 
-<a href="https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT" style="margin: 0 2px;">
-    <img src='https://img.shields.io/badge/GitHub-Repo-blue?style=flat&logo=GitHub' alt='GitHub'>
-  </a>
-
-  <a href="https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT?tab=GPL-3.0-1-ov-file" style="margin: 0 2px;">
-    <img src='https://img.shields.io/badge/License-CC BY--NC--4.0-lightgreen?style=flat&logo=Lisence' alt='License'>
-  </a>
+Inspired by SAM NN from meta and Tensor-RT implementation from: https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git
 
 ## 🌐 Overview
-A high-performance C++ implementation for SAM (segment anything model) using TensorRT and CUDA, optimized for real-time image segmentation tasks.
+A high-performance C++ implementation for SAM (segment anything model) using ONNX and CUDA, optimized for real-time image segmentation tasks.
 
-## 📢 Updates
-    Model Conversion: Build TensorRT engines from ONNX models for accelerated inference.
-    Segmentation with Points and BBoxes: Easily segment images using selected points or bounding boxes.
-    FP16 Precision: Choose between FP16 and FP32 for speed and precision balance.
-    Dynamic Shape Support: Efficient handling of variable input sizes using optimization profiles.
-    CUDA Optimization: Leverage CUDA for preprocessing and efficient memory handling.
 
 ## 📢 Performance
+
+### Warm-Up cost :fire:
+    NVIDIA GeForce RTX 3050
+    Encoder Cuda warm-up cost 66.875 ms.
+    Decoder Cuda warm-up cost 53.87 ms.
+
  ### Infernce Time
 
-| Component                  | SpeedSAM |
-|----------------------------|-----------|
-| **Image Encoder**          |           |
-| Parameters                  | 5M        |
-| Speed                       | 8ms       |
-| **Mask Decoder**           |           |
-| Parameters                  | 3.876M    |
-| Speed                       | 4ms       |
-| **Whole Pipeline (Enc+Dec)** |         |
-| Parameters                  | 9.66M     |
-| Speed                       | 12ms      |
-### Results
-![SPEED-SAM-C-TENSORRT RESULT](assets/Speed_SAM_Results.JPG)
+| Component                  | Pre processing | Inference | Post processing |
+|----------------------------|----------------| --------- | ----------------|
+| **Image Encoder**          |           | ||
+| Parameters                  | 5M        |- | -|
+| Speed                       | 8ms       | 33.322ms | 0.437ms |
+| **Mask Decoder**           |           | ||
+| Parameters                  | 3.876M    |- |- |
+| Speed                       | 34ms       | 11.176ms | 5.984|
+| **Whole Pipeline (Enc+Dec)** |         | | |
+| Parameters                  | 9.66M     | -| -|
+| Su of Speed                       | 92.92ms      | - |-  |
+
 
 ## 📂 Project Structure
     SPEED-SAM-CPP-TENSORRT/
@@ -53,44 +45,23 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten
     └── CMakeLists.txt        # CMake configuration
 
 # 🚀 Installation
-## Prerequisites
-    git clone https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git
-    cd SPEED-SAM-CPP-TENSORRT
-
+## Compile
+    git clone <repo>
+    cd sam_onnx_ros
     # Create a build directory and compile
     mkdir build && cd build
     cmake ..
     make -j$(nproc)
-Note: Update the CMakeLists.txt with the correct paths for TensorRT and OpenCV.
+
+Note: Update the CMakeLists.txt with the correct paths for Onnxruntime and OpenCV and Onnx Models (since for TechUnited we keep them on separate repositories).
+
+You can use main.cpp to run the application
+
+## ROS option
+    You can also run the code as a catkin package.
 
 ## 📦 Dependencies
     CUDA: NVIDIA's parallel computing platform
-    TensorRT: High-performance deep learning inference
+    Onnx: High-performance deep learning inference
     OpenCV: Image processing library
     C++17: Required standard for compilation
-
-# 🔍 Code Overview
-## Main Components
-    SpeedSam Class (speedSam.h): Manages image encoding and mask decoding.
-    EngineTRT Class (engineTRT.h): TensorRT engine creation and inference.
-    CUDA Utilities (cuda_utils.h): Macros for CUDA error handling.
-    Config (config.h): Defines model parameters and precision settings.
-## Key Functions
-    EngineTRT::build: Builds the TensorRT engine from an ONNX model.
-    EngineTRT::infer: Runs inference on the provided input data.
-    SpeedSam::predict: Segments an image using input points or bounding boxes.
-## 📞 Contact
-
-For advanced inquiries, feel free to contact me on LinkedIn: <a href="https://www.linkedin.com/in/hamdi-boukamcha/" target="_blank"> <img src="assets/blue-linkedin-logo.png" alt="LinkedIn" width="32" height="32"></a>
-
-## 📜 Citation
-
-If you use this code in your research, please cite the repository as follows:
-
-        @misc{boukamcha2024SpeedSam,
-            author = {Hamdi Boukamcha},
-            title = {SPEED-SAM-C-TENSORRT},
-            year = {2024},
-            publisher = {GitHub},
-            howpublished = {\url{https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT//}},
-        }
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 3ae5677..8e61ec4 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -188,11 +188,11 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
       double post_process_time =
           (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
       if (_cudaEnable) {
-        std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, "
+        std::cout << "[SAM_encoder(CUDA)]: " << pre_process_time << "ms pre-process, "
                   << process_time << "ms inference, " << post_process_time
                   << "ms post-process." << std::endl;
       } else {
-        std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, "
+        std::cout << "[SAM_encoder(CPU)]: " << pre_process_time << "ms pre-process, "
                   << process_time << "ms inference, " << post_process_time
                   << "ms post-process." << std::endl;
       }
@@ -235,6 +235,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
 #ifdef ROI
     for (const auto &box : boundingBoxes)
 #else
+
     for (const auto &box : result.boxes)
 #endif // ROI
     {
@@ -303,11 +304,11 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
     double post_process_time =
         (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
     if (_cudaEnable) {
-      std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, "
+      std::cout << "[SAM_decoder(CUDA)]: " << pre_process_time << "ms pre-process, "
                 << process_time << "ms inference, " << post_process_time
                 << "ms post-process." << std::endl;
     } else {
-      std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, "
+      std::cout << "[SAM_decoder(CPU)]: " << pre_process_time << "ms pre-process, "
                 << process_time << "ms inference, " << post_process_time
                 << "ms post-process." << std::endl;
     }

From b4d5bcb7c6b4a62084e661ae4472477f91423f4a Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 23 Sep 2025 20:13:06 +0200
Subject: [PATCH 24/28] Deleted redundant code and fixed some brackets

---
 include/dl_types.h      | 24 +++---------------------
 include/sam_inference.h |  1 -
 src/sam_inference.cpp   | 15 ++++++---------
 src/segmentation.cpp    |  4 ----
 4 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/include/dl_types.h b/include/dl_types.h
index 033df56..0f5874d 100644
--- a/include/dl_types.h
+++ b/include/dl_types.h
@@ -10,15 +10,8 @@ namespace SEG
 {
     enum MODEL_TYPE
     {
-        // FLOAT32 MODEL
         SAM_SEGMENT_ENCODER = 1,
         SAM_SEGMENT_DECODER = 2,
-        // YOLO_CLS = 3,
-
-        // FLOAT16 MODEL
-        // YOLO_DETECT_V8_HALF = 4,
-        // YOLO_POSE_V8_HALF = 5,
-        // YOLO_CLS_HALF = 6
     };
 
     typedef struct _DL_INIT_PARAM
@@ -27,13 +20,9 @@ namespace SEG
         std::string modelPath;
         MODEL_TYPE modelType = SAM_SEGMENT_ENCODER;
         std::vector<int> imgSize = {640, 640};
-        float rectConfidenceThreshold = 0.6;
-        float iouThreshold = 0.5;
-        int keyPointsNum = 2; // Note:kpt number for pose
         bool cudaEnable = false;
         int logSeverityLevel = 3;
         int intraOpNumThreads = 1;
-        // std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
 
         // Overloaded output operator for _DL_INIT_PARAM to print its contents
         friend std::ostream &operator<<(std::ostream &os, const _DL_INIT_PARAM &param)
@@ -44,9 +33,6 @@ namespace SEG
             for (const auto &size : param.imgSize)
                 os << size << " ";
             os << "\n";
-            os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n";
-            os << "iouThreshold: " << param.iouThreshold << "\n";
-            os << "keyPointsNum: " << param.keyPointsNum << "\n";
             os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n";
             os << "logSeverityLevel: " << param.logSeverityLevel << "\n";
             os << "intraOpNumThreads: " << param.intraOpNumThreads;
@@ -57,14 +43,10 @@ namespace SEG
 
     typedef struct _DL_RESULT
     {
-
-        std::vector<cv::Rect> boxes; // For SAM encoder model, this will be filled with detected boxes
-        std::vector<cv::Point2f> keyPoints;
-
-        // Sam Part
+        // For SAM encoder model, this will be filled with detected boxes from object detection model.
+        std::vector<cv::Rect> boxes;
         std::vector<float> embeddings;
-        // Masks for SAM decoder model output
-        std::vector<cv::Mat> masks; // Each cv::Mat represents a mask
+        std::vector<cv::Mat> masks;
 
     } DL_RESULT;
 } // namespace SEG
diff --git a/include/sam_inference.h b/include/sam_inference.h
index 6b4713e..250e254 100644
--- a/include/sam_inference.h
+++ b/include/sam_inference.h
@@ -43,7 +43,6 @@ class SAM
     SEG::MODEL_TYPE _modelType;
     std::vector<int> _imgSize;
     float _rectConfidenceThreshold;
-    float _iouThreshold;
 };
 
 #endif // SAMINFERENCE_H
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 8e61ec4..444163f 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -51,8 +51,6 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
     return Ret;
   }
   try {
-    _rectConfidenceThreshold = iParams.rectConfidenceThreshold;
-    _iouThreshold = iParams.iouThreshold;
     _imgSize = iParams.imgSize;
     _modelType = iParams.modelType;
     _cudaEnable = iParams.cudaEnable;
@@ -116,7 +114,8 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
 
 const char *SAM::RunSession(const cv::Mat &iImg,
                             std::vector<SEG::DL_RESULT> &oResult,
-                            SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) {
+                            SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result)
+{
 #ifdef benchmark
   clock_t starttime_1 = clock();
 #endif // benchmark
@@ -133,19 +132,19 @@ const char *SAM::RunSession(const cv::Mat &iImg,
   } else if (_modelType == SEG::SAM_SEGMENT_DECODER) {
     inputNodeDims = {1, 256, 64, 64};
   }
-  TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult,
+  TensorProcess_(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult,
                 utilities, result);
 
   return Ret;
 }
 
 template <typename N>
-const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
+const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg,
                                N &blob, std::vector<int64_t> &inputNodeDims,
                                SEG::MODEL_TYPE _modelType,
                                std::vector<SEG::DL_RESULT> &oResult,
-                               Utils &utilities, SEG::DL_RESULT &result) {
-
+                               Utils &utilities, SEG::DL_RESULT &result)
+{
   switch (_modelType) {
   case SEG::SAM_SEGMENT_ENCODER:
     // case OTHER_SAM_MODEL:
@@ -235,7 +234,6 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg,
 #ifdef ROI
     for (const auto &box : boundingBoxes)
 #else
-
     for (const auto &box : result.boxes)
 #endif // ROI
     {
@@ -410,7 +408,6 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) {
           inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size());
     }
 
-    _outputNodeNames.size();
     delete[] blob;
     clock_t starttime_4 = clock();
     double post_process_time =
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 41176de..0394db3 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -13,8 +13,6 @@ Initializer() {
   SEG::DL_INIT_PARAM params_decoder;
   SEG::DL_RESULT res;
   std::vector<SEG::DL_RESULT> resSam;
-  params_encoder.rectConfidenceThreshold = 0.1;
-  params_encoder.iouThreshold = 0.5;
   params_encoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx";
   params_encoder.imgSize = {1024, 1024};
 
@@ -42,8 +40,6 @@ void SegmentAnything(std::vector<std::unique_ptr<SAM>> &samSegmentors,
                 const SEG::DL_INIT_PARAM &params_decoder, const cv::Mat &img, std::vector<SEG::DL_RESULT> &resSam,
   SEG::DL_RESULT &res) {
 
-
-
   SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType;
   samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res);
 

From 3a55f754a5ed9d72a46e3d50df50cd46b28e4385 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 23 Sep 2025 20:13:49 +0200
Subject: [PATCH 25/28] made private member methods that were needed to be

---
 include/sam_inference.h |  9 ++++-----
 src/sam_inference.cpp   | 12 ++++++++----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/sam_inference.h b/include/sam_inference.h
index 250e254..c3694f0 100644
--- a/include/sam_inference.h
+++ b/include/sam_inference.h
@@ -24,15 +24,14 @@ class SAM
 
     const char *RunSession(const cv::Mat &iImg, std::vector<SEG::DL_RESULT> &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result);
 
-    char *WarmUpSession(SEG::MODEL_TYPE modelType);
+private:
+
+    char *WarmUpSession_(SEG::MODEL_TYPE modelType);
 
     template <typename N>
-    const char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
+    const char *TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
                         SEG::MODEL_TYPE modelType, std::vector<SEG::DL_RESULT> &oResult, Utils &utilities, SEG::DL_RESULT &result);
 
-    std::vector<std::string> classes{};
-
-private:
     Ort::Env _env;
     std::unique_ptr<Ort::Session> _session;
     bool _cudaEnable;
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 444163f..e8622ef 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -5,9 +5,12 @@
 #define benchmark
 //#define ROI
 
-SAM::SAM() {}
+SAM::SAM()
+{
+}
 
-SAM::~SAM() {
+SAM::~SAM()
+{
   // Clean up input/output node names
   for (auto &name : _inputNodeNames) {
     delete[] name;
@@ -98,7 +101,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
                            .GetTensorTypeAndShapeInfo()
                            .GetElementType();
 
-    WarmUpSession(_modelType);
+    WarmUpSession_(_modelType);
     return RET_OK;
   } catch (const std::exception &e) {
     const char *str1 = "[SAM]:";
@@ -320,7 +323,8 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg,
   return RET_OK;
 }
 
-char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) {
+char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType)
+{
   clock_t starttime_1 = clock();
   Utils utilities;
   cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3);

From 8aeb6577dc625b449073819281819dd93f725ec2 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Wed, 24 Sep 2025 15:39:03 +0200
Subject: [PATCH 26/28] Fixed structurre of CMakeLists and package.xml and
 logged with console bridge

---
 CMakeLists.txt        | 132 +++++++++++++++++++++++++-----------------
 package.xml           |   5 ++
 src/sam_inference.cpp |  46 +++++++++------
 3 files changed, 112 insertions(+), 71 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12ca294..eaec0e5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,19 +3,17 @@ cmake_minimum_required(VERSION 3.14)
 project(sam_onnx_ros)
 
 # -------------- CMake Policies ------------------#
-#add_compile_options(-Wall -Werror=all)
-#add_compile_options(-Wextra -Werror=extra)
+# add_compile_options(-Wall -Werror=all)
+# add_compile_options(-Wextra -Werror=extra)
 
 # -------------- Support C++17 for using filesystem  ------------------#
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS ON)
-#set(CMAKE_INCLUDE_CURRENT_DIR ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
 # -------------- OpenCV  ------------------#
 find_package(OpenCV REQUIRED)
-include_directories(${OpenCV_INCLUDE_DIRS})
-
+find_package(console_bridge REQUIRED)
 
 # -------------- ONNXRuntime  ------------------#
 set(ONNXRUNTIME_VERSION 1.21.0)
@@ -26,94 +24,124 @@ include_directories(${ONNXRUNTIME_ROOT}/include)
 add_definitions(-DUSE_CUDA=1)
 include_directories(/usr/local/cuda/include)
 
+# -------------- Models ------------------#
+# TODO: Find proper folder Copy sam_<model>.onnx file to the same folder of the executable file
+configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
+configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
+
+
 find_package(catkin REQUIRED
   COMPONENTS
-  # roscpp
-  # tue_config
-  # tue_filesystem
-  # code_profiler
-
+  rosconsole
+  console_bridge
   #onnxruntime_ros
 )
 
+find_package(console_bridge REQUIRED)
+
 # ------------------------------------------------------------------------------------------------
 #                                        CATKIN EXPORT
 # ------------------------------------------------------------------------------------------------
 
 catkin_package(
   INCLUDE_DIRS include
-  #LIBRARIES ${PROJECT_NAME}
-  LIBRARIES sam_onnx_ros_core
+  LIBRARIES ${PROJECT_NAME}_lib
   CATKIN_DEPENDS
-  DEPENDS OpenCV
+  DEPENDS OpenCV console_bridge
 )
 
 # ------------------------------------------------------------------------------------------------
 #                                           BUILD
 # ------------------------------------------------------------------------------------------------
 
-include_directories(
-  include
-  SYSTEM
-  ${OpenCV_INCLUDE_DIRS}
-  ${catkin_INCLUDE_DIRS}
-)
-
-set(PROJECT_SOURCES
-        src/main.cpp
+# Build core library
+add_library(${PROJECT_NAME}_lib
         src/sam_inference.cpp
         src/segmentation.cpp
         src/utils.cpp
 )
 
-# Build core library (no main.cpp here)
-add_library(sam_onnx_ros_core
-        src/sam_inference.cpp
-        src/segmentation.cpp
-        src/utils.cpp
+target_include_directories(${PROJECT_NAME}_lib
+  PUBLIC
+    include
+    SYSTEM
+    ${OpenCV_INCLUDE_DIRS}
+    ${catkin_INCLUDE_DIRS}
+    ${console_bridge_INCLUDE_DIRS}
+    ${ONNXRUNTIME_ROOT}/include
 )
-target_link_libraries(sam_onnx_ros_core
+
+
+target_link_libraries(${PROJECT_NAME}_lib
     ${OpenCV_LIBS}
     ${catkin_LIBRARIES}
     ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so
 )
-target_include_directories(sam_onnx_ros_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 
 # Main executable links the core lib
-add_executable(${PROJECT_NAME} src/main.cpp)
-target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core)
+add_executable(${PROJECT_NAME}
+  src/main.cpp
+)
 
-# Copy sam_<model>.onnx file to the same folder of the executable file
-configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx  COPYONLY)
-configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY)
+target_link_libraries(${PROJECT_NAME}
+  ${PROJECT_NAME}_lib
+  ${catkin_LIBRARIES}
+  ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so
+)
+
+# ------------------------------------------------------------------------------------------------
+#                                           Install Targets
+# ------------------------------------------------------------------------------------------------
+
+install(
+  DIRECTORY include/
+  DESTINATION ${CATKIN_GLOBAL_INCLUDE_DESTINATION}
+)
 
-# Create folder name images in the same folder of the executable file
-add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images
+install(
+  TARGETS
+    ${PROJECT_NAME}_lib
+  ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+  LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+  RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
 )
 
-# # Enable testing
+install(
+  TARGETS
+    ${PROJECT_NAME}
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
+
+# ------------------------------------------------------------------------------------------------
+#                                           Testing
+# ------------------------------------------------------------------------------------------------
 if (CATKIN_ENABLE_TESTING)
-#   find_package(catkin_lint_cmake REQUIRED)
-#   catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
+  find_package(catkin_lint_cmake REQUIRED)
+  catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
 
- # Utils unit tests (no models needed)
+  # Utils unit tests (no models needed)
   catkin_add_gtest(utils_tests test/test_utils.cpp)
   if(TARGET utils_tests)
-    target_link_libraries(utils_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES})
-    target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+    target_link_libraries(
+      utils_tests
+      ${PROJECT_NAME}_lib
+      ${catkin_LIBRARIES}
+      GTest::gtest
+      GTest::gtest_main
+      )
+    #target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
   endif()
 
   # SAM integration-ish tests (may need models)
   catkin_add_gtest(sam_tests test/sam_test.cpp)
   if(TARGET sam_tests)
-    target_link_libraries(sam_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES})
-    target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+    target_link_libraries(
+      sam_tests
+      ${PROJECT_NAME}_lib
+      ${catkin_LIBRARIES}
+      GTest::gtest
+      GTest::gtest_main
+    )
+    #target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
   endif()
 endif()
-
-
-
-#If you want to debug
-# set(CMAKE_BUILD_TYPE Debug)
-# set(CMAKE_CXX_FLAGS_DEBUG "-g")
diff --git a/package.xml b/package.xml
index 7d17b8d..2228902 100644
--- a/package.xml
+++ b/package.xml
@@ -12,13 +12,18 @@
 
   <license>BSD</license>
 
+
   <buildtool_depend>catkin</buildtool_depend>
 
+  <depend>libconsole-bridge-dev</depend>
+
   <build_depend>libopencv-dev</build_depend>
   <build_depend>onnxruntime_ros</build_depend>
+  <build_depend>rosconsole</build_depend>
 
   <exec_depend>libopencv-dev</exec_depend>
   <exec_depend>onnxruntime_ros</exec_depend>
+  <exec_depend>rosconsole</exec_depend>
 
   <test_depend>catkin_lint_cmake</test_depend>
 
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index e8622ef..457dcb5 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -1,6 +1,8 @@
 #include "sam_inference.h"
 #include "utils.h"
+
 #include <regex>
+#include <console_bridge/console.h>
 
 #define benchmark
 //#define ROI
@@ -50,7 +52,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
   if (result) {
     Ret = "[SAM]:Your model path is error.Change your model path without "
           "chinese characters.";
-    std::cout << Ret << std::endl;
+    CONSOLE_BRIDGE_logWarn("%s", Ret);
     return Ret;
   }
   try {
@@ -109,7 +111,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
     std::string str_result = std::string(str1) + std::string(str2);
     char *merged = new char[str_result.length() + 1];
     std::strcpy(merged, str_result.c_str());
-    std::cout << merged << std::endl;
+    CONSOLE_BRIDGE_logWarn("%s", merged);
     delete[] merged;
     return "[SAM]:Create _session failed.";
   }
@@ -190,13 +192,17 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg,
       double post_process_time =
           (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
       if (_cudaEnable) {
-        std::cout << "[SAM_encoder(CUDA)]: " << pre_process_time << "ms pre-process, "
-                  << process_time << "ms inference, " << post_process_time
-                  << "ms post-process." << std::endl;
+        CONSOLE_BRIDGE_logInform("[SAM_encoder(CUDA)]: %.2fms pre-process, %.2fms inference, "
+                               "%.2fms post-process.",
+                               pre_process_time, process_time,
+                               post_process_time
+                                );
       } else {
-        std::cout << "[SAM_encoder(CPU)]: " << pre_process_time << "ms pre-process, "
-                  << process_time << "ms inference, " << post_process_time
-                  << "ms post-process." << std::endl;
+        CONSOLE_BRIDGE_logInform("[SAM_encoder(CPU)]: %.2fms pre-process, %.2fms inference, "
+                               "%.2fms post-process.",
+                               pre_process_time, process_time,
+                               post_process_time
+                                );
       }
 #endif // benchmark
 
@@ -305,20 +311,24 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg,
     double post_process_time =
         (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
     if (_cudaEnable) {
-      std::cout << "[SAM_decoder(CUDA)]: " << pre_process_time << "ms pre-process, "
-                << process_time << "ms inference, " << post_process_time
-                << "ms post-process." << std::endl;
+      CONSOLE_BRIDGE_logInform("[SAM_decoder(CUDA)]: %.2fms pre-process, %.2fms inference, "
+                             "%.2fms post-process.",
+                             pre_process_time, process_time,
+                             post_process_time
+                              );
     } else {
-      std::cout << "[SAM_decoder(CPU)]: " << pre_process_time << "ms pre-process, "
-                << process_time << "ms inference, " << post_process_time
-                << "ms post-process." << std::endl;
+      CONSOLE_BRIDGE_logInform("[SAM_decoder(CPU)]: %.2fms pre-process, %.2fms inference, "
+                             "%.2fms post-process.",
+                             pre_process_time, process_time,
+                             post_process_time
+                              );
     }
 #endif // benchmark
     break;
   }
 
   default:
-    std::cout << "[SAM]: " << "Not support model type." << std::endl;
+    CONSOLE_BRIDGE_logWarn("[SAM]: " "Not support model type.");
   }
   return RET_OK;
 }
@@ -350,8 +360,7 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType)
     double post_process_time =
         (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
     if (_cudaEnable) {
-      std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time
-                << " ms. " << std::endl;
+      CONSOLE_BRIDGE_logInform("[SAM(CUDA)]: Cuda warm-up cost %.2f ms.", post_process_time);
     }
     break;
   }
@@ -417,8 +426,7 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType)
     double post_process_time =
         (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
     if (_cudaEnable) {
-      std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time
-                << " ms. " << std::endl;
+      CONSOLE_BRIDGE_logInform("[SAM(CUDA)]: Cuda warm-up cost %.2f ms.", post_process_time);
     }
 
     break;

From c7ac06acf3321c595387d59867be724b64a2cac0 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Mon, 29 Sep 2025 13:27:58 +0200
Subject: [PATCH 27/28] updated CMakeLists, included .hpp suffix and
 sam_onnx_ros include dir and configuration .hpp.in file

---
 CMakeLists.txt                                | 48 +++++++++++--------
 include/sam_onnx_ros/config.hpp.in            |  7 +++
 .../{dl_types.h => sam_onnx_ros/dl_types.hpp} |  0
 .../sam_inference.hpp}                        |  4 +-
 .../segmentation.hpp}                         |  3 +-
 include/{utils.h => sam_onnx_ros/utils.hpp}   |  4 +-
 src/main.cpp                                  |  5 +-
 src/sam_inference.cpp                         | 21 ++++----
 src/segmentation.cpp                          |  2 +-
 src/utils.cpp                                 |  4 +-
 test/sam_test.cpp                             |  7 +--
 test/test_utils.cpp                           |  3 +-
 12 files changed, 61 insertions(+), 47 deletions(-)
 create mode 100644 include/sam_onnx_ros/config.hpp.in
 rename include/{dl_types.h => sam_onnx_ros/dl_types.hpp} (100%)
 rename include/{sam_inference.h => sam_onnx_ros/sam_inference.hpp} (96%)
 rename include/{segmentation.h => sam_onnx_ros/segmentation.hpp} (91%)
 rename include/{utils.h => sam_onnx_ros/utils.hpp} (98%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index eaec0e5..fcda779 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,26 +3,19 @@ cmake_minimum_required(VERSION 3.14)
 project(sam_onnx_ros)
 
 # -------------- CMake Policies ------------------#
-# add_compile_options(-Wall -Werror=all)
-# add_compile_options(-Wextra -Werror=extra)
+add_compile_options(-Wall -Werror=all)
+add_compile_options(-Wextra -Werror=extra)
 
 # -------------- Support C++17 for using filesystem  ------------------#
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
-# -------------- OpenCV  ------------------#
-find_package(OpenCV REQUIRED)
-find_package(console_bridge REQUIRED)
 
 # -------------- ONNXRuntime  ------------------#
 set(ONNXRUNTIME_VERSION 1.21.0)
 set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1")
 include_directories(${ONNXRUNTIME_ROOT}/include)
 
-# -------------- Cuda ------------------#
-add_definitions(-DUSE_CUDA=1)
-include_directories(/usr/local/cuda/include)
 
 # -------------- Models ------------------#
 # TODO: Find proper folder Copy sam_<model>.onnx file to the same folder of the executable file
@@ -37,7 +30,20 @@ find_package(catkin REQUIRED
   #onnxruntime_ros
 )
 
-find_package(console_bridge REQUIRED)
+find_package(OpenCV REQUIRED)
+
+# -------------- Cuda ------------------#
+add_definitions(-DUSE_CUDA=1)
+include_directories(/usr/local/cuda/include)
+
+set(${PROJECT_NAME}_CUDA_ENABLED ${onnxruntime_ros_CUDA_ENABLED})
+if(onnxruntime_ros_CUDA_ENABLED)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
+configure_file(include/${PROJECT_NAME}/config.hpp.in ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp)
+# add_custom_target(generate_config_hpp
+#   DEPENDS ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp
 
 # ------------------------------------------------------------------------------------------------
 #                                        CATKIN EXPORT
@@ -45,7 +51,7 @@ find_package(console_bridge REQUIRED)
 
 catkin_package(
   INCLUDE_DIRS include
-  LIBRARIES ${PROJECT_NAME}_lib
+  LIBRARIES ${PROJECT_NAME}
   CATKIN_DEPENDS
   DEPENDS OpenCV console_bridge
 )
@@ -54,14 +60,14 @@ catkin_package(
 #                                           BUILD
 # ------------------------------------------------------------------------------------------------
 
-# Build core library
-add_library(${PROJECT_NAME}_lib
+# Build library
+add_library(${PROJECT_NAME}
         src/sam_inference.cpp
         src/segmentation.cpp
         src/utils.cpp
 )
 
-target_include_directories(${PROJECT_NAME}_lib
+target_include_directories(${PROJECT_NAME}
   PUBLIC
     include
     SYSTEM
@@ -72,19 +78,19 @@ target_include_directories(${PROJECT_NAME}_lib
 )
 
 
-target_link_libraries(${PROJECT_NAME}_lib
+target_link_libraries(${PROJECT_NAME}
     ${OpenCV_LIBS}
     ${catkin_LIBRARIES}
     ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so
 )
 
 # Main executable links the core lib
-add_executable(${PROJECT_NAME}
+add_executable(test_${PROJECT_NAME}
   src/main.cpp
 )
 
-target_link_libraries(${PROJECT_NAME}
-  ${PROJECT_NAME}_lib
+target_link_libraries(test_${PROJECT_NAME}
+  ${PROJECT_NAME}
   ${catkin_LIBRARIES}
   ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so
 )
@@ -100,7 +106,7 @@ install(
 
 install(
   TARGETS
-    ${PROJECT_NAME}_lib
+    ${PROJECT_NAME}
   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
   RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
@@ -116,8 +122,8 @@ install(
 #                                           Testing
 # ------------------------------------------------------------------------------------------------
 if (CATKIN_ENABLE_TESTING)
-  find_package(catkin_lint_cmake REQUIRED)
-  catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
+  #find_package(catkin_lint_cmake REQUIRED)
+  #catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH")
 
   # Utils unit tests (no models needed)
   catkin_add_gtest(utils_tests test/test_utils.cpp)
diff --git a/include/sam_onnx_ros/config.hpp.in b/include/sam_onnx_ros/config.hpp.in
new file mode 100644
index 0000000..2794619
--- /dev/null
+++ b/include/sam_onnx_ros/config.hpp.in
@@ -0,0 +1,7 @@
+#ifndef YOLO_ONNX_SAM_CONFIG_HPP_
+#define YOLO_ONNX_SAM_CONFIG_HPP_
+
+//Set which version of the Tree Interface to use
+#define YOLO_ONNX_SAM_CUDA_ENABLED @onnx_sam_ros_CUDA_ENABLED@
+
+#endif //#define YOLO_ONNX_SAM_CONFIG_HPP_
diff --git a/include/dl_types.h b/include/sam_onnx_ros/dl_types.hpp
similarity index 100%
rename from include/dl_types.h
rename to include/sam_onnx_ros/dl_types.hpp
diff --git a/include/sam_inference.h b/include/sam_onnx_ros/sam_inference.hpp
similarity index 96%
rename from include/sam_inference.h
rename to include/sam_onnx_ros/sam_inference.hpp
index c3694f0..d098d4a 100644
--- a/include/sam_inference.h
+++ b/include/sam_onnx_ros/sam_inference.hpp
@@ -4,14 +4,14 @@
 
 #define RET_OK nullptr
 #include <memory>
-#include <string>
 #include <vector>
 #include <cstdio>
-#include "utils.h"
 #ifdef USE_CUDA
 #include <cuda_fp16.h>
 #endif
 
+#include "sam_onnx_ros/utils.hpp"
+
 class SAM
 {
 public:
diff --git a/include/segmentation.h b/include/sam_onnx_ros/segmentation.hpp
similarity index 91%
rename from include/segmentation.h
rename to include/sam_onnx_ros/segmentation.hpp
index 83102e0..c7ebfd0 100644
--- a/include/segmentation.h
+++ b/include/sam_onnx_ros/segmentation.hpp
@@ -1,9 +1,8 @@
 #ifndef SEGMENTATION_H
 #define SEGMENTATION_H
 
-#include <tuple>
+#include "sam_onnx_ros/sam_inference.hpp"
 
-#include "sam_inference.h"
 std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM, SEG::DL_RESULT, std::vector<SEG::DL_RESULT>> Initializer();
 void SegmentAnything(std::vector<std::unique_ptr<SAM>>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, const cv::Mat& img,
 std::vector<SEG::DL_RESULT> &resSam,
diff --git a/include/utils.h b/include/sam_onnx_ros/utils.hpp
similarity index 98%
rename from include/utils.h
rename to include/sam_onnx_ros/utils.hpp
index a471512..e8084a1 100644
--- a/include/utils.h
+++ b/include/sam_onnx_ros/utils.hpp
@@ -3,15 +3,15 @@
 
 #define RET_OK nullptr
 
-#include <string>
 #include <vector>
 #include <cstdio>
 #include "onnxruntime_cxx_api.h"
-#include "dl_types.h"
 #ifdef USE_CUDA
 #include <cuda_fp16.h>
 #endif
 
+#include "sam_onnx_ros/dl_types.hpp"
+
 class Utils
 {
 public:
diff --git a/src/main.cpp b/src/main.cpp
index cd0f9dd..ed09788 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,9 +1,8 @@
-#include "segmentation.h"
-#include <iostream>
-#include <vector>
 #include <filesystem>
 #include <opencv2/opencv.hpp>
 
+#include "sam_onnx_ros/segmentation.hpp"
+
 int main()
 {
     // Running inference
diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp
index 457dcb5..ccfb2b3 100644
--- a/src/sam_inference.cpp
+++ b/src/sam_inference.cpp
@@ -1,9 +1,9 @@
-#include "sam_inference.h"
-#include "utils.h"
-
 #include <regex>
 #include <console_bridge/console.h>
 
+#include "sam_onnx_ros/sam_inference.hpp"
+#include "sam_onnx_ros/utils.hpp"
+
 #define benchmark
 //#define ROI
 
@@ -97,11 +97,11 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) {
     auto input_shape =
         _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
 
-    auto output_shape =
-        _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
-    auto output_type = _session->GetOutputTypeInfo(0)
-                           .GetTensorTypeAndShapeInfo()
-                           .GetElementType();
+    // auto output_shape =
+    //     _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
+    // auto output_type = _session->GetOutputTypeInfo(0)
+    //                        .GetTensorTypeAndShapeInfo()
+    //                        .GetElementType();
 
     WarmUpSession_(_modelType);
     return RET_OK;
@@ -251,9 +251,10 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg,
           embeddings.data(), // Use the embeddings from the encoder
           embeddings.size(), // Total number of elements
           decoderInputDims.data(), decoderInputDims.size());
+
       // Use center of bounding box as foreground point
-      float centerX = box.x + box.width / 2.0;
-      float centerY = box.y + box.height / 2.0;
+      // float centerX = box.x + box.width / 2.0;
+      // float centerY = box.y + box.height / 2.0;
 
       // Convert bounding box to points
       std::vector<float> pointCoords = {
diff --git a/src/segmentation.cpp b/src/segmentation.cpp
index 0394db3..ad66eaa 100644
--- a/src/segmentation.cpp
+++ b/src/segmentation.cpp
@@ -1,4 +1,4 @@
-#include "segmentation.h"
+#include "sam_onnx_ros/segmentation.hpp"
 
 std::tuple<std::vector<std::unique_ptr<SAM>>, SEG::DL_INIT_PARAM,
            SEG::DL_INIT_PARAM, SEG::DL_RESULT, std::vector<SEG::DL_RESULT>>
diff --git a/src/utils.cpp b/src/utils.cpp
index 643dba4..8af08a3 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,5 +1,5 @@
-#include "utils.h"
-#include <opencv2/ximgproc/edge_filter.hpp> // for guided filter
+#include "sam_onnx_ros/utils.hpp"
+
 //#define LOGGING
 
 // Constructor
diff --git a/test/sam_test.cpp b/test/sam_test.cpp
index cd54d7e..5a64126 100644
--- a/test/sam_test.cpp
+++ b/test/sam_test.cpp
@@ -1,9 +1,10 @@
 #include <gtest/gtest.h>
 #include <opencv2/opencv.hpp>
 #include <filesystem>
-#include "segmentation.h"
-#include "sam_inference.h"
-#include "dl_types.h"
+
+#include "sam_onnx_ros/sam_inference.hpp"
+#include "sam_onnx_ros/dl_types.hpp"
+#include "sam_onnx_ros/segmentation.hpp"
 
 // This file contains higher-level (integration-ish) tests.
 // They cover object/session creation and a full pipeline run using synthetic images.
diff --git a/test/test_utils.cpp b/test/test_utils.cpp
index 92d20ab..bde7145 100644
--- a/test/test_utils.cpp
+++ b/test/test_utils.cpp
@@ -1,6 +1,7 @@
 #include <gtest/gtest.h>
 #include <opencv2/opencv.hpp>
-#include "utils.h"
+
+#include "sam_onnx_ros/utils.hpp"
 
 // This file contains small, focused unit tests for Utils.
 // We verify image preprocessing (channel conversion, aspect-preserving resize, padding)

From 013ba3bbb4001bd879a8a5ddec5644f1e306ecd6 Mon Sep 17 00:00:00 2001
From: IasonTheodorou <iasonth95@gmail.com>
Date: Tue, 30 Sep 2025 20:16:52 +0200
Subject: [PATCH 28/28] updated CMakeLists rosconsole bridge

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fcda779..ca92d96 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,10 +26,10 @@ configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.on
 find_package(catkin REQUIRED
   COMPONENTS
   rosconsole
-  console_bridge
+
   #onnxruntime_ros
 )
-
+find_package(console_bridge REQUIRED)
 find_package(OpenCV REQUIRED)
 
 # -------------- Cuda ------------------#