From 269a64c301ef185ed451731593c469794fd6019d Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Thu, 3 Jul 2025 15:14:16 +0200 Subject: [PATCH 01/28] Initial refactoring --- .gitignore | 14 +++++++++ CMakeLists.txt | 7 +++-- inc/segmentation.h | 11 ++++++++ src/main.cpp | 66 ++++++++----------------------------------- src/sam_inference.cpp | 2 +- src/segmentation.cpp | 53 ++++++++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 59 deletions(-) create mode 100644 .gitignore create mode 100644 inc/segmentation.h create mode 100644 src/segmentation.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ddd4b43 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +build/ +images/* +onnxruntime*/ +onnxruntime/* +docker/* +CMakefile +CMakeCache.txt +CMakeFiles/* +cmake_install.cmake +Makefile +SPEED-SAM-C-TENSORRT/ +sam_inference/model/FastSAM-x.onnx +mask* +segmentation_results* diff --git a/CMakeLists.txt b/CMakeLists.txt index 8036a89..d1e926c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ include_directories(${OpenCV_INCLUDE_DIRS}) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) -set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../onnxruntime-linux-x64-gpu-1.21.1") +set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1") include_directories(${ONNXRUNTIME_ROOT}/include) # -------------- Cuda ------------------# @@ -25,6 +25,7 @@ include_directories(/usr/local/cuda/include) set(PROJECT_SOURCES src/main.cpp src/sam_inference.cpp + src/segmentation.cpp src/utils.cpp ) @@ -44,10 +45,10 @@ endif () # Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml # and put it in the same folder of the executable file -configure_file(./model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) # Copy yolov8n.onnx file to the same folder of the executable file -configure_file(./model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD diff --git a/inc/segmentation.h b/inc/segmentation.h new file mode 100644 index 0000000..9617001 --- /dev/null +++ b/inc/segmentation.h @@ -0,0 +1,11 @@ +#include +#include +#include +#include +#include +#include + +#include "sam_inference.h" + +std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); +void SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 7481567..3905ead 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,67 +1,23 @@ -#include -#include -#include "sam_inference.h" -#include -#include -#include +#include "segmentation.h" - - -void SegmentAnything() { - - SAM* samSegmentor = new SAM; - SEG::DL_INIT_PARAM params; - SEG::DL_INIT_PARAM params1; - - params.rectConfidenceThreshold = 0.1; - params.iouThreshold = 0.5; - params.modelPath = "SAM_encoder.onnx"; - params.imgSize = { 1024, 1024 }; - - params1 = params; - params1.modelType = SEG::SAM_SEGMENT_DECODER; - params1.modelPath = "SAM_mask_decoder.onnx"; - - - #ifdef USE_CUDA - params.cudaEnable = true; - #else - params.cudaEnable = false; - #endif - - - - //Running inference +int main() +{ + //Running inference + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = current_path / "../../pipeline/build/images"; - std::vector resSam; + std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images"; for (auto& i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); + SegmentAnything(samSegmentors, params_encoder, params_decoder, img); - SEG::DL_RESULT res; - samSegmentor->CreateSession(params); - SEG::MODEL_TYPE modelTypeRef = params.modelType; - samSegmentor->RunSession(img, resSam, modelTypeRef, res); - - - - - samSegmentor->CreateSession(params1); - modelTypeRef = params1.modelType; - samSegmentor->RunSession(img, resSam, modelTypeRef, res); - std::cout << "Press any key to exit" << std::endl; - cv::imshow("Result of Detection", img); - cv::waitKey(0); - cv::destroyAllWindows(); } } -} - -int main() -{ - SegmentAnything(); + return 0; } \ No newline at end of file diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 3820338..69671ec 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -4,7 +4,7 @@ #include #define benchmark -//#define ROI +#define ROI // #define min(a,b) (((a) < (b)) ? (a) : (b)) SAM::SAM() { diff --git a/src/segmentation.cpp b/src/segmentation.cpp new file mode 100644 index 0000000..4b2c022 --- /dev/null +++ b/src/segmentation.cpp @@ -0,0 +1,53 @@ +#include "segmentation.h" + +std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM> Initializer() +{ + std::vector> samSegmentors; + samSegmentors.push_back(std::make_unique()); + samSegmentors.push_back(std::make_unique()); + + std::unique_ptr samSegmentorEncoder = std::make_unique(); + std::unique_ptr samSegmentorDecoder = std::make_unique(); + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + + params_encoder.rectConfidenceThreshold = 0.1; + params_encoder.iouThreshold = 0.5; + params_encoder.modelPath = "SAM_encoder.onnx"; + params_encoder.imgSize = { 1024, 1024 }; + + params_decoder = params_encoder; + params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; + params_decoder.modelPath = "SAM_mask_decoder.onnx"; + + + + #ifdef USE_CUDA + params_encoder.cudaEnable = true; + #else + params_encoder.cudaEnable = false; + #endif + + samSegmentorEncoder->CreateSession(params_encoder); + samSegmentorDecoder->CreateSession(params_decoder); + samSegmentors[0] = std::move(samSegmentorEncoder); + samSegmentors[1] = std::move(samSegmentorDecoder); + return {std::move(samSegmentors), params_encoder, params_decoder}; +} + +void SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { + + std::vector resSam; + SEG::DL_RESULT res; + + SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; + samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); + + + modelTypeRef = params_decoder.modelType; + samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); + std::cout << "Press any key to exit" << std::endl; + cv::imshow("Result of Detection", img); + cv::waitKey(0); + cv::destroyAllWindows(); +} From 3f0aa166208a0ecb4d18a356e1d0582c5d09bbcc Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 10:02:37 +0200 Subject: [PATCH 02/28] create catkin package --- package.xml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 package.xml diff --git a/package.xml b/package.xml new file mode 100644 index 0000000..cde009a --- /dev/null +++ b/package.xml @@ -0,0 +1,29 @@ + + + + sam_onnx_ros + 0.0.0 + Segment Anything Model (SAM) segmentation + + Iason Theodorou + + ToDo + + catkin + + libopencv-dev + libopencv-dev + onnxruntime_ros + onnxruntime_ros + + catkin_lint_cmake + + doxygen + + + + + + \ No newline at end of file From fe7719659e32123483c5163f9122ccfb7ae58ac5 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 10:03:43 +0200 Subject: [PATCH 03/28] included CI tests --- .github/workflows/main.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..21edb06 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,13 @@ +name: CI + +on: [push, pull_request] + +jobs: + tue-ci: + name: TUe CI - ${{ github.event_name }} + runs-on: ubuntu-latest + steps: + - name: TUe CI + uses: tue-robotics/tue-env/ci/main@master + with: + package: ${{ github.event.repository.name }} \ No newline at end of file From 17bd100227eb9e88c1d1d1901744152befdcda04 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 10:04:18 +0200 Subject: [PATCH 04/28] update of CMakeLists to include some initial needed components --- CMakeLists.txt | 53 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d1e926c..9e7ed33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,17 @@ cmake_minimum_required(VERSION 3.5) set(PROJECT_NAME SAMOnnxRuntimeCPPInference) -project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) +project(sam_onnx_ros) + +# -------------- CMake Policies ------------------# +#add_compile_options(-Wall -Werror=all) +#add_compile_options(-Wextra -Werror=extra) # -------------- Support C++17 for using filesystem ------------------# set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) -set(CMAKE_INCLUDE_CURRENT_DIR ON) +#set(CMAKE_INCLUDE_CURRENT_DIR ON) # -------------- OpenCV ------------------# find_package(OpenCV REQUIRED) @@ -22,6 +26,33 @@ include_directories(${ONNXRUNTIME_ROOT}/include) add_definitions(-DUSE_CUDA=1) include_directories(/usr/local/cuda/include) +# find_package(catkin REQUIRED +# COMPONENTS +# onnxruntime_ros +# ) + +# ------------------------------------------------------------------------------------------------ +# CATKIN EXPORT +# ------------------------------------------------------------------------------------------------ + +# catkin_package( +# INCLUDE_DIRS include +# LIBRARIES ${PROJECT_NAME} +# CATKIN_DEPENDS +# DEPENDS OpenCV +# ) + +# ------------------------------------------------------------------------------------------------ +# BUILD +# ------------------------------------------------------------------------------------------------ + +include_directories( + include + SYSTEM + ${OpenCV_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} +) + set(PROJECT_SOURCES src/main.cpp src/sam_inference.cpp @@ -33,21 +64,11 @@ add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) # Link OpenCV libraries along with ONNX Runtime -target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) - -# For Windows system, copy onnxruntime.dll to the same folder of the executable file -if (WIN32) - add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${ONNXRUNTIME_ROOT}/lib/onnxruntime.dll" - $) -endif () - -# Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml -# and put it in the same folder of the executable file -configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) -# Copy yolov8n.onnx file to the same folder of the executable file + +# Copy sam_.onnx file to the same folder of the executable file +configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file From 2ef2fc4e9b7d11bfcd774d7c7b40d96945f55eb5 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 13:36:46 +0200 Subject: [PATCH 05/28] Return the mask on SegmentAnything function (not working properly) --- inc/dl_types.h | 1 + inc/segmentation.h | 2 +- src/main.cpp | 11 +++++++++-- src/segmentation.cpp | 15 +++++++++++---- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/inc/dl_types.h b/inc/dl_types.h index 54bd60f..72bd1fe 100644 --- a/inc/dl_types.h +++ b/inc/dl_types.h @@ -29,6 +29,7 @@ typedef struct _DL_INIT_PARAM int intraOpNumThreads = 1; //std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + // Overloaded output operator for _DL_INIT_PARAM to print its contents friend std::ostream& operator<<(std::ostream& os, _DL_INIT_PARAM& param) { os << "modelPath: " << param.modelPath << "\n"; diff --git a/inc/segmentation.h b/inc/segmentation.h index 9617001..46e954e 100644 --- a/inc/segmentation.h +++ b/inc/segmentation.h @@ -8,4 +8,4 @@ #include "sam_inference.h" std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); -void SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file +std::vector SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 3905ead..5c22108 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,8 +15,15 @@ int main() { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); - SegmentAnything(samSegmentors, params_encoder, params_decoder, img); - + std::vector masks; + masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img); + for (int j = 0; j < masks.size(); j++) + { + std::cout << "Press any key to exit" << std::endl; + cv::imshow("Result of MASKS", masks[j]); + cv::waitKey(0); + cv::destroyAllWindows(); + } } } return 0; diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 4b2c022..2962563 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -35,7 +35,7 @@ std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P return {std::move(samSegmentors), params_encoder, params_decoder}; } -void SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { +std::vector SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { std::vector resSam; SEG::DL_RESULT res; @@ -46,8 +46,15 @@ void SegmentAnything(std::vector>& samSegmentors, SEG::DL_I modelTypeRef = params_decoder.modelType; samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - std::cout << "Press any key to exit" << std::endl; - cv::imshow("Result of Detection", img); - cv::waitKey(0); + + //cv::destroyAllWindows(); + cv::Mat finalMask = res.masks[0]; + std::cout << "Final mask size: " << finalMask.size() << std::endl; + + for (const auto& mask : res.masks) { + cv::imshow("Mask", mask); + cv::waitKey(0); + } cv::destroyAllWindows(); + return std::move(res.masks); } From 70fd58af017182389c1b0385fe2c1a3695513e97 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 19 Aug 2025 21:19:57 +0200 Subject: [PATCH 06/28] Updated CMake and removed not needed parts of the code --- CMakeLists.txt | 24 +++++++++++--------- {inc => include}/dl_types.h | 0 {inc => include}/sam_inference.h | 0 {inc => include}/segmentation.h | 0 {inc => include}/utils.h | 6 ----- src/sam_inference.cpp | 39 +------------------------------- src/utils.cpp | 7 ++---- 7 files changed, 16 insertions(+), 60 deletions(-) rename {inc => include}/dl_types.h (100%) rename {inc => include}/sam_inference.h (100%) rename {inc => include}/segmentation.h (100%) rename {inc => include}/utils.h (96%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e7ed33..1270d93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.5) set(PROJECT_NAME SAMOnnxRuntimeCPPInference) project(sam_onnx_ros) +project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) # -------------- CMake Policies ------------------# #add_compile_options(-Wall -Werror=all) @@ -17,6 +18,7 @@ set(CMAKE_CXX_EXTENSIONS ON) find_package(OpenCV REQUIRED) include_directories(${OpenCV_INCLUDE_DIRS}) + # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1") @@ -26,21 +28,21 @@ include_directories(${ONNXRUNTIME_ROOT}/include) add_definitions(-DUSE_CUDA=1) include_directories(/usr/local/cuda/include) -# find_package(catkin REQUIRED -# COMPONENTS -# onnxruntime_ros -# ) +find_package(catkin REQUIRED + COMPONENTS + #onnxruntime_ros +) # ------------------------------------------------------------------------------------------------ # CATKIN EXPORT # ------------------------------------------------------------------------------------------------ -# catkin_package( -# INCLUDE_DIRS include -# LIBRARIES ${PROJECT_NAME} -# CATKIN_DEPENDS -# DEPENDS OpenCV -# ) +catkin_package( + INCLUDE_DIRS include + LIBRARIES ${PROJECT_NAME} + CATKIN_DEPENDS + DEPENDS OpenCV +) # ------------------------------------------------------------------------------------------------ # BUILD @@ -61,7 +63,7 @@ set(PROJECT_SOURCES ) add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) # Link OpenCV libraries along with ONNX Runtime target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) diff --git a/inc/dl_types.h b/include/dl_types.h similarity index 100% rename from inc/dl_types.h rename to include/dl_types.h diff --git a/inc/sam_inference.h b/include/sam_inference.h similarity index 100% rename from inc/sam_inference.h rename to include/sam_inference.h diff --git a/inc/segmentation.h b/include/segmentation.h similarity index 100% rename from inc/segmentation.h rename to include/segmentation.h diff --git a/inc/utils.h b/include/utils.h similarity index 96% rename from inc/utils.h rename to include/utils.h index 0e7a8d7..1bded56 100644 --- a/inc/utils.h +++ b/include/utils.h @@ -2,12 +2,6 @@ #define RET_OK nullptr -#ifdef _WIN32 -#include -#include -#include -#endif - #include #include #include diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 69671ec..77cef8e 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -71,26 +71,12 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { sessionOption.AppendExecutionProvider_CUDA(cudaOption); } - //OrtTensorRTProviderOptions trtOptions{}; - //trtOptions.device_id = 0; - //trtOptions.trt_fp16_enable = true; - //sessionOption.AppendExecutionProvider_TensorRT(trtOptions); - sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); -#ifdef _WIN32 - int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast(iParams.modelPath.length()), nullptr, 0); - wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1]; - MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast(iParams.modelPath.length()), wide_cstr, ModelPathSize); - wide_cstr[ModelPathSize] = L'\0'; - const wchar_t* modelPath = wide_cstr; -#else const char* modelPath = iParams.modelPath.c_str(); -#endif // _WIN32 - //session = new Ort::Session(env, modelPath, sessionOption); session = std::make_unique(env, modelPath, sessionOption); Ort::AllocatorWithDefaultOptions allocator; size_t inputNodesNum = session->GetInputCount(); @@ -111,14 +97,6 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { } options = Ort::RunOptions{ nullptr }; - //std::vector input_shape; - //std::vector output_shape; - //size_t input_tensor_size = 0; - //size_t output_tensor_size = 0; - //Get input and output tensor size - - //auto input_tensor_size = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementCount(); - //auto output_tensor_size = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementCount(); auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_type = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementType(); @@ -159,13 +137,7 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR } else if (modelType == SEG::SAM_SEGMENT_DECODER) { - // For SAM decoder model, the input size is different - // Assuming the input size is 236x64x64 for the decoder - // You can adjust this based on your actual model requirements - // For example, if the input size is 1x3x236x64, you can set it as follows: - // inputNodeDims = { 1, 3, 236, 64 }; - // But here we are using 1x236x64x64 as per your original code - + // Input size or SAM decoder model is 256x64x64 for the decoder inputNodeDims = { 1, 256, 64, 64 }; } TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); @@ -209,7 +181,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); std::vector outputNodeDims = tensor_info.GetShape(); auto output = outputTensor.front().GetTensorMutableData::type>(); - //std::vector outputNodeDims = outputTensor.front().GetTensorTypeAndShapeInfo().GetShape(); delete[] blob; int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size @@ -234,14 +205,12 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR break; } case SEG::SAM_SEGMENT_DECODER: - //case : { // Use embeddings from the last result std::vector embeddings = result.embeddings; // Create tensor for decoder std::vector decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements - // Create point coordinates and labels #ifdef ROI @@ -258,8 +227,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR return "[SAM]: NO valid Box."; } - //cv::Rect bbox1(138, 29, 170, 301); - std::vector boundingBoxes; boundingBoxes.push_back(bbox); #endif // ROI @@ -344,10 +311,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR utilities.overlay(output_tensors, iImg, imgSize, result); - //std::cout << "Press any key to exit" << std::endl; - //cv::imshow("Result of INTERMEDIATE Detection", iImg); - //cv::waitKey(0); - //cv::destroyAllWindows(); } // Add the result to oResult oResult.push_back(result); diff --git a/src/utils.cpp b/src/utils.cpp index ce75a0b..153c0ac 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -172,7 +172,7 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg } } - // 1. Calculate the dimensions the image had during preprocessing + // 1. Calculate the dimensions the image had during preprocessing float scale; int processedWidth, processedHeight; if (iImg.cols >= iImg.rows) { @@ -184,9 +184,6 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg processedWidth = int(iImg.cols * scale); processedHeight = imgSize[1]; } - // 2. Resize mask to match the SAM input dimensions - //cv::Mat resizedMask; - //cv::resize(mask, resizedMask, cv::Size(256, 256)); // 3. Extract the portion that corresponds to the actual image (no padding) int cropWidth = std::min(256, int(256 * processedWidth / (float)imgSize[0])); @@ -207,7 +204,7 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg } // Apply the Guided Filter - // cv::Mat filteredMask; + cv::Mat filteredMask; int radius = 2; double eps = 0.01; cv::ximgproc::guidedFilter(iImg, finalMask, finalMask, radius, eps); From 6c5c097d9f2752c2ae675bdc159095cd55ac97a1 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 26 Aug 2025 20:34:24 +0200 Subject: [PATCH 07/28] Updated code format --- include/dl_types.h | 109 +++++----- include/sam_inference.h | 32 +-- include/utils.h | 68 +++--- src/main.cpp | 4 +- src/sam_inference.cpp | 447 ++++++++++++++++++++-------------------- src/segmentation.cpp | 19 +- src/utils.cpp | 207 ++++++++++--------- 7 files changed, 433 insertions(+), 453 deletions(-) diff --git a/include/dl_types.h b/include/dl_types.h index 72bd1fe..632c7c6 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -1,69 +1,66 @@ #pragma once namespace SEG { -enum MODEL_TYPE -{ - //FLOAT32 MODEL - SAM_SEGMENT_ENCODER = 1, - SAM_SEGMENT_DECODER = 2, - //YOLO_CLS = 3, - - //FLOAT16 MODEL - //YOLO_DETECT_V8_HALF = 4, - //YOLO_POSE_V8_HALF = 5, - //YOLO_CLS_HALF = 6 -}; - + enum MODEL_TYPE + { + // FLOAT32 MODEL + SAM_SEGMENT_ENCODER = 1, + SAM_SEGMENT_DECODER = 2, + // YOLO_CLS = 3, -typedef struct _DL_INIT_PARAM -{ - // Yolo & Common Part - std::string modelPath; - MODEL_TYPE modelType = SAM_SEGMENT_ENCODER; - std::vector imgSize = { 640, 640 }; - float rectConfidenceThreshold = 0.6; - float iouThreshold = 0.5; - int keyPointsNum = 2; //Note:kpt number for pose - bool cudaEnable = false; - int logSeverityLevel = 3; - int intraOpNumThreads = 1; - //std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + // FLOAT16 MODEL + // YOLO_DETECT_V8_HALF = 4, + // YOLO_POSE_V8_HALF = 5, + // YOLO_CLS_HALF = 6 + }; - // Overloaded output operator for _DL_INIT_PARAM to print its contents - friend std::ostream& operator<<(std::ostream& os, _DL_INIT_PARAM& param) + typedef struct _DL_INIT_PARAM { - os << "modelPath: " << param.modelPath << "\n"; - os << "modelType: " << param.modelType << "\n"; - os << "imgSize: "; - for (const auto& size : param.imgSize) - os << size << " "; - os << "\n"; - os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n"; - os << "iouThreshold: " << param.iouThreshold << "\n"; - os << "keyPointsNum: " << param.keyPointsNum << "\n"; - os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n"; - os << "logSeverityLevel: " << param.logSeverityLevel << "\n"; - os << "intraOpNumThreads: " << param.intraOpNumThreads; - return os; - } - -} DL_INIT_PARAM; + // Yolo & Common Part + std::string modelPath; + MODEL_TYPE modelType = SAM_SEGMENT_ENCODER; + std::vector imgSize = {640, 640}; + float rectConfidenceThreshold = 0.6; + float iouThreshold = 0.5; + int keyPointsNum = 2; // Note:kpt number for pose + bool cudaEnable = false; + int logSeverityLevel = 3; + int intraOpNumThreads = 1; + // std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + // Overloaded output operator for _DL_INIT_PARAM to print its contents + friend std::ostream &operator<<(std::ostream &os, _DL_INIT_PARAM ¶m) + { + os << "modelPath: " << param.modelPath << "\n"; + os << "modelType: " << param.modelType << "\n"; + os << "imgSize: "; + for (const auto &size : param.imgSize) + os << size << " "; + os << "\n"; + os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n"; + os << "iouThreshold: " << param.iouThreshold << "\n"; + os << "keyPointsNum: " << param.keyPointsNum << "\n"; + os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n"; + os << "logSeverityLevel: " << param.logSeverityLevel << "\n"; + os << "intraOpNumThreads: " << param.intraOpNumThreads; + return os; + } -typedef struct _DL_RESULT -{ + } DL_INIT_PARAM; - //Yolo Part - int classId; - float confidence; - std::vector boxes; // For SAM encoder model, this will be filled with detected boxes - std::vector keyPoints; + typedef struct _DL_RESULT + { - // Sam Part - std::vector embeddings; - // Masks for SAM decoder model output - std::vector masks; // Each cv::Mat represents a mask + // Yolo Part + int classId; + float confidence; + std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + std::vector keyPoints; + // Sam Part + std::vector embeddings; + // Masks for SAM decoder model output + std::vector masks; // Each cv::Mat represents a mask -} DL_RESULT; + } DL_RESULT; } // namespace SEG \ No newline at end of file diff --git a/include/sam_inference.h b/include/sam_inference.h index 8a15c38..8910bda 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -1,12 +1,6 @@ #pragma once -#define RET_OK nullptr - -#ifdef _WIN32 -#include -#include -#include -#endif +#define RET_OK nullptr #include #include @@ -18,10 +12,6 @@ #include #endif - - - - class SAM { public: @@ -30,18 +20,15 @@ class SAM ~SAM(); public: + const char *CreateSession(SEG::DL_INIT_PARAM &iParams); - const char* CreateSession(SEG::DL_INIT_PARAM& iParams); - - const char* RunSession(const cv::Mat& iImg, std::vector& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result); - - char* WarmUpSession(SEG::MODEL_TYPE modelType); - - template - char* TensorProcess(clock_t& starttime_1, const cv::Mat& iImg, N& blob, std::vector& inputNodeDims, - SEG::MODEL_TYPE modelType, std::vector& oResult, Utils& utilities, SEG::DL_RESULT& result); + const char *RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result); + char *WarmUpSession(SEG::MODEL_TYPE modelType); + template + char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result); std::vector classes{}; @@ -50,12 +37,11 @@ class SAM std::unique_ptr session; bool cudaEnable; Ort::RunOptions options; - std::vector inputNodeNames; - std::vector outputNodeNames; + std::vector inputNodeNames; + std::vector outputNodeNames; SEG::MODEL_TYPE modelType; std::vector imgSize; float rectConfidenceThreshold; float iouThreshold; - }; \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index 1bded56..7ff7f9c 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,6 +1,6 @@ #pragma once -#define RET_OK nullptr +#define RET_OK nullptr #include #include @@ -14,42 +14,42 @@ class Utils { - public: - Utils(); - ~Utils(); - - void overlay(std::vector& output_tensors, const cv::Mat& iImg, std::vector iImgSize, SEG::DL_RESULT& result); - char* PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg); - void ScaleBboxPoints(const cv::Mat& iImg, std::vector iImgSize, std::vector& pointCoords, std::vector& PointsCoordsScaled); - - std::vector PrepareInputTensor(Ort::Value& decoderInputTensor, std::vector& pointCoordsScaled, std::vector pointCoordsDims, - std::vector& pointLabels, std::vector pointLabelsDims, std::vector& maskInput, - std::vector maskInputDims, std::vector& hasMaskInput, std::vector hasMaskInputDims); - - // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW. - // Note: Code in header file since it is used outside of this utils src code. - template - char* BlobFromImage(const cv::Mat& iImg, T& iBlob) { - int channels = iImg.channels(); - int imgHeight = iImg.rows; - int imgWidth = iImg.cols; - - for (int c = 0; c < channels; c++) +public: + Utils(); + ~Utils(); + + void overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector iImgSize, SEG::DL_RESULT &result); + char *PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg); + void ScaleBboxPoints(const cv::Mat &iImg, std::vector iImgSize, std::vector &pointCoords, std::vector &PointsCoordsScaled); + + std::vector PrepareInputTensor(Ort::Value &decoderInputTensor, std::vector &pointCoordsScaled, std::vector pointCoordsDims, + std::vector &pointLabels, std::vector pointLabelsDims, std::vector &maskInput, + std::vector maskInputDims, std::vector &hasMaskInput, std::vector hasMaskInputDims); + + // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW. + // Note: Code in header file since it is used outside of this utils src code. + template + char *BlobFromImage(const cv::Mat &iImg, T &iBlob) + { + int channels = iImg.channels(); + int imgHeight = iImg.rows; + int imgWidth = iImg.cols; + + for (int c = 0; c < channels; c++) + { + for (int h = 0; h < imgHeight; h++) { - for (int h = 0; h < imgHeight; h++) + for (int w = 0; w < imgWidth; w++) { - for (int w = 0; w < imgWidth; w++) - { - iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer::type( - (iImg.at(h, w)[c]) / 255.0f); - } + iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer::type( + (iImg.at(h, w)[c]) / 255.0f); } } - return RET_OK; } - private: - float resizeScales; - float resizeScalesBbox; //letterbox scale + return RET_OK; + } - - }; +private: + float resizeScales; + float resizeScalesBbox; // letterbox scale +}; diff --git a/src/main.cpp b/src/main.cpp index 5c22108..3c8091d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,14 +2,14 @@ int main() { - //Running inference + // Running inference std::vector> samSegmentors; SEG::DL_INIT_PARAM params_encoder; SEG::DL_INIT_PARAM params_decoder; std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images"; - for (auto& i : std::filesystem::directory_iterator(imgs_path)) + for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") { diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 77cef8e..8a07b6b 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -5,19 +5,20 @@ #define benchmark #define ROI -// #define min(a,b) (((a) < (b)) ? (a) : (b)) - -SAM::SAM() { +SAM::SAM() +{ } - -SAM::~SAM() { +SAM::~SAM() +{ // Clean up input/output node names - for (auto& name : inputNodeNames) { + for (auto &name : inputNodeNames) + { delete[] name; } - for (auto& name : outputNodeNames) { + for (auto &name : outputNodeNames) + { delete[] name; } } @@ -25,24 +26,30 @@ SAM::~SAM() { #ifdef USE_CUDA namespace Ort { - template<> - struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; }; + template <> + struct TypeToTensorType + { + static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; + }; } #endif - -const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { - const char* Ret = RET_OK; - if (session) { +const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) +{ + const char *Ret = RET_OK; + if (session) + { session.reset(); // Release previous session // Clear node names - for (auto& name : inputNodeNames) { + for (auto &name : inputNodeNames) + { delete[] name; } inputNodeNames.clear(); - for (auto& name : outputNodeNames) { + for (auto &name : outputNodeNames) + { delete[] name; } outputNodeNames.clear(); @@ -75,7 +82,7 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); - const char* modelPath = iParams.modelPath.c_str(); + const char *modelPath = iParams.modelPath.c_str(); session = std::make_unique(env, modelPath, sessionOption); Ort::AllocatorWithDefaultOptions allocator; @@ -83,7 +90,7 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { for (size_t i = 0; i < inputNodesNum; i++) { Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); - char* temp_buf = new char[50]; + char *temp_buf = new char[50]; strcpy(temp_buf, input_node_name.get()); inputNodeNames.push_back(temp_buf); } @@ -91,11 +98,11 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { for (size_t i = 0; i < OutputNodesNum; i++) { Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char* temp_buf = new char[10]; + char *temp_buf = new char[10]; strcpy(temp_buf, output_node_name.get()); outputNodeNames.push_back(temp_buf); } - options = Ort::RunOptions{ nullptr }; + options = Ort::RunOptions{nullptr}; auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); @@ -104,78 +111,79 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { WarmUpSession(modelType); return RET_OK; } - catch (const std::exception& e) + catch (const std::exception &e) { - const char* str1 = "[SAM]:"; - const char* str2 = e.what(); + const char *str1 = "[SAM]:"; + const char *str2 = e.what(); std::string result = std::string(str1) + std::string(str2); - char* merged = new char[result.length() + 1]; + char *merged = new char[result.length() + 1]; std::strcpy(merged, result.c_str()); std::cout << merged << std::endl; delete[] merged; return "[SAM]:Create session failed."; } - } -const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result) { - #ifdef benchmark - clock_t starttime_1 = clock(); - #endif // benchmark - Utils utilities; - const char* Ret = RET_OK; - cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) +const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) +{ +#ifdef benchmark + clock_t starttime_1 = clock(); +#endif // benchmark + Utils utilities; + const char *Ret = RET_OK; + cv::Mat processedImg; + utilities.PreProcess(iImg, imgSize, processedImg); + if (modelType < 4) + { + float *blob = new float[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims; + if (modelType == SEG::SAM_SEGMENT_ENCODER) { - float* blob = new float[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims; - if (modelType == SEG::SAM_SEGMENT_ENCODER) - { - inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) }; - } - else if (modelType == SEG::SAM_SEGMENT_DECODER) - { - // Input size or SAM decoder model is 256x64x64 for the decoder - inputNodeDims = { 1, 256, 64, 64 }; - } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); + inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; } - else + else if (modelType == SEG::SAM_SEGMENT_DECODER) { - #ifdef USE_CUDA - half* blob = new half[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) }; - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); - #endif + // Input size or SAM decoder model is 256x64x64 for the decoder + inputNodeDims = {1, 256, 64, 64}; } - - return Ret; + TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); + } + else + { +#ifdef USE_CUDA + half *blob = new half[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; + TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); +#endif } - template - char* SAM::TensorProcess(clock_t& starttime_1, const cv::Mat& iImg, N& blob, std::vector& inputNodeDims, - SEG::MODEL_TYPE modelType, std::vector& oResult, Utils& utilities, SEG::DL_RESULT& result) { + return Ret; +} - switch (modelType) - { - case SEG::SAM_SEGMENT_ENCODER: +template +char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result) +{ + + switch (modelType) + { + case SEG::SAM_SEGMENT_ENCODER: // case OTHER_SAM_MODEL: { Ort::Value inputTensor = Ort::Value::CreateTensor::type>( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size()); - #ifdef benchmark +#ifdef benchmark clock_t starttime_2 = clock(); - #endif // benchmark +#endif // benchmark auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), - outputNodeNames.size()); - #ifdef benchmark + outputNodeNames.size()); +#ifdef benchmark clock_t starttime_3 = clock(); - #endif // benchmark +#endif // benchmark Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); @@ -184,10 +192,9 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR delete[] blob; int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size - result.embeddings.assign(output, output + embeddingSize); // Save embeddings + result.embeddings.assign(output, output + embeddingSize); // Save embeddings - - #ifdef benchmark +#ifdef benchmark clock_t starttime_4 = clock(); double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; @@ -200,149 +207,140 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR { std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } - #endif // benchmark +#endif // benchmark break; } - case SEG::SAM_SEGMENT_DECODER: - { - // Use embeddings from the last result - std::vector embeddings = result.embeddings; - // Create tensor for decoder - std::vector decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements - - // Create point coordinates and labels - #ifdef ROI - - // Create a window for user interaction - namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); - - // Let the user select the bounding box - cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); - - // Check if a valid bounding box was selected - if (bbox.width == 0 || bbox.height == 0) - { - std::cerr << "No valid bounding box selected." << std::endl; - return "[SAM]: NO valid Box."; - } - - std::vector boundingBoxes; - boundingBoxes.push_back(bbox); - #endif // ROI - //boundingBoxes.push_back(bbox1); - // Declare timing variables BEFORE the loop - #ifdef benchmark - clock_t starttime_2 = 0; - clock_t starttime_3 = 0; - #endif // benchmark - - #ifdef ROI - for (const auto &bbox : boundingBoxes) - #else - for (const auto &bbox : result.boxes) - #endif // ROI - { - Ort::Value decoderInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - embeddings.data(), // Use the embeddings from the encoder - embeddings.size(), // Total number of elements - decoderInputDims.data(), - decoderInputDims.size() - ); - // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width/2; - float centerY = bbox.y + bbox.height/2; - - // Convert bounding box to points - std::vector pointCoords = { - (float)bbox.x, (float)bbox.y, // Top-left - (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right - }; - - - std::vector pointCoordsScaled; - - std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) - - // Labels for the points - std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels - std::vector pointLabelsDims = {1, 2}; - - // Create dummy mask_input and has_mask_input - std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = {1, 1, 256, 256}; - - - std::vector hasMaskInput = {0.0f}; // No mask provided - std::vector hasMaskInputDims = {1}; - - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + case SEG::SAM_SEGMENT_DECODER: + { + // Use embeddings from the last result + std::vector embeddings = result.embeddings; + // Create tensor for decoder + std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements + // Create point coordinates and labels +#ifdef ROI + // Create a window for user interaction + namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); + // Let the user select the bounding box + cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); - std::vector inputTensors = utilities.PrepareInputTensor( - decoderInputTensor, - pointCoordsScaled, - pointCoordsDims, - pointLabels, - pointLabelsDims, - maskInput, - maskInputDims, - hasMaskInput, - hasMaskInputDims - ); - - #ifdef benchmark - starttime_2 = clock(); - #endif // benchmark - auto output_tensors = session->Run( - options, - inputNodeNames.data(), - inputTensors.data(), - inputTensors.size(), - outputNodeNames.data(), - outputNodeNames.size()); + // Check if a valid bounding box was selected + if (bbox.width == 0 || bbox.height == 0) + { + std::cerr << "No valid bounding box selected." << std::endl; + return "[SAM]: NO valid Box."; + } - #ifdef benchmark - starttime_3 = clock(); - #endif // benchmark + std::vector boundingBoxes; + boundingBoxes.push_back(bbox); +#endif // ROI + // boundingBoxes.push_back(bbox1); + // Declare timing variables BEFORE the loop +#ifdef benchmark + clock_t starttime_2 = 0; + clock_t starttime_3 = 0; +#endif // benchmark + +#ifdef ROI + for (const auto &bbox : boundingBoxes) +#else + for (const auto &bbox : result.boxes) +#endif // ROI + { + Ort::Value decoderInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + embeddings.data(), // Use the embeddings from the encoder + embeddings.size(), // Total number of elements + decoderInputDims.data(), + decoderInputDims.size()); + // Use center of bounding box as foreground point + float centerX = bbox.x + bbox.width / 2; + float centerY = bbox.y + bbox.height / 2; + + // Convert bounding box to points + std::vector pointCoords = { + (float)bbox.x, (float)bbox.y, // Top-left + (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right + }; + std::vector pointCoordsScaled; + + std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) + + // Labels for the points + std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels + std::vector pointLabelsDims = {1, 2}; + + // Create dummy mask_input and has_mask_input + std::vector maskInput(256 * 256, 0.0f); // Fill with zeros + std::vector maskInputDims = {1, 1, 256, 256}; + + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; + + utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + + std::vector inputTensors = utilities.PrepareInputTensor( + decoderInputTensor, + pointCoordsScaled, + pointCoordsDims, + pointLabels, + pointLabelsDims, + maskInput, + maskInputDims, + hasMaskInput, + hasMaskInputDims); + +#ifdef benchmark + starttime_2 = clock(); +#endif // benchmark + auto output_tensors = session->Run( + options, + inputNodeNames.data(), + inputTensors.data(), + inputTensors.size(), + outputNodeNames.data(), + outputNodeNames.size()); - utilities.overlay(output_tensors, iImg, imgSize, result); - } - // Add the result to oResult - oResult.push_back(result); +#ifdef benchmark + starttime_3 = clock(); +#endif // benchmark - delete[] blob; + utilities.overlay(output_tensors, iImg, imgSize, result); + } + // Add the result to oResult + oResult.push_back(result); - #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - else - { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - #endif // benchmark - break; - } + delete[] blob; - default: - std::cout << "[SAM]: " << "Not support model type." << std::endl; +#ifdef benchmark + clock_t starttime_4 = clock(); + double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) + { + std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } - return RET_OK; - + else + { + std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; + } +#endif // benchmark + break; } + default: + std::cout << "[SAM]: " << "Not support model type." << std::endl; + } + return RET_OK; +} -char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { +char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) +{ clock_t starttime_1 = clock(); Utils utilities; cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); @@ -350,17 +348,18 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { utilities.PreProcess(iImg, imgSize, processedImg); if (modelType < 4) { - float* blob = new float[iImg.total() * 3]; + float *blob = new float[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) }; + std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; switch (modelType) { - case SEG::SAM_SEGMENT_ENCODER: { + case SEG::SAM_SEGMENT_ENCODER: + { Ort::Value input_tensor = Ort::Value::CreateTensor( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), - outputNodeNames.size()); + outputNodeNames.size()); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; @@ -371,36 +370,36 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { break; } - case SEG::SAM_SEGMENT_DECODER: { - std::vector inputNodeDims = { 1, 256, 64, 64 }; // BUG: That was 236 instead of 256 + case SEG::SAM_SEGMENT_DECODER: + { + std::vector inputNodeDims = {1, 256, 64, 64}; // BUG: That was 236 instead of 256 // Use embeddings from the last result - std::vector dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values - std::vector decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements - + std::vector dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values + std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements // Create dummy point coordinates and labels std::vector boundingBoxes = { cv::Rect(0, 0, 100, 100), // Example bounding box with (x, y, width, height) - //cv::Rect(0, 0, 473, 359) // Another example bounding box + // cv::Rect(0, 0, 473, 359) // Another example bounding box }; - for (const auto& bbox : boundingBoxes) { + for (const auto &bbox : boundingBoxes) + { Ort::Value decoderInputTensor = Ort::Value::CreateTensor( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), dummyEmbeddings.data(), // Use the embeddings from the encoder dummyEmbeddings.size(), // Total number of elements decoderInputDims.data(), - decoderInputDims.size() - ); + decoderInputDims.size()); // Convert bounding box to points // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width/2; - float centerY = bbox.y + bbox.height/2; + float centerX = bbox.x + bbox.width / 2; + float centerY = bbox.y + bbox.height / 2; std::vector pointCoords = { - centerX, centerY // Center point (foreground) + centerX, centerY // Center point (foreground) }; - std::vector pointCoordsDims = { 1, 1, 2 }; // 2 points, each with (x, y) + std::vector pointCoordsDims = {1, 1, 2}; // 2 points, each with (x, y) std::vector pointCoordsScaled; @@ -408,14 +407,14 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { // Labels for the points std::vector pointLabels = {1.0f}; // All points are foreground - std::vector pointLabelsDims = { 1, 1}; + std::vector pointLabelsDims = {1, 1}; // Create dummy mask_input and has_mask_input std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = { 1, 1, 256, 256 }; - std::vector hasMaskInput = { 0.0f }; // No mask provided - std::vector hasMaskInputDims = { 1 }; + std::vector maskInputDims = {1, 1, 256, 256}; + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; - std::vector inputTensors = utilities.PrepareInputTensor( + std::vector inputTensors = utilities.PrepareInputTensor( decoderInputTensor, pointCoordsScaled, pointCoordsDims, @@ -424,8 +423,7 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { maskInput, maskInputDims, hasMaskInput, - hasMaskInputDims - ); + hasMaskInputDims); auto output_tensors = session->Run( options, @@ -433,8 +431,8 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { inputTensors.data(), inputTensors.size(), outputNodeNames.data(), - outputNodeNames.size() - ); } + outputNodeNames.size()); + } outputNodeNames.size(); delete[] blob; @@ -447,15 +445,14 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { break; } - } - + } } else { #ifdef USE_CUDA - half* blob = new half[iImg.total() * 3]; + half *blob = new half[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) }; + std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; Ort::Value input_tensor = Ort::Value::CreateTensor(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size()); delete[] blob; diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 2962563..8b5338c 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -14,19 +14,17 @@ std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; params_encoder.modelPath = "SAM_encoder.onnx"; - params_encoder.imgSize = { 1024, 1024 }; + params_encoder.imgSize = {1024, 1024}; params_decoder = params_encoder; params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; params_decoder.modelPath = "SAM_mask_decoder.onnx"; - - - #ifdef USE_CUDA +#ifdef USE_CUDA params_encoder.cudaEnable = true; - #else +#else params_encoder.cudaEnable = false; - #endif +#endif samSegmentorEncoder->CreateSession(params_encoder); samSegmentorDecoder->CreateSession(params_decoder); @@ -35,7 +33,8 @@ std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P return {std::move(samSegmentors), params_encoder, params_decoder}; } -std::vector SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { +std::vector SegmentAnything(std::vector> &samSegmentors, SEG::DL_INIT_PARAM ¶ms_encoder, SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) +{ std::vector resSam; SEG::DL_RESULT res; @@ -43,15 +42,15 @@ std::vector SegmentAnything(std::vector>& samSegme SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); - modelTypeRef = params_decoder.modelType; samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - //cv::destroyAllWindows(); + // cv::destroyAllWindows(); cv::Mat finalMask = res.masks[0]; std::cout << "Final mask size: " << finalMask.size() << std::endl; - for (const auto& mask : res.masks) { + for (const auto &mask : res.masks) + { cv::imshow("Mask", mask); cv::waitKey(0); } diff --git a/src/utils.cpp b/src/utils.cpp index 153c0ac..8d76ac1 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -2,15 +2,16 @@ #include // for guided filter // Constructor -Utils::Utils(){ - +Utils::Utils() +{ } // Destructor -Utils::~Utils(){ +Utils::~Utils() +{ } -char* Utils::PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg) +char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg) { if (iImg.channels() == 3) { @@ -22,41 +23,46 @@ char* Utils::PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); } - - if (iImg.cols >= iImg.rows) - { - resizeScales = iImg.cols / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); - } - else - { - resizeScales = iImg.rows / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); - } - cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); - oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); - oImg = tempImg; + if (iImg.cols >= iImg.rows) + { + resizeScales = iImg.cols / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); + } + else + { + resizeScales = iImg.rows / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); + } + cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); + oImg = tempImg; return RET_OK; } -void Utils::ScaleBboxPoints(const cv::Mat& iImg, std::vector imgSize, std::vector& pointCoords, std::vector& pointCoordsScaled){ +void Utils::ScaleBboxPoints(const cv::Mat &iImg, std::vector imgSize, std::vector &pointCoords, std::vector &pointCoordsScaled) +{ pointCoordsScaled.clear(); // Calculate same scale as preprocessing float scale; - if (iImg.cols >= iImg.rows) { + if (iImg.cols >= iImg.rows) + { scale = imgSize[0] / (float)iImg.cols; resizeScalesBbox = iImg.cols / (float)imgSize[0]; - } else { + } + else + { scale = imgSize[1] / (float)iImg.rows; resizeScalesBbox = iImg.rows / (float)imgSize[1]; } - // TOP-LEFT placement (matching PreProcess) - for (size_t i = 0; i < pointCoords.size(); i += 2) { - if (i + 1 < pointCoords.size()) { + // Top-Left placement (matching PreProcess) + for (size_t i = 0; i < pointCoords.size(); i += 2) + { + if (i + 1 < pointCoords.size()) + { float x = pointCoords[i]; float y = pointCoords[i + 1]; @@ -70,68 +76,59 @@ void Utils::ScaleBboxPoints(const cv::Mat& iImg, std::vector imgSize, std:: } } -std::vector Utils::PrepareInputTensor(Ort::Value& decoderInputTensor, std::vector& pointCoordsScaled, std::vector pointCoordsDims, std::vector& pointLabels, - std::vector pointLabelsDims, std::vector& maskInput, std::vector maskInputDims, std::vector& hasMaskInput, std::vector hasMaskInputDims){ - -Ort::Value pointCoordsTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - pointCoordsScaled.data(), - pointCoordsScaled.size(), - pointCoordsDims.data(), - pointCoordsDims.size() -); - - - -Ort::Value pointLabelsTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - pointLabels.data(), - pointLabels.size(), - pointLabelsDims.data(), - pointLabelsDims.size() -); - - - -Ort::Value maskInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - maskInput.data(), - maskInput.size(), - maskInputDims.data(), - maskInputDims.size() -); - - - -Ort::Value hasMaskInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - hasMaskInput.data(), - hasMaskInput.size(), - hasMaskInputDims.data(), - hasMaskInputDims.size() -); - -// Pass all inputs to the decoder -std::vector inputTensors; -inputTensors.push_back(std::move(decoderInputTensor)); -inputTensors.push_back(std::move(pointCoordsTensor)); -inputTensors.push_back(std::move(pointLabelsTensor)); -inputTensors.push_back(std::move(maskInputTensor)); -inputTensors.push_back(std::move(hasMaskInputTensor)); +std::vector Utils::PrepareInputTensor(Ort::Value &decoderInputTensor, std::vector &pointCoordsScaled, std::vector pointCoordsDims, std::vector &pointLabels, + std::vector pointLabelsDims, std::vector &maskInput, std::vector maskInputDims, std::vector &hasMaskInput, std::vector hasMaskInputDims) +{ -return inputTensors; + Ort::Value pointCoordsTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + pointCoordsScaled.data(), + pointCoordsScaled.size(), + pointCoordsDims.data(), + pointCoordsDims.size()); + + Ort::Value pointLabelsTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + pointLabels.data(), + pointLabels.size(), + pointLabelsDims.data(), + pointLabelsDims.size()); + + Ort::Value maskInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + maskInput.data(), + maskInput.size(), + maskInputDims.data(), + maskInputDims.size()); + + Ort::Value hasMaskInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + hasMaskInput.data(), + hasMaskInput.size(), + hasMaskInputDims.data(), + hasMaskInputDims.size()); + + // Pass all inputs to the decoder + std::vector inputTensors; + inputTensors.push_back(std::move(decoderInputTensor)); + inputTensors.push_back(std::move(pointCoordsTensor)); + inputTensors.push_back(std::move(pointLabelsTensor)); + inputTensors.push_back(std::move(maskInputTensor)); + inputTensors.push_back(std::move(hasMaskInputTensor)); + + return inputTensors; } -void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg, std::vector imgSize, SEG::DL_RESULT& result){ +void Utils::overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) +{ // Process decoder output (masks) if (output_tensors.size() > 0) { // Get the masks from the output tensor - auto scoresTensor = std::move(output_tensors[0]); // IoU scores - auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG + auto scoresTensor = std::move(output_tensors[0]); // IoU scores + auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); auto masksShape = masksInfo.GetShape(); - if (masksShape.size() == 4) { auto masksData = masksTensor.GetTensorMutableData(); @@ -142,7 +139,6 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg size_t height = masksShape[2]; // Height of mask size_t width = masksShape[3]; // Width of mask - // Find the best mask (highest IoU score) float bestScore = -1; size_t bestMaskIndex = 0; @@ -152,34 +148,39 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg float score = scoresData[i]; - if (score > bestScore) { + if (score > bestScore) + { bestScore = score; bestMaskIndex = i; } } - std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl; - // Create OpenCV Mat for the mask - cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1); + // std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl; + + // Create OpenCV Mat for the mask + cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1); - // Convert float mask to binary mask - for (size_t h = 0; h < height; ++h) + // Convert float mask to binary mask + for (size_t h = 0; h < height; ++h) + { + for (size_t w = 0; w < width; ++w) { - for (size_t w = 0; w < width; ++w) - { - size_t idx = (bestMaskIndex * height * width) + (h * width) + w; - float value = masksData[idx]; - mask.at(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5 - } + size_t idx = (bestMaskIndex * height * width) + (h * width) + w; + float value = masksData[idx]; + mask.at(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5 } + } // 1. Calculate the dimensions the image had during preprocessing float scale; int processedWidth, processedHeight; - if (iImg.cols >= iImg.rows) { + if (iImg.cols >= iImg.rows) + { scale = (float)imgSize[0] / iImg.cols; processedWidth = imgSize[0]; processedHeight = int(iImg.rows * scale); - } else { + } + else + { scale = (float)imgSize[1] / iImg.rows; processedWidth = int(iImg.cols * scale); processedHeight = imgSize[1]; @@ -238,7 +239,6 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg } }*/ - // Find contours of the mask std::vector> contours; cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); @@ -251,16 +251,17 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg // Draw contours with a thick, high-contrast outline cv::drawContours(iImg, contours, -1, cv::Scalar(0, 255, 255), 2); // Yellow outline - // Save or display the result cv::imwrite("segmentation_result_" + std::to_string(bestMaskIndex) + ".jpg", iImg); cv::imwrite("mask_" + std::to_string(bestMaskIndex) + ".jpg", finalMask); - }else - { - std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; - } - }else - { - std::cerr << "[SAM]: No masks found in the output tensor." << std::endl; - } - } \ No newline at end of file + } + else + { + std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; + } + } + else + { + std::cerr << "[SAM]: No masks found in the output tensor." << std::endl; + } +} \ No newline at end of file From e10d45d557e34227e34d8422e3a8eeeed0afbb7e Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 29 Aug 2025 13:35:05 +0200 Subject: [PATCH 08/28] Small refactoring of the module --- CMakeLists.txt | 6 +- include/dl_types.h | 17 +- include/sam_inference.h | 14 +- include/segmentation.h | 13 +- include/utils.h | 6 +- src/main.cpp | 6 +- src/sam_inference.cpp | 760 +++++++++++++++++++--------------------- src/segmentation.cpp | 83 ++--- 8 files changed, 434 insertions(+), 471 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1270d93..8cb430a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.5) -set(PROJECT_NAME SAMOnnxRuntimeCPPInference) -project(sam_onnx_ros) +set(PROJECT_NAME sam_onnx_ros) + project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) # -------------- CMake Policies ------------------# @@ -13,7 +13,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) #set(CMAKE_INCLUDE_CURRENT_DIR ON) - +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # -------------- OpenCV ------------------# find_package(OpenCV REQUIRED) include_directories(${OpenCV_INCLUDE_DIRS}) diff --git a/include/dl_types.h b/include/dl_types.h index 632c7c6..5141284 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -1,4 +1,11 @@ -#pragma once +#ifndef DL_TYPES_H +#define DL_TYPES_H + +#include +#include +#include +#include + namespace SEG { enum MODEL_TYPE @@ -29,7 +36,7 @@ namespace SEG // std::vector boxes; // For SAM encoder model, this will be filled with detected boxes // Overloaded output operator for _DL_INIT_PARAM to print its contents - friend std::ostream &operator<<(std::ostream &os, _DL_INIT_PARAM ¶m) + friend std::ostream &operator<<(std::ostream &os, const _DL_INIT_PARAM ¶m) { os << "modelPath: " << param.modelPath << "\n"; os << "modelType: " << param.modelType << "\n"; @@ -51,9 +58,6 @@ namespace SEG typedef struct _DL_RESULT { - // Yolo Part - int classId; - float confidence; std::vector boxes; // For SAM encoder model, this will be filled with detected boxes std::vector keyPoints; @@ -63,4 +67,5 @@ namespace SEG std::vector masks; // Each cv::Mat represents a mask } DL_RESULT; -} // namespace SEG \ No newline at end of file +} // namespace SEG +#endif // DL_TYPES_H \ No newline at end of file diff --git a/include/sam_inference.h b/include/sam_inference.h index 8910bda..d63701c 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -1,12 +1,12 @@ -#pragma once +#ifndef SAMINFERENCE_H +#define SAMINFERENCE_H -#define RET_OK nullptr +#define RET_OK nullptr +#include #include #include #include -#include -#include "onnxruntime_cxx_api.h" #include "utils.h" #ifdef USE_CUDA #include @@ -27,7 +27,7 @@ class SAM char *WarmUpSession(SEG::MODEL_TYPE modelType); template - char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + const char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result); std::vector classes{}; @@ -44,4 +44,6 @@ class SAM std::vector imgSize; float rectConfidenceThreshold; float iouThreshold; -}; \ No newline at end of file +}; + +#endif // SAMINFERENCE_H \ No newline at end of file diff --git a/include/segmentation.h b/include/segmentation.h index 46e954e..b341f8d 100644 --- a/include/segmentation.h +++ b/include/segmentation.h @@ -1,11 +1,10 @@ -#include -#include -#include -#include -#include +#ifndef SEGMENTATION_H +#define SEGMENTATION_H + #include #include "sam_inference.h" - std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); -std::vector SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file +std::vector SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); + +#endif // SEGMENTATION_H \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index 7ff7f9c..333c9e3 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,11 +1,11 @@ -#pragma once +#ifndef UTILS_H +#define UTILS_H #define RET_OK nullptr #include #include #include -#include #include "onnxruntime_cxx_api.h" #include "dl_types.h" #ifdef USE_CUDA @@ -53,3 +53,5 @@ class Utils float resizeScales; float resizeScalesBbox; // letterbox scale }; + +#endif // UTILS_H \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 3c8091d..2b2d602 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,8 @@ #include "segmentation.h" - +#include +#include +#include +#include int main() { // Running inference @@ -24,6 +27,7 @@ int main() cv::waitKey(0); cv::destroyAllWindows(); } + std::cout << "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo" << std::endl; } } return 0; diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 8a07b6b..9c0463b 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -1,468 +1,426 @@ #include "sam_inference.h" #include "utils.h" #include -#include #define benchmark #define ROI -SAM::SAM() -{ -} +SAM::SAM() {} -SAM::~SAM() -{ - // Clean up input/output node names - for (auto &name : inputNodeNames) - { - delete[] name; - } - for (auto &name : outputNodeNames) - { - delete[] name; - } +SAM::~SAM() { + // Clean up input/output node names + for (auto &name : inputNodeNames) { + delete[] name; + } + for (auto &name : outputNodeNames) { + delete[] name; + } } #ifdef USE_CUDA -namespace Ort -{ - template <> - struct TypeToTensorType - { - static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; - }; -} +namespace Ort { +template <> struct TypeToTensorType { + static constexpr ONNXTensorElementDataType type = + ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; +}; +} // namespace Ort #endif -const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) -{ - const char *Ret = RET_OK; - if (session) - { - session.reset(); // Release previous session - - // Clear node names - for (auto &name : inputNodeNames) - { - delete[] name; - } - inputNodeNames.clear(); - - for (auto &name : outputNodeNames) - { - delete[] name; - } - outputNodeNames.clear(); +const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { + const char *Ret = RET_OK; + if (session) { + session.reset(); // Release previous session + + // Clear node names + for (auto &name : inputNodeNames) { + delete[] name; } - std::regex pattern("[\u4e00-\u9fa5]"); - bool result = std::regex_search(iParams.modelPath, pattern); - if (result) - { - Ret = "[SAM]:Your model path is error.Change your model path without chinese characters."; - std::cout << Ret << std::endl; - return Ret; + inputNodeNames.clear(); + + for (auto &name : outputNodeNames) { + delete[] name; } - try - { - rectConfidenceThreshold = iParams.rectConfidenceThreshold; - iouThreshold = iParams.iouThreshold; - imgSize = iParams.imgSize; - modelType = iParams.modelType; - cudaEnable = iParams.cudaEnable; - env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); - Ort::SessionOptions sessionOption; - if (iParams.cudaEnable) - { - OrtCUDAProviderOptions cudaOption; - cudaOption.device_id = 0; - sessionOption.AppendExecutionProvider_CUDA(cudaOption); - } - - sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); - sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); - sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); - - const char *modelPath = iParams.modelPath.c_str(); - - session = std::make_unique(env, modelPath, sessionOption); - Ort::AllocatorWithDefaultOptions allocator; - size_t inputNodesNum = session->GetInputCount(); - for (size_t i = 0; i < inputNodesNum; i++) - { - Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); - char *temp_buf = new char[50]; - strcpy(temp_buf, input_node_name.get()); - inputNodeNames.push_back(temp_buf); - } - size_t OutputNodesNum = session->GetOutputCount(); - for (size_t i = 0; i < OutputNodesNum; i++) - { - Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char *temp_buf = new char[10]; - strcpy(temp_buf, output_node_name.get()); - outputNodeNames.push_back(temp_buf); - } - options = Ort::RunOptions{nullptr}; - - auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_type = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementType(); - - WarmUpSession(modelType); - return RET_OK; + outputNodeNames.clear(); + } + std::regex pattern("[\u4e00-\u9fa5]"); + bool result = std::regex_search(iParams.modelPath, pattern); + if (result) { + Ret = "[SAM]:Your model path is error.Change your model path without " + "chinese characters."; + std::cout << Ret << std::endl; + return Ret; + } + try { + rectConfidenceThreshold = iParams.rectConfidenceThreshold; + iouThreshold = iParams.iouThreshold; + imgSize = iParams.imgSize; + modelType = iParams.modelType; + cudaEnable = iParams.cudaEnable; + env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); + Ort::SessionOptions sessionOption; + if (iParams.cudaEnable) { + OrtCUDAProviderOptions cudaOption; + cudaOption.device_id = 0; + sessionOption.AppendExecutionProvider_CUDA(cudaOption); } - catch (const std::exception &e) - { - const char *str1 = "[SAM]:"; - const char *str2 = e.what(); - std::string result = std::string(str1) + std::string(str2); - char *merged = new char[result.length() + 1]; - std::strcpy(merged, result.c_str()); - std::cout << merged << std::endl; - delete[] merged; - return "[SAM]:Create session failed."; + + sessionOption.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); + sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); + sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); + + const char *modelPath = iParams.modelPath.c_str(); + + session = std::make_unique(env, modelPath, sessionOption); + Ort::AllocatorWithDefaultOptions allocator; + size_t inputNodesNum = session->GetInputCount(); + for (size_t i = 0; i < inputNodesNum; i++) { + Ort::AllocatedStringPtr input_node_name = + session->GetInputNameAllocated(i, allocator); + char *temp_buf = new char[50]; + strcpy(temp_buf, input_node_name.get()); + inputNodeNames.push_back(temp_buf); + } + size_t OutputNodesNum = session->GetOutputCount(); + for (size_t i = 0; i < OutputNodesNum; i++) { + Ort::AllocatedStringPtr output_node_name = + session->GetOutputNameAllocated(i, allocator); + char *temp_buf = new char[10]; + strcpy(temp_buf, output_node_name.get()); + outputNodeNames.push_back(temp_buf); } + options = Ort::RunOptions{nullptr}; + + auto input_shape = + session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + auto output_shape = + session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + auto output_type = session->GetOutputTypeInfo(0) + .GetTensorTypeAndShapeInfo() + .GetElementType(); + + WarmUpSession(modelType); + return RET_OK; + } catch (const std::exception &e) { + const char *str1 = "[SAM]:"; + const char *str2 = e.what(); + std::string str_result = std::string(str1) + std::string(str2); + char *merged = new char[str_result.length() + 1]; + std::strcpy(merged, str_result.c_str()); + std::cout << merged << std::endl; + delete[] merged; + return "[SAM]:Create session failed."; + } } -const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) -{ +const char *SAM::RunSession(const cv::Mat &iImg, + std::vector &oResult, + SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) { #ifdef benchmark - clock_t starttime_1 = clock(); + clock_t starttime_1 = clock(); #endif // benchmark - Utils utilities; - const char *Ret = RET_OK; - cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) - { - float *blob = new float[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims; - if (modelType == SEG::SAM_SEGMENT_ENCODER) - { - inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; - } - else if (modelType == SEG::SAM_SEGMENT_DECODER) - { - // Input size or SAM decoder model is 256x64x64 for the decoder - inputNodeDims = {1, 256, 64, 64}; - } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); - } - else - { -#ifdef USE_CUDA - half *blob = new half[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); -#endif - } - - return Ret; + Utils utilities; + const char *Ret = RET_OK; + cv::Mat processedImg; + utilities.PreProcess(iImg, imgSize, processedImg); + float *blob = new float[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims; + if (modelType == SEG::SAM_SEGMENT_ENCODER) { + inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; + } else if (modelType == SEG::SAM_SEGMENT_DECODER) { + // Input size or SAM decoder model is 256x64x64 for the decoder + inputNodeDims = {1, 256, 64, 64}; + } + TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, + utilities, result); + + return Ret; } template -char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, - SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result) -{ - - switch (modelType) +const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, + N &blob, std::vector &inputNodeDims, + SEG::MODEL_TYPE modelType, + std::vector &oResult, + Utils &utilities, SEG::DL_RESULT &result) { + + switch (modelType) { + case SEG::SAM_SEGMENT_ENCODER: + // case OTHER_SAM_MODEL: { - case SEG::SAM_SEGMENT_ENCODER: - // case OTHER_SAM_MODEL: - { - Ort::Value inputTensor = Ort::Value::CreateTensor::type>( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), - inputNodeDims.data(), inputNodeDims.size()); + Ort::Value inputTensor = + Ort::Value::CreateTensor::type>( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), + inputNodeDims.size()); #ifdef benchmark - clock_t starttime_2 = clock(); + clock_t starttime_2 = clock(); #endif // benchmark - auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), - outputNodeNames.size()); + auto outputTensor = + session->Run(options, inputNodeNames.data(), &inputTensor, 1, + outputNodeNames.data(), outputNodeNames.size()); #ifdef benchmark - clock_t starttime_3 = clock(); + clock_t starttime_3 = clock(); #endif // benchmark - Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); - auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); - std::vector outputNodeDims = tensor_info.GetShape(); - auto output = outputTensor.front().GetTensorMutableData::type>(); - delete[] blob; + Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); + auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); + std::vector outputNodeDims = tensor_info.GetShape(); + auto output = + outputTensor.front() + .GetTensorMutableData::type>(); + delete[] blob; - int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size - result.embeddings.assign(output, output + embeddingSize); // Save embeddings + int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * + outputNodeDims[3]; // Flattened size + result.embeddings.assign(output, + output + embeddingSize); // Save embeddings #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - else - { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } + clock_t starttime_4 = clock(); + double pre_process_time = + (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = + (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = + (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; + } else { + std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; + } #endif // benchmark - break; - } - case SEG::SAM_SEGMENT_DECODER: - { - // Use embeddings from the last result - std::vector embeddings = result.embeddings; - // Create tensor for decoder - std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements - - // Create point coordinates and labels + break; + } + case SEG::SAM_SEGMENT_DECODER: { + // Use embeddings from the last result + std::vector embeddings = result.embeddings; + // Create tensor for decoder + std::vector decoderInputDims = { + 1, 256, 64, 64}; // Adjust based on your decoder's requirements + + // Create point coordinates and labels #ifdef ROI - // Create a window for user interaction - namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); + // Create a window for user interaction + namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); - // Let the user select the bounding box - cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); + // Let the user select the bounding box + cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); - // Check if a valid bounding box was selected - if (bbox.width == 0 || bbox.height == 0) - { - std::cerr << "No valid bounding box selected." << std::endl; - return "[SAM]: NO valid Box."; - } + // Check if a valid bounding box was selected + if (bbox.width == 0 || bbox.height == 0) { + std::cerr << "No valid bounding box selected." << std::endl; + return "[SAM]: NO valid Box."; + } - std::vector boundingBoxes; - boundingBoxes.push_back(bbox); + std::vector boundingBoxes; + boundingBoxes.push_back(bbox); #endif // ROI // boundingBoxes.push_back(bbox1); // Declare timing variables BEFORE the loop #ifdef benchmark - clock_t starttime_2 = 0; - clock_t starttime_3 = 0; + clock_t starttime_2 = 0; + clock_t starttime_3 = 0; #endif // benchmark #ifdef ROI - for (const auto &bbox : boundingBoxes) + for (const auto &box : boundingBoxes) #else - for (const auto &bbox : result.boxes) + for (const auto &box : result.boxes) #endif // ROI - { - Ort::Value decoderInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - embeddings.data(), // Use the embeddings from the encoder - embeddings.size(), // Total number of elements - decoderInputDims.data(), - decoderInputDims.size()); - // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width / 2; - float centerY = bbox.y + bbox.height / 2; - - // Convert bounding box to points - std::vector pointCoords = { - (float)bbox.x, (float)bbox.y, // Top-left - (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right - }; - - std::vector pointCoordsScaled; - - std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) - - // Labels for the points - std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels - std::vector pointLabelsDims = {1, 2}; - - // Create dummy mask_input and has_mask_input - std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = {1, 1, 256, 256}; - - std::vector hasMaskInput = {0.0f}; // No mask provided - std::vector hasMaskInputDims = {1}; - - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); - - std::vector inputTensors = utilities.PrepareInputTensor( - decoderInputTensor, - pointCoordsScaled, - pointCoordsDims, - pointLabels, - pointLabelsDims, - maskInput, - maskInputDims, - hasMaskInput, - hasMaskInputDims); + { + Ort::Value decoderInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + embeddings.data(), // Use the embeddings from the encoder + embeddings.size(), // Total number of elements + decoderInputDims.data(), decoderInputDims.size()); + // Use center of bounding box as foreground point + float centerX = box.x + box.width / 2.0; + float centerY = box.y + box.height / 2.0; + + // Convert bounding box to points + std::vector pointCoords = { + (float)box.x, (float)box.y, // Top-left + (float)(box.x + box.width), + (float)(box.y + box.height) // Bottom-right + }; + + std::vector pointCoordsScaled; + + std::vector pointCoordsDims = {1, 2, + 2}; // 2 points, each with (x, y) + + // Labels for the points + std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels + std::vector pointLabelsDims = {1, 2}; + + // Create dummy mask_input and has_mask_input + std::vector maskInput(256 * 256, 0.0f); // Fill with zeros + std::vector maskInputDims = {1, 1, 256, 256}; + + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; + + utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + + std::vector inputTensors = utilities.PrepareInputTensor( + decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels, + pointLabelsDims, maskInput, maskInputDims, hasMaskInput, + hasMaskInputDims); #ifdef benchmark - starttime_2 = clock(); + starttime_2 = clock(); #endif // benchmark - auto output_tensors = session->Run( - options, - inputNodeNames.data(), - inputTensors.data(), - inputTensors.size(), - outputNodeNames.data(), - outputNodeNames.size()); + auto output_tensors = session->Run( + options, inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); #ifdef benchmark - starttime_3 = clock(); + starttime_3 = clock(); #endif // benchmark - utilities.overlay(output_tensors, iImg, imgSize, result); - } - // Add the result to oResult - oResult.push_back(result); + utilities.overlay(output_tensors, iImg, imgSize, result); + } + // Add the result to oResult + oResult.push_back(result); - delete[] blob; + delete[] blob; #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - else - { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } -#endif // benchmark - break; + clock_t starttime_4 = clock(); + double pre_process_time = + (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = + (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = + (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; + } else { + std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; } +#endif // benchmark + break; + } - default: - std::cout << "[SAM]: " << "Not support model type." << std::endl; - } - return RET_OK; + default: + std::cout << "[SAM]: " << "Not support model type." << std::endl; + } + return RET_OK; } -char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) -{ - clock_t starttime_1 = clock(); - Utils utilities; - cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); - cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) - { - float *blob = new float[iImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; - switch (modelType) - { - case SEG::SAM_SEGMENT_ENCODER: - { - Ort::Value input_tensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), - SAM_input_node_dims.data(), SAM_input_node_dims.size()); - auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), - outputNodeNames.size()); - delete[] blob; - clock_t starttime_4 = clock(); - double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; - } - break; - } - - case SEG::SAM_SEGMENT_DECODER: - { - std::vector inputNodeDims = {1, 256, 64, 64}; // BUG: That was 236 instead of 256 - // Use embeddings from the last result - std::vector dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values - std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements - - // Create dummy point coordinates and labels - std::vector boundingBoxes = { - cv::Rect(0, 0, 100, 100), // Example bounding box with (x, y, width, height) - // cv::Rect(0, 0, 473, 359) // Another example bounding box - }; - for (const auto &bbox : boundingBoxes) - { - Ort::Value decoderInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - dummyEmbeddings.data(), // Use the embeddings from the encoder - dummyEmbeddings.size(), // Total number of elements - decoderInputDims.data(), - decoderInputDims.size()); - // Convert bounding box to points - // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width / 2; - float centerY = bbox.y + bbox.height / 2; - - std::vector pointCoords = { - centerX, centerY // Center point (foreground) - }; - - std::vector pointCoordsDims = {1, 1, 2}; // 2 points, each with (x, y) - - std::vector pointCoordsScaled; - - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); - - // Labels for the points - std::vector pointLabels = {1.0f}; // All points are foreground - std::vector pointLabelsDims = {1, 1}; - // Create dummy mask_input and has_mask_input - std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = {1, 1, 256, 256}; - std::vector hasMaskInput = {0.0f}; // No mask provided - std::vector hasMaskInputDims = {1}; - - std::vector inputTensors = utilities.PrepareInputTensor( - decoderInputTensor, - pointCoordsScaled, - pointCoordsDims, - pointLabels, - pointLabelsDims, - maskInput, - maskInputDims, - hasMaskInput, - hasMaskInputDims); - - auto output_tensors = session->Run( - options, - inputNodeNames.data(), - inputTensors.data(), - inputTensors.size(), - outputNodeNames.data(), - outputNodeNames.size()); - } - - outputNodeNames.size(); - delete[] blob; - clock_t starttime_4 = clock(); - double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; - } - - break; - } - } +char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { + clock_t starttime_1 = clock(); + Utils utilities; + cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); + cv::Mat processedImg; + utilities.PreProcess(iImg, imgSize, processedImg); + + float *blob = new float[iImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), + imgSize.at(1)}; + switch (modelType) { + case SEG::SAM_SEGMENT_ENCODER: { + Ort::Value input_tensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, + 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), + SAM_input_node_dims.size()); + auto output_tensors = + session->Run(options, inputNodeNames.data(), &input_tensor, 1, + outputNodeNames.data(), outputNodeNames.size()); + delete[] blob; + clock_t starttime_4 = clock(); + double post_process_time = + (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time + << " ms. " << std::endl; } - else - { -#ifdef USE_CUDA - half *blob = new half[iImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; - Ort::Value input_tensor = Ort::Value::CreateTensor(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); - auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size()); - delete[] blob; - clock_t starttime_4 = clock(); - double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; - } -#endif + break; + } + + case SEG::SAM_SEGMENT_DECODER: { + std::vector inputNodeDims = { + 1, 256, 64, 64}; // BUG: That was 236 instead of 256 + // Use embeddings from the last result + std::vector dummyEmbeddings( + 256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values + std::vector decoderInputDims = { + 1, 256, 64, 64}; // Adjust based on your decoder's requirements + + // Create dummy point coordinates and labels + std::vector boundingBoxes = { + cv::Rect(0, 0, 100, + 100), // Example bounding box with (x, y, width, height) + // cv::Rect(0, 0, 473, 359) // Another example bounding box + }; + for (const auto &bbox : boundingBoxes) { + Ort::Value decoderInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + dummyEmbeddings.data(), // Use the embeddings from the encoder + dummyEmbeddings.size(), // Total number of elements + decoderInputDims.data(), decoderInputDims.size()); + // Convert bounding box to points + // Use center of bounding box as foreground point + float centerX = bbox.x + bbox.width / 2.0; + float centerY = bbox.y + bbox.height / 2.0; + + std::vector pointCoords = { + centerX, centerY // Center point (foreground) + }; + + std::vector pointCoordsDims = {1, 1, + 2}; // 2 points, each with (x, y) + + std::vector pointCoordsScaled; + + utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + + // Labels for the points + std::vector pointLabels = {1.0f}; // All points are foreground + std::vector pointLabelsDims = {1, 1}; + // Create dummy mask_input and has_mask_input + std::vector maskInput(256 * 256, 0.0f); // Fill with zeros + std::vector maskInputDims = {1, 1, 256, 256}; + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; + + std::vector inputTensors = utilities.PrepareInputTensor( + decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels, + pointLabelsDims, maskInput, maskInputDims, hasMaskInput, + hasMaskInputDims); + + auto output_tensors = session->Run( + options, inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); } - return RET_OK; + + outputNodeNames.size(); + delete[] blob; + clock_t starttime_4 = clock(); + double post_process_time = + (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time + << " ms. " << std::endl; + } + + break; + } + } + + return RET_OK; } diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 8b5338c..585dd13 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -1,59 +1,52 @@ #include "segmentation.h" -std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM> Initializer() -{ - std::vector> samSegmentors; - samSegmentors.push_back(std::make_unique()); - samSegmentors.push_back(std::make_unique()); - - std::unique_ptr samSegmentorEncoder = std::make_unique(); - std::unique_ptr samSegmentorDecoder = std::make_unique(); - SEG::DL_INIT_PARAM params_encoder; - SEG::DL_INIT_PARAM params_decoder; - - params_encoder.rectConfidenceThreshold = 0.1; - params_encoder.iouThreshold = 0.5; - params_encoder.modelPath = "SAM_encoder.onnx"; - params_encoder.imgSize = {1024, 1024}; - - params_decoder = params_encoder; - params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; - params_decoder.modelPath = "SAM_mask_decoder.onnx"; +std::tuple>, SEG::DL_INIT_PARAM, + SEG::DL_INIT_PARAM> +Initializer() { + std::vector> samSegmentors; + samSegmentors.push_back(std::make_unique()); + samSegmentors.push_back(std::make_unique()); + + std::unique_ptr samSegmentorEncoder = std::make_unique(); + std::unique_ptr samSegmentorDecoder = std::make_unique(); + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + + params_encoder.rectConfidenceThreshold = 0.1; + params_encoder.iouThreshold = 0.5; + params_encoder.modelPath = "SAM_encoder.onnx"; + params_encoder.imgSize = {1024, 1024}; + + params_decoder = params_encoder; + params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; + params_decoder.modelPath = "SAM_mask_decoder.onnx"; #ifdef USE_CUDA - params_encoder.cudaEnable = true; + params_encoder.cudaEnable = true; #else - params_encoder.cudaEnable = false; + params_encoder.cudaEnable = false; #endif - samSegmentorEncoder->CreateSession(params_encoder); - samSegmentorDecoder->CreateSession(params_decoder); - samSegmentors[0] = std::move(samSegmentorEncoder); - samSegmentors[1] = std::move(samSegmentorDecoder); - return {std::move(samSegmentors), params_encoder, params_decoder}; + samSegmentorEncoder->CreateSession(params_encoder); + samSegmentorDecoder->CreateSession(params_decoder); + samSegmentors[0] = std::move(samSegmentorEncoder); + samSegmentors[1] = std::move(samSegmentorDecoder); + return {std::move(samSegmentors), params_encoder, params_decoder}; } -std::vector SegmentAnything(std::vector> &samSegmentors, SEG::DL_INIT_PARAM ¶ms_encoder, SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) -{ +std::vector +SegmentAnything(std::vector> &samSegmentors, + const SEG::DL_INIT_PARAM ¶ms_encoder, + const SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) { - std::vector resSam; - SEG::DL_RESULT res; + std::vector resSam; + SEG::DL_RESULT res; - SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; - samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); + SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; + samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); - modelTypeRef = params_decoder.modelType; - samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); + modelTypeRef = params_decoder.modelType; + samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - // cv::destroyAllWindows(); - cv::Mat finalMask = res.masks[0]; - std::cout << "Final mask size: " << finalMask.size() << std::endl; - - for (const auto &mask : res.masks) - { - cv::imshow("Mask", mask); - cv::waitKey(0); - } - cv::destroyAllWindows(); - return std::move(res.masks); + return std::move(res.masks); } From 5cb4ab4bb2e20bd1bd01edef921854ab1c3de46c Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 2 Sep 2025 18:10:33 +0200 Subject: [PATCH 09/28] Refactor post processing for better accuracy and performance. Also correcting preprocessing scalling issue on long images --- include/utils.h | 3 +- src/main.cpp | 9 +- src/sam_inference.cpp | 10 +- src/utils.cpp | 221 ++++++++++++++++-------------------------- 4 files changed, 93 insertions(+), 150 deletions(-) diff --git a/include/utils.h b/include/utils.h index 333c9e3..e81b236 100644 --- a/include/utils.h +++ b/include/utils.h @@ -18,7 +18,6 @@ class Utils Utils(); ~Utils(); - void overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector iImgSize, SEG::DL_RESULT &result); char *PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg); void ScaleBboxPoints(const cv::Mat &iImg, std::vector iImgSize, std::vector &pointCoords, std::vector &PointsCoordsScaled); @@ -26,6 +25,8 @@ class Utils std::vector &pointLabels, std::vector pointLabelsDims, std::vector &maskInput, std::vector maskInputDims, std::vector &hasMaskInput, std::vector hasMaskInputDims); + void PostProcess(std::vector &output_tensors, const cv::Mat &iImg, std::vector iImgSize, SEG::DL_RESULT &result); + // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW. // Note: Code in header file since it is used outside of this utils src code. template diff --git a/src/main.cpp b/src/main.cpp index 2b2d602..9399779 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -20,14 +20,7 @@ int main() cv::Mat img = cv::imread(img_path); std::vector masks; masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img); - for (int j = 0; j < masks.size(); j++) - { - std::cout << "Press any key to exit" << std::endl; - cv::imshow("Result of MASKS", masks[j]); - cv::waitKey(0); - cv::destroyAllWindows(); - } - std::cout << "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo" << std::endl; + } } return 0; diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 9c0463b..3f6a09e 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -206,7 +206,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector decoderInputDims = { 1, 256, 64, 64}; // Adjust based on your decoder's requirements - // Create point coordinates and labels + // Create point coordinates for testing purposes #ifdef ROI // Create a window for user interaction @@ -224,8 +224,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector boundingBoxes; boundingBoxes.push_back(bbox); #endif // ROI - // boundingBoxes.push_back(bbox1); - // Declare timing variables BEFORE the loop + #ifdef benchmark clock_t starttime_2 = 0; clock_t starttime_3 = 0; @@ -255,8 +254,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector pointCoordsScaled; - std::vector pointCoordsDims = {1, 2, - 2}; // 2 points, each with (x, y) + std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) // Labels for the points std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels @@ -287,7 +285,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, starttime_3 = clock(); #endif // benchmark - utilities.overlay(output_tensors, iImg, imgSize, result); + utilities.PostProcess(output_tensors, iImg, imgSize, result); } // Add the result to oResult oResult.push_back(result); diff --git a/src/utils.cpp b/src/utils.cpp index 8d76ac1..f7721db 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -30,7 +30,7 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat } else { - resizeScales = iImg.rows / (float)iImgSize.at(0); + resizeScales = iImg.rows / (float)iImgSize.at(1); cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); } cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); @@ -118,150 +118,101 @@ std::vector Utils::PrepareInputTensor(Ort::Value &decoderInputTensor return inputTensors; } -void Utils::overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) +void Utils::PostProcess(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) { - // Process decoder output (masks) - if (output_tensors.size() > 0) + if (output_tensors.size() < 2) { - // Get the masks from the output tensor - auto scoresTensor = std::move(output_tensors[0]); // IoU scores - auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG - auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); - auto masksShape = masksInfo.GetShape(); + std::cerr << "[SAM]: Decoder returned insufficient outputs." << std::endl; + return; + } + + // Assume [scores, masks]; consider shape-based detection later + auto scoresTensor = std::move(output_tensors[0]); + auto masksTensor = std::move(output_tensors[1]); + + auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); + auto masksShape = masksInfo.GetShape(); + + if (masksShape.size() == 4) + { + auto masksData = masksTensor.GetTensorMutableData(); + auto scoresData = scoresTensor.GetTensorMutableData(); - if (masksShape.size() == 4) + const size_t numMasks = static_cast(masksShape[1]); + const size_t height = static_cast(masksShape[2]); + const size_t width = static_cast(masksShape[3]); + + // Pick best mask by score + float bestScore = -1.0f; + size_t bestMaskIndex = 0; + for (size_t i = 0; i < numMasks; ++i) { - auto masksData = masksTensor.GetTensorMutableData(); - auto scoresData = scoresTensor.GetTensorMutableData(); - - size_t batchSize = masksShape[0]; // Usually 1 - size_t numMasks = masksShape[1]; // Number of masks (typically 1) - size_t height = masksShape[2]; // Height of mask - size_t width = masksShape[3]; // Width of mask - - // Find the best mask (highest IoU score) - float bestScore = -1; - size_t bestMaskIndex = 0; - - for (size_t i = 0; i < numMasks; ++i) - { - - float score = scoresData[i]; - - if (score > bestScore) - { - bestScore = score; - bestMaskIndex = i; - } - } - // std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl; - - // Create OpenCV Mat for the mask - cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1); - - // Convert float mask to binary mask - for (size_t h = 0; h < height; ++h) - { - for (size_t w = 0; w < width; ++w) - { - size_t idx = (bestMaskIndex * height * width) + (h * width) + w; - float value = masksData[idx]; - mask.at(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5 - } - } - - // 1. Calculate the dimensions the image had during preprocessing - float scale; - int processedWidth, processedHeight; - if (iImg.cols >= iImg.rows) - { - scale = (float)imgSize[0] / iImg.cols; - processedWidth = imgSize[0]; - processedHeight = int(iImg.rows * scale); - } - else - { - scale = (float)imgSize[1] / iImg.rows; - processedWidth = int(iImg.cols * scale); - processedHeight = imgSize[1]; - } - - // 3. Extract the portion that corresponds to the actual image (no padding) - int cropWidth = std::min(256, int(256 * processedWidth / (float)imgSize[0])); - int cropHeight = std::min(256, int(256 * processedHeight / (float)imgSize[1])); - cv::Mat croppedMask = mask(cv::Rect(0, 0, cropWidth, cropHeight)); - - // 4. Resize directly to original image dimensions in one step - cv::Mat finalMask; - - // Use INTER_NEAREST for binary masks - preserves hard edges - cv::resize(croppedMask, finalMask, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_NEAREST); - - ////////////////////// GUIDED BILATERAL FILTER ///////////////////////// - // Convert the upscaled mask to CV_8UC1 if necessary - if (finalMask.type() != CV_8UC1) - { - finalMask.convertTo(finalMask, CV_8UC1); - } - - // Apply the Guided Filter - cv::Mat filteredMask; - int radius = 2; - double eps = 0.01; - cv::ximgproc::guidedFilter(iImg, finalMask, finalMask, radius, eps); - ////////////////////// END: GUIDED BILATERAL FILTER ///////////////////////// - - ////////////////////// MORPHOLOGICAN OPERATIONS ///////////////////////// - // Morphological operations to clean up the mask - int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100); // Adaptive size - cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize)); - - // CLOSE operation: fills small holes in the mask - cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel); - - // OPEN operation: removes small noise - cv::morphologyEx(finalMask, finalMask, cv::MORPH_OPEN, kernel); - - ////////////////////// END: MORPHOLOGICAN OPERATIONS ///////////////////////// - - // Re-threshold after resizing to ensure binary mask (critical step) - - cv::threshold(finalMask, finalMask, 127, 255, cv::THRESH_BINARY); - result.masks.push_back(finalMask); - - /*// Add IoU scores if available (typically second tensor) - if (output_tensors.size() > 1) { - auto scoresTensor = std::move(output_tensors[1]); - auto scoresData = scoresTensor.GetTensorMutableData(); - if (i < scoresTensor.GetTensorTypeAndShapeInfo().GetShape()[1]) { - result.confidence = scoresData[i]; - std::cout << "Mask confidence: " << result.confidence << std::endl; - } - }*/ - - // Find contours of the mask - std::vector> contours; - cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); - - // Create a semi-transparent overlay - cv::Mat colorMask = cv::Mat::zeros(iImg.size(), CV_8UC3); - colorMask.setTo(cv::Scalar(0, 200, 0), finalMask); // Green fill - cv::addWeighted(iImg, 0.7, colorMask, 0.3, 0, iImg); - - // Draw contours with a thick, high-contrast outline - cv::drawContours(iImg, contours, -1, cv::Scalar(0, 255, 255), 2); // Yellow outline - - // Save or display the result - cv::imwrite("segmentation_result_" + std::to_string(bestMaskIndex) + ".jpg", iImg); - cv::imwrite("mask_" + std::to_string(bestMaskIndex) + ".jpg", finalMask); + const float s = scoresData ? scoresData[i] : 0.0f; + if (s > bestScore) { bestScore = s; bestMaskIndex = i; } + } + + // Compute preprocessed region (top-left anchored) + float scale; + int processedWidth, processedHeight; + if (iImg.cols >= iImg.rows) + { + scale = static_cast(imgSize[0]) / static_cast(iImg.cols); + processedWidth = imgSize[0]; + processedHeight = static_cast(iImg.rows * scale); } else { - std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; + scale = static_cast(imgSize[1]) / static_cast(iImg.rows); + processedWidth = static_cast(iImg.cols * scale); + processedHeight = imgSize[1]; } + + auto clampDim = [](int v, int lo, int hi) { return std::max(lo, std::min(v, hi)); }; + + // Wrap selected mask plane as float prob map + const size_t planeOffset = bestMaskIndex * height * width; + cv::Mat prob32f(static_cast(height), static_cast(width), CV_32F, + const_cast(masksData + planeOffset)); + + // Crop in mask space using proportional dimensions (no hardcoded 256) + const int cropW = clampDim(static_cast(std::round(static_cast(width) * processedWidth / static_cast(imgSize[0]))), 1, static_cast(width)); + const int cropH = clampDim(static_cast(std::round(static_cast(height) * processedHeight / static_cast(imgSize[1]))), 1, static_cast(height)); + cv::Mat probCropped = prob32f(cv::Rect(0, 0, cropW, cropH)); + + // Resize probabilities to original image (linear) + cv::Mat probResized; + cv::resize(probCropped, probResized, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_LINEAR); + + // Threshold once to binary mask + cv::Mat finalMask; + cv::compare(probResized, 0.5f, finalMask, cv::CMP_GT); // CV_8U 0/255 + + // Morphological cleanup (light, then ensure binary) + int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100); + cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize)); + cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel); + cv::morphologyEx(finalMask, finalMask, cv::MORPH_OPEN, kernel); + cv::threshold(finalMask, finalMask, 127, 255, cv::THRESH_BINARY); + + // Save mask + result.masks.push_back(finalMask); + + // Overlay for display on a copy (iImg is const) + cv::Mat overlay = iImg.clone(); + std::vector> contours; + cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); + + cv::Mat colorMask = cv::Mat::zeros(overlay.size(), CV_8UC3); + colorMask.setTo(cv::Scalar(0, 200, 0), finalMask); + cv::addWeighted(overlay, 0.7, colorMask, 0.3, 0, overlay); + cv::drawContours(overlay, contours, -1, cv::Scalar(0, 255, 255), 2); + + cv::imshow("SAM Segmentation", overlay); + cv::waitKey(0); + cv::destroyAllWindows(); } else { - std::cerr << "[SAM]: No masks found in the output tensor." << std::endl; + std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; } } \ No newline at end of file From 0fe00dae324f419817a7e72d13624fa30f85296c Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 2 Sep 2025 21:06:50 +0200 Subject: [PATCH 10/28] Added tests (still not working with catkin) --- CMakeLists.txt | 44 ++++++++++++++---- package.xml | 7 ++- test/sam_test.cpp | 114 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 11 deletions(-) create mode 100644 test/sam_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cb430a..f4087dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,6 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.0.2) -set(PROJECT_NAME sam_onnx_ros) - -project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) +project(sam_onnx_ros) # -------------- CMake Policies ------------------# #add_compile_options(-Wall -Werror=all) @@ -30,6 +28,11 @@ include_directories(/usr/local/cuda/include) find_package(catkin REQUIRED COMPONENTS + roscpp + tue_config + tue_filesystem + code_profiler + #onnxruntime_ros ) @@ -62,12 +65,22 @@ set(PROJECT_SOURCES src/utils.cpp ) -add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) - -# Link OpenCV libraries along with ONNX Runtime -target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) +# Build core library (no main.cpp here) +add_library(sam_onnx_ros_core + src/sam_inference.cpp + src/segmentation.cpp + src/utils.cpp +) +target_link_libraries(sam_onnx_ros_core + ${OpenCV_LIBS} + ${catkin_LIBRARIES} + ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so +) +target_include_directories(sam_onnx_ros_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +# Main executable links the core lib +add_executable(${PROJECT_NAME} src/main.cpp) +target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) # Copy sam_.onnx file to the same folder of the executable file configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) @@ -78,6 +91,19 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images ) +# Enable testing and add a gtest +if (CATKIN_ENABLE_TESTING) + find_package(catkin_lint_cmake REQUIRED) + catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + + catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp) + if(TARGET sam_onnx_ros_tests) + target_link_libraries(sam_onnx_ros_tests sam_onnx_ros_core ${catkin_LIBRARIES}) + target_include_directories(sam_onnx_ros_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + endif() +endif() + + #If you want to debug set(CMAKE_BUILD_TYPE Debug) set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/package.xml b/package.xml index cde009a..250abed 100644 --- a/package.xml +++ b/package.xml @@ -12,14 +12,17 @@ ToDo catkin + roscpp + libpcl-common + libopencv-dev libopencv-dev onnxruntime_ros onnxruntime_ros - catkin_lint_cmake - +gtest +rostest doxygen diff --git a/test/sam_test.cpp b/test/sam_test.cpp new file mode 100644 index 0000000..5f1024f --- /dev/null +++ b/test/sam_test.cpp @@ -0,0 +1,114 @@ +#include "segmentation.h" +#include "sam_inference.h" +#include +#include +#include "dl_types.h" +#include "utils.h" +#include + +class SamInferenceTest : public ::testing::Test +{ +protected: + void SetUp() override + { + // Create test images with different characteristics + testImage_640x640 = cv::Mat::ones(640, 640, CV_8UC3) * 255; + testImage_800x600 = cv::Mat::ones(600, 800, CV_8UC3) * 128; + + // Create a more realistic test image with some patterns + testImage_realistic = cv::Mat(640, 640, CV_8UC3); + cv::randu(testImage_realistic, cv::Scalar(0,0,0), cv::Scalar(255,255,255)); + + // Setup common parameters + NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; + + sam = std::make_unique(); + params.rectConfidenceThreshold = 0.1f; + params.iouThreshold = 0.5f; + params.imgSize = {1024, 1024}; + params.modelType = SEG::SAM_SEGMENT_ENCODER; + params.modelPath = "SAM_encoder.onnx"; // copied to build/ by CMake +#ifdef USE_CUDA + params.cudaEnable = true; +#else + params.cudaEnable = false; +#endif + } + + void TearDown() override { sam.reset(); } + + // Test data + Utils utilities; + cv::Mat testImage_640x640, testImage_800x600, testImage_realistic; + SEG::DL_INIT_PARAM params; + std::unique_ptr sam; + std::vector NonSquareImgSize; +}; + + + +TEST_F(SamInferenceTest, ObjectCreation) +{ + EXPECT_NO_THROW({ + SAM localSam; + }); +} + +TEST_F(SamInferenceTest, PreProcessSquareImage) +{ + cv::Mat processedImg; + const char* result = utilities.PreProcess(testImage_640x640, params.imgSize, processedImg); + + EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; + EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024"; + EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; +} + +TEST_F(SamInferenceTest, PreProcessRectangularImage) +{ + cv::Mat processedImg; + const char* result = utilities.PreProcess(testImage_800x600, NonSquareImgSize, processedImg); + + EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; + EXPECT_EQ(processedImg.size(), cv::Size(800, 600)) << "Output should be letterboxed to 800x600"; + EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; +} + +TEST_F(SamInferenceTest, CreateSessionWithValidModel) +{ + if (!std::filesystem::exists("SAM_encoder.onnx")) { + GTEST_SKIP() << "Model not found in build dir"; + } + const char* result = sam->CreateSession(params); + EXPECT_EQ(result, nullptr) << "CreateSession should succeed with valid parameters"; +} + +TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) +{ + params.modelPath = "nonexistent_model.onnx"; + const char* result = sam->CreateSession(params); + EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path"; +} + +TEST_F(SamInferenceTest, FullInferencePipeline) +{ + if (!std::filesystem::exists("SAM_encoder.onnx") || + !std::filesystem::exists("SAM_mask_decoder.onnx")) { + GTEST_SKIP() << "Models not found in build dir"; + } + + // Use the package Initializer/SegmentAnything for the full pipeline + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder, params_decoder; + std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + + auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); + EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; +} + +// Run all tests +int main(int argc, char **argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From 9f210122ba54b26a19a1be4c31f14ff455d07857 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Thu, 4 Sep 2025 12:55:03 +0200 Subject: [PATCH 11/28] Fixed catkin workspace for both code and tests --- CMakeLists.txt | 26 +++++++++++++------------- package.xml | 8 ++------ src/main.cpp | 2 +- src/sam_inference.cpp | 2 +- src/segmentation.cpp | 4 ++-- test/sam_test.cpp | 35 +++++++++++++++-------------------- 6 files changed, 34 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4087dc..cece29a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,10 +28,10 @@ include_directories(/usr/local/cuda/include) find_package(catkin REQUIRED COMPONENTS - roscpp - tue_config - tue_filesystem - code_profiler + # roscpp + # tue_config + # tue_filesystem + # code_profiler #onnxruntime_ros ) @@ -42,7 +42,8 @@ find_package(catkin REQUIRED catkin_package( INCLUDE_DIRS include - LIBRARIES ${PROJECT_NAME} + #LIBRARIES ${PROJECT_NAME} + LIBRARIES sam_onnx_ros_core CATKIN_DEPENDS DEPENDS OpenCV ) @@ -83,18 +84,18 @@ add_executable(${PROJECT_NAME} src/main.cpp) target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) # Copy sam_.onnx file to the same folder of the executable file -configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images ) -# Enable testing and add a gtest +# # Enable testing if (CATKIN_ENABLE_TESTING) - find_package(catkin_lint_cmake REQUIRED) - catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") +# find_package(catkin_lint_cmake REQUIRED) +# catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp) if(TARGET sam_onnx_ros_tests) @@ -103,7 +104,6 @@ if (CATKIN_ENABLE_TESTING) endif() endif() - #If you want to debug -set(CMAKE_BUILD_TYPE Debug) -set(CMAKE_CXX_FLAGS_DEBUG "-g") +# set(CMAKE_BUILD_TYPE Debug) +# set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/package.xml b/package.xml index 250abed..b00e6d6 100644 --- a/package.xml +++ b/package.xml @@ -9,20 +9,16 @@ Iason Theodorou - ToDo + BSD catkin - roscpp - libpcl-common - libopencv-dev libopencv-dev onnxruntime_ros onnxruntime_ros + catkin_lint_cmake -gtest -rostest doxygen diff --git a/src/main.cpp b/src/main.cpp index 9399779..454e086 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,7 +11,7 @@ int main() SEG::DL_INIT_PARAM params_decoder; std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images"; + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/pipeline/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 3f6a09e..c9bacbe 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -85,7 +85,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { for (size_t i = 0; i < OutputNodesNum; i++) { Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char *temp_buf = new char[10]; + char *temp_buf = new char[50]; strcpy(temp_buf, output_node_name.get()); outputNodeNames.push_back(temp_buf); } diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 585dd13..25b8fae 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -14,12 +14,12 @@ Initializer() { params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; - params_encoder.modelPath = "SAM_encoder.onnx"; + params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; params_encoder.imgSize = {1024, 1024}; params_decoder = params_encoder; params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; - params_decoder.modelPath = "SAM_mask_decoder.onnx"; + params_decoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_mask_decoder.onnx"; #ifdef USE_CUDA params_encoder.cudaEnable = true; diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 5f1024f..75a9c98 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -22,27 +22,25 @@ class SamInferenceTest : public ::testing::Test // Setup common parameters NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; - sam = std::make_unique(); - params.rectConfidenceThreshold = 0.1f; - params.iouThreshold = 0.5f; - params.imgSize = {1024, 1024}; - params.modelType = SEG::SAM_SEGMENT_ENCODER; - params.modelPath = "SAM_encoder.onnx"; // copied to build/ by CMake + // Use the package Initializer/SegmentAnything for the full pipeline + + std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + #ifdef USE_CUDA - params.cudaEnable = true; + params_encoder.cudaEnable = true; #else - params.cudaEnable = false; + params_encoder.cudaEnable = false; #endif } - void TearDown() override { sam.reset(); } + void TearDown() override { samSegmentors[0].reset(); samSegmentors[1].reset(); } // Test data Utils utilities; cv::Mat testImage_640x640, testImage_800x600, testImage_realistic; - SEG::DL_INIT_PARAM params; - std::unique_ptr sam; std::vector NonSquareImgSize; + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder, params_decoder; }; @@ -57,7 +55,7 @@ TEST_F(SamInferenceTest, ObjectCreation) TEST_F(SamInferenceTest, PreProcessSquareImage) { cv::Mat processedImg; - const char* result = utilities.PreProcess(testImage_640x640, params.imgSize, processedImg); + const char* result = utilities.PreProcess(testImage_640x640, params_encoder.imgSize, processedImg); EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024"; @@ -79,14 +77,14 @@ TEST_F(SamInferenceTest, CreateSessionWithValidModel) if (!std::filesystem::exists("SAM_encoder.onnx")) { GTEST_SKIP() << "Model not found in build dir"; } - const char* result = sam->CreateSession(params); - EXPECT_EQ(result, nullptr) << "CreateSession should succeed with valid parameters"; + + EXPECT_NE(samSegmentors[0], nullptr) << "CreateSession should succeed with valid parameters"; } TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) { - params.modelPath = "nonexistent_model.onnx"; - const char* result = sam->CreateSession(params); + params_encoder.modelPath = "nonexistent_model.onnx"; + const char* result = samSegmentors[0]->CreateSession(params_encoder); EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path"; } @@ -97,10 +95,7 @@ TEST_F(SamInferenceTest, FullInferencePipeline) GTEST_SKIP() << "Models not found in build dir"; } - // Use the package Initializer/SegmentAnything for the full pipeline - std::vector> samSegmentors; - SEG::DL_INIT_PARAM params_encoder, params_decoder; - std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; From a9fce070c810f6c8799326652785e3d5fe768e58 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 5 Sep 2025 12:38:53 +0200 Subject: [PATCH 12/28] fixed functionallity for the tests to pass and added logging definition --- src/main.cpp | 3 ++- src/sam_inference.cpp | 4 +++- src/utils.cpp | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 454e086..c9624c5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,7 @@ #include #include #include + int main() { // Running inference @@ -11,7 +12,7 @@ int main() SEG::DL_INIT_PARAM params_decoder; std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/pipeline/build/images"; // current_path / <- you could use + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index c9bacbe..24af832 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -3,7 +3,7 @@ #include #define benchmark -#define ROI +//#define ROI SAM::SAM() {} @@ -223,6 +223,8 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector boundingBoxes; boundingBoxes.push_back(bbox); +#else + result.boxes.push_back(cv::Rect(0, 0, iImg.cols, iImg.rows)); #endif // ROI #ifdef benchmark diff --git a/src/utils.cpp b/src/utils.cpp index f7721db..ca66b6c 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,5 +1,6 @@ #include "utils.h" #include // for guided filter +#define LOGGING // Constructor Utils::Utils() @@ -33,7 +34,8 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat resizeScales = iImg.rows / (float)iImgSize.at(1); cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); } - cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3); oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); oImg = tempImg; @@ -198,6 +200,7 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & result.masks.push_back(finalMask); // Overlay for display on a copy (iImg is const) + #ifdef LOGGING cv::Mat overlay = iImg.clone(); std::vector> contours; cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); @@ -210,6 +213,7 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & cv::imshow("SAM Segmentation", overlay); cv::waitKey(0); cv::destroyAllWindows(); + #endif // LOGGING } else { From dffbcd3544fd097eb007614c1cbe1de7b9b1c7fa Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 5 Sep 2025 12:56:59 +0200 Subject: [PATCH 13/28] renamed private members of utils and sam_inference --- include/sam_inference.h | 22 +++---- include/utils.h | 4 +- src/sam_inference.cpp | 132 ++++++++++++++++++++-------------------- src/utils.cpp | 12 ++-- 4 files changed, 85 insertions(+), 85 deletions(-) diff --git a/include/sam_inference.h b/include/sam_inference.h index d63701c..7bff0b1 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -33,17 +33,17 @@ class SAM std::vector classes{}; private: - Ort::Env env; - std::unique_ptr session; - bool cudaEnable; - Ort::RunOptions options; - std::vector inputNodeNames; - std::vector outputNodeNames; - - SEG::MODEL_TYPE modelType; - std::vector imgSize; - float rectConfidenceThreshold; - float iouThreshold; + Ort::Env _env; + std::unique_ptr _session; + bool _cudaEnable; + Ort::RunOptions _options; + std::vector _inputNodeNames; + std::vector _outputNodeNames; + + SEG::MODEL_TYPE _modelType; + std::vector _imgSize; + float _rectConfidenceThreshold; + float _iouThreshold; }; #endif // SAMINFERENCE_H \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index e81b236..6cb8819 100644 --- a/include/utils.h +++ b/include/utils.h @@ -51,8 +51,8 @@ class Utils } private: - float resizeScales; - float resizeScalesBbox; // letterbox scale + float _resizeScales; + float _resizeScalesBbox; // letterbox scale }; #endif // UTILS_H \ No newline at end of file diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 24af832..f12c56b 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -9,10 +9,10 @@ SAM::SAM() {} SAM::~SAM() { // Clean up input/output node names - for (auto &name : inputNodeNames) { + for (auto &name : _inputNodeNames) { delete[] name; } - for (auto &name : outputNodeNames) { + for (auto &name : _outputNodeNames) { delete[] name; } } @@ -28,19 +28,19 @@ template <> struct TypeToTensorType { const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { const char *Ret = RET_OK; - if (session) { - session.reset(); // Release previous session + if (_session) { + _session.reset(); // Release previous _session // Clear node names - for (auto &name : inputNodeNames) { + for (auto &name : _inputNodeNames) { delete[] name; } - inputNodeNames.clear(); + _inputNodeNames.clear(); - for (auto &name : outputNodeNames) { + for (auto &name : _outputNodeNames) { delete[] name; } - outputNodeNames.clear(); + _outputNodeNames.clear(); } std::regex pattern("[\u4e00-\u9fa5]"); bool result = std::regex_search(iParams.modelPath, pattern); @@ -51,55 +51,55 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { return Ret; } try { - rectConfidenceThreshold = iParams.rectConfidenceThreshold; - iouThreshold = iParams.iouThreshold; - imgSize = iParams.imgSize; - modelType = iParams.modelType; - cudaEnable = iParams.cudaEnable; - env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); - Ort::SessionOptions sessionOption; + _rectConfidenceThreshold = iParams.rectConfidenceThreshold; + _iouThreshold = iParams.iouThreshold; + _imgSize = iParams.imgSize; + _modelType = iParams.modelType; + _cudaEnable = iParams.cudaEnable; + _env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); + Ort::SessionOptions _sessionOption; if (iParams.cudaEnable) { OrtCUDAProviderOptions cudaOption; cudaOption.device_id = 0; - sessionOption.AppendExecutionProvider_CUDA(cudaOption); + _sessionOption.AppendExecutionProvider_CUDA(cudaOption); } - sessionOption.SetGraphOptimizationLevel( + _sessionOption.SetGraphOptimizationLevel( GraphOptimizationLevel::ORT_ENABLE_ALL); - sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); - sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); + _sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); + _sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); const char *modelPath = iParams.modelPath.c_str(); - session = std::make_unique(env, modelPath, sessionOption); + _session = std::make_unique(_env, modelPath, _sessionOption); Ort::AllocatorWithDefaultOptions allocator; - size_t inputNodesNum = session->GetInputCount(); + size_t inputNodesNum = _session->GetInputCount(); for (size_t i = 0; i < inputNodesNum; i++) { Ort::AllocatedStringPtr input_node_name = - session->GetInputNameAllocated(i, allocator); + _session->GetInputNameAllocated(i, allocator); char *temp_buf = new char[50]; strcpy(temp_buf, input_node_name.get()); - inputNodeNames.push_back(temp_buf); + _inputNodeNames.push_back(temp_buf); } - size_t OutputNodesNum = session->GetOutputCount(); + size_t OutputNodesNum = _session->GetOutputCount(); for (size_t i = 0; i < OutputNodesNum; i++) { Ort::AllocatedStringPtr output_node_name = - session->GetOutputNameAllocated(i, allocator); + _session->GetOutputNameAllocated(i, allocator); char *temp_buf = new char[50]; strcpy(temp_buf, output_node_name.get()); - outputNodeNames.push_back(temp_buf); + _outputNodeNames.push_back(temp_buf); } - options = Ort::RunOptions{nullptr}; + _options = Ort::RunOptions{nullptr}; auto input_shape = - session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_shape = - session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_type = session->GetOutputTypeInfo(0) + _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + auto output_type = _session->GetOutputTypeInfo(0) .GetTensorTypeAndShapeInfo() .GetElementType(); - WarmUpSession(modelType); + WarmUpSession(_modelType); return RET_OK; } catch (const std::exception &e) { const char *str1 = "[SAM]:"; @@ -109,30 +109,30 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { std::strcpy(merged, str_result.c_str()); std::cout << merged << std::endl; delete[] merged; - return "[SAM]:Create session failed."; + return "[SAM]:Create _session failed."; } } const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, - SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) { + SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) { #ifdef benchmark clock_t starttime_1 = clock(); #endif // benchmark Utils utilities; const char *Ret = RET_OK; cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); + utilities.PreProcess(iImg, _imgSize, processedImg); float *blob = new float[processedImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); std::vector inputNodeDims; - if (modelType == SEG::SAM_SEGMENT_ENCODER) { - inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; - } else if (modelType == SEG::SAM_SEGMENT_DECODER) { + if (_modelType == SEG::SAM_SEGMENT_ENCODER) { + inputNodeDims = {1, 3, _imgSize.at(0), _imgSize.at(1)}; + } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { // Input size or SAM decoder model is 256x64x64 for the decoder inputNodeDims = {1, 256, 64, 64}; } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, + TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, utilities, result); return Ret; @@ -141,11 +141,11 @@ const char *SAM::RunSession(const cv::Mat &iImg, template const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, - SEG::MODEL_TYPE modelType, + SEG::MODEL_TYPE _modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result) { - switch (modelType) { + switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: // case OTHER_SAM_MODEL: { @@ -153,14 +153,14 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, Ort::Value inputTensor = Ort::Value::CreateTensor::type>( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), + blob, 3 * _imgSize.at(0) * _imgSize.at(1), inputNodeDims.data(), inputNodeDims.size()); #ifdef benchmark clock_t starttime_2 = clock(); #endif // benchmark auto outputTensor = - session->Run(options, inputNodeNames.data(), &inputTensor, 1, - outputNodeNames.data(), outputNodeNames.size()); + _session->Run(_options, _inputNodeNames.data(), &inputTensor, 1, + _outputNodeNames.data(), _outputNodeNames.size()); #ifdef benchmark clock_t starttime_3 = clock(); #endif // benchmark @@ -186,7 +186,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; @@ -269,7 +269,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector hasMaskInput = {0.0f}; // No mask provided std::vector hasMaskInputDims = {1}; - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + utilities.ScaleBboxPoints(iImg, _imgSize, pointCoords, pointCoordsScaled); std::vector inputTensors = utilities.PrepareInputTensor( decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels, @@ -279,15 +279,15 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, #ifdef benchmark starttime_2 = clock(); #endif // benchmark - auto output_tensors = session->Run( - options, inputNodeNames.data(), inputTensors.data(), - inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); + auto output_tensors = _session->Run( + _options, _inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size()); #ifdef benchmark starttime_3 = clock(); #endif // benchmark - utilities.PostProcess(output_tensors, iImg, imgSize, result); + utilities.PostProcess(output_tensors, iImg, _imgSize, result); } // Add the result to oResult oResult.push_back(result); @@ -302,7 +302,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; @@ -321,31 +321,31 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, return RET_OK; } -char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { +char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { clock_t starttime_1 = clock(); Utils utilities; - cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); + cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3); cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); + utilities.PreProcess(iImg, _imgSize, processedImg); float *blob = new float[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), - imgSize.at(1)}; - switch (modelType) { + std::vector SAM_input_node_dims = {1, 3, _imgSize.at(0), + _imgSize.at(1)}; + switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: { Ort::Value input_tensor = Ort::Value::CreateTensor( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, - 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), + 3 * _imgSize.at(0) * _imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); auto output_tensors = - session->Run(options, inputNodeNames.data(), &input_tensor, 1, - outputNodeNames.data(), outputNodeNames.size()); + _session->Run(_options, _inputNodeNames.data(), &input_tensor, 1, + _outputNodeNames.data(), _outputNodeNames.size()); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; } @@ -387,7 +387,7 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { std::vector pointCoordsScaled; - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + utilities.ScaleBboxPoints(iImg, _imgSize, pointCoords, pointCoordsScaled); // Labels for the points std::vector pointLabels = {1.0f}; // All points are foreground @@ -403,17 +403,17 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { pointLabelsDims, maskInput, maskInputDims, hasMaskInput, hasMaskInputDims); - auto output_tensors = session->Run( - options, inputNodeNames.data(), inputTensors.data(), - inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); + auto output_tensors = _session->Run( + _options, _inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size()); } - outputNodeNames.size(); + _outputNodeNames.size(); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; } diff --git a/src/utils.cpp b/src/utils.cpp index ca66b6c..2c826ab 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -26,13 +26,13 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat if (iImg.cols >= iImg.rows) { - resizeScales = iImg.cols / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); + _resizeScales = iImg.cols / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / _resizeScales))); } else { - resizeScales = iImg.rows / (float)iImgSize.at(1); - cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); + _resizeScales = iImg.rows / (float)iImgSize.at(1); + cv::resize(oImg, oImg, cv::Size(int(iImg.cols / _resizeScales), iImgSize.at(1))); } //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3); @@ -52,12 +52,12 @@ void Utils::ScaleBboxPoints(const cv::Mat &iImg, std::vector imgSize, std:: if (iImg.cols >= iImg.rows) { scale = imgSize[0] / (float)iImg.cols; - resizeScalesBbox = iImg.cols / (float)imgSize[0]; + _resizeScalesBbox = iImg.cols / (float)imgSize[0]; } else { scale = imgSize[1] / (float)iImg.rows; - resizeScalesBbox = iImg.rows / (float)imgSize[1]; + _resizeScalesBbox = iImg.rows / (float)imgSize[1]; } // Top-Left placement (matching PreProcess) From 959a3ffc6d2d597c0a6c80d6a5a34f15280ffab7 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 5 Sep 2025 15:10:28 +0200 Subject: [PATCH 14/28] Separrated test files per category (utils or sam related for now) --- CMakeLists.txt | 18 ++++- test/sam_test.cpp | 68 +++++++---------- test/test_utils.cpp | 175 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+), 47 deletions(-) create mode 100644 test/test_utils.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cece29a..ffc7611 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,13 +97,23 @@ if (CATKIN_ENABLE_TESTING) # find_package(catkin_lint_cmake REQUIRED) # catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") - catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp) - if(TARGET sam_onnx_ros_tests) - target_link_libraries(sam_onnx_ros_tests sam_onnx_ros_core ${catkin_LIBRARIES}) - target_include_directories(sam_onnx_ros_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + # Utils unit tests (no models needed) + catkin_add_gtest(utils_tests test/test_utils.cpp) + if(TARGET utils_tests) + target_link_libraries(utils_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) + target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + endif() + + # SAM integration-ish tests (may need models) + catkin_add_gtest(sam_tests test/sam_test.cpp) + if(TARGET sam_tests) + target_link_libraries(sam_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) + target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) endif() endif() + + #If you want to debug # set(CMAKE_BUILD_TYPE Debug) # set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 75a9c98..521b0d8 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -1,41 +1,46 @@ -#include "segmentation.h" -#include "sam_inference.h" #include #include -#include "dl_types.h" -#include "utils.h" #include +#include "segmentation.h" +#include "sam_inference.h" +#include "dl_types.h" + +// This file contains higher-level (integration-ish) tests. +// They cover object/session creation and a full pipeline run using synthetic images. +// These tests may require the .onnx model files to be present next to the binary or in a known dir. class SamInferenceTest : public ::testing::Test { protected: void SetUp() override { - // Create test images with different characteristics + // Create simple synthetic images: + // - a white 640x640 (square) + // - a gray 800x600 (non-square) testImage_640x640 = cv::Mat::ones(640, 640, CV_8UC3) * 255; testImage_800x600 = cv::Mat::ones(600, 800, CV_8UC3) * 128; - // Create a more realistic test image with some patterns + // A "random noise" image to simulate realistic content for end-to-end checks. testImage_realistic = cv::Mat(640, 640, CV_8UC3); cv::randu(testImage_realistic, cv::Scalar(0,0,0), cv::Scalar(255,255,255)); - // Setup common parameters + // Cache non-square size for preprocessing helpers. NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; - // Use the package Initializer/SegmentAnything for the full pipeline - + // Use package helpers to build default params and SAM objects. std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); #ifdef USE_CUDA - params_encoder.cudaEnable = true; + params_encoder.cudaEnable = true; // Enable CUDA if compiled with it #else - params_encoder.cudaEnable = false; + params_encoder.cudaEnable = false; // Otherwise run on CPU #endif } + // Clean up the SAM objects after each test. void TearDown() override { samSegmentors[0].reset(); samSegmentors[1].reset(); } - // Test data + // Test data and objects shared across tests. Utils utilities; cv::Mat testImage_640x640, testImage_800x600, testImage_realistic; std::vector NonSquareImgSize; @@ -43,8 +48,7 @@ class SamInferenceTest : public ::testing::Test SEG::DL_INIT_PARAM params_encoder, params_decoder; }; - - +// Simple smoke test: we can construct a SAM object without throwing. TEST_F(SamInferenceTest, ObjectCreation) { EXPECT_NO_THROW({ @@ -52,26 +56,8 @@ TEST_F(SamInferenceTest, ObjectCreation) }); } -TEST_F(SamInferenceTest, PreProcessSquareImage) -{ - cv::Mat processedImg; - const char* result = utilities.PreProcess(testImage_640x640, params_encoder.imgSize, processedImg); - - EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; - EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024"; - EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; -} - -TEST_F(SamInferenceTest, PreProcessRectangularImage) -{ - cv::Mat processedImg; - const char* result = utilities.PreProcess(testImage_800x600, NonSquareImgSize, processedImg); - - EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; - EXPECT_EQ(processedImg.size(), cv::Size(800, 600)) << "Output should be letterboxed to 800x600"; - EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; -} - +// Confirms that with a present encoder model we can initialize a session. +// Skips if the model file is not available. TEST_F(SamInferenceTest, CreateSessionWithValidModel) { if (!std::filesystem::exists("SAM_encoder.onnx")) { @@ -81,6 +67,7 @@ TEST_F(SamInferenceTest, CreateSessionWithValidModel) EXPECT_NE(samSegmentors[0], nullptr) << "CreateSession should succeed with valid parameters"; } +// Confirms that giving an invalid model path returns an error (no crash). TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) { params_encoder.modelPath = "nonexistent_model.onnx"; @@ -88,6 +75,8 @@ TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path"; } +// End-to-end check: with both encoder/decoder models present, the pipeline runs +// and returns a mask vector. Skips if models are not available. TEST_F(SamInferenceTest, FullInferencePipeline) { if (!std::filesystem::exists("SAM_encoder.onnx") || @@ -95,15 +84,8 @@ TEST_F(SamInferenceTest, FullInferencePipeline) GTEST_SKIP() << "Models not found in build dir"; } - - auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); - EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; -} -// Run all tests -int main(int argc, char **argv) -{ - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).) + EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; } \ No newline at end of file diff --git a/test/test_utils.cpp b/test/test_utils.cpp new file mode 100644 index 0000000..27eaaa0 --- /dev/null +++ b/test/test_utils.cpp @@ -0,0 +1,175 @@ +#include +#include +#include "utils.h" + +// This file contains small, focused unit tests for Utils. +// We verify image preprocessing (channel conversion, aspect-preserving resize, padding) +// and coordinate scaling to match preprocessing. + +// Lightweight fixture: gives each test a fresh Utils instance. +class UtilsTest : public ::testing::Test { +protected: + Utils u; +}; + +// Checks that a grayscale (1-channel) image is converted to RGB (3-channel) +// and the output image is exactly the requested target size (letterboxed). +TEST_F(UtilsTest, GrayscaleToRGBKeepsSize) { + cv::Mat gray = cv::Mat::zeros(300, 500, CV_8UC1); + cv::Mat out; + std::vector target{1024, 1024}; + + // Call PreProcess and expect no error. + const char* err = u.PreProcess(gray, target, out); + ASSERT_EQ(err, nullptr); + + // After preprocessing, we must have 3 channels (RGB). + EXPECT_EQ(out.channels(), 3); + + // The letterboxed output must match the target canvas size. + EXPECT_EQ(out.size(), cv::Size(target[0], target[1])); +} + +// Verifies three things: +// 1) Aspect ratio is preserved when resizing to the target. +// 2) The resized image is placed at the top-left (0,0). +// 3) The padding area is zero (black). +TEST_F(UtilsTest, PreprocessTopLeftPaddingAndAspect) { + const cv::Scalar fill(10, 20, 30); // Input color in BGR + cv::Mat img(720, 1280, CV_8UC3, fill); + cv::Mat out; + std::vector target{1024, 1024}; + + ASSERT_EQ(u.PreProcess(img, target, out), nullptr); + ASSERT_EQ(out.size(), cv::Size(target[0], target[1])); + ASSERT_EQ(out.channels(), 3); + + // Width drives resizing here (landscape). Width becomes 1024, height scales accordingly. + int resized_w = target[0]; + int resized_h = static_cast(img.rows / (img.cols / static_cast(target[0]))); + + // PreProcess converts BGR -> RGB, so expected color is swapped. + cv::Scalar expected_rgb(fill[2], fill[1], fill[0]); + + // The top-left region (resized content) should keep the image color. + cv::Mat roi_top = out(cv::Rect(0, 0, resized_w, resized_h)); + cv::Scalar mean_top = cv::mean(roi_top); + EXPECT_NEAR(mean_top[0], expected_rgb[0], 1.0); + EXPECT_NEAR(mean_top[1], expected_rgb[1], 1.0); + EXPECT_NEAR(mean_top[2], expected_rgb[2], 1.0); + + // The area below the resized content (padding) must be zeros. + if (resized_h < target[1]) { + cv::Mat roi_pad = out(cv::Rect(0, resized_h, target[0], target[1] - resized_h)); + cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(cv::countNonZero(gray), 0); + } +} + +// Parameterized fixture: used with TEST_P to run the same test body +// for many (input size, target size) pairs. +class UtilsPreprocessParamTest + : public ::testing::TestWithParam> { +protected: + Utils u; +}; + +// TEST_P defines a parameterized test. It runs once per parameter set. +// We assert that: +// - Output size equals the target canvas. +// - Output has 3 channels (RGB). +// - The padding area (bottom or right) is zero depending on which side letterboxes. +TEST_P(UtilsPreprocessParamTest, LetterboxWithinBoundsAndChannels3) { + const auto [inSize, target] = GetParam(); + cv::Mat img(inSize, CV_8UC3, cv::Scalar(1, 2, 3)); + cv::Mat out; + + ASSERT_EQ(u.PreProcess(img, {target.width, target.height}, out), nullptr); + EXPECT_EQ(out.size(), target); + EXPECT_EQ(out.channels(), 3); + + // Detect which side letterboxes and check that the padded region is zeros. + if (inSize.width >= inSize.height) { + int resized_h = static_cast(inSize.height / (inSize.width / static_cast(target.width))); + if (resized_h < target.height) { + cv::Mat roi_pad = out(cv::Rect(0, resized_h, target.width, target.height - resized_h)); + cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(cv::countNonZero(gray), 0); + } + } else { + int resized_w = static_cast(inSize.width / (inSize.height / static_cast(target.height))); + if (resized_w < target.width) { + cv::Mat roi_pad = out(cv::Rect(resized_w, 0, target.width - resized_w, target.height)); + cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(cv::countNonZero(gray), 0); + } + } +} + +// INSTANTIATE_TEST_SUITE_P provides the concrete parameter values. +// Each pair (input size, target size) creates a separate test instance. +INSTANTIATE_TEST_SUITE_P( + ManySizes, + UtilsPreprocessParamTest, + ::testing::Values( + std::make_tuple(cv::Size(640, 640), cv::Size(1024, 1024)), // square -> square + std::make_tuple(cv::Size(800, 600), cv::Size(800, 600)), // same size (no resize) + std::make_tuple(cv::Size(600, 800), cv::Size(800, 600)), // portrait -> landscape + std::make_tuple(cv::Size(1280, 720), cv::Size(1024, 1024)) // wide -> square + ) +); + +// Separate fixture for point scaling tests. +class UtilsScaleBboxPointsTest : public ::testing::Test { +protected: + Utils u; +}; + +// If the input size and target size are the same, scaling should do nothing. +TEST_F(UtilsScaleBboxPointsTest, IdentityWhenSameSize) { + cv::Mat img(600, 800, CV_8UC3); + std::vector target{800, 600}; + std::vector pts{100.f, 100.f, 700.f, 500.f}; + std::vector scaled; + + u.ScaleBboxPoints(img, target, pts, scaled); + ASSERT_EQ(scaled.size(), pts.size()); + EXPECT_NEAR(scaled[0], pts[0], 1e-3); + EXPECT_NEAR(scaled[1], pts[1], 1e-3); + EXPECT_NEAR(scaled[2], pts[2], 1e-3); + EXPECT_NEAR(scaled[3], pts[3], 1e-3); +} + +// When width drives the resize (landscape), both x and y are scaled by the same factor. +// We expect coordinates to be multiplied by target_width / input_width. +TEST_F(UtilsScaleBboxPointsTest, ScalesWidthDominant) { + cv::Mat img(300, 600, CV_8UC3); // h=300, w=600 (w >= h) + std::vector target{1200, 600}; // width doubles + std::vector pts{100.f, 50.f, 500.f, 250.f}; + std::vector scaled; + + u.ScaleBboxPoints(img, target, pts, scaled); + ASSERT_EQ(scaled.size(), pts.size()); + const float scale = target[0] / static_cast(img.cols); // 1200/600 = 2 + EXPECT_NEAR(scaled[0], pts[0] * scale, 1e-3); + EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3); + EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3); + EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3); +} + +// When height drives the resize (portrait), both x and y are scaled by the same factor. +// We expect coordinates to be multiplied by target_height / input_height. +TEST_F(UtilsScaleBboxPointsTest, ScalesHeightDominant) { + cv::Mat img(600, 300, CV_8UC3); // h=600, w=300 (h > w) + std::vector target{600, 1200}; // height doubles + std::vector pts{100.f, 50.f, 200.f, 500.f}; + std::vector scaled; + + u.ScaleBboxPoints(img, target, pts, scaled); + ASSERT_EQ(scaled.size(), pts.size()); + const float scale = target[1] / static_cast(img.rows); // 1200/600 = 2 + EXPECT_NEAR(scaled[0], pts[0] * scale, 1e-3); + EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3); + EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3); + EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3); +} \ No newline at end of file From fa97e64961952205c6de52b5573aac2b88a64d23 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 10 Sep 2025 14:35:19 +0200 Subject: [PATCH 15/28] Updated initializer and SegmentAnything modules to store the data to the custom result structs properly --- CMakeLists.txt | 6 +++--- include/segmentation.h | 6 ++++-- src/main.cpp | 8 +++++--- src/segmentation.cpp | 18 +++++++++--------- src/utils.cpp | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffc7611..1489bcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ include_directories(${OpenCV_INCLUDE_DIRS}) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) -set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1") +set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1") include_directories(${ONNXRUNTIME_ROOT}/include) # -------------- Cuda ------------------# @@ -84,8 +84,8 @@ add_executable(${PROJECT_NAME} src/main.cpp) target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) # Copy sam_.onnx file to the same folder of the executable file -configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD diff --git a/include/segmentation.h b/include/segmentation.h index b341f8d..e6a6d67 100644 --- a/include/segmentation.h +++ b/include/segmentation.h @@ -4,7 +4,9 @@ #include #include "sam_inference.h" -std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); -std::vector SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); +std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM, SEG::DL_RESULT, std::vector> Initializer(); +void SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, const cv::Mat& img, +std::vector &resSam, + SEG::DL_RESULT &res); #endif // SEGMENTATION_H \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index c9624c5..e52e9ce 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,9 @@ int main() std::vector> samSegmentors; SEG::DL_INIT_PARAM params_encoder; SEG::DL_INIT_PARAM params_decoder; - std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + std::vector resSam; + SEG::DL_RESULT res; + std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) @@ -19,8 +21,8 @@ int main() { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); - std::vector masks; - masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img); + + SegmentAnything(samSegmentors, params_encoder, params_decoder, img, resSam, res); } } diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 25b8fae..5bb9744 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -1,7 +1,7 @@ #include "segmentation.h" std::tuple>, SEG::DL_INIT_PARAM, - SEG::DL_INIT_PARAM> + SEG::DL_INIT_PARAM, SEG::DL_RESULT, std::vector> Initializer() { std::vector> samSegmentors; samSegmentors.push_back(std::make_unique()); @@ -11,7 +11,8 @@ Initializer() { std::unique_ptr samSegmentorDecoder = std::make_unique(); SEG::DL_INIT_PARAM params_encoder; SEG::DL_INIT_PARAM params_decoder; - + SEG::DL_RESULT res; + std::vector resSam; params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; @@ -31,16 +32,15 @@ Initializer() { samSegmentorDecoder->CreateSession(params_decoder); samSegmentors[0] = std::move(samSegmentorEncoder); samSegmentors[1] = std::move(samSegmentorDecoder); - return {std::move(samSegmentors), params_encoder, params_decoder}; + return {std::move(samSegmentors), params_encoder, params_decoder, res, resSam}; } -std::vector -SegmentAnything(std::vector> &samSegmentors, +void SegmentAnything(std::vector> &samSegmentors, const SEG::DL_INIT_PARAM ¶ms_encoder, - const SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) { + const SEG::DL_INIT_PARAM ¶ms_decoder, const cv::Mat &img, std::vector &resSam, + SEG::DL_RESULT &res) { + - std::vector resSam; - SEG::DL_RESULT res; SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); @@ -48,5 +48,5 @@ SegmentAnything(std::vector> &samSegmentors, modelTypeRef = params_decoder.modelType; samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - return std::move(res.masks); + // return std::move(res.masks); } diff --git a/src/utils.cpp b/src/utils.cpp index 2c826ab..28a7ded 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,6 +1,6 @@ #include "utils.h" #include // for guided filter -#define LOGGING +//#define LOGGING // Constructor Utils::Utils() From 53989b69f48aa2d4d172a55a6d44b3565963921e Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 10 Sep 2025 15:51:18 +0200 Subject: [PATCH 16/28] Enabled cuda on the decoder as well --- src/segmentation.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 5bb9744..23efd59 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -24,6 +24,8 @@ Initializer() { #ifdef USE_CUDA params_encoder.cudaEnable = true; + params_decoder.cudaEnable = true; + #else params_encoder.cudaEnable = false; #endif From 7490f03db3faf315f715260477c6e3549eaf63a3 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 10 Sep 2025 16:14:56 +0200 Subject: [PATCH 17/28] Fixed small bug of adding an extra (full img) bounding box --- src/sam_inference.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index f12c56b..de1b408 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -223,8 +223,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector boundingBoxes; boundingBoxes.push_back(bbox); -#else - result.boxes.push_back(cv::Rect(0, 0, iImg.cols, iImg.rows)); + #endif // ROI #ifdef benchmark From 01da1fc659d9d7fa6e635b1430e19569d40b5549 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 12 Sep 2025 12:13:22 +0200 Subject: [PATCH 18/28] Aligned dimensions [high width] between onnx and opencv --- src/main.cpp | 2 +- src/sam_inference.cpp | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index e52e9ce..9744737 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,7 +14,7 @@ int main() SEG::DL_RESULT res; std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam.bak/sam_inference/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index de1b408..0ccec9a 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -93,6 +93,16 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { auto input_shape = _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + // Optional shape check when model has fixed dims (not -1) + if (input_shape.size() >= 4 && input_shape[2] > 0 && input_shape[3] > 0) { + const int64_t expectH = _imgSize.at(1); + const int64_t expectW = _imgSize.at(0); + if (input_shape[2] != expectH || input_shape[3] != expectW) { + std::cerr << "[SAM]: Model input (H,W)=(" << input_shape[2] << "," << input_shape[3] + << ") mismatches configured imgSize (W,H)=(" << _imgSize[0] << "," << _imgSize[1] << ")." + << std::endl; + } + } auto output_shape = _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_type = _session->GetOutputTypeInfo(0) @@ -127,9 +137,9 @@ const char *SAM::RunSession(const cv::Mat &iImg, utilities.BlobFromImage(processedImg, blob); std::vector inputNodeDims; if (_modelType == SEG::SAM_SEGMENT_ENCODER) { - inputNodeDims = {1, 3, _imgSize.at(0), _imgSize.at(1)}; + // NCHW: H = imgSize[1], W = imgSize[0] + inputNodeDims = {1, 3, _imgSize.at(1), _imgSize.at(0)}; } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { - // Input size or SAM decoder model is 256x64x64 for the decoder inputNodeDims = {1, 256, 64, 64}; } TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, @@ -329,8 +339,9 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { float *blob = new float[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, _imgSize.at(0), - _imgSize.at(1)}; + + // NCHW: H = imgSize[1], W = imgSize[0] + std::vector SAM_input_node_dims = {1, 3, _imgSize.at(1), _imgSize.at(0)}; switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: { Ort::Value input_tensor = Ort::Value::CreateTensor( From 7cdf39aa9e29124011e911ce9123495eb88df049 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 12 Sep 2025 12:14:29 +0200 Subject: [PATCH 19/28] corrected tests for the new segmentation way of inference (initialize and segment anything interfaces changed) and added one more test to check the image dimensions W,H --- src/sam_inference.cpp | 11 +---------- test/sam_test.cpp | 9 ++++++--- test/test_utils.cpp | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 0ccec9a..3ae5677 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -93,16 +93,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { auto input_shape = _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - // Optional shape check when model has fixed dims (not -1) - if (input_shape.size() >= 4 && input_shape[2] > 0 && input_shape[3] > 0) { - const int64_t expectH = _imgSize.at(1); - const int64_t expectW = _imgSize.at(0); - if (input_shape[2] != expectH || input_shape[3] != expectW) { - std::cerr << "[SAM]: Model input (H,W)=(" << input_shape[2] << "," << input_shape[3] - << ") mismatches configured imgSize (W,H)=(" << _imgSize[0] << "," << _imgSize[1] << ")." - << std::endl; - } - } + auto output_shape = _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_type = _session->GetOutputTypeInfo(0) diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 521b0d8..2ae4c73 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -28,13 +28,14 @@ class SamInferenceTest : public ::testing::Test NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; // Use package helpers to build default params and SAM objects. - std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); #ifdef USE_CUDA params_encoder.cudaEnable = true; // Enable CUDA if compiled with it #else params_encoder.cudaEnable = false; // Otherwise run on CPU #endif + } // Clean up the SAM objects after each test. @@ -46,6 +47,8 @@ class SamInferenceTest : public ::testing::Test std::vector NonSquareImgSize; std::vector> samSegmentors; SEG::DL_INIT_PARAM params_encoder, params_decoder; + SEG::DL_RESULT res; + std::vector resSam; }; // Simple smoke test: we can construct a SAM object without throwing. @@ -84,8 +87,8 @@ TEST_F(SamInferenceTest, FullInferencePipeline) GTEST_SKIP() << "Models not found in build dir"; } - auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); + SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic, resSam, res); // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).) - EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; + EXPECT_TRUE(res.masks.size() >= 0) << "Masks should be a valid output vector"; } \ No newline at end of file diff --git a/test/test_utils.cpp b/test/test_utils.cpp index 27eaaa0..a03b31b 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -66,6 +66,22 @@ TEST_F(UtilsTest, PreprocessTopLeftPaddingAndAspect) { } } +// Explicitly ensure imgSize is interpreted as [W, H] in PreProcess for non-square targets. +TEST_F(UtilsTest, PreprocessNonSquareWidthHeightOrder) { + // Input image: H=300, W=500 + cv::Mat img(300, 500, CV_8UC3, cv::Scalar(5, 6, 7)); + + // Target canvas (W,H) with non-square dims + std::vector target{640, 480}; + cv::Mat out; + + ASSERT_EQ(u.PreProcess(img, target, out), nullptr); + // cols = width, rows = height + EXPECT_EQ(out.cols, target[0]); + EXPECT_EQ(out.rows, target[1]); + EXPECT_EQ(out.size(), cv::Size(target[0], target[1])); +} + // Parameterized fixture: used with TEST_P to run the same test body // for many (input size, target size) pairs. class UtilsPreprocessParamTest From b587a1cf2e01aabbe7bb97ab8c476239c3526b7d Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 12 Sep 2025 14:10:23 +0200 Subject: [PATCH 20/28] Removed typo / from model path --- src/segmentation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 23efd59..41176de 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -15,7 +15,7 @@ Initializer() { std::vector resSam; params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; - params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; + params_encoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; params_encoder.imgSize = {1024, 1024}; params_decoder = params_encoder; From b83bf61153c56c0b1d759ad7e939369704cbb65d Mon Sep 17 00:00:00 2001 From: Matthijs van der Burgh Date: Tue, 16 Sep 2025 10:23:52 +0200 Subject: [PATCH 21/28] Bump min required cmake version to 3.14 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1489bcf..12ca294 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0.2) +cmake_minimum_required(VERSION 3.14) project(sam_onnx_ros) From 733167f3919e540972fba0733e7823072f32b27c Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 16 Sep 2025 20:11:04 +0200 Subject: [PATCH 22/28] EOF line added and package.xml structure update --- LICENSE | 37 ++++++++++++++++++++----------------- README.md | 11 +++-------- include/dl_types.h | 2 +- include/sam_inference.h | 2 +- include/segmentation.h | 2 +- include/utils.h | 2 +- package.xml | 7 +++++-- src/main.cpp | 2 +- src/utils.cpp | 2 +- test/sam_test.cpp | 2 +- test/test_utils.cpp | 2 +- 11 files changed, 36 insertions(+), 35 deletions(-) diff --git a/LICENSE b/LICENSE index 7121e4a..c640c63 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,25 @@ -Custom License Agreement +BSD 2-Clause License -1. License Grant You are hereby granted a non-exclusive, non-transferable license to use, reproduce, and distribute the code (hereinafter referred to as "the Software") under the following conditions: +Copyright (c) 2021, Eindhoven University of Technology - CST Robotics Group +All rights reserved. -2. Conditions of Use +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -Non-Commercial Use: You may use the Software for personal, educational, or non-commercial purposes without any additional permissions. -Commercial Use: Any commercial use of the Software, including but not limited to selling, licensing, or using it in a commercial product, requires prior written permission from the original developer. -3. Contact Requirement +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -If you wish to use the Software for commercial purposes, you must contact the original developer at [https://www.linkedin.com/in/hamdi-boukamcha/] to obtain a commercial license. -The terms of any commercial license will be mutually agreed upon and may involve a licensing fee. -4. Attribution +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -Regardless of whether you are using the Software for commercial or non-commercial purposes, you must provide appropriate credit to the original developer in any distributions or products that use the Software. -5. Disclaimer of Warranty - -The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the original developer be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software. -6. Governing Law - -This License Agreement shall be governed by and construed in accordance with the laws of France. -By using the Software, you agree to abide by the terms outlined in this License Agreement. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 0efca14..4692d73 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten Dynamic Shape Support: Efficient handling of variable input sizes using optimization profiles. CUDA Optimization: Leverage CUDA for preprocessing and efficient memory handling. -## 📢 Performance - ### Infernce Time +## 📢 Performance + ### Infernce Time | Component | SpeedSAM | |----------------------------|-----------| @@ -51,7 +51,7 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten │ ├── main.cpp # Main entry point │ └── speedSam.cpp # Implementation of the SpeedSam class └── CMakeLists.txt # CMake configuration - + # 🚀 Installation ## Prerequisites git clone https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git @@ -94,8 +94,3 @@ If you use this code in your research, please cite the repository as follows: publisher = {GitHub}, howpublished = {\url{https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT//}}, } - - - - - diff --git a/include/dl_types.h b/include/dl_types.h index 5141284..033df56 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -68,4 +68,4 @@ namespace SEG } DL_RESULT; } // namespace SEG -#endif // DL_TYPES_H \ No newline at end of file +#endif // DL_TYPES_H diff --git a/include/sam_inference.h b/include/sam_inference.h index 7bff0b1..6b4713e 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -46,4 +46,4 @@ class SAM float _iouThreshold; }; -#endif // SAMINFERENCE_H \ No newline at end of file +#endif // SAMINFERENCE_H diff --git a/include/segmentation.h b/include/segmentation.h index e6a6d67..83102e0 100644 --- a/include/segmentation.h +++ b/include/segmentation.h @@ -9,4 +9,4 @@ void SegmentAnything(std::vector>& samSegmentors, const SEG std::vector &resSam, SEG::DL_RESULT &res); -#endif // SEGMENTATION_H \ No newline at end of file +#endif // SEGMENTATION_H diff --git a/include/utils.h b/include/utils.h index 6cb8819..a471512 100644 --- a/include/utils.h +++ b/include/utils.h @@ -55,4 +55,4 @@ class Utils float _resizeScalesBbox; // letterbox scale }; -#endif // UTILS_H \ No newline at end of file +#endif // UTILS_H diff --git a/package.xml b/package.xml index b00e6d6..7d17b8d 100644 --- a/package.xml +++ b/package.xml @@ -7,6 +7,7 @@ 0.0.0 Segment Anything Model (SAM) segmentation + Iason Theodorou Iason Theodorou BSD @@ -14,15 +15,17 @@ catkin libopencv-dev - libopencv-dev onnxruntime_ros + + libopencv-dev onnxruntime_ros catkin_lint_cmake + doxygen - \ No newline at end of file + diff --git a/src/main.cpp b/src/main.cpp index 9744737..cd0f9dd 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -27,4 +27,4 @@ int main() } } return 0; -} \ No newline at end of file +} diff --git a/src/utils.cpp b/src/utils.cpp index 28a7ded..643dba4 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -219,4 +219,4 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & { std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; } -} \ No newline at end of file +} diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 2ae4c73..cd54d7e 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -91,4 +91,4 @@ TEST_F(SamInferenceTest, FullInferencePipeline) // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).) EXPECT_TRUE(res.masks.size() >= 0) << "Masks should be a valid output vector"; -} \ No newline at end of file +} diff --git a/test/test_utils.cpp b/test/test_utils.cpp index a03b31b..92d20ab 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -188,4 +188,4 @@ TEST_F(UtilsScaleBboxPointsTest, ScalesHeightDominant) { EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3); EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3); EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3); -} \ No newline at end of file +} From 386f54628e801c499f4473d387eadc340f12aa25 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 23 Sep 2025 18:51:58 +0200 Subject: [PATCH 23/28] Update read me and include better comments --- README.md | 93 +++++++++++++++---------------------------- src/sam_inference.cpp | 9 +++-- 2 files changed, 37 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 4692d73..4b7a36b 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,32 @@ -# SPEED SAM C++ TENSORRT -![SAM C++ TENSORRT](assets/speed_sam_cpp_tenosrrt.PNG) +# SAM C++ ONNX implementation - - GitHub - - - - License - +Inspired by SAM NN from meta and Tensor-RT implementation from: https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git ## 🌐 Overview -A high-performance C++ implementation for SAM (segment anything model) using TensorRT and CUDA, optimized for real-time image segmentation tasks. +A high-performance C++ implementation for SAM (segment anything model) using ONNX and CUDA, optimized for real-time image segmentation tasks. -## 📢 Updates - Model Conversion: Build TensorRT engines from ONNX models for accelerated inference. - Segmentation with Points and BBoxes: Easily segment images using selected points or bounding boxes. - FP16 Precision: Choose between FP16 and FP32 for speed and precision balance. - Dynamic Shape Support: Efficient handling of variable input sizes using optimization profiles. - CUDA Optimization: Leverage CUDA for preprocessing and efficient memory handling. ## 📢 Performance + +### Warm-Up cost :fire: + NVIDIA GeForce RTX 3050 + Encoder Cuda warm-up cost 66.875 ms. + Decoder Cuda warm-up cost 53.87 ms. + ### Infernce Time -| Component | SpeedSAM | -|----------------------------|-----------| -| **Image Encoder** | | -| Parameters | 5M | -| Speed | 8ms | -| **Mask Decoder** | | -| Parameters | 3.876M | -| Speed | 4ms | -| **Whole Pipeline (Enc+Dec)** | | -| Parameters | 9.66M | -| Speed | 12ms | -### Results -![SPEED-SAM-C-TENSORRT RESULT](assets/Speed_SAM_Results.JPG) +| Component | Pre processing | Inference | Post processing | +|----------------------------|----------------| --------- | ----------------| +| **Image Encoder** | | || +| Parameters | 5M |- | -| +| Speed | 8ms | 33.322ms | 0.437ms | +| **Mask Decoder** | | || +| Parameters | 3.876M |- |- | +| Speed | 34ms | 11.176ms | 5.984| +| **Whole Pipeline (Enc+Dec)** | | | | +| Parameters | 9.66M | -| -| +| Su of Speed | 92.92ms | - |- | + ## 📂 Project Structure SPEED-SAM-CPP-TENSORRT/ @@ -53,44 +45,23 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten └── CMakeLists.txt # CMake configuration # 🚀 Installation -## Prerequisites - git clone https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git - cd SPEED-SAM-CPP-TENSORRT - +## Compile + git clone + cd sam_onnx_ros # Create a build directory and compile mkdir build && cd build cmake .. make -j$(nproc) -Note: Update the CMakeLists.txt with the correct paths for TensorRT and OpenCV. + +Note: Update the CMakeLists.txt with the correct paths for Onnxruntime and OpenCV and Onnx Models (since for TechUnited we keep them on separate repositories). + +You can use main.cpp to run the application + +## ROS option + You can also run the code as a catkin package. ## 📦 Dependencies CUDA: NVIDIA's parallel computing platform - TensorRT: High-performance deep learning inference + Onnx: High-performance deep learning inference OpenCV: Image processing library C++17: Required standard for compilation - -# 🔍 Code Overview -## Main Components - SpeedSam Class (speedSam.h): Manages image encoding and mask decoding. - EngineTRT Class (engineTRT.h): TensorRT engine creation and inference. - CUDA Utilities (cuda_utils.h): Macros for CUDA error handling. - Config (config.h): Defines model parameters and precision settings. -## Key Functions - EngineTRT::build: Builds the TensorRT engine from an ONNX model. - EngineTRT::infer: Runs inference on the provided input data. - SpeedSam::predict: Segments an image using input points or bounding boxes. -## 📞 Contact - -For advanced inquiries, feel free to contact me on LinkedIn: LinkedIn - -## 📜 Citation - -If you use this code in your research, please cite the repository as follows: - - @misc{boukamcha2024SpeedSam, - author = {Hamdi Boukamcha}, - title = {SPEED-SAM-C-TENSORRT}, - year = {2024}, - publisher = {GitHub}, - howpublished = {\url{https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT//}}, - } diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 3ae5677..8e61ec4 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -188,11 +188,11 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_encoder(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } else { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_encoder(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } @@ -235,6 +235,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, #ifdef ROI for (const auto &box : boundingBoxes) #else + for (const auto &box : result.boxes) #endif // ROI { @@ -303,11 +304,11 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_decoder(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } else { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_decoder(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } From b4d5bcb7c6b4a62084e661ae4472477f91423f4a Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 23 Sep 2025 20:13:06 +0200 Subject: [PATCH 24/28] Deleted redundant code and fixed some brackets --- include/dl_types.h | 24 +++--------------------- include/sam_inference.h | 1 - src/sam_inference.cpp | 15 ++++++--------- src/segmentation.cpp | 4 ---- 4 files changed, 9 insertions(+), 35 deletions(-) diff --git a/include/dl_types.h b/include/dl_types.h index 033df56..0f5874d 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -10,15 +10,8 @@ namespace SEG { enum MODEL_TYPE { - // FLOAT32 MODEL SAM_SEGMENT_ENCODER = 1, SAM_SEGMENT_DECODER = 2, - // YOLO_CLS = 3, - - // FLOAT16 MODEL - // YOLO_DETECT_V8_HALF = 4, - // YOLO_POSE_V8_HALF = 5, - // YOLO_CLS_HALF = 6 }; typedef struct _DL_INIT_PARAM @@ -27,13 +20,9 @@ namespace SEG std::string modelPath; MODEL_TYPE modelType = SAM_SEGMENT_ENCODER; std::vector imgSize = {640, 640}; - float rectConfidenceThreshold = 0.6; - float iouThreshold = 0.5; - int keyPointsNum = 2; // Note:kpt number for pose bool cudaEnable = false; int logSeverityLevel = 3; int intraOpNumThreads = 1; - // std::vector boxes; // For SAM encoder model, this will be filled with detected boxes // Overloaded output operator for _DL_INIT_PARAM to print its contents friend std::ostream &operator<<(std::ostream &os, const _DL_INIT_PARAM ¶m) @@ -44,9 +33,6 @@ namespace SEG for (const auto &size : param.imgSize) os << size << " "; os << "\n"; - os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n"; - os << "iouThreshold: " << param.iouThreshold << "\n"; - os << "keyPointsNum: " << param.keyPointsNum << "\n"; os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n"; os << "logSeverityLevel: " << param.logSeverityLevel << "\n"; os << "intraOpNumThreads: " << param.intraOpNumThreads; @@ -57,14 +43,10 @@ namespace SEG typedef struct _DL_RESULT { - - std::vector boxes; // For SAM encoder model, this will be filled with detected boxes - std::vector keyPoints; - - // Sam Part + // For SAM encoder model, this will be filled with detected boxes from object detection model. + std::vector boxes; std::vector embeddings; - // Masks for SAM decoder model output - std::vector masks; // Each cv::Mat represents a mask + std::vector masks; } DL_RESULT; } // namespace SEG diff --git a/include/sam_inference.h b/include/sam_inference.h index 6b4713e..250e254 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -43,7 +43,6 @@ class SAM SEG::MODEL_TYPE _modelType; std::vector _imgSize; float _rectConfidenceThreshold; - float _iouThreshold; }; #endif // SAMINFERENCE_H diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 8e61ec4..444163f 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -51,8 +51,6 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { return Ret; } try { - _rectConfidenceThreshold = iParams.rectConfidenceThreshold; - _iouThreshold = iParams.iouThreshold; _imgSize = iParams.imgSize; _modelType = iParams.modelType; _cudaEnable = iParams.cudaEnable; @@ -116,7 +114,8 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, - SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) { + SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) +{ #ifdef benchmark clock_t starttime_1 = clock(); #endif // benchmark @@ -133,19 +132,19 @@ const char *SAM::RunSession(const cv::Mat &iImg, } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { inputNodeDims = {1, 256, 64, 64}; } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, + TensorProcess_(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, utilities, result); return Ret; } template -const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, +const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, SEG::MODEL_TYPE _modelType, std::vector &oResult, - Utils &utilities, SEG::DL_RESULT &result) { - + Utils &utilities, SEG::DL_RESULT &result) +{ switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: // case OTHER_SAM_MODEL: @@ -235,7 +234,6 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, #ifdef ROI for (const auto &box : boundingBoxes) #else - for (const auto &box : result.boxes) #endif // ROI { @@ -410,7 +408,6 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size()); } - _outputNodeNames.size(); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 41176de..0394db3 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -13,8 +13,6 @@ Initializer() { SEG::DL_INIT_PARAM params_decoder; SEG::DL_RESULT res; std::vector resSam; - params_encoder.rectConfidenceThreshold = 0.1; - params_encoder.iouThreshold = 0.5; params_encoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; params_encoder.imgSize = {1024, 1024}; @@ -42,8 +40,6 @@ void SegmentAnything(std::vector> &samSegmentors, const SEG::DL_INIT_PARAM ¶ms_decoder, const cv::Mat &img, std::vector &resSam, SEG::DL_RESULT &res) { - - SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); From 3a55f754a5ed9d72a46e3d50df50cd46b28e4385 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 23 Sep 2025 20:13:49 +0200 Subject: [PATCH 25/28] made private member methods that were needed to be --- include/sam_inference.h | 9 ++++----- src/sam_inference.cpp | 12 ++++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/sam_inference.h b/include/sam_inference.h index 250e254..c3694f0 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -24,15 +24,14 @@ class SAM const char *RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result); - char *WarmUpSession(SEG::MODEL_TYPE modelType); +private: + + char *WarmUpSession_(SEG::MODEL_TYPE modelType); template - const char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + const char *TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result); - std::vector classes{}; - -private: Ort::Env _env; std::unique_ptr _session; bool _cudaEnable; diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 444163f..e8622ef 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -5,9 +5,12 @@ #define benchmark //#define ROI -SAM::SAM() {} +SAM::SAM() +{ +} -SAM::~SAM() { +SAM::~SAM() +{ // Clean up input/output node names for (auto &name : _inputNodeNames) { delete[] name; @@ -98,7 +101,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { .GetTensorTypeAndShapeInfo() .GetElementType(); - WarmUpSession(_modelType); + WarmUpSession_(_modelType); return RET_OK; } catch (const std::exception &e) { const char *str1 = "[SAM]:"; @@ -320,7 +323,8 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, return RET_OK; } -char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { +char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) +{ clock_t starttime_1 = clock(); Utils utilities; cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3); From 8aeb6577dc625b449073819281819dd93f725ec2 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 24 Sep 2025 15:39:03 +0200 Subject: [PATCH 26/28] Fixed structurre of CMakeLists and package.xml and logged with console bridge --- CMakeLists.txt | 132 +++++++++++++++++++++++++----------------- package.xml | 5 ++ src/sam_inference.cpp | 46 +++++++++------ 3 files changed, 112 insertions(+), 71 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12ca294..eaec0e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,19 +3,17 @@ cmake_minimum_required(VERSION 3.14) project(sam_onnx_ros) # -------------- CMake Policies ------------------# -#add_compile_options(-Wall -Werror=all) -#add_compile_options(-Wextra -Werror=extra) +# add_compile_options(-Wall -Werror=all) +# add_compile_options(-Wextra -Werror=extra) # -------------- Support C++17 for using filesystem ------------------# set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS ON) -#set(CMAKE_INCLUDE_CURRENT_DIR ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + # -------------- OpenCV ------------------# find_package(OpenCV REQUIRED) -include_directories(${OpenCV_INCLUDE_DIRS}) - +find_package(console_bridge REQUIRED) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) @@ -26,94 +24,124 @@ include_directories(${ONNXRUNTIME_ROOT}/include) add_definitions(-DUSE_CUDA=1) include_directories(/usr/local/cuda/include) +# -------------- Models ------------------# +# TODO: Find proper folder Copy sam_.onnx file to the same folder of the executable file +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) + + find_package(catkin REQUIRED COMPONENTS - # roscpp - # tue_config - # tue_filesystem - # code_profiler - + rosconsole + console_bridge #onnxruntime_ros ) +find_package(console_bridge REQUIRED) + # ------------------------------------------------------------------------------------------------ # CATKIN EXPORT # ------------------------------------------------------------------------------------------------ catkin_package( INCLUDE_DIRS include - #LIBRARIES ${PROJECT_NAME} - LIBRARIES sam_onnx_ros_core + LIBRARIES ${PROJECT_NAME}_lib CATKIN_DEPENDS - DEPENDS OpenCV + DEPENDS OpenCV console_bridge ) # ------------------------------------------------------------------------------------------------ # BUILD # ------------------------------------------------------------------------------------------------ -include_directories( - include - SYSTEM - ${OpenCV_INCLUDE_DIRS} - ${catkin_INCLUDE_DIRS} -) - -set(PROJECT_SOURCES - src/main.cpp +# Build core library +add_library(${PROJECT_NAME}_lib src/sam_inference.cpp src/segmentation.cpp src/utils.cpp ) -# Build core library (no main.cpp here) -add_library(sam_onnx_ros_core - src/sam_inference.cpp - src/segmentation.cpp - src/utils.cpp +target_include_directories(${PROJECT_NAME}_lib + PUBLIC + include + SYSTEM + ${OpenCV_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} + ${console_bridge_INCLUDE_DIRS} + ${ONNXRUNTIME_ROOT}/include ) -target_link_libraries(sam_onnx_ros_core + + +target_link_libraries(${PROJECT_NAME}_lib ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) -target_include_directories(sam_onnx_ros_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) # Main executable links the core lib -add_executable(${PROJECT_NAME} src/main.cpp) -target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) +add_executable(${PROJECT_NAME} + src/main.cpp +) -# Copy sam_.onnx file to the same folder of the executable file -configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +target_link_libraries(${PROJECT_NAME} + ${PROJECT_NAME}_lib + ${catkin_LIBRARIES} + ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so +) + +# ------------------------------------------------------------------------------------------------ +# Install Targets +# ------------------------------------------------------------------------------------------------ + +install( + DIRECTORY include/ + DESTINATION ${CATKIN_GLOBAL_INCLUDE_DESTINATION} +) -# Create folder name images in the same folder of the executable file -add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images +install( + TARGETS + ${PROJECT_NAME}_lib + ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} ) -# # Enable testing +install( + TARGETS + ${PROJECT_NAME} + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +# ------------------------------------------------------------------------------------------------ +# Testing +# ------------------------------------------------------------------------------------------------ if (CATKIN_ENABLE_TESTING) -# find_package(catkin_lint_cmake REQUIRED) -# catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + find_package(catkin_lint_cmake REQUIRED) + catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") - # Utils unit tests (no models needed) + # Utils unit tests (no models needed) catkin_add_gtest(utils_tests test/test_utils.cpp) if(TARGET utils_tests) - target_link_libraries(utils_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) - target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + target_link_libraries( + utils_tests + ${PROJECT_NAME}_lib + ${catkin_LIBRARIES} + GTest::gtest + GTest::gtest_main + ) + #target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) endif() # SAM integration-ish tests (may need models) catkin_add_gtest(sam_tests test/sam_test.cpp) if(TARGET sam_tests) - target_link_libraries(sam_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) - target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + target_link_libraries( + sam_tests + ${PROJECT_NAME}_lib + ${catkin_LIBRARIES} + GTest::gtest + GTest::gtest_main + ) + #target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) endif() endif() - - - -#If you want to debug -# set(CMAKE_BUILD_TYPE Debug) -# set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/package.xml b/package.xml index 7d17b8d..2228902 100644 --- a/package.xml +++ b/package.xml @@ -12,13 +12,18 @@ BSD + catkin + libconsole-bridge-dev + libopencv-dev onnxruntime_ros + rosconsole libopencv-dev onnxruntime_ros + rosconsole catkin_lint_cmake diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index e8622ef..457dcb5 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -1,6 +1,8 @@ #include "sam_inference.h" #include "utils.h" + #include +#include #define benchmark //#define ROI @@ -50,7 +52,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { if (result) { Ret = "[SAM]:Your model path is error.Change your model path without " "chinese characters."; - std::cout << Ret << std::endl; + CONSOLE_BRIDGE_logWarn("%s", Ret); return Ret; } try { @@ -109,7 +111,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { std::string str_result = std::string(str1) + std::string(str2); char *merged = new char[str_result.length() + 1]; std::strcpy(merged, str_result.c_str()); - std::cout << merged << std::endl; + CONSOLE_BRIDGE_logWarn("%s", merged); delete[] merged; return "[SAM]:Create _session failed."; } @@ -190,13 +192,17 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM_encoder(CUDA)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_encoder(CUDA)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } else { - std::cout << "[SAM_encoder(CPU)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_encoder(CPU)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } #endif // benchmark @@ -305,20 +311,24 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM_decoder(CUDA)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_decoder(CUDA)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } else { - std::cout << "[SAM_decoder(CPU)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_decoder(CPU)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } #endif // benchmark break; } default: - std::cout << "[SAM]: " << "Not support model type." << std::endl; + CONSOLE_BRIDGE_logWarn("[SAM]: " "Not support model type."); } return RET_OK; } @@ -350,8 +360,7 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time - << " ms. " << std::endl; + CONSOLE_BRIDGE_logInform("[SAM(CUDA)]: Cuda warm-up cost %.2f ms.", post_process_time); } break; } @@ -417,8 +426,7 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time - << " ms. " << std::endl; + CONSOLE_BRIDGE_logInform("[SAM(CUDA)]: Cuda warm-up cost %.2f ms.", post_process_time); } break; From c7ac06acf3321c595387d59867be724b64a2cac0 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Mon, 29 Sep 2025 13:27:58 +0200 Subject: [PATCH 27/28] updated CMakeLists, included .hpp suffix and sam_onnx_ros include dir and configuration .hpp.in file --- CMakeLists.txt | 48 +++++++++++-------- include/sam_onnx_ros/config.hpp.in | 7 +++ .../{dl_types.h => sam_onnx_ros/dl_types.hpp} | 0 .../sam_inference.hpp} | 4 +- .../segmentation.hpp} | 3 +- include/{utils.h => sam_onnx_ros/utils.hpp} | 4 +- src/main.cpp | 5 +- src/sam_inference.cpp | 21 ++++---- src/segmentation.cpp | 2 +- src/utils.cpp | 4 +- test/sam_test.cpp | 7 +-- test/test_utils.cpp | 3 +- 12 files changed, 61 insertions(+), 47 deletions(-) create mode 100644 include/sam_onnx_ros/config.hpp.in rename include/{dl_types.h => sam_onnx_ros/dl_types.hpp} (100%) rename include/{sam_inference.h => sam_onnx_ros/sam_inference.hpp} (96%) rename include/{segmentation.h => sam_onnx_ros/segmentation.hpp} (91%) rename include/{utils.h => sam_onnx_ros/utils.hpp} (98%) diff --git a/CMakeLists.txt b/CMakeLists.txt index eaec0e5..fcda779 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,26 +3,19 @@ cmake_minimum_required(VERSION 3.14) project(sam_onnx_ros) # -------------- CMake Policies ------------------# -# add_compile_options(-Wall -Werror=all) -# add_compile_options(-Wextra -Werror=extra) +add_compile_options(-Wall -Werror=all) +add_compile_options(-Wextra -Werror=extra) # -------------- Support C++17 for using filesystem ------------------# set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# -------------- OpenCV ------------------# -find_package(OpenCV REQUIRED) -find_package(console_bridge REQUIRED) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1") include_directories(${ONNXRUNTIME_ROOT}/include) -# -------------- Cuda ------------------# -add_definitions(-DUSE_CUDA=1) -include_directories(/usr/local/cuda/include) # -------------- Models ------------------# # TODO: Find proper folder Copy sam_.onnx file to the same folder of the executable file @@ -37,7 +30,20 @@ find_package(catkin REQUIRED #onnxruntime_ros ) -find_package(console_bridge REQUIRED) +find_package(OpenCV REQUIRED) + +# -------------- Cuda ------------------# +add_definitions(-DUSE_CUDA=1) +include_directories(/usr/local/cuda/include) + +set(${PROJECT_NAME}_CUDA_ENABLED ${onnxruntime_ros_CUDA_ENABLED}) +if(onnxruntime_ros_CUDA_ENABLED) + find_package(CUDAToolkit REQUIRED) +endif() + +configure_file(include/${PROJECT_NAME}/config.hpp.in ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp) +# add_custom_target(generate_config_hpp +# DEPENDS ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp # ------------------------------------------------------------------------------------------------ # CATKIN EXPORT @@ -45,7 +51,7 @@ find_package(console_bridge REQUIRED) catkin_package( INCLUDE_DIRS include - LIBRARIES ${PROJECT_NAME}_lib + LIBRARIES ${PROJECT_NAME} CATKIN_DEPENDS DEPENDS OpenCV console_bridge ) @@ -54,14 +60,14 @@ catkin_package( # BUILD # ------------------------------------------------------------------------------------------------ -# Build core library -add_library(${PROJECT_NAME}_lib +# Build library +add_library(${PROJECT_NAME} src/sam_inference.cpp src/segmentation.cpp src/utils.cpp ) -target_include_directories(${PROJECT_NAME}_lib +target_include_directories(${PROJECT_NAME} PUBLIC include SYSTEM @@ -72,19 +78,19 @@ target_include_directories(${PROJECT_NAME}_lib ) -target_link_libraries(${PROJECT_NAME}_lib +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) # Main executable links the core lib -add_executable(${PROJECT_NAME} +add_executable(test_${PROJECT_NAME} src/main.cpp ) -target_link_libraries(${PROJECT_NAME} - ${PROJECT_NAME}_lib +target_link_libraries(test_${PROJECT_NAME} + ${PROJECT_NAME} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) @@ -100,7 +106,7 @@ install( install( TARGETS - ${PROJECT_NAME}_lib + ${PROJECT_NAME} ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} @@ -116,8 +122,8 @@ install( # Testing # ------------------------------------------------------------------------------------------------ if (CATKIN_ENABLE_TESTING) - find_package(catkin_lint_cmake REQUIRED) - catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + #find_package(catkin_lint_cmake REQUIRED) + #catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") # Utils unit tests (no models needed) catkin_add_gtest(utils_tests test/test_utils.cpp) diff --git a/include/sam_onnx_ros/config.hpp.in b/include/sam_onnx_ros/config.hpp.in new file mode 100644 index 0000000..2794619 --- /dev/null +++ b/include/sam_onnx_ros/config.hpp.in @@ -0,0 +1,7 @@ +#ifndef YOLO_ONNX_SAM_CONFIG_HPP_ +#define YOLO_ONNX_SAM_CONFIG_HPP_ + +//Set which version of the Tree Interface to use +#define YOLO_ONNX_SAM_CUDA_ENABLED @onnx_sam_ros_CUDA_ENABLED@ + +#endif //#define YOLO_ONNX_SAM_CONFIG_HPP_ diff --git a/include/dl_types.h b/include/sam_onnx_ros/dl_types.hpp similarity index 100% rename from include/dl_types.h rename to include/sam_onnx_ros/dl_types.hpp diff --git a/include/sam_inference.h b/include/sam_onnx_ros/sam_inference.hpp similarity index 96% rename from include/sam_inference.h rename to include/sam_onnx_ros/sam_inference.hpp index c3694f0..d098d4a 100644 --- a/include/sam_inference.h +++ b/include/sam_onnx_ros/sam_inference.hpp @@ -4,14 +4,14 @@ #define RET_OK nullptr #include -#include #include #include -#include "utils.h" #ifdef USE_CUDA #include #endif +#include "sam_onnx_ros/utils.hpp" + class SAM { public: diff --git a/include/segmentation.h b/include/sam_onnx_ros/segmentation.hpp similarity index 91% rename from include/segmentation.h rename to include/sam_onnx_ros/segmentation.hpp index 83102e0..c7ebfd0 100644 --- a/include/segmentation.h +++ b/include/sam_onnx_ros/segmentation.hpp @@ -1,9 +1,8 @@ #ifndef SEGMENTATION_H #define SEGMENTATION_H -#include +#include "sam_onnx_ros/sam_inference.hpp" -#include "sam_inference.h" std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM, SEG::DL_RESULT, std::vector> Initializer(); void SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, const cv::Mat& img, std::vector &resSam, diff --git a/include/utils.h b/include/sam_onnx_ros/utils.hpp similarity index 98% rename from include/utils.h rename to include/sam_onnx_ros/utils.hpp index a471512..e8084a1 100644 --- a/include/utils.h +++ b/include/sam_onnx_ros/utils.hpp @@ -3,15 +3,15 @@ #define RET_OK nullptr -#include #include #include #include "onnxruntime_cxx_api.h" -#include "dl_types.h" #ifdef USE_CUDA #include #endif +#include "sam_onnx_ros/dl_types.hpp" + class Utils { public: diff --git a/src/main.cpp b/src/main.cpp index cd0f9dd..ed09788 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,9 +1,8 @@ -#include "segmentation.h" -#include -#include #include #include +#include "sam_onnx_ros/segmentation.hpp" + int main() { // Running inference diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 457dcb5..ccfb2b3 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -1,9 +1,9 @@ -#include "sam_inference.h" -#include "utils.h" - #include #include +#include "sam_onnx_ros/sam_inference.hpp" +#include "sam_onnx_ros/utils.hpp" + #define benchmark //#define ROI @@ -97,11 +97,11 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { auto input_shape = _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_shape = - _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_type = _session->GetOutputTypeInfo(0) - .GetTensorTypeAndShapeInfo() - .GetElementType(); + // auto output_shape = + // _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + // auto output_type = _session->GetOutputTypeInfo(0) + // .GetTensorTypeAndShapeInfo() + // .GetElementType(); WarmUpSession_(_modelType); return RET_OK; @@ -251,9 +251,10 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, embeddings.data(), // Use the embeddings from the encoder embeddings.size(), // Total number of elements decoderInputDims.data(), decoderInputDims.size()); + // Use center of bounding box as foreground point - float centerX = box.x + box.width / 2.0; - float centerY = box.y + box.height / 2.0; + // float centerX = box.x + box.width / 2.0; + // float centerY = box.y + box.height / 2.0; // Convert bounding box to points std::vector pointCoords = { diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 0394db3..ad66eaa 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -1,4 +1,4 @@ -#include "segmentation.h" +#include "sam_onnx_ros/segmentation.hpp" std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM, SEG::DL_RESULT, std::vector> diff --git a/src/utils.cpp b/src/utils.cpp index 643dba4..8af08a3 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,5 +1,5 @@ -#include "utils.h" -#include // for guided filter +#include "sam_onnx_ros/utils.hpp" + //#define LOGGING // Constructor diff --git a/test/sam_test.cpp b/test/sam_test.cpp index cd54d7e..5a64126 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -1,9 +1,10 @@ #include #include #include -#include "segmentation.h" -#include "sam_inference.h" -#include "dl_types.h" + +#include "sam_onnx_ros/sam_inference.hpp" +#include "sam_onnx_ros/dl_types.hpp" +#include "sam_onnx_ros/segmentation.hpp" // This file contains higher-level (integration-ish) tests. // They cover object/session creation and a full pipeline run using synthetic images. diff --git a/test/test_utils.cpp b/test/test_utils.cpp index 92d20ab..bde7145 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -1,6 +1,7 @@ #include #include -#include "utils.h" + +#include "sam_onnx_ros/utils.hpp" // This file contains small, focused unit tests for Utils. // We verify image preprocessing (channel conversion, aspect-preserving resize, padding) From 013ba3bbb4001bd879a8a5ddec5644f1e306ecd6 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 30 Sep 2025 20:16:52 +0200 Subject: [PATCH 28/28] updated CMakeLists rosconsole bridge --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fcda779..ca92d96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,10 +26,10 @@ configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.on find_package(catkin REQUIRED COMPONENTS rosconsole - console_bridge + #onnxruntime_ros ) - +find_package(console_bridge REQUIRED) find_package(OpenCV REQUIRED) # -------------- Cuda ------------------#