diff --git a/ed_sensor_integration/CMakeLists.txt b/ed_sensor_integration/CMakeLists.txt index 67934c20..95f1df03 100644 --- a/ed_sensor_integration/CMakeLists.txt +++ b/ed_sensor_integration/CMakeLists.txt @@ -1,8 +1,29 @@ -cmake_minimum_required(VERSION 3.0.2) +cmake_minimum_required(VERSION 3.5) project(ed_sensor_integration) -add_compile_options(-Wall -Werror=all) -add_compile_options(-Wextra -Werror=extra) + +#add_compile_options(-Wall -Werror=all) +#add_compile_options(-Wextra -Werror=extra) + +# -------------- ONNXRuntime Setup (define this early) ------------------# +set(ONNXRUNTIME_VERSION 1.21.1) +set(ONNXRUNTIME_ROOT "/home/amigo/Documents/repos/hero_sam.bak/onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}") + +# -------------- Neural Network Models ------------------# + +# Copy model files to the same folder as the executables +#configure_file(/home/amigo/Documents/repos/hero_sam.bak/yolo_inference/data/coco.yaml ${CATKIN_DEVEL_PREFIX}/lib/ed_sensor_integration/coco.yaml COPYONLY) +#configure_file(/home/amigo/Documents/repos/hero_sam.bak/yolo_inference/model/yolo11m.onnx ${CATKIN_DEVEL_PREFIX}/lib/ed_sensor_integration/yolo11m.onnx COPYONLY) +# configure_file(/home/amigo/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CATKIN_DEVEL_PREFIX}/lib/ed_sensor_integration/SAM_mask_decoder.onnx COPYONLY) +# configure_file(/home/amigo/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CATKIN_DEVEL_PREFIX}/lib/ed_sensor_integration/SAM_encoder.onnx COPYONLY) +#OR +# Define model paths +set(YOLO_MODELS_PATH "/home/amigo/Documents/repos/hero_sam.bak/yolo_inference/model") +set(SAM_MODELS_PATH "/home/amigo/Documents/repos/hero_sam.bak/sam_inference/model") + +# Make these paths available to the C++ code +add_definitions(-DYOLO_MODELS_PATH="${YOLO_MODELS_PATH}") +add_definitions(-DSAM_MODELS_PATH="${SAM_MODELS_PATH}") find_package(OpenCV REQUIRED) find_package(PCL REQUIRED COMPONENTS common kdtree) @@ -21,6 +42,13 @@ find_package(catkin REQUIRED COMPONENTS tue_config tue_filesystem visualization_msgs + # 2D -> 3D point cloud estimation + yolo_onnx_ros + sam_onnx_ros + bmm_ros + # For displaying SAM MASK + cv_bridge + image_transport ) # ------------------------------------------------------------------------------------------------ @@ -29,8 +57,14 @@ find_package(catkin REQUIRED COMPONENTS catkin_package( INCLUDE_DIRS include - LIBRARIES ed_kinect - CATKIN_DEPENDS code_profiler ed ${PROJECT_NAME}_msgs geolib2 image_geometry rgbd rgbd_image_buffer roscpp tue_config visualization_msgs + LIBRARIES + ed_kinect + ${PROJECT_NAME}_console_bridge + ed_association + ed_laser + CATKIN_DEPENDS + code_profiler ed ${PROJECT_NAME}_msgs geolib2 image_geometry rgbd rgbd_image_buffer roscpp tue_config visualization_msgs + sam_onnx_ros bmm_ros cv_bridge image_transport DEPENDS OpenCV PCL ) @@ -43,14 +77,17 @@ include_directories( SYSTEM ${OpenCV_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS} + ${ONNXRUNTIME_ROOT}/include + ) # ------------------------------------------------------------------------------------------------ # LIBRARIES # ------------------------------------------------------------------------------------------------ + + add_library(${PROJECT_NAME}_console_bridge - # Foward geolib2 and tue_filesystem logging to rosconsole src/rosconsole_bridge.cpp ) @@ -60,24 +97,34 @@ add_library(ed_association ) add_library(ed_kinect - include/ed/kinect/association.h - include/ed/kinect/beam_model.h - include/ed/kinect/entity_update.h - include/ed/kinect/fitter.h - include/ed/kinect/mesh_tools.h - include/ed/kinect/renderer.h - include/ed/kinect/segmenter.h - include/ed/kinect/updater.h - - src/kinect/association.cpp - src/kinect/beam_model.cpp - src/kinect/fitter.cpp - src/kinect/mesh_tools.cpp - src/kinect/renderer.cpp - src/kinect/segmenter.cpp - src/kinect/updater.cpp -) -target_link_libraries(ed_kinect ${PROJECT_NAME}_console_bridge ed_association ${OpenCV_LIBRARIES} ${catkin_LIBRARIES}) + include/ed/kinect/association.h + include/ed/kinect/beam_model.h + include/ed/kinect/entity_update.h + include/ed/kinect/fitter.h + include/ed/kinect/mesh_tools.h + include/ed/kinect/renderer.h + include/ed/kinect/segmenter.h + include/ed/kinect/updater.h + src/kinect/association.cpp + src/kinect/beam_model.cpp + src/kinect/fitter.cpp + src/kinect/mesh_tools.cpp + src/kinect/renderer.cpp + src/kinect/segmenter.cpp + src/kinect/updater.cpp +) + +add_library(ed_sam_segment_module + include/ed_sensor_integration/kinect/segmodules/sam_seg_module.h + src/kinect/sam_seg_module.cpp +) +target_link_libraries(ed_kinect + ${PROJECT_NAME}_console_bridge + ed_association + ed_sam_segment_module + ${OpenCV_LIBRARIES} + ${catkin_LIBRARIES} +) add_dependencies(ed_kinect ${catkin_EXPORTED_TARGETS}) add_library(ed_laser @@ -93,28 +140,36 @@ add_dependencies(ed_laser ${catkin_EXPORTED_TARGETS}) # ------------------------------------------------------------------------------------------------ add_library(ed_kinect_plugin - src/kinect/kinect_plugin.cpp - src/kinect/kinect_plugin.h - src/kinect/ray_tracer.cpp - src/kinect/ray_tracer.h + src/kinect/kinect_plugin.cpp + src/kinect/kinect_plugin.h + src/kinect/ray_tracer.cpp + src/kinect/ray_tracer.h ) -target_link_libraries(ed_kinect_plugin ${PROJECT_NAME}_console_bridge ed_kinect ${catkin_LIBRARIES}) + +target_link_libraries(ed_kinect_plugin + ${PROJECT_NAME}_console_bridge + ed_kinect + ${catkin_LIBRARIES} +) + add_dependencies(ed_kinect_plugin ${catkin_EXPORTED_TARGETS}) # ------------------------------------------------------------------------------------------------ add_library(ed_laser_plugin - src/laser/laser_plugin.cpp - src/laser/laser_plugin.h + src/laser/laser_plugin.cpp + src/laser/laser_plugin.h ) + target_link_libraries(ed_laser_plugin ${PROJECT_NAME}_console_bridge ed_laser ed_association ${catkin_LIBRARIES}) # ------------------------------------------------------------------------------------------------ add_library(ed_clearer_plugin - src/clearer/clearer_plugin.cpp - src/clearer/clearer_plugin.h + src/clearer/clearer_plugin.cpp + src/clearer/clearer_plugin.h ) + target_link_libraries(ed_clearer_plugin ${catkin_LIBRARIES}) # ------------------------------------------------------------------------------------------------ @@ -126,13 +181,26 @@ target_link_libraries(ed_image_saver ${catkin_LIBRARIES}) add_dependencies(ed_image_saver ${catkin_EXPORTED_TARGETS}) add_executable(ed_segmenter tools/segmenter.cpp) -target_link_libraries(ed_segmenter ed_kinect ${catkin_LIBRARIES}) +target_link_libraries(ed_segmenter + ed_kinect + ${catkin_LIBRARIES} +) add_executable(ed_fitter_data tools/fitter_viz_data.cpp) -target_link_libraries(ed_fitter_data ${PROJECT_NAME}_console_bridge ed_kinect ${OpenCV_LIBRARIES} ${catkin_LIBRARIES}) +target_link_libraries(ed_fitter_data + ${PROJECT_NAME}_console_bridge + ed_kinect + ${OpenCV_LIBRARIES} + ${catkin_LIBRARIES} +) add_executable(ed_fitter_live tools/fitter_viz_live.cpp) -target_link_libraries(ed_fitter_live ${PROJECT_NAME}_console_bridge ed_kinect ${OpenCV_LIBRARIES} ${catkin_LIBRARIES}) +target_link_libraries(ed_fitter_live + ${PROJECT_NAME}_console_bridge + ed_kinect + ${OpenCV_LIBRARIES} + ${catkin_LIBRARIES} +) # ------------------------------------------------------------------------------------------------ # Install @@ -179,11 +247,18 @@ if (CATKIN_ENABLE_TESTING) catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") catkin_add_gtest(test_furniture_fitting test/test_furniture_fit.cpp) - target_link_libraries(test_furniture_fitting ${PROJECT_NAME}_console_bridge ed_kinect ${OpenCV_LIBRARIES} ${catkin_LIBRARIES}) + target_link_libraries(test_furniture_fitting + ${PROJECT_NAME}_console_bridge + ed_kinect + ${OpenCV_LIBRARIES} + ${catkin_LIBRARIES} + ) catkin_add_gtest(test_laser_fitting test/test_laser_segmenter.cpp) - target_link_libraries(test_laser_fitting ${PROJECT_NAME}_console_bridge ed_laser ${OpenCV_LIBRARIES} ${catkin_LIBRARIES}) + target_link_libraries(test_laser_fitting + ${PROJECT_NAME}_console_bridge + ed_laser + ${OpenCV_LIBRARIES} + ${catkin_LIBRARIES} + ) endif () - - - diff --git a/ed_sensor_integration/include/ed/kinect/segmenter.h b/ed_sensor_integration/include/ed/kinect/segmenter.h index 077b6747..bbccc047 100644 --- a/ed_sensor_integration/include/ed/kinect/segmenter.h +++ b/ed_sensor_integration/include/ed/kinect/segmenter.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -29,7 +30,7 @@ class Segmenter public: - Segmenter(); + Segmenter(tue::Configuration config); ~Segmenter(); @@ -38,12 +39,31 @@ class Segmenter void calculatePointsWithin(const rgbd::Image& image, const geo::Shape& shape, const geo::Pose3D& shape_pose, cv::Mat& filtered_depth_image) const; - - void cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model, - const geo::Pose3D& sensor_pose, std::vector& clusters) const; + /** + * @brief Preprocess RGB image for segmentation. This function masks the RGB image with the filtered depth image (non-zero depth values are kept) + * It returns a masked RGB image with values only where depth is non-zero + * + * @param rgb_image + * @param filtered_depth_image + * @return cv::Mat filtered_depth_image + */ + cv::Mat preprocessRGBForSegmentation(const cv::Mat& rgb_image, const cv::Mat& filtered_depth_image) const; + + /** + * @brief Cluster the depth image into segments. Applies new algorithm which is the YOLO - SAM - BMM depth segmentation pipeline. + * + * @param depth_image + * @param cam_model + * @param sensor_pose + * @param clusters + * @param rgb_image + * @return std::vector masks // 3D pointcloud masks of all the segmented objects + */ + std::vector cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model, + const geo::Pose3D& sensor_pose, std::vector& clusters, const cv::Mat& rgb_image); private: - + tue::Configuration config_; }; #endif diff --git a/ed_sensor_integration/include/ed/kinect/updater.h b/ed_sensor_integration/include/ed/kinect/updater.h index 03f03798..de51a304 100644 --- a/ed_sensor_integration/include/ed/kinect/updater.h +++ b/ed_sensor_integration/include/ed/kinect/updater.h @@ -5,9 +5,10 @@ #include "ed/kinect/segmenter.h" #include "ed/kinect/entity_update.h" +#include + #include #include - // ---------------------------------------------------------------------------------------------------- struct UpdateRequest @@ -48,7 +49,7 @@ class Updater public: - Updater(); + Updater(tue::Configuration config); ~Updater(); @@ -59,11 +60,15 @@ class Updater Fitter fitter_; - Segmenter segmenter_; + std::unique_ptr segmenter_; // Stores for each segmented entity with which area description it was found std::map id_to_area_description_; + //For displaying SAM MASK + ros::Publisher mask_pub_; + ros::Publisher cloud_pub_; + }; #endif diff --git a/ed_sensor_integration/include/ed_sensor_integration/kinect/segmodules/sam_seg_module.h b/ed_sensor_integration/include/ed_sensor_integration/kinect/segmodules/sam_seg_module.h new file mode 100644 index 00000000..c12fc5ea --- /dev/null +++ b/ed_sensor_integration/include/ed_sensor_integration/kinect/segmodules/sam_seg_module.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include "yolo_inference.h" +#include "sam_onnx_ros/sam_inference.hpp" +#include +#include +#include +//#include "ed/kinect/entity_update.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// EntityUpdate and UpdateResult come from kinect (avoid including laser variant to prevent redefinition) +#include +#include // defines UpdateResult (and possibly other needed types) + +/** + * @brief Segmentation pipeline that processes the input image and generates segmentation masks. + * + * @param img The input RGB image to segment. + * @return std::vector The generated segmentation masks. + */ +std::vector SegmentationPipeline(const cv::Mat& img); + +/** + * @brief Overlay segmentation masks on the RGB image for visualization purposes. + * + * @param rgb The RGB image to overlay masks on. + * @param masks The segmentation masks to overlay. + */ +void overlayMasksOnImage_(cv::Mat& rgb, const std::vector& masks); + +/** + * @brief Publish segmentation results and pointcloud estimation as ROS messages. + * + * @param filtered_depth_image The filtered depth image to publish. + * @param rgb The RGB image to publish. + * @param sensor_pose The pose of the sensor. + * @param clustered_images The clustered segmentation masks. + * @param mask_pub_ The ROS publisher for the mask images. + * @param cloud_pub_ The ROS publisher for the point cloud data. + * @param res_updates The entity updates to publish. + */ +void publishSegmentationResults(const cv::Mat& filtered_depth_image, const cv::Mat& rgb, + const geo::Pose3D& sensor_pose, std::vector& clustered_images, + ros::Publisher& mask_pub_, ros::Publisher& cloud_pub_, std::vector& res_updates); diff --git a/ed_sensor_integration/package.xml b/ed_sensor_integration/package.xml index 7cddbf8c..a4e5bd22 100644 --- a/ed_sensor_integration/package.xml +++ b/ed_sensor_integration/package.xml @@ -26,6 +26,9 @@ roscpp tue_config visualization_msgs + sam_onnx_ros + yolo_onnx_ros + bmm_ros geometry_msgs geometry_msgs @@ -36,6 +39,12 @@ tue_filesystem tue_filesystem + + cv_bridge + image_transport + cv_bridge + image_transport + catkin_lint_cmake doxygen diff --git a/ed_sensor_integration/src/kinect/kinect_plugin.cpp b/ed_sensor_integration/src/kinect/kinect_plugin.cpp index 5188ac26..c7770116 100644 --- a/ed_sensor_integration/src/kinect/kinect_plugin.cpp +++ b/ed_sensor_integration/src/kinect/kinect_plugin.cpp @@ -53,9 +53,17 @@ void KinectPlugin::initialize(ed::InitData& init) if (config.value("topic", topic)) { ROS_INFO_STREAM("[ED KINECT PLUGIN] Initializing kinect client with topic '" << topic << "'."); + //topic = "/usb_cam/image_raw"; image_buffer_.initialize(topic, "map"); } + if (config.readGroup("updater", tue::config::REQUIRED)) + { + updater_ = std::make_unique(config.limitScope()); + config.endGroup(); + } + // config.endGroup(); + // - - - - - - - - - - - - - - - - - - // Services @@ -179,7 +187,7 @@ bool KinectPlugin::srvUpdate(ed_sensor_integration_msgs::Update::Request& req, e kinect_update_req.max_yaw_change = 0.25 * M_PI; UpdateResult kinect_update_res(*update_req_); - if (!updater_.update(*world_, image, sensor_pose, kinect_update_req, kinect_update_res)) + if (!updater_->update(*world_, image, sensor_pose, kinect_update_req, kinect_update_res)) { res.error_msg = kinect_update_res.error.str(); return true; diff --git a/ed_sensor_integration/src/kinect/kinect_plugin.h b/ed_sensor_integration/src/kinect/kinect_plugin.h index 5f35fc12..9f8697f6 100644 --- a/ed_sensor_integration/src/kinect/kinect_plugin.h +++ b/ed_sensor_integration/src/kinect/kinect_plugin.h @@ -8,6 +8,8 @@ #include +#include + // Services #include #include @@ -44,7 +46,7 @@ class KinectPlugin : public ed::Plugin // geo::Pose3D last_sensor_pose_; - Updater updater_; + std::unique_ptr updater_; // Communication diff --git a/ed_sensor_integration/src/kinect/sam_seg_module.cpp b/ed_sensor_integration/src/kinect/sam_seg_module.cpp new file mode 100644 index 00000000..69659d34 --- /dev/null +++ b/ed_sensor_integration/src/kinect/sam_seg_module.cpp @@ -0,0 +1,174 @@ +#include "ed_sensor_integration/kinect/segmodules/sam_seg_module.h" +#include "sam_onnx_ros/segmentation.hpp" +#include "detection.h" + +// Add required includes for types used in publishSegmentationResults +#include +#include +#include +#include +#include +#include + + +std::vector SegmentationPipeline(const cv::Mat& img) +{ + + ////////////////////////// YOLO ////////////////////////////////////// + std::unique_ptr yoloDetector; + DL_INIT_PARAM params; + std::tie(yoloDetector, params) = Initialize(); + ////////////////////////// SAM ////////////////////////////////////// + + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + std::vector resSam; + SEG::DL_RESULT res; + std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); + + + ////////////////////////// YOLO ////////////////////////////////////// + std::vector resYolo; + resYolo = Detector(yoloDetector, img); + + ////////////////////////// SAM ////////////////////////////////////// + for (const auto& result : resYolo) { + // This should be deleted as we would upload everything + // (here we are skipping the table object but it should happen only on the rosservice scenario: on_top_of dinner_table ) + int table_classification = 60; + if (result.classId == table_classification) { + std::cout << "Class ID is: " << yoloDetector->classes[result.classId] << " So we dont append"<< std::endl; + continue; + } + res.boxes.push_back(result.box); + std::cout << "Confidence is OKOK: " << result.confidence << std::endl; + std::cout << "Class is: " << yoloDetector->classes[result.classId] << std::endl; + std::cout << "Class ID is: " << result.classId << std::endl; + } + + SegmentAnything(samSegmentors, params_encoder, params_decoder, img, resSam, res); + + return std::move(res.masks); + } + + +void overlayMasksOnImage_(cv::Mat& rgb, const std::vector& masks) +{ + // Define colors in BGR format for OpenCV (high contrast) + std::vector colors = { + cv::Scalar(0, 0, 255), // Red + cv::Scalar(0, 255, 0), // Green + cv::Scalar(255, 0, 0), // Blue + cv::Scalar(0, 255, 255), // Yellow + cv::Scalar(255, 0, 255), // Magenta + cv::Scalar(255, 255, 0), // Cyan + cv::Scalar(128, 0, 128), // Purple + cv::Scalar(0, 128, 128) // Brown + }; + + // Create a copy for the overlay (preserves original for contours) + cv::Mat overlay = rgb.clone(); + + for (size_t i = 0; i < masks.size(); i++) { + // Get a working copy of the mask + cv::Mat working_mask = masks[i].clone(); + + // Check if mask needs resizing + if (working_mask.rows != rgb.rows || working_mask.cols != rgb.cols) { + cv::resize(working_mask, working_mask, rgb.size(), 0, 0, cv::INTER_NEAREST); + } + + // Ensure the mask is binary (values 0 or 255) + if (cv::countNonZero(working_mask > 0 & working_mask < 255) > 0) { + cv::threshold(working_mask, working_mask, 127, 255, cv::THRESH_BINARY); + } + + // Use a different color for each mask + cv::Scalar color = colors[i % colors.size()]; + + // Create the colored overlay with this mask's specific color + cv::Mat colorMask = cv::Mat::zeros(rgb.size(), CV_8UC3); + colorMask.setTo(color, working_mask); + + // Add this mask's overlay to the combined overlay + cv::addWeighted(overlay, 1.0, colorMask, 0.2, 0, overlay); + + // Find contours of the mask - use CHAIN_APPROX_NONE for most accurate contours + std::vector> contours; + cv::findContours(working_mask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_NONE); + + // Draw double contours for better visibility (outer black, inner colored) + cv::drawContours(rgb, contours, -1, cv::Scalar(0, 0, 0), 2); // Outer black border + cv::drawContours(rgb, contours, -1, color, 1); // Inner colored line + + } + + // Apply the semi-transparent overlay with all masks + cv::addWeighted(rgb, 0.7, overlay, 0.3, 0, rgb); +} + +void publishSegmentationResults(const cv::Mat& filtered_depth_image, const cv::Mat& rgb, + const geo::Pose3D& sensor_pose, std::vector& clustered_images, + ros::Publisher& mask_pub_, ros::Publisher& cloud_pub_, std::vector& res_updates) +{ + // Overlay masks on the RGB image + cv::Mat visualization = rgb.clone(); + // Create a path to save the image + std::string path = "/tmp"; + cv::imwrite(path + "/visualization.png", visualization); + + // Create a properly normalized depth visualization + cv::Mat depth_vis; + double min_val, max_val; + cv::minMaxLoc(filtered_depth_image, &min_val, &max_val); + + // Handle empty depth image case + if (max_val == 0) { + depth_vis = cv::Mat::zeros(filtered_depth_image.size(), CV_8UC1); + } else { + // Scale to full 8-bit range and convert to 8-bit + filtered_depth_image.convertTo(depth_vis, CV_8UC1, 255.0 / max_val); + + // Apply a colormap for better visibility + cv::Mat depth_color; + cv::applyColorMap(depth_vis, depth_color, cv::COLORMAP_JET); + cv::imwrite(path + "/visualization_depth_color.png", depth_color); + } + + // Save both grayscale and color versions + cv::imwrite(path + "/visualization_depth.png", depth_vis); + overlayMasksOnImage_(visualization, clustered_images); + // save after overlaying masks + cv::imwrite(path + "/visualization_with_masks.png", visualization); + // Convert to ROS message + sensor_msgs::ImagePtr msg = cv_bridge::CvImage(std_msgs::Header(), "bgr8", visualization).toImageMsg(); + msg->header.stamp = ros::Time::now(); + + typedef pcl::PointCloud PointCloud; + PointCloud::Ptr combined_cloud (new PointCloud); + + combined_cloud->header.frame_id = "map"; // Use appropriate frame ID + + // Add points from all entity updates + for (const EntityUpdate& update : res_updates) { + for (const geo::Vec3& point : update.points) { + // Transform from camera to map frame + geo::Vec3 p_map = sensor_pose * point; + pcl::PointXYZ pcl_point; + pcl_point.x = p_map.x; + pcl_point.y = p_map.y; + pcl_point.z = p_map.z; + combined_cloud->push_back(pcl_point); + } + } + + sensor_msgs::PointCloud2 cloud_msg; + pcl::toROSMsg(*combined_cloud, cloud_msg); + cloud_msg.header.stamp = ros::Time::now(); + cloud_msg.header.frame_id = "map"; // Use appropriate frame ID + + // Publish + mask_pub_.publish(msg); + cloud_pub_.publish(cloud_msg); +} diff --git a/ed_sensor_integration/src/kinect/segmenter.cpp b/ed_sensor_integration/src/kinect/segmenter.cpp index 5f21a776..27ea15c9 100644 --- a/ed_sensor_integration/src/kinect/segmenter.cpp +++ b/ed_sensor_integration/src/kinect/segmenter.cpp @@ -14,13 +14,22 @@ // Clustering #include #include +#include -// Visualization -//#include +#include +#include +#include +#include + +#include + +#include +#include // ---------------------------------------------------------------------------------------------------- -Segmenter::Segmenter() +Segmenter::Segmenter(tue::Configuration config) + : config_(config) { } @@ -34,6 +43,9 @@ Segmenter::~Segmenter() namespace { +// Internal constants (tuning thresholds) +constexpr std::size_t MIN_FILTERED_POINTS = 10; +constexpr double MIN_RETENTION_RATIO = 0.10; // 10% class DepthRenderer : public geo::RenderResult { @@ -164,97 +176,66 @@ void Segmenter::calculatePointsWithin(const rgbd::Image& image, const geo::Shape if (d_min > 0 && d_max > 0 && d >= d_min && d <= d_max) filtered_depth_image.at(i) = d; } - -// cv::imshow("min", res.min_buffer / 10); -// cv::imshow("max", res.max_buffer / 10); -// cv::imshow("diff", (res.max_buffer - res.min_buffer) * 10); -// cv::imshow("filtered", filtered_depth_image / 10); -// cv::waitKey(); } // ---------------------------------------------------------------------------------------------------- -void Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model, - const geo::Pose3D& sensor_pose, std::vector& clusters) const +cv::Mat Segmenter::preprocessRGBForSegmentation(const cv::Mat& rgb_image, + const cv::Mat& filtered_depth_image) const +{ + cv::Mat masked_rgb = cv::Mat::zeros(rgb_image.size(), rgb_image.type()); + for (int y = 0; y < rgb_image.rows; ++y) { + const float* depth_row = filtered_depth_image.ptr(y); // fast + cv::Vec3b* out_row = masked_rgb.ptr(y); + const cv::Vec3b* rgb_row = rgb_image.ptr(y); + for (int x = 0; x < rgb_image.cols; ++x) { + if (depth_row[x] > 0.f) { + out_row[x] = rgb_row[x]; + } + } + } + return masked_rgb; +} +// ---------------------------------------------------------------------------------------------------- + +std::vector Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model, + const geo::Pose3D& sensor_pose, std::vector& clusters, const cv::Mat& rgb_image) { int width = depth_image.cols; int height = depth_image.rows; ROS_DEBUG("Cluster with depth image of size %i, %i", width, height); - cv::Mat visited(height, width, CV_8UC1, cv::Scalar(0)); - - // Mark borders as visited (2-pixel border) - for(int x = 0; x < width; ++x) - { - visited.at(0, x) = 1; - visited.at(1, x) = 1; - visited.at(height - 1, x) = 1; - visited.at(height - 2, x) = 1; - } - - for(int y = 0; y < height; ++y) - { - visited.at(y, 0) = 1; - visited.at(y, 1) = 1; - visited.at(y, width - 1) = 1; - visited.at(y, width - 2) = 1; - } - - int dirs[] = { -1, 1, -width, width, - -2, 2, -width * 2, width * 2}; // Also try one pixel skipped (filtering may cause some 1-pixel gaps) - + std::vector masks = SegmentationPipeline(rgb_image.clone()); ROS_DEBUG("Creating clusters"); unsigned int size = width * height; - for(unsigned int i_pixel = 0; i_pixel < size; ++i_pixel) - { - float d = depth_image.at(i_pixel); - if (d == 0 || d != d) - continue; - - // Create cluster + for (size_t i = 0; i < masks.size(); ++i) + { + cv::Mat mask = masks[i]; clusters.push_back(EntityUpdate()); EntityUpdate& cluster = clusters.back(); - // Mark visited - visited.at(i_pixel) = 1; - - std::queue Q; - Q.push(i_pixel); - - while(!Q.empty()) - { - unsigned int p1 = Q.front(); - Q.pop(); - - float p1_d = depth_image.at(p1); + unsigned int num_points = 0; - // Add to cluster - cluster.pixel_indices.push_back(p1); - cluster.points.push_back(cam_model.project2Dto3D(p1 % width, p1 / width) * p1_d); + // Add to cluster + for(int y = 0; y < mask.rows; ++y) { + for(int x = 0; x < mask.cols; ++x) { + if (mask.at(y, x) > 0) { + unsigned int pixel_idx = y * width + x; + float d = depth_image.at(pixel_idx); - for(int dir = 0; dir < 8; ++dir) - { - unsigned int p2 = p1 + dirs[dir]; + if (d > 0 && std::isfinite(d)) { - // Check if out of bounds (N.B., if dirs[dir] < 0, p2 might be negative but since it's an unsigned int it will - // in practice be a large number. - if (p2 >= size) - continue; - float p2_d = depth_image.at(p2); - - // If not yet visited, and depth is within bounds - if (visited.at(p2) == 0 && std::abs(p2_d - p1_d) < 0.05) - { - // Mark visited - visited.at(p2) = 1; - - // Add point to queue - Q.push(p2); + // Add this point to the cluster + cluster.pixel_indices.push_back(pixel_idx); + cluster.points.push_back(cam_model.project2Dto3D(x, y) * d); + num_points++; + } } } } + // Check if cluster has enough points. If not, remove it from the list if (cluster.pixel_indices.size() < 100) // TODO: magic number { @@ -262,6 +243,36 @@ void Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_ continue; } + // BMM point cloud denoising + GMMParams params; + config_.value("psi0", params.psi0); + config_.value("nu0", params.nu0); + config_.value("alpha", params.alpha); + config_.value("kappa0", params.kappa0); + + + MAPGMM gmm(2, cluster.points, params); // 2 components: object + outliers + gmm.fit(cluster.points, sensor_pose); + // Get component assignments and inlier component + std::vector labels = gmm.get_labels(); + int inlier_component = gmm.get_inlier_component(); + // Filter points + std::vector filtered_points; + for (size_t i = 0; i < labels.size(); i++) { + if (labels[i] == inlier_component) { + filtered_points.push_back(cluster.points[i]); + } + } + // Safety check + if (filtered_points.size() > MIN_FILTERED_POINTS && filtered_points.size() > MIN_RETENTION_RATIO * cluster.points.size()) { + ROS_INFO("MAP-GMM filtering: kept %zu of %zu points (%.1f%%)", + filtered_points.size(), cluster.points.size(), + 100.0 * filtered_points.size() / cluster.points.size()); + cluster.points = filtered_points; + } else { + ROS_WARN("MAP-GMM filtering: too few points kept, using original points"); + } + // Calculate cluster convex hull float z_min = 1e9; float z_max = -1e9; @@ -284,5 +295,31 @@ void Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_ ed::convex_hull::create(points_2d, z_min, z_max, cluster.chull, cluster.pose_map); cluster.chull.complete = false; + + // Simple statistical outlier removal (geometric based optional for some better results if needed) + // After collecting all points, filter outliers + // if (!cluster.points.empty()) { + // // Calculate mean and standard deviation of distances to centroid + // geo::Vec3 centroid(0, 0, 0); + // for (const auto& p : cluster.points) + // centroid += p; + // centroid = centroid / cluster.points.size(); + + // // Calculate standard deviation + // float sum_sq_dist = 0; + // for (const auto& p : cluster.points) + // sum_sq_dist += (p - centroid).length2(); + // float stddev = std::sqrt(sum_sq_dist / cluster.points.size()); + + // // Filter points that are too far from centroid + // std::vector filtered_points; + // for (const auto& p : cluster.points) { + // if ((p - centroid).length() < 2.5 * stddev) + // filtered_points.push_back(p); + // } + + // cluster.points = filtered_points; + // } } + return masks; } diff --git a/ed_sensor_integration/src/kinect/updater.cpp b/ed_sensor_integration/src/kinect/updater.cpp index d68f5f1e..880b5282 100644 --- a/ed_sensor_integration/src/kinect/updater.cpp +++ b/ed_sensor_integration/src/kinect/updater.cpp @@ -11,19 +11,23 @@ #include "ed/kinect/association.h" #include "ed/kinect/renderer.h" - #include "ed/convex_hull_calc.h" + + + #include #include +#include +#include "ed_sensor_integration/kinect/segmodules/sam_seg_module.h" // ---------------------------------------------------------------------------------------------------- // Calculates which depth points are in the given convex hull (in the EntityUpdate), updates the mask, // and updates the convex hull height based on the points found void refitConvexHull(const rgbd::Image& image, const geo::Pose3D& sensor_pose, const geo::DepthCamera& cam_model, - const Segmenter& segmenter_, EntityUpdate& up) + const std::unique_ptr& segmenter_, EntityUpdate& up) { up.pose_map.t.z += (up.chull.z_max + up.chull.z_min) / 2; @@ -34,7 +38,7 @@ void refitConvexHull(const rgbd::Image& image, const geo::Pose3D& sensor_pose, c geo::createConvexPolygon(chull_shape, points, up.chull.height()); cv::Mat filtered_depth_image; - segmenter_.calculatePointsWithin(image, chull_shape, sensor_pose.inverse() * up.pose_map, filtered_depth_image); + segmenter_->calculatePointsWithin(image, chull_shape, sensor_pose.inverse() * up.pose_map, filtered_depth_image); up.points.clear(); up.pixel_indices.clear(); @@ -43,6 +47,7 @@ void refitConvexHull(const rgbd::Image& image, const geo::Pose3D& sensor_pose, c float z_max = -1e9; int i_pixel = 0; + std::vector z_values; for(int y = 0; y < filtered_depth_image.rows; ++y) { for(int x = 0; x < filtered_depth_image.cols; ++x) @@ -56,6 +61,7 @@ void refitConvexHull(const rgbd::Image& image, const geo::Pose3D& sensor_pose, c geo::Vec3 p = cam_model.project2Dto3D(x, y) * d; geo::Vec3 p_map = sensor_pose * p; + z_values.push_back(p_map.z); z_min = std::min(z_min, p_map.z); z_max = std::max(z_max, p_map.z); @@ -66,6 +72,21 @@ void refitConvexHull(const rgbd::Image& image, const geo::Pose3D& sensor_pose, c } } + // Statistical filtering out outliers + // Calculate z statistics + if (z_values.empty()) return; + + // Sort for percentile calculation + std::sort(z_values.begin(), z_values.end()); + + // Use 5th and 95th percentiles instead of min/max to filter outliers + int lower_idx = std::max(0, static_cast(z_values.size() * 0.05)); + int upper_idx = std::min(static_cast(z_values.size()-1), + static_cast(z_values.size() * 0.95)); + + z_min = z_values[lower_idx]; + z_max = z_values[upper_idx]; + double h = z_max - z_min; up.pose_map.t.z = (z_max + z_min) / 2; up.chull.z_min = -h / 2; @@ -80,7 +101,7 @@ void refitConvexHull(const rgbd::Image& image, const geo::Pose3D& sensor_pose, c * @return new EntityUpdate including new convexHull and measurement points of both inputs. */ EntityUpdate mergeConvexHulls(const rgbd::Image& image, const geo::Pose3D& sensor_pose, const geo::DepthCamera& cam_model, - const Segmenter& segmenter_, const EntityUpdate& u1, const EntityUpdate& u2) + const std::unique_ptr& segmenter_, const EntityUpdate& u1, const EntityUpdate& u2) { EntityUpdate new_u = u1; double z_max = std::max(u1.pose_map.t.getZ()+u1.chull.z_max,u2.pose_map.t.getZ()+u2.chull.z_max); @@ -110,7 +131,7 @@ EntityUpdate mergeConvexHulls(const rgbd::Image& image, const geo::Pose3D& senso // Calculates which depth points are in the given convex hull (in the EntityUpdate), updates the mask, // and updates the convex hull height based on the points found std::vector mergeOverlappingConvexHulls(const rgbd::Image& image, const geo::Pose3D& sensor_pose, const geo::DepthCamera& cam_model, - const Segmenter& segmenter_, const std::vector& updates) + const std::unique_ptr& segmenter_, const std::vector& updates) { ROS_INFO("mergoverlapping chulls: nr of updates: %lu", updates.size()); @@ -185,9 +206,22 @@ std::vector mergeOverlappingConvexHulls(const rgbd::Image& image, } // ---------------------------------------------------------------------------------------------------- - -Updater::Updater() +Updater::Updater(tue::Configuration config) { + if (config.readGroup("segmenter", tue::config::REQUIRED)) + { + segmenter_ = std::make_unique(config); + } + else + { + ROS_ERROR("Failed to read segmenter configuration group from config file, cannot initialize Updater"); + throw std::runtime_error("Failed to read segmenter configuration group from config file"); + } + //For displaying SAM MASK + // Initialize the image publisher + ros::NodeHandle nh("~"); + mask_pub_ = nh.advertise("segmentation_masks", 1); + cloud_pub_ = nh.advertise("point_cloud_ooo", 1); } // ---------------------------------------------------------------------------------------------------- @@ -206,12 +240,14 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima // will contain depth image filtered with given update shape and world model (background) subtraction cv::Mat filtered_depth_image; - + cv::Mat filtered_rgb_image; // sensor pose might be update, so copy (making non-const) geo::Pose3D sensor_pose = sensor_pose_const; // depth image const cv::Mat& depth = image->getDepthImage(); + const cv::Mat& rgb = image->getRGBImage(); + //cv::Mat img = rgb.clone(); // Determine depth image camera model rgbd::View view(*image, depth.cols); @@ -290,8 +326,8 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima } else { -// res.error << "Could not determine pose of '" << entity_id.str() << "'."; -// return false; + // res.error << "Could not determine pose of '" << entity_id.str() << "'."; + // return false; // Could not fit entity, so keep the old pose new_pose = e->pose(); @@ -338,7 +374,7 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima // Segment geo::Pose3D shape_pose = sensor_pose.inverse() * new_pose; - segmenter_.calculatePointsWithin(*image, shape, shape_pose, filtered_depth_image); + segmenter_->calculatePointsWithin(*image, shape, shape_pose, filtered_depth_image); } } else @@ -356,7 +392,7 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima ed::WorldModel world_updated = world; world_updated.update(res.update_req); - segmenter_.removeBackground(filtered_depth_image, world_updated, cam_model, sensor_pose, req.background_padding); + segmenter_->removeBackground(filtered_depth_image, world_updated, cam_model, sensor_pose, req.background_padding); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Clear convex hulls that are no longer there @@ -385,13 +421,12 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima }*/ } -// cv::imshow("depth image", depth / 10); -// cv::imshow("segments", filtered_depth_image / 10); -// cv::waitKey(); - // - - - - - - - - - - - - - - - - - - - - - - - - // Cluster - segmenter_.cluster(filtered_depth_image, cam_model, sensor_pose, res.entity_updates); + filtered_rgb_image = segmenter_->preprocessRGBForSegmentation(rgb, filtered_depth_image); + std::vector clustered_images = segmenter_->cluster(filtered_depth_image, cam_model, sensor_pose, res.entity_updates, filtered_rgb_image); + + publishSegmentationResults(filtered_depth_image, filtered_rgb_image, sensor_pose, clustered_images, mask_pub_, cloud_pub_, res.entity_updates); // - - - - - - - - - - - - - - - - - - - - - - - - // Merge the detected clusters if they overlap in XY or Z diff --git a/ed_sensor_integration/tools/segmenter.cpp b/ed_sensor_integration/tools/segmenter.cpp index e140288d..500628f3 100644 --- a/ed_sensor_integration/tools/segmenter.cpp +++ b/ed_sensor_integration/tools/segmenter.cpp @@ -135,7 +135,8 @@ int main(int argc, char **argv) ed::SnapshotCrawler crawler(path); - Updater updater; + tue::Configuration config; + Updater updater(config); std::vector snapshots; diff --git a/ed_sensor_integration_py/CMakeLists.txt b/ed_sensor_integration_py/CMakeLists.txt new file mode 100644 index 00000000..f7dc285b --- /dev/null +++ b/ed_sensor_integration_py/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.0.2) +project(ed_sensor_integration_py) + +find_package(catkin REQUIRED COMPONENTS) + +catkin_python_setup() + +catkin_package() + +install(PROGRAMS + #scripts/face_recognition_node + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +if(CATKIN_ENABLE_TESTING) + find_package(catkin_lint_cmake REQUIRED) + catkin_add_catkin_lint_test("-W2") + + catkin_add_nosetests(test) +endif() diff --git a/ed_sensor_integration_py/README.md b/ed_sensor_integration_py/README.md new file mode 100644 index 00000000..40e7278d --- /dev/null +++ b/ed_sensor_integration_py/README.md @@ -0,0 +1 @@ +# ed_sensor_integration_py diff --git a/ed_sensor_integration_py/package.xml b/ed_sensor_integration_py/package.xml new file mode 100644 index 00000000..54d0934b --- /dev/null +++ b/ed_sensor_integration_py/package.xml @@ -0,0 +1,33 @@ + + + + ed_sensor_integration_py + 0.0.1 + The ed_sensor_integration_py package + + ToDo + + MIT + + catkin + + python3-setuptools + + cv_bridge + python3-numpy + python3-opencv + rospy + + catkin_lint_cmake + + python3-sphinx + python-sphinx-autoapi-pip + python-sphinx-rtd-theme-pip + python3-yaml + + + + + diff --git a/ed_sensor_integration_py/rosdoc.yaml b/ed_sensor_integration_py/rosdoc.yaml new file mode 100644 index 00000000..d6daeddf --- /dev/null +++ b/ed_sensor_integration_py/rosdoc.yaml @@ -0,0 +1,3 @@ +- builder: sphinx + sphinx_root_dir: docs + name: Python API diff --git a/ed_sensor_integration_py/setup.py b/ed_sensor_integration_py/setup.py new file mode 100644 index 00000000..75a7212c --- /dev/null +++ b/ed_sensor_integration_py/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from setuptools import setup +from catkin_pkg.python_setup import generate_distutils_setup + +d = generate_distutils_setup( + packages=['ed_sensor_integration_py'], + package_dir={'': 'src'}, +) + +setup(**d) diff --git a/ed_sensor_integration_py/src/ed_sensor_integration_py/__init__.py b/ed_sensor_integration_py/src/ed_sensor_integration_py/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hero_sam b/hero_sam new file mode 120000 index 00000000..bc863849 --- /dev/null +++ b/hero_sam @@ -0,0 +1 @@ +/home/amigo/Documents/repos/hero_sam/ \ No newline at end of file