diff --git a/Dockerfile b/Dockerfile index 27fefbe..97981e8 100755 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # Platform arm64 means nvidia jetson arm64v8. # Image may be not compatible with other arm machines. -FROM --platform=linux/arm64v8 nvcr.io/nvidia/l4t-base:r35.1.0 AS handy-base-arm64 +FROM --platform=linux/arm64v8 nvcr.io/nvidia/l4t-base:r36.2.0 AS handy-base-arm64 ### INSTALL NVIDIA PACKAGES @@ -105,7 +105,7 @@ RUN apt-get update && \ zlib1g-dev \ && rm -rf /var/lib/apt/lists/* && apt-get clean -ENV TORCHVISION_VERSION=0.14.0 +ENV TORCHVISION_VERSION=0.17.0 FROM handy-common AS handy-cuda-arm64 @@ -162,19 +162,26 @@ RUN pip3 install --no-cache-dir \ numpy \ pillow -ENV PYTORCH_WHL="torch-1.13.0a0+340c4120.nv22.06-cp38-cp38-linux_aarch64.whl" -ENV PYTORCH_URL="https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/${PYTORCH_WHL}" +ENV PYTORCH_WHL="torch-2.2.0a0+81ea7a4.nv24.01-cp310-cp310-linux_aarch64.whl" +ENV PYTORCH_URL="https://developer.download.nvidia.com/compute/redist/jp/v60dp/pytorch/${PYTORCH_WHL}" RUN wget --no-check-certificate -qO ${PYTORCH_WHL} ${PYTORCH_URL} \ && pip3 install --no-cache-dir ${PYTORCH_WHL} \ && rm -rf /tmp/* -RUN wget -qO - https://github.com/pytorch/vision/archive/refs/tags/v${TORCHVISION_VERSION}.tar.gz | tar -xz \ - && cd vision-${TORCHVISION_VERSION} \ - && python3 setup.py install \ - && rm -rf /tmp/* +RUN pip3 install torchvision==${TORCHVISION_VERSION} + +# RUN apt-get update && \ +# apt-get install -y cuda-command-line-tools-12-2 libcupti-dev && \ +# echo "/usr/local/cuda-12.2/extras/CUPTI/lib64" \ +# > /etc/ld.so.conf.d/cupti.conf && ldconfig -ENV PYTORCH_PATH="/usr/local/lib/python3.8/dist-packages/torch" +# RUN wget -qO - https://github.com/pytorch/vision/archive/refs/tags/v${TORCHVISION_VERSION}.tar.gz | tar -xz \ +# && cd vision-${TORCHVISION_VERSION} \ +# && python3 setup.py install \ +# && rm -rf /tmp/* + +ENV PYTORCH_PATH="/usr/local/lib/python3.10/dist-packages/torch" ENV LD_LIBRARY_PATH="${PYTORCH_PATH}/lib:${LD_LIBRARY_PATH}" FROM handy-common AS handy-cuda-amd64 @@ -215,13 +222,13 @@ RUN wget -qO - https://github.com/opencv/opencv/archive/refs/tags/${OPENCV_VERSI -DBUILD_TESTS=OFF \ && make -j$(nproc) install && rm -rf /tmp/* -ENV TORCH_VERSION=1.13.0 +ENV TORCH_VERSION=2.2.0 RUN pip3 install --no-cache-dir \ torch==${TORCH_VERSION} \ torchvision==${TORCHVISION_VERSION} -ENV PYTORCH_PATH="/usr/local/lib/python3.8/dist-packages/torch" +ENV PYTORCH_PATH="/usr/local/lib/python3.10/dist-packages/torch" ENV LD_LIBRARY_PATH="${PYTORCH_PATH}/lib:${LD_LIBRARY_PATH}" FROM handy-cuda-${TARGETARCH} AS handy-ros @@ -295,6 +302,11 @@ RUN mkdir -p ${ROS_ROOT} \ > ${ROS_ROOT}/ros2.rosinstall \ && vcs import ${ROS_TMP} < ${ROS_ROOT}/ros2.rosinstall +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates curl gnupg2 lsb-release \ + && rm -rf /var/lib/apt/lists/* + RUN apt-get update -q \ && rosdep init \ && rosdep update \ @@ -311,11 +323,13 @@ RUN apt-get update -q \ --skip-keys python3-opencv \ && rm -rf /var/lib/apt/lists/* && apt-get clean +ENV CMAKE_TLS_VERIFY=0 + RUN cd ${ROS_TMP} \ && colcon build \ --merge-install \ --install-base ${ROS_ROOT} \ - --cmake-args -DBUILD_TESTING=OFF \ + --cmake-args -DBUILD_TESTING=OFF -DCMAKE_TLS_VERIFY=OFF \ && rm -rf /tmp/* RUN printf "export ROS_ROOT=${ROS_ROOT}\n" >> ${HOME}/.bashrc \ diff --git a/docker-compose.yaml b/docker-compose.yaml index 43754b4..f55943d 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,14 +1,16 @@ services: handy: - image: "registry.robotics-lab.ru/handy:0.7.3" + image: "registry.robotics-lab.ru/handy:0.7.4" container_name: "${CONTAINER_NAME:-handy-${USER}}" networks: [default] privileged: true + runtime: nvidia ports: - "${FOXGLOVE_PORT:-8765}:8765" volumes: - "/home/bakind/handy:/handy" - "/dev:/dev" + # - "/tmp/.X11-unix/:/tmp/.X11-unix" deploy: resources: limits: @@ -22,10 +24,10 @@ services: x-bake: platforms: - linux/arm64 - - linux/amd64 + # - linux/amd64 cache-from: - type=registry,ref=registry.robotics-lab.ru/handy:cache-arm64 - - type=registry,ref=registry.robotics-lab.ru/handy:cache-amd64 + # - type=registry,ref=registry.robotics-lab.ru/handy:cache-amd64 cache-to: # [!!!] Warning: https://github.com/docker/buildx/discussions/1382 # - type=registry,mode=max,ref=registry.robotics-lab.ru/handy:cache-arm64 diff --git a/packages/camera/CMakeLists.txt b/packages/camera/CMakeLists.txt index 722645a..1a9ab81 100644 --- a/packages/camera/CMakeLists.txt +++ b/packages/camera/CMakeLists.txt @@ -4,6 +4,21 @@ project(camera) add_compile_options(-Wall -Wextra -Werror=unused-variable -Wpedantic) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O3") +set(Torch_DIR "/usr/local/lib/python3.10/dist-packages/torch/share/cmake/Torch") +find_package(Torch REQUIRED) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") + +add_compile_options(-fsanitize=address) +add_link_options(-fsanitize=address) + +find_path(TENSORRT_INCLUDE_DIR NvInfer.h + PATHS /usr/include /usr/local/include) +find_library(TENSORRT_LIB nvinfer + PATHS /usr/lib /usr/lib/aarch64-linux-gnu) +find_library(TENSORRT_ONNX_LIB nvonnxparser + PATHS /usr/lib /usr/lib/aarch64-linux-gnu) + +find_package(CUDAToolkit REQUIRED) find_package(OpenCV REQUIRED) find_package(yaml_cpp_vendor REQUIRED) find_package(ament_cmake REQUIRED) @@ -18,28 +33,46 @@ add_executable( calibration src/calibration_main.cpp src/calibration.cpp src/params.cpp ) add_executable(triangulation src/triangulation.cpp src/params.cpp) +add_executable(inference_node src/inference_node.cpp src/inference_node_main.cpp) target_include_directories( camera PUBLIC "$" - "$" "/usr/include" + "$" + "/usr/include" + "/opt/ros/iron/include/mcap_vendor/mcap" ) target_include_directories( calibration PUBLIC "$" "/usr/include" ) + target_include_directories( triangulation PUBLIC "$" "/usr/include" ) +target_include_directories( + inference_node PUBLIC "$" + ${TENSORRT_INCLUDE_DIR} + "/usr/local/cuda/include" + "/usr/include" + ${mcap_vendor_INCLUDE_DIRS} +) + ament_target_dependencies(camera yaml_cpp_vendor mcap_vendor) ament_target_dependencies(calibration yaml_cpp_vendor Boost OpenCV) ament_target_dependencies(triangulation OpenCV yaml_cpp_vendor) +ament_target_dependencies(inference_node OpenCV Boost yaml_cpp_vendor mcap_vendor Torch) target_link_libraries(camera mvsdk) +target_link_libraries(inference_node + ${TENSORRT_LIB} + ${TENSORRT_ONNX_LIB} + mvsdk + ${TORCH_LIBRARIES}) -install(TARGETS camera calibration triangulation +install(TARGETS camera calibration triangulation inference_node DESTINATION lib/${PROJECT_NAME} ) diff --git a/packages/camera/include/camera.h b/packages/camera/include/camera.h index 65f161b..6591697 100644 --- a/packages/camera/include/camera.h +++ b/packages/camera/include/camera.h @@ -11,7 +11,7 @@ #include #include -#include "mcap_vendor/mcap/writer.hpp" +#include "mcap/writer.hpp" using namespace std::chrono_literals; @@ -147,5 +147,7 @@ class CameraRecorder { mcap::McapWriter mcap_writer_; MappedFileManager file_manager_; + + std::function deleter_; }; } // namespace handy::camera diff --git a/packages/camera/include/inference_node.h b/packages/camera/include/inference_node.h new file mode 100644 index 0000000..3453076 --- /dev/null +++ b/packages/camera/include/inference_node.h @@ -0,0 +1,113 @@ +#pragma once + +#include "camera.h" +#include +#include +#include "c10/cuda/CUDAStream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace handy::camera { + +class BallInferenceNode { + public: + static const int kMaxCameraNum = 2; + + /// Per‐frame detection result + struct DetectionOutput { + std::array centers; // one ball center per camera image + std::array flags; // true = needs segmentation for that camera + uint32_t timestamp = 0; + SynchronizedFrameBuffers* images_to_segment = nullptr; + }; + + /// Per‐frame segmentation result + struct SegmentationOutput { + // one centroid per camera image that requested segmentation + std::vector centroids; + std::vector camera_indices; + uint32_t timestamp = 0; + }; + + using DetectionCallback = std::function; + using SegmentationCallback = std::function; + + /// ctor loads both models and sets up queues/threads + BallInferenceNode( + const std::string& det_model_path, const std::string& seg_model_path, Size input_size, + std::function deleter, int window_size = 5); + ~BallInferenceNode(); + + /// Subscribe to detection outputs + /// Called from arbitrary thread; callbacks must be lock‐free/low‐latency. + void registerDetectionSubscriber(const DetectionCallback& cb); + + /// Subscribe to segmentation outputs + void registerSegmentationSubscriber(const SegmentationCallback& cb); + + /// Start the internal threads + void start(); + + /// Signal all threads to stop, join them, and release resources + void stop(); + + void handleSyncBuffer(std::shared_ptr buffer); + + private: + void detectSingleCamera(const std::deque& frames, DetectionOutput& detect_result, + int camera_idx, c10::cuda::CUDAStream& infer_stream); + void detectionLoop(); + void segmentationLoop(); + + + // === model objects === + torch::jit::script::Module det_model_; + torch::jit::script::Module seg_model_; + + std::function buffer_deleter_; + + struct Param { + // === input queue & rolling window === + int window_size = 5; + Size input_size{320, 192}; + const at::Tensor means = + torch::tensor({0.077, 0.092, 0.142}).view({1, 3, 1, 1}).to(at::kFloat).to(at::kCUDA); + const at::Tensor stds = + torch::tensor({0.068, 0.079, 0.108}).view({1, 3, 1, 1}).to(at::kFloat).to(at::kCUDA); + } param_; + + struct State { + boost::lockfree::queue det_queue{64}; + + // protected by ring_mutex_ + std::array, kMaxCameraNum> ring_buffer; + std::vector pinned_pool; + std::atomic pool_idx = 0; + std::mutex ring_mutex; + + // === intermediate queues === + boost::lockfree::queue seg_queue{128}; + + // === subscriber callbacks === + std::vector det_cbs; + std::vector seg_cbs; + std::mutex cb_mutex; + + // === threading & synchroization === + std::atomic running{false}; + std::thread det_thread; + std::thread seg_thread; + std::condition_variable seg_cv; + std::mutex seg_mutex; + } state_; +}; + +} // namespace handy::camera diff --git a/packages/camera/include/triangulation.h b/packages/camera/include/triangulation.h index 18ca8b9..15e0f66 100644 --- a/packages/camera/include/triangulation.h +++ b/packages/camera/include/triangulation.h @@ -1,5 +1,4 @@ #include "params.h" -#include "params.h" #include #include diff --git a/packages/camera/src/camera.cpp b/packages/camera/src/camera.cpp index 87973fe..1a4a147 100644 --- a/packages/camera/src/camera.cpp +++ b/packages/camera/src/camera.cpp @@ -282,6 +282,18 @@ CameraRecorder::CameraRecorder( } } + deleter_ = [this](SynchronizedFrameBuffers* sync_buffers_ptr) { + for (size_t i = 0; i < sync_buffers_ptr->images.size(); ++i) { + if (!sync_buffers_ptr->images[i]) { + continue; + } + while (!this->state_.free_buffers[i]->push(sync_buffers_ptr->images[i])) { + } + state_.free_buffer_cnts[i]++; + } + delete sync_buffers_ptr; + }; + for (int i = 0; i < state_.camera_num; ++i) { abortIfNot("reset timestamp", i, CameraRstTimeStamp(state_.camera_handles[i])); } @@ -395,20 +407,9 @@ void CameraRecorder::synchronizeQueues() { }; // make single structure as a shared ptr - // deleter is expected to push buffers back to the queue and then free the structure - auto custom_deleter = [this](SynchronizedFrameBuffers* sync_buffers_ptr) { - for (size_t i = 0; i < sync_buffers_ptr->images.size(); ++i) { - if (!sync_buffers_ptr->images[i]) { - continue; - } - while (!this->state_.free_buffers[i]->push(sync_buffers_ptr->images[i])) { - } - state_.free_buffer_cnts[i]++; - } - delete sync_buffers_ptr; - }; + // deleter_ is expected to push buffers back to the queue and then free the structure std::shared_ptr sync_buffers( - new SynchronizedFrameBuffers, custom_deleter); + new SynchronizedFrameBuffers, deleter_); sync_buffers->images.resize(state_.camera_num, nullptr); sync_buffers->image_sizes.resize(state_.camera_num); @@ -440,7 +441,7 @@ void CameraRecorder::synchronizeQueues() { // creating new structure sync_buffers = std::shared_ptr( new SynchronizedFrameBuffers, - custom_deleter); // custom deleter adds buffer pointer back to the queue + deleter_); // custom deleter adds buffer pointer back to the queue sync_buffers->images.resize(state_.camera_num, nullptr); sync_buffers->image_sizes.resize(state_.camera_num); } while (next()); diff --git a/packages/camera/src/inference_node.cpp b/packages/camera/src/inference_node.cpp new file mode 100644 index 0000000..1262644 --- /dev/null +++ b/packages/camera/src/inference_node.cpp @@ -0,0 +1,283 @@ +#include "inference_node.h" +#include +#include +#include +#include "c10/cuda/CUDAStream.h" +#include "c10/cuda/CUDAGuard.h" +#include + +namespace handy::camera { + +BallInferenceNode::BallInferenceNode( + const std::string& det_model_path, const std::string& seg_model_path, Size input_size, + std::function deleter, int window_size) { + det_model_ = torch::jit::load(det_model_path); + det_model_.eval(); + det_model_.to(at::kCUDA); + + seg_model_ = torch::jit::load(seg_model_path); + seg_model_.eval(); + seg_model_.to(at::kCUDA); + + // Store params + param_.input_size = input_size; + param_.window_size = window_size; + buffer_deleter_ = deleter; + + for (int i = 0; i < param_.window_size * kMaxCameraNum; ++i) { + // Allocate an empty pinned tensor of shape [1, C, H, W] + at::Tensor t = torch::empty( + {1, 3, param_.input_size.height, param_.input_size.width}, + at::TensorOptions().dtype(torch::kUInt8)) + .pin_memory(); + state_.pinned_pool.push_back(std::move(t)); + } +} + +BallInferenceNode::~BallInferenceNode() { stop(); } + +void BallInferenceNode::registerDetectionSubscriber(const DetectionCallback& cb) { + std::lock_guard lk(state_.cb_mutex); + state_.det_cbs.push_back(cb); +} + +void BallInferenceNode::registerSegmentationSubscriber(const SegmentationCallback& cb) { + std::lock_guard lk(state_.cb_mutex); + state_.seg_cbs.push_back(cb); +} + +void BallInferenceNode::handleSyncBuffer(std::shared_ptr buffer) { + while (!state_.det_queue.push(buffer.get())) { + } +} + +void BallInferenceNode::start() { + state_.running = true; + // Launch threads + state_.det_thread = std::thread(&BallInferenceNode::detectionLoop, this); + state_.seg_thread = std::thread(&BallInferenceNode::segmentationLoop, this); +} + +void BallInferenceNode::stop() { + bool expected = true; + if (state_.running.compare_exchange_strong(expected, false)) { + state_.seg_cv.notify_one(); + if (state_.det_thread.joinable()) { + state_.det_thread.join(); + } + if (state_.seg_thread.joinable()) { + state_.seg_thread.join(); + } + } +} + +void BallInferenceNode::detectSingleCamera( + const std::deque& frames, DetectionOutput& detect_result, int camera_idx, + c10::cuda::CUDAStream& infer_stream) { + std::vector frames_vec(frames.begin(), frames.end()); + at::Tensor batch = at::cat(frames_vec, 1); // dim=1 + + torch::NoGradGuard no_grad; + at::cuda::CUDAStreamGuard guard(infer_stream); + auto out_tuple = det_model_.forward({batch}).toTuple(); + + // Unpack model outputs + // out_tuple[0]: prob distribution (1, width + height) + // out_tuple[1]: presence logits (1, 2) + at::Tensor center_distr = out_tuple->elements()[0].toTensor(); + at::Tensor success_detection_distr = out_tuple->elements()[1].toTensor(); + + // Threshold & argmax to get (x,y) center + // replicate: pred[pred < thresh]=0, then argmax on slices + const float thresh = 0.0001; + int W = param_.input_size.width; + center_distr = center_distr.clone(); + center_distr.masked_fill_(center_distr < thresh, 0.0f); + + at::Tensor prob_x = center_distr.slice(1, 0, W); // {1, W} + at::Tensor prob_y = center_distr.slice(1, W, center_distr.size(1)); // {1, H} + + int64_t pred_x = prob_x.argmax(1).item(); + int64_t pred_y = prob_y.argmax(1).item(); + detect_result.centers[camera_idx] = {pred_x, pred_y}; + + int64_t pres_idx = success_detection_distr.argmax(1).item(); + detect_result.flags[camera_idx] = (pres_idx == 1); // class 1 means object present +} + +void BallInferenceNode::detectionLoop() { + auto infer_stream = at::cuda::getStreamFromPool(); + std::vector> rollBuf(kMaxCameraNum); + + SynchronizedFrameBuffers* sync; + while (state_.running) { + if (!state_.det_queue.pop(sync)) { + std::this_thread::sleep_for(1ms); + continue; + } + std::cout << "recieved buffer for detection\n"; + + DetectionOutput detection_out; + bool any_successful_detection = false; + auto start = std::chrono::steady_clock::now(); + + // Preprocess each camera image + for (size_t cam = 0; cam < sync->images.size(); ++cam) { + uint8_t* raw = sync->images[cam]; + Size sz = sync->image_sizes[cam]; + cv::Mat bayer(sz.height, sz.width, CV_8UC1, raw); + + + + static thread_local cv::Mat rgb; + cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB); + + static thread_local cv::Mat resized; + cv::resize(rgb, resized, cv::Size(param_.input_size.width, param_.input_size.height)); + // To tensor of shape (1, height, width, channel) and perform all the preprocessing + at::Tensor t_uint8 = state_.pinned_pool[state_.pool_idx++ % state_.pinned_pool.size()]; + std::memcpy( + t_uint8.data_ptr(), // pinned host memory + resized.data, // OpenCV RGB data + 3 * param_.input_size.area()); + at::Tensor t = t_uint8.to(at::kCUDA, true) + .toType(at::kFloat) + .div_(255.0) + .sub_(param_.means) + .div_(param_.stds); + + rollBuf[cam].push_back(t); + if (rollBuf[cam].size() > (size_t)param_.window_size) { + rollBuf[cam].pop_front(); + } + if (rollBuf[cam].size() < (size_t)param_.window_size) { + continue; + } + detectSingleCamera(rollBuf[cam], detection_out, cam, infer_stream); + any_successful_detection |= detection_out.flags[cam]; + } + auto end = std::chrono::steady_clock::now(); + + std::cout << "elapsed: " + << std::chrono::duration_cast(end - start).count() + << '\n'; + detection_out.images_to_segment = nullptr; + detection_out.timestamp = sync->timestamp; + + { + std::lock_guard lk(state_.cb_mutex); + for (auto& cb : state_.det_cbs) { + cb(detection_out); + } + } + + if (any_successful_detection) { + // segmentation requires initial images, so we need to send pointer to buffer too + detection_out.images_to_segment = sync; + state_.seg_queue.push(detection_out); + state_.seg_cv.notify_one(); + } else { + // in case no segmentation is required we can release buffer + buffer_deleter_(sync); + } + } +} + +void BallInferenceNode::segmentationLoop() { + while (state_.running) { + std::unique_lock lk(state_.seg_mutex); + state_.seg_cv.wait_for( + lk, 5ms, [this]() { return !state_.running || !state_.seg_queue.empty(); }); + if (!state_.running && state_.seg_queue.empty()) break; + + DetectionOutput det; + if (!state_.seg_queue.pop(det)) { + continue; + } + + auto sync = det.images_to_segment; + if (!sync) { + continue; + } + + std::vector masks; + std::vector cams; + std::vector centers; + std::vector centroids; + + SegmentationOutput segmentation_output; + segmentation_output.timestamp = det.timestamp; + + // Process each flagged camera + for (size_t cam = 0; cam < det.flags.size(); ++cam) { + if (!det.flags[cam]) continue; + + uint8_t* raw = sync->images[cam]; + Size sz = sync->image_sizes[cam]; + cv::Mat bayer(sz.height, sz.width, CV_8UC1, raw); + cv::Mat rgb; + cv::cvtColor(bayer, rgb, cv::COLOR_BayerBG2RGB); + + const int cropSize = 128; + int half = cropSize / 2; + cv::Point pt = det.centers[cam]; + int x = std::clamp(pt.x, half, sz.width - half); + int y = std::clamp(pt.y, half, sz.height - half); + int x0 = x - half; + int y0 = y - half; + cv::Rect cropRect(x0, y0, cropSize, cropSize); + cv::Mat crop = rgb(cropRect); + + at::Tensor t = torch::from_blob(crop.data, {1, crop.rows, crop.cols, 3}, at::kByte) + .permute({0, 3, 1, 2}) + .toType(at::kFloat) + .div_(255.0) + .sub_(param_.means) + .div_(param_.stds) + .pin_memory(); + + at::Tensor gpu_in = t.to(at::kCUDA, /*non_blocking=*/true); + torch::NoGradGuard ndg; + at::Tensor out = seg_model_.forward({gpu_in}).toTensor().cpu(); + // out shape {1, 1, cropSize, cropSize} + at::Tensor prob = out.sigmoid().squeeze(0).squeeze(0); // {H, W} on CUDA + + constexpr float thresh = 0.5f; + at::Tensor bin = prob > thresh; // ByteTensor {H, W} on CUDA + at::Tensor coords = bin.nonzero(); + + // compute centroid as the mean of these coords + at::Tensor centroid; + if (coords.numel() == 0) { + // {row_center, col_center} + centroid = + torch::tensor({cropSize / 2.0, cropSize / 2.0}, at::kFloat).to(at::kCUDA); + } else { + // convert coords to float and take mean over dim=0 -> {mean_row, mean_col} + centroid = coords.to(at::kFloat).mean(0); + } + + centroid = centroid.to(at::kCPU); + + float centroid_y = centroid[0].item() + y0; + float centroid_x = centroid[1].item() + x0; + + segmentation_output.centroids.emplace_back(centroid_x, centroid_y); + segmentation_output.camera_indices.push_back(cam); + } + + if (segmentation_output.camera_indices.empty()) { + continue; + } + + std::lock_guard lk_callback(state_.seg_mutex); + for (auto& cb : state_.seg_cbs) { + cb(segmentation_output); + } + + // now we do not need buffer + buffer_deleter_(sync); + } +} + +} // namespace handy::camera