Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Platform arm64 means nvidia jetson arm64v8.
# Image may be not compatible with other arm machines.

FROM --platform=linux/arm64v8 nvcr.io/nvidia/l4t-base:r35.1.0 AS handy-base-arm64
FROM --platform=linux/arm64v8 nvcr.io/nvidia/l4t-base:r36.2.0 AS handy-base-arm64

### INSTALL NVIDIA PACKAGES

Expand Down Expand Up @@ -105,7 +105,7 @@ RUN apt-get update && \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/* && apt-get clean

ENV TORCHVISION_VERSION=0.14.0
ENV TORCHVISION_VERSION=0.17.0

FROM handy-common AS handy-cuda-arm64

Expand Down Expand Up @@ -162,19 +162,26 @@ RUN pip3 install --no-cache-dir \
numpy \
pillow

ENV PYTORCH_WHL="torch-1.13.0a0+340c4120.nv22.06-cp38-cp38-linux_aarch64.whl"
ENV PYTORCH_URL="https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/${PYTORCH_WHL}"
ENV PYTORCH_WHL="torch-2.2.0a0+81ea7a4.nv24.01-cp310-cp310-linux_aarch64.whl"
ENV PYTORCH_URL="https://developer.download.nvidia.com/compute/redist/jp/v60dp/pytorch/${PYTORCH_WHL}"

RUN wget --no-check-certificate -qO ${PYTORCH_WHL} ${PYTORCH_URL} \
&& pip3 install --no-cache-dir ${PYTORCH_WHL} \
&& rm -rf /tmp/*

RUN wget -qO - https://github.com/pytorch/vision/archive/refs/tags/v${TORCHVISION_VERSION}.tar.gz | tar -xz \
&& cd vision-${TORCHVISION_VERSION} \
&& python3 setup.py install \
&& rm -rf /tmp/*
RUN pip3 install torchvision==${TORCHVISION_VERSION}

# RUN apt-get update && \
# apt-get install -y cuda-command-line-tools-12-2 libcupti-dev && \
# echo "/usr/local/cuda-12.2/extras/CUPTI/lib64" \
# > /etc/ld.so.conf.d/cupti.conf && ldconfig

ENV PYTORCH_PATH="/usr/local/lib/python3.8/dist-packages/torch"
# RUN wget -qO - https://github.com/pytorch/vision/archive/refs/tags/v${TORCHVISION_VERSION}.tar.gz | tar -xz \
# && cd vision-${TORCHVISION_VERSION} \
# && python3 setup.py install \
# && rm -rf /tmp/*

ENV PYTORCH_PATH="/usr/local/lib/python3.10/dist-packages/torch"
ENV LD_LIBRARY_PATH="${PYTORCH_PATH}/lib:${LD_LIBRARY_PATH}"

FROM handy-common AS handy-cuda-amd64
Expand Down Expand Up @@ -215,13 +222,13 @@ RUN wget -qO - https://github.com/opencv/opencv/archive/refs/tags/${OPENCV_VERSI
-DBUILD_TESTS=OFF \
&& make -j$(nproc) install && rm -rf /tmp/*

ENV TORCH_VERSION=1.13.0
ENV TORCH_VERSION=2.2.0

RUN pip3 install --no-cache-dir \
torch==${TORCH_VERSION} \
torchvision==${TORCHVISION_VERSION}

ENV PYTORCH_PATH="/usr/local/lib/python3.8/dist-packages/torch"
ENV PYTORCH_PATH="/usr/local/lib/python3.10/dist-packages/torch"
ENV LD_LIBRARY_PATH="${PYTORCH_PATH}/lib:${LD_LIBRARY_PATH}"

FROM handy-cuda-${TARGETARCH} AS handy-ros
Expand Down Expand Up @@ -295,6 +302,11 @@ RUN mkdir -p ${ROS_ROOT} \
> ${ROS_ROOT}/ros2.rosinstall \
&& vcs import ${ROS_TMP} < ${ROS_ROOT}/ros2.rosinstall

RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates curl gnupg2 lsb-release \
&& rm -rf /var/lib/apt/lists/*

RUN apt-get update -q \
&& rosdep init \
&& rosdep update \
Expand All @@ -311,11 +323,13 @@ RUN apt-get update -q \
--skip-keys python3-opencv \
&& rm -rf /var/lib/apt/lists/* && apt-get clean

ENV CMAKE_TLS_VERIFY=0

RUN cd ${ROS_TMP} \
&& colcon build \
--merge-install \
--install-base ${ROS_ROOT} \
--cmake-args -DBUILD_TESTING=OFF \
--cmake-args -DBUILD_TESTING=OFF -DCMAKE_TLS_VERIFY=OFF \
&& rm -rf /tmp/*

RUN printf "export ROS_ROOT=${ROS_ROOT}\n" >> ${HOME}/.bashrc \
Expand Down
8 changes: 5 additions & 3 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
services:
handy:
image: "registry.robotics-lab.ru/handy:0.7.3"
image: "registry.robotics-lab.ru/handy:0.7.4"
container_name: "${CONTAINER_NAME:-handy-${USER}}"
networks: [default]
privileged: true
runtime: nvidia
ports:
- "${FOXGLOVE_PORT:-8765}:8765"
volumes:
- "/home/bakind/handy:/handy"
- "/dev:/dev"
# - "/tmp/.X11-unix/:/tmp/.X11-unix"
deploy:
resources:
limits:
Expand All @@ -22,10 +24,10 @@ services:
x-bake:
platforms:
- linux/arm64
- linux/amd64
# - linux/amd64
cache-from:
- type=registry,ref=registry.robotics-lab.ru/handy:cache-arm64
- type=registry,ref=registry.robotics-lab.ru/handy:cache-amd64
# - type=registry,ref=registry.robotics-lab.ru/handy:cache-amd64
cache-to:
# [!!!] Warning: https://github.com/docker/buildx/discussions/1382
# - type=registry,mode=max,ref=registry.robotics-lab.ru/handy:cache-arm64
Expand Down
37 changes: 35 additions & 2 deletions packages/camera/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ project(camera)
add_compile_options(-Wall -Wextra -Werror=unused-variable -Wpedantic)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O3")

set(Torch_DIR "/usr/local/lib/python3.10/dist-packages/torch/share/cmake/Torch")
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)

find_path(TENSORRT_INCLUDE_DIR NvInfer.h
PATHS /usr/include /usr/local/include)
find_library(TENSORRT_LIB nvinfer
PATHS /usr/lib /usr/lib/aarch64-linux-gnu)
find_library(TENSORRT_ONNX_LIB nvonnxparser
PATHS /usr/lib /usr/lib/aarch64-linux-gnu)

find_package(CUDAToolkit REQUIRED)
find_package(OpenCV REQUIRED)
find_package(yaml_cpp_vendor REQUIRED)
find_package(ament_cmake REQUIRED)
Expand All @@ -18,28 +33,46 @@ add_executable(
calibration src/calibration_main.cpp src/calibration.cpp src/params.cpp
)
add_executable(triangulation src/triangulation.cpp src/params.cpp)
add_executable(inference_node src/inference_node.cpp src/inference_node_main.cpp)

target_include_directories(
camera PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include/${PROJECT_NAME}>" "/usr/include"
"$<INSTALL_INTERFACE:include/${PROJECT_NAME}>"
"/usr/include"
"/opt/ros/iron/include/mcap_vendor/mcap"
)

target_include_directories(
calibration PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"/usr/include"
)

target_include_directories(
triangulation PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"/usr/include"
)

target_include_directories(
inference_node PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
${TENSORRT_INCLUDE_DIR}
"/usr/local/cuda/include"
"/usr/include"
${mcap_vendor_INCLUDE_DIRS}
)

ament_target_dependencies(camera yaml_cpp_vendor mcap_vendor)
ament_target_dependencies(calibration yaml_cpp_vendor Boost OpenCV)
ament_target_dependencies(triangulation OpenCV yaml_cpp_vendor)
ament_target_dependencies(inference_node OpenCV Boost yaml_cpp_vendor mcap_vendor Torch)

target_link_libraries(camera mvsdk)
target_link_libraries(inference_node
${TENSORRT_LIB}
${TENSORRT_ONNX_LIB}
mvsdk
${TORCH_LIBRARIES})

install(TARGETS camera calibration triangulation
install(TARGETS camera calibration triangulation inference_node
DESTINATION lib/${PROJECT_NAME}
)

Expand Down
4 changes: 3 additions & 1 deletion packages/camera/include/camera.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <cstdint>

#include <boost/lockfree/queue.hpp>
#include "mcap_vendor/mcap/writer.hpp"
#include "mcap/writer.hpp"

using namespace std::chrono_literals;

Expand Down Expand Up @@ -147,5 +147,7 @@ class CameraRecorder {

mcap::McapWriter mcap_writer_;
MappedFileManager file_manager_;

std::function<void(SynchronizedFrameBuffers*)> deleter_;
};
} // namespace handy::camera
113 changes: 113 additions & 0 deletions packages/camera/include/inference_node.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#pragma once

#include "camera.h"
#include <torch/script.h>
#include <opencv2/core.hpp>
#include "c10/cuda/CUDAStream.h"
#include <boost/lockfree/queue.hpp>
#include <mutex>
#include <atomic>
#include <thread>
#include <condition_variable>
#include <deque>
#include <vector>
#include <functional>
#include <memory>
#include <array>

namespace handy::camera {

class BallInferenceNode {
public:
static const int kMaxCameraNum = 2;

/// Per‐frame detection result
struct DetectionOutput {
std::array<cv::Point, kMaxCameraNum> centers; // one ball center per camera image
std::array<bool, kMaxCameraNum> flags; // true = needs segmentation for that camera
uint32_t timestamp = 0;
SynchronizedFrameBuffers* images_to_segment = nullptr;
};

/// Per‐frame segmentation result
struct SegmentationOutput {
// one centroid per camera image that requested segmentation
std::vector<cv::Point> centroids;
std::vector<int> camera_indices;
uint32_t timestamp = 0;
};

using DetectionCallback = std::function<void(DetectionOutput)>;
using SegmentationCallback = std::function<void(SegmentationOutput)>;

/// ctor loads both models and sets up queues/threads
BallInferenceNode(
const std::string& det_model_path, const std::string& seg_model_path, Size input_size,
std::function<void(SynchronizedFrameBuffers*)> deleter, int window_size = 5);
~BallInferenceNode();

/// Subscribe to detection outputs
/// Called from arbitrary thread; callbacks must be lock‐free/low‐latency.
void registerDetectionSubscriber(const DetectionCallback& cb);

/// Subscribe to segmentation outputs
void registerSegmentationSubscriber(const SegmentationCallback& cb);

/// Start the internal threads
void start();

/// Signal all threads to stop, join them, and release resources
void stop();

void handleSyncBuffer(std::shared_ptr<SynchronizedFrameBuffers> buffer);

private:
void detectSingleCamera(const std::deque<at::Tensor>& frames, DetectionOutput& detect_result,
int camera_idx, c10::cuda::CUDAStream& infer_stream);
void detectionLoop();
void segmentationLoop();


// === model objects ===
torch::jit::script::Module det_model_;
torch::jit::script::Module seg_model_;

std::function<void(SynchronizedFrameBuffers*)> buffer_deleter_;

struct Param {
// === input queue & rolling window ===
int window_size = 5;
Size input_size{320, 192};
const at::Tensor means =
torch::tensor({0.077, 0.092, 0.142}).view({1, 3, 1, 1}).to(at::kFloat).to(at::kCUDA);
const at::Tensor stds =
torch::tensor({0.068, 0.079, 0.108}).view({1, 3, 1, 1}).to(at::kFloat).to(at::kCUDA);
} param_;

struct State {
boost::lockfree::queue<SynchronizedFrameBuffers*> det_queue{64};

// protected by ring_mutex_
std::array<std::deque<at::Tensor>, kMaxCameraNum> ring_buffer;
std::vector<at::Tensor> pinned_pool;
std::atomic<size_t> pool_idx = 0;
std::mutex ring_mutex;

// === intermediate queues ===
boost::lockfree::queue<DetectionOutput> seg_queue{128};

// === subscriber callbacks ===
std::vector<DetectionCallback> det_cbs;
std::vector<SegmentationCallback> seg_cbs;
std::mutex cb_mutex;

// === threading & synchroization ===
std::atomic<bool> running{false};
std::thread det_thread;
std::thread seg_thread;
std::condition_variable seg_cv;
std::mutex seg_mutex;
} state_;
};

} // namespace handy::camera
1 change: 0 additions & 1 deletion packages/camera/include/triangulation.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "params.h"
#include "params.h"

#include <opencv2/core/core.hpp>
#include <string>
Expand Down
29 changes: 15 additions & 14 deletions packages/camera/src/camera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,18 @@ CameraRecorder::CameraRecorder(
}
}

deleter_ = [this](SynchronizedFrameBuffers* sync_buffers_ptr) {
for (size_t i = 0; i < sync_buffers_ptr->images.size(); ++i) {
if (!sync_buffers_ptr->images[i]) {
continue;
}
while (!this->state_.free_buffers[i]->push(sync_buffers_ptr->images[i])) {
}
state_.free_buffer_cnts[i]++;
}
delete sync_buffers_ptr;
};

for (int i = 0; i < state_.camera_num; ++i) {
abortIfNot("reset timestamp", i, CameraRstTimeStamp(state_.camera_handles[i]));
}
Expand Down Expand Up @@ -395,20 +407,9 @@ void CameraRecorder::synchronizeQueues() {
};

// make single structure as a shared ptr
// deleter is expected to push buffers back to the queue and then free the structure
auto custom_deleter = [this](SynchronizedFrameBuffers* sync_buffers_ptr) {
for (size_t i = 0; i < sync_buffers_ptr->images.size(); ++i) {
if (!sync_buffers_ptr->images[i]) {
continue;
}
while (!this->state_.free_buffers[i]->push(sync_buffers_ptr->images[i])) {
}
state_.free_buffer_cnts[i]++;
}
delete sync_buffers_ptr;
};
// deleter_ is expected to push buffers back to the queue and then free the structure
std::shared_ptr<SynchronizedFrameBuffers> sync_buffers(
new SynchronizedFrameBuffers, custom_deleter);
new SynchronizedFrameBuffers, deleter_);
sync_buffers->images.resize(state_.camera_num, nullptr);
sync_buffers->image_sizes.resize(state_.camera_num);

Expand Down Expand Up @@ -440,7 +441,7 @@ void CameraRecorder::synchronizeQueues() {
// creating new structure
sync_buffers = std::shared_ptr<SynchronizedFrameBuffers>(
new SynchronizedFrameBuffers,
custom_deleter); // custom deleter adds buffer pointer back to the queue
deleter_); // custom deleter adds buffer pointer back to the queue
sync_buffers->images.resize(state_.camera_num, nullptr);
sync_buffers->image_sizes.resize(state_.camera_num);
} while (next());
Expand Down
Loading
Loading