Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions .gitlab/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ EXTRA_BUILD_ARGS=${3:-""}
UCX_VERSION=${UCX_VERSION:-v1.19.0}
# EFA_INSTALLER_VERSION is the version of EFA installer to use, defaults to "latest"
EFA_INSTALLER_VERSION=${EFA_INSTALLER_VERSION:-latest}
# LIBFABRIC_VERSION is the version of libfabric to build override default with env variable.
LIBFABRIC_VERSION=${LIBFABRIC_VERSION:-v2.3.0}
# LIBFABRIC_INSTALL_DIR can be set via environment variable, defaults to INSTALL_DIR
LIBFABRIC_INSTALL_DIR=${LIBFABRIC_INSTALL_DIR:-$INSTALL_DIR}

if [ -z "$INSTALL_DIR" ]; then
echo "Usage: $0 <install_dir> <ucx_install_dir>"
Expand Down Expand Up @@ -123,10 +127,30 @@ curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz
$SUDO ldconfig \
)

curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" | tar xz
wget --tries=3 --waitretry=5 -O "aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz"
tar xzf "aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz"
rm "aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz"
( \
cd aws-efa-installer && \
$SUDO ./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify && \
$SUDO ./efa_installer.sh -y --minimal --skip-kmod --skip-limit-conf --no-verify && \
$SUDO ldconfig \
)

wget --tries=3 --waitretry=5 -O "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
tar xjf "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
rm "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
( \
cd libfabric-* && \
./autogen.sh && \
./configure --prefix="${LIBFABRIC_INSTALL_DIR}" \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa && \
make -j && \
make install && \
$SUDO ldconfig \
)

Expand All @@ -152,10 +176,10 @@ curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLE
)

export LIBRARY_PATH="$LIBRARY_PATH:/usr/local/cuda/lib64"
export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:${INSTALL_DIR}/lib/$ARCH-linux-gnu:${INSTALL_DIR}/lib64:$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:${INSTALL_DIR}/lib:/opt/amazon/efa/lib"
export CPATH="${INSTALL_DIR}/include:/opt/amazon/efa/include:$CPATH"
export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:${INSTALL_DIR}/lib/$ARCH-linux-gnu:${INSTALL_DIR}/lib64:$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:${INSTALL_DIR}/lib:${LIBFABRIC_INSTALL_DIR}/lib"
export CPATH="${INSTALL_DIR}/include:${LIBFABRIC_INSTALL_DIR}/include:$CPATH"
export PATH="${INSTALL_DIR}/bin:$PATH"
export PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig:${INSTALL_DIR}/lib64/pkgconfig:${INSTALL_DIR}:/opt/amazon/efa/lib/pkgconfig:$PKG_CONFIG_PATH"
export PKG_CONFIG_PATH="${INSTALL_DIR}/lib/pkgconfig:${INSTALL_DIR}/lib64/pkgconfig:${INSTALL_DIR}:${LIBFABRIC_INSTALL_DIR}/lib/pkgconfig:$PKG_CONFIG_PATH"
export NIXL_PLUGIN_DIR="${INSTALL_DIR}/lib/$ARCH-linux-gnu/plugins"
export CMAKE_PREFIX_PATH="${INSTALL_DIR}:${CMAKE_PREFIX_PATH}"

Expand All @@ -164,7 +188,7 @@ export CMAKE_PREFIX_PATH="${INSTALL_DIR}:${CMAKE_PREFIX_PATH}"
export UCX_TLS=^cuda_ipc

# shellcheck disable=SC2086
meson setup nixl_build --prefix=${INSTALL_DIR} -Ducx_path=${UCX_INSTALL_DIR} -Dbuild_docs=true -Drust=false ${EXTRA_BUILD_ARGS} -Dlibfabric_path="/opt/amazon/efa"
meson setup nixl_build --prefix=${INSTALL_DIR} -Ducx_path=${UCX_INSTALL_DIR} -Dbuild_docs=true -Drust=false ${EXTRA_BUILD_ARGS} -Dlibfabric_path="${LIBFABRIC_INSTALL_DIR}"
ninja -C nixl_build && ninja -C nixl_build install

# TODO(kapila): Copy the nixl.pc file to the install directory if needed.
Expand Down
42 changes: 33 additions & 9 deletions benchmark/nixlbench/contrib/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ RUN apt-get update -y && \
libz-dev \
flex \
libgtest-dev \
hwloc \
libhwloc-dev \
build-essential

# Add DOCA repository and install packages
Expand Down Expand Up @@ -104,11 +106,38 @@ ARG DEFAULT_PYTHON_VERSION
ARG WHL_PYTHON_VERSIONS="3.12"
ARG WHL_PLATFORM="manylinux_2_39_$ARCH"
ARG BUILD_TYPE="release"
ARG EFA_INSTALLER_VERSION="latest"
ARG EFA_INSTALL_PATH="/opt/amazon/efa"
ARG LIBFABRIC_VERSION="v2.3.0"
ARG NPROC

WORKDIR /workspace

# Build libfabric from source
# Install RDMA/EFA packages required for libfabric EFA provider
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get -y install \
--reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
libnuma-dev librdmacm-dev ibverbs-providers

# Build libfabric from source
RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
cd libfabric-* && \
./autogen.sh && \
./configure --prefix=/usr/local \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa \
--with-cuda=/usr/local/cuda \
--enable-cuda-dlopen \
--with-gdrcopy \
--enable-gdrcopy-dlopen && \
make -j${NPROC:-$(nproc)} && \
make install && \
ldconfig

RUN git clone --depth 1 https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git && \
cd etcd-cpp-apiv3 && \
sed -i '/^find_dependency(cpprestsdk)$/d' etcd-cpp-api-config.in.cmake && \
Expand All @@ -122,11 +151,6 @@ COPY --from=nixlbench . /workspace/nixlbench
# Install AWS SDK C++ dependencies and build
RUN apt-get update && apt-get install -y libcurl4-openssl-dev libssl-dev uuid-dev zlib1g-dev hwloc libhwloc-dev

# Install EFA (Elastic Fabric Adapter)
RUN curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" | tar xz && \
cd aws-efa-installer && \
./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify && \
ldconfig

RUN git clone --recurse-submodules --depth 1 --shallow-submodules https://github.com/aws/aws-sdk-cpp.git --branch 1.11.581 && \
mkdir sdk_build && \
Expand All @@ -137,7 +161,7 @@ RUN git clone --recurse-submodules --depth 1 --shallow-submodules https://github

WORKDIR /workspace/nixl

ENV LD_LIBRARY_PATH=/usr/local/lib:$EFA_INSTALL_PATH/lib:$LD_LIBRARY_PATH
ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH

ENV VIRTUAL_ENV=/workspace/nixl/.venv
RUN uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION && \
Expand All @@ -149,7 +173,7 @@ RUN CUDA_SHORT_VERSION=cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .) && \

RUN rm -rf build && \
mkdir build && \
uv run meson setup build -Dlibfabric_path=$EFA_INSTALL_PATH --prefix=/usr/local/nixl --buildtype=$BUILD_TYPE && \
uv run meson setup build --prefix=/usr/local/nixl --buildtype=$BUILD_TYPE && \
cd build && \
ninja && \
ninja install
Expand Down
34 changes: 25 additions & 9 deletions contrib/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ ARG NIXL_PREFIX="/usr/local/nixl"
ARG NIXL_PLUGIN_DIR="$NIXL_PREFIX/lib/$ARCH-linux-gnu/plugins"
ARG NPROC
ARG WHL_DEFAULT_PYTHON_VERSIONS="3.12"
ARG EFA_INSTALLER_VERSION="latest"
ARG EFA_INSTALL_PATH="/opt/amazon/efa"
ARG LIBFABRIC_VERSION="v2.3.0"
ARG LIBFABRIC_INSTALL_PATH="/usr/local"

RUN apt-get update -y && \
apt-get install -y ubuntu-keyring && \
Expand Down Expand Up @@ -74,11 +74,6 @@ RUN git clone --depth 1 https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git &&
mkdir build && cd build && \
cmake .. -DBUILD_ETCD_CORE_ONLY=ON -DCMAKE_BUILD_TYPE=Release && make -j${NPROC:-$(nproc)} && make install

# Install EFA (Elastic Fabric Adapter)
RUN curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" | tar xz && \
cd aws-efa-installer && \
./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify && \
ldconfig

RUN git clone --recurse-submodules --depth 1 --shallow-submodules https://github.com/aws/aws-sdk-cpp.git --branch 1.11.581 && \
mkdir aws_sdk_build && cd aws_sdk_build && \
Expand Down Expand Up @@ -140,10 +135,31 @@ RUN cd /usr/local/src && \
make -j${NPROC:-$(nproc)} install-strip && \
ldconfig

# Build libfabric from source
RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
cd libfabric-* && \
./autogen.sh && \
./configure --prefix="${LIBFABRIC_INSTALL_PATH}" \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa \
--with-cuda=/usr/local/cuda \
--enable-cuda-dlopen \
--with-gdrcopy \
--enable-gdrcopy-dlopen && \
make -j${NPROC:-$(nproc)} && \
make install && \
ldconfig

WORKDIR /workspace/nixl
COPY . /workspace/nixl

ENV LD_LIBRARY_PATH=/usr/local/lib:$EFA_INSTALL_PATH/lib:$LD_LIBRARY_PATH
ENV LD_LIBRARY_PATH=/usr/local/lib:$LIBFABRIC_INSTALL_PATH/lib:$LD_LIBRARY_PATH

ENV VIRTUAL_ENV=/workspace/nixl/.venv
RUN rm -rf $VIRTUAL_ENV && uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION && \
Expand All @@ -156,7 +172,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends pybind11-dev
ENV NIXL_PREFIX=$NIXL_PREFIX
RUN rm -rf build && \
mkdir build && \
uv run meson setup -Dlibfabric_path=$EFA_INSTALL_PATH build/ --prefix=$NIXL_PREFIX && \
uv run meson setup -Dlibfabric_path=$LIBFABRIC_INSTALL_PATH build/ --prefix=$NIXL_PREFIX && \
cd build && \
ninja && \
ninja install
Expand Down
24 changes: 24 additions & 0 deletions contrib/Dockerfile.manylinux
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
ARG DEFAULT_PYTHON_VERSION="3.12"
ARG ARCH="x86_64"
ARG UCX_REF="v1.19.0"
ARG LIBFABRIC_VERSION="v2.3.0"

RUN yum groupinstall -y 'Development Tools' && \
dnf install -y almalinux-release-synergy && \
Expand Down Expand Up @@ -53,6 +54,8 @@ RUN yum groupinstall -y 'Development Tools' && \
libibumad-devel \
numactl-devel \
librdmacm-devel \
hwloc \
hwloc-devel \
wget \
zlib

Expand Down Expand Up @@ -198,6 +201,27 @@ RUN cd /usr/local/src && \
make -j install-strip && \
ldconfig

# Build libfabric from source
RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
cd libfabric-* && \
./autogen.sh && \
./configure --prefix=/usr/local \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa \
--with-cuda=/usr/local/cuda \
--enable-cuda-dlopen \
--with-gdrcopy \
--enable-gdrcopy-dlopen && \
make -j$(nproc) && \
make install && \
ldconfig

COPY . /workspace/nixl

RUN rm -rf build && \
Expand Down
13 changes: 10 additions & 3 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,16 @@ libfabric_path = get_option('libfabric_path')
if libfabric_path != ''
libfabric_lib_path = libfabric_path + '/lib'
libfabric_inc_path = libfabric_path + '/include'
libfabric_dep = declare_dependency(
link_args : ['-L' + libfabric_lib_path, '-lfabric'],
include_directories : include_directories(libfabric_inc_path))
# Check if path is absolute
if libfabric_inc_path.startswith('/')
libfabric_dep = declare_dependency(
link_args : ['-L' + libfabric_lib_path, '-lfabric'],
compile_args : ['-I' + libfabric_inc_path])
else
libfabric_dep = declare_dependency(
link_args : ['-L' + libfabric_lib_path, '-lfabric'],
include_directories : include_directories(libfabric_inc_path))
endif
else
libfabric_dep = dependency('libfabric', required: false)
endif
Expand Down