Skip to content

Commit c47556d

Browse files
committed
Refactor build script to use environment variables and improve download reliability
- Change libfabric install dir from required parameter to environment variable - Replace curl with wget for more reliable downloads with retry logic - Add libfabric installation to Dockerfile.manylinux
1 parent 360ccd7 commit c47556d

File tree

4 files changed

+56
-61
lines changed

4 files changed

+56
-61
lines changed

.gitlab/build.sh

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,25 @@ set -o pipefail
2222
# and second argument being the UCX installation directory.
2323
INSTALL_DIR=$1
2424
UCX_INSTALL_DIR=$2
25-
LIBFABRIC_INSTALL_DIR=$3
26-
EXTRA_BUILD_ARGS=${4:-""}
25+
EXTRA_BUILD_ARGS=${3:-""}
2726
# UCX_VERSION is the version of UCX to build override default with env variable.
2827
UCX_VERSION=${UCX_VERSION:-v1.19.0}
2928
# EFA_INSTALLER_VERSION is the version of EFA installer to use, defaults to "latest"
3029
EFA_INSTALLER_VERSION=${EFA_INSTALLER_VERSION:-latest}
3130
# LIBFABRIC_VERSION is the version of libfabric to build override default with env variable.
3231
LIBFABRIC_VERSION=${LIBFABRIC_VERSION:-v2.3.0}
32+
# LIBFABRIC_INSTALL_DIR can be set via environment variable, defaults to INSTALL_DIR
33+
LIBFABRIC_INSTALL_DIR=${LIBFABRIC_INSTALL_DIR:-$INSTALL_DIR}
34+
3335
if [ -z "$INSTALL_DIR" ]; then
34-
echo "Usage: $0 <install_dir> <ucx_install_dir> <libfabric_install_dir>"
36+
echo "Usage: $0 <install_dir> <ucx_install_dir>"
3537
exit 1
3638
fi
3739

3840
if [ -z "$UCX_INSTALL_DIR" ]; then
3941
UCX_INSTALL_DIR=$INSTALL_DIR
4042
fi
4143

42-
if [ -z "$LIBFABRIC_INSTALL_DIR" ]; then
43-
LIBFABRIC_INSTALL_DIR=$INSTALL_DIR
44-
fi
45-
4644

4745
# For running as user - check if running as root, if not set sudo variable
4846
if [ "$(id -u)" -ne 0 ]; then
@@ -129,14 +127,18 @@ curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz
129127
$SUDO ldconfig \
130128
)
131129

132-
curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" | tar xz
130+
wget --tries=3 --waitretry=5 -O "aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz"
131+
tar xzf "aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz"
132+
rm "aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz"
133133
( \
134134
cd aws-efa-installer && \
135135
$SUDO ./efa_installer.sh -y --minimal --skip-kmod --skip-limit-conf --no-verify && \
136136
$SUDO ldconfig \
137137
)
138138

139-
curl -fSsL "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" | tar xj
139+
wget --tries=3 --waitretry=5 -O "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
140+
tar xjf "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
141+
rm "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
140142
( \
141143
cd libfabric-* && \
142144
./autogen.sh && \

benchmark/nixlbench/contrib/Dockerfile

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ ARG WHL_PYTHON_VERSIONS="3.12"
105105
ARG WHL_PLATFORM="manylinux_2_39_$ARCH"
106106
ARG BUILD_TYPE="release"
107107
ARG EFA_INSTALLER_VERSION="latest"
108-
ARG LIBFABRIC_VERSION="v2.3.0"
109-
ARG LIBFABRIC_INSTALL_PATH="/usr/local"
110108
ARG NPROC
111109

112110
WORKDIR /workspace
@@ -123,30 +121,6 @@ COPY --from=nixlbench . /workspace/nixlbench
123121
# Install AWS SDK C++ dependencies and build
124122
RUN apt-get update && apt-get install -y libcurl4-openssl-dev libssl-dev uuid-dev zlib1g-dev hwloc libhwloc-dev
125123

126-
# Install EFA (Elastic Fabric Adapter) - minimal installation
127-
RUN curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" | tar xz && \
128-
cd aws-efa-installer && \
129-
./efa_installer.sh -y --minimal --skip-kmod --skip-limit-conf --no-verify && \
130-
ldconfig
131-
132-
# Build libfabric from source
133-
RUN curl -fSsL "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" | tar xj && \
134-
cd libfabric-* && \
135-
./autogen.sh && \
136-
./configure --prefix="${LIBFABRIC_INSTALL_PATH}" \
137-
--disable-verbs \
138-
--disable-psm3 \
139-
--disable-opx \
140-
--disable-usnic \
141-
--disable-rstream \
142-
--enable-efa \
143-
--with-cuda=/usr/local/cuda \
144-
--enable-cuda-dlopen \
145-
--with-gdrcopy \
146-
--enable-gdrcopy-dlopen && \
147-
make -j${NPROC:-$(nproc)} && \
148-
make install && \
149-
ldconfig
150124

151125
RUN git clone --recurse-submodules --depth 1 --shallow-submodules https://github.com/aws/aws-sdk-cpp.git --branch 1.11.581 && \
152126
mkdir sdk_build && \
@@ -157,7 +131,7 @@ RUN git clone --recurse-submodules --depth 1 --shallow-submodules https://github
157131

158132
WORKDIR /workspace/nixl
159133

160-
ENV LD_LIBRARY_PATH=/usr/local/lib:$LIBFABRIC_INSTALL_PATH/lib:$LD_LIBRARY_PATH
134+
ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
161135

162136
ENV VIRTUAL_ENV=/workspace/nixl/.venv
163137
RUN uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION && \
@@ -169,7 +143,7 @@ RUN CUDA_SHORT_VERSION=cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .) && \
169143

170144
RUN rm -rf build && \
171145
mkdir build && \
172-
uv run meson setup build -Dlibfabric_path=$LIBFABRIC_INSTALL_PATH --prefix=/usr/local/nixl --buildtype=$BUILD_TYPE && \
146+
uv run meson setup build --prefix=/usr/local/nixl --buildtype=$BUILD_TYPE && \
173147
cd build && \
174148
ninja && \
175149
ninja install

contrib/Dockerfile

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -75,30 +75,6 @@ RUN git clone --depth 1 https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git &&
7575
mkdir build && cd build && \
7676
cmake .. -DBUILD_ETCD_CORE_ONLY=ON -DCMAKE_BUILD_TYPE=Release && make -j${NPROC:-$(nproc)} && make install
7777

78-
# Install EFA (Elastic Fabric Adapter) - minimal installation
79-
RUN curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" | tar xz && \
80-
cd aws-efa-installer && \
81-
./efa_installer.sh -y --minimal --skip-kmod --skip-limit-conf --no-verify && \
82-
ldconfig
83-
84-
# Build libfabric from source
85-
RUN curl -fSsL "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" | tar xj && \
86-
cd libfabric-* && \
87-
./autogen.sh && \
88-
./configure --prefix="${LIBFABRIC_INSTALL_PATH}" \
89-
--disable-verbs \
90-
--disable-psm3 \
91-
--disable-opx \
92-
--disable-usnic \
93-
--disable-rstream \
94-
--enable-efa \
95-
--with-cuda=/usr/local/cuda \
96-
--enable-cuda-dlopen \
97-
--with-gdrcopy \
98-
--enable-gdrcopy-dlopen && \
99-
make -j${NPROC:-$(nproc)} && \
100-
make install && \
101-
ldconfig
10278

10379
RUN git clone --recurse-submodules --depth 1 --shallow-submodules https://github.com/aws/aws-sdk-cpp.git --branch 1.11.581 && \
10480
mkdir aws_sdk_build && cd aws_sdk_build && \
@@ -160,6 +136,27 @@ RUN cd /usr/local/src && \
160136
make -j${NPROC:-$(nproc)} install-strip && \
161137
ldconfig
162138

139+
# Build libfabric from source
140+
RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
141+
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
142+
tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
143+
cd libfabric-* && \
144+
./autogen.sh && \
145+
./configure --prefix="${LIBFABRIC_INSTALL_PATH}" \
146+
--disable-verbs \
147+
--disable-psm3 \
148+
--disable-opx \
149+
--disable-usnic \
150+
--disable-rstream \
151+
--enable-efa \
152+
--with-cuda=/usr/local/cuda \
153+
--enable-cuda-dlopen \
154+
--with-gdrcopy \
155+
--enable-gdrcopy-dlopen && \
156+
make -j${NPROC:-$(nproc)} && \
157+
make install && \
158+
ldconfig
159+
163160
WORKDIR /workspace/nixl
164161
COPY . /workspace/nixl
165162

contrib/Dockerfile.manylinux

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
2121
ARG DEFAULT_PYTHON_VERSION="3.12"
2222
ARG ARCH="x86_64"
2323
ARG UCX_REF="v1.19.0"
24+
ARG LIBFABRIC_VERSION="v2.3.0"
2425

2526
RUN yum groupinstall -y 'Development Tools' && \
2627
dnf install -y almalinux-release-synergy && \
@@ -198,6 +199,27 @@ RUN cd /usr/local/src && \
198199
make -j install-strip && \
199200
ldconfig
200201

202+
# Build libfabric from source
203+
RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
204+
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
205+
tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
206+
cd libfabric-* && \
207+
./autogen.sh && \
208+
./configure --prefix=/usr/local \
209+
--disable-verbs \
210+
--disable-psm3 \
211+
--disable-opx \
212+
--disable-usnic \
213+
--disable-rstream \
214+
--enable-efa \
215+
--with-cuda=/usr/local/cuda \
216+
--enable-cuda-dlopen \
217+
--with-gdrcopy \
218+
--enable-gdrcopy-dlopen && \
219+
make -j$(nproc) && \
220+
make install && \
221+
ldconfig
222+
201223
COPY . /workspace/nixl
202224

203225
RUN rm -rf build && \

0 commit comments

Comments
 (0)