Skip to content

Commit 1edaec6

Browse files
fix(gpu): coprocessor bench
1 parent 1dcc3c8 commit 1edaec6

File tree

7 files changed

+78
-33
lines changed

7 files changed

+78
-33
lines changed

.github/workflows/coprocessor-benchmark-gpu.yml

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@ name: coprocessor-benchmark-gpu
33

44
on:
55
workflow_dispatch:
6+
inputs:
7+
profile:
8+
description: "Instance type"
9+
required: true
10+
type: choice
11+
options:
12+
- "l40 (n3-L40x1)"
13+
- "4-l40 (n3-L40x4)"
14+
- "single-h100 (n3-H100x1)"
15+
- "2-h100 (n3-H100x2)"
16+
- "4-h100 (n3-H100x4)"
17+
- "multi-h100 (n3-H100x8)"
18+
- "multi-h100-nvlink (n3-H100x8-NVLink)"
19+
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
20+
- "multi-h100-sxm5_fallback (n3-H100x8-SXM5)"
21+
622
schedule:
723
# Weekly tests @ 1AM
824
- cron: "0 1 * * 6"
@@ -17,7 +33,9 @@ env:
1733
RUST_BACKTRACE: "full"
1834
RUST_MIN_STACK: "8388608"
1935
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
20-
PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
36+
PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100x8-SXM5)"
37+
PROFILE_MANUAL_RUN: ${{ inputs.profile }}
38+
IS_MANUAL_RUN: ${{ github.event_name == 'workflow_dispatch' }}
2139
BENCHMARK_TYPE: "ALL"
2240
OPTIMIZATION_TARGET: "throughput"
2341
BATCH_SIZE: "5000"
@@ -40,15 +58,25 @@ jobs:
4058
- name: Parse profile
4159
id: parse_profile
4260
run: |
61+
if [[ ${IS_MANUAL_RUN} == true ]]; then
62+
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
63+
else
64+
PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
65+
fi
4366
# shellcheck disable=SC2001
44-
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
67+
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
4568
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
4669
4770
- name: Parse hardware name
4871
id: parse_hardware_name
4972
run: |
73+
if [[ ${IS_MANUAL_RUN} == true ]]; then
74+
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
75+
else
76+
PROFILE_RAW="${PROFILE}"
77+
fi
5078
# shellcheck disable=SC2001
51-
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
79+
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|.*[[:space:]](\(.*\))|\1|')
5280
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
5381
5482
setup-instance:
@@ -130,6 +158,13 @@ jobs:
130158
} >> "${GITHUB_ENV}"
131159
working-directory: tfhe-rs/
132160

161+
- name: Setup Hyperstack dependencies
162+
uses: ./tfhe-rs/.github/actions/gpu_setup
163+
with:
164+
cuda-version: ${{ matrix.cuda }}
165+
gcc-version: ${{ matrix.gcc }}
166+
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
167+
133168
- name: Check fhEVM and TFHE-rs repos
134169
run: |
135170
pwd
@@ -140,21 +175,14 @@ jobs:
140175
run: git lfs checkout
141176
working-directory: fhevm/
142177

143-
- name: Setup Hyperstack dependencies
144-
uses: ./fhevm/.github/actions/gpu_setup
145-
with:
146-
cuda-version: ${{ matrix.cuda }}
147-
gcc-version: ${{ matrix.gcc }}
148-
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
149-
150178
- name: Install rust
151179
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
152180
with:
153181
toolchain: nightly
154182

155183
- name: Install cargo dependencies
156184
run: |
157-
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
185+
sudo apt-get install -y protobuf-compiler pkg-config libssl-dev \
158186
libclang-dev docker-compose-v2 docker.io acl
159187
sudo usermod -aG docker "$USER"
160188
newgrp docker
@@ -181,9 +209,16 @@ jobs:
181209
username: ${{ github.actor }}
182210
password: ${{ secrets.GITHUB_TOKEN }}
183211

212+
- name: Login to Chainguard Registry
213+
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
214+
with:
215+
registry: cgr.dev
216+
username: ${{ secrets.CGR_USERNAME }}
217+
password: ${{ secrets.CGR_PASSWORD }}
218+
184219
- name: Init database
185220
run: make init_db
186-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
221+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
187222

188223
- name: Use Node.js
189224
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
@@ -203,8 +238,12 @@ jobs:
203238

204239
- name: Profile erc20 no-cmux benchmark on GPU
205240
run: |
206-
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
207-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
241+
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
242+
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
243+
BENCHMARK_TYPE="THROUGHPUT_200" \
244+
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
245+
make -e "profile_erc20_gpu"
246+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
208247

209248
- name: Get nsys profile name
210249
id: nsys_profile_name
@@ -215,25 +254,25 @@ jobs:
215254
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
216255
run: |
217256
mv report1.nsys-rep ${{ env.REPORT_NAME }}
218-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
257+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
219258

220259
- name: Upload profile artifact
221260
env:
222261
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
223262
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
224263
with:
225264
name: ${{ env.REPORT_NAME }}
226-
path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ env.REPORT_NAME }}
265+
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
227266

228267
- name: Run latency benchmark on GPU
229268
run: |
230269
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
231-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
270+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
232271

233272
- name: Run throughput benchmarks on GPU
234273
run: |
235274
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
236-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
275+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
237276

238277
- name: Parse results
239278
run: |
@@ -246,7 +285,7 @@ jobs:
246285
--commit-date "${COMMIT_DATE}" \
247286
--bench-date "${BENCH_DATE}" \
248287
--walk-subdirs \
249-
--crate "coprocessor/fhevm-engine/coprocessor" \
288+
--crate "coprocessor/fhevm-engine/tfhe-worker" \
250289
--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
251290
working-directory: fhevm/
252291

.github/workflows/gpu_full_multi_gpu_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
slab-url: ${{ secrets.SLAB_BASE_URL }}
8787
job-secret: ${{ secrets.JOB_SECRET }}
8888
backend: hyperstack
89-
profile: multi-gpu-test
89+
profile: 4-l40
9090

9191
# This instance will be spawned especially for pull-request from forked repository
9292
- name: Start GitHub instance

.github/workflows/gpu_integer_long_run_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
slab-url: ${{ secrets.SLAB_BASE_URL }}
4444
job-secret: ${{ secrets.JOB_SECRET }}
4545
backend: hyperstack
46-
profile: multi-gpu-test
46+
profile: 4-l40
4747

4848
cuda-tests:
4949
name: gpu_integer_long_run_tests/cuda-tests

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,11 @@ test_list_gpu: install_rs_build_toolchain install_cargo_nextest
10041004
--features=integer,internal-keycache,gpu,zk-pok -p tfhe \
10051005
-E "test(/.*gpu.*/)"
10061006

1007+
.PHONY: test_high_level_build_one_gpu
1008+
test_high_level_build_one_gpu: install_rs_build_toolchain
1009+
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
1010+
--features=integer,gpu-debug -vv -p tfhe -- "$${TEST}" --test-threads=1 --nocapture
1011+
10071012
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
10081013
ifeq ($(HPU_CONFIG), v80)
10091014
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \

backends/tfhe-cuda-backend/cuda/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
8686
message("Compiling in Debug mode")
8787
add_definitions(-DDEBUG)
8888
set(OPTIMIZATION_FLAGS "${OPTIMIZATION_FLAGS} -O0 -G -g")
89+
set(USE_NVTOOLS 1)
8990
else()
9091
# Release mode
9192
message("Compiling in Release mode")

backends/tfhe-cuda-backend/cuda/src/device.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#include <cstdint>
33
#include <cuda_runtime.h>
44
#include <mutex>
5+
#ifdef USE_NVTOOLS
6+
#include <cuda_profiler_api.h>
7+
#endif
58

69
uint32_t cuda_get_device() {
710
int device;
@@ -83,6 +86,9 @@ void cuda_set_device(uint32_t gpu_index) {
8386
check_cuda_error(cudaSetDevice(gpu_index));
8487
// Mempools are initialized only once in all the GPUS available
8588
cuda_setup_mempool(gpu_index);
89+
#ifdef USE_NVTOOLS
90+
check_cuda_error(cudaProfilerStart());
91+
#endif
8692
}
8793

8894
cudaEvent_t cuda_create_event(uint32_t gpu_index) {

ci/slab.toml

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,18 +83,6 @@ image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
8383
flavor_name = "n3-A100x8-NVLink"
8484
user = "ubuntu"
8585

86-
[backend.hyperstack.multi-gpu-test]
87-
environment_name = "canada"
88-
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
89-
flavor_name = "n3-L40x4"
90-
user = "ubuntu"
91-
92-
[backend.hyperstack.multi-gpu-test_fallback]
93-
environment_name = "canada"
94-
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
95-
flavor_name = "n3-RTX-A6000x2"
96-
user = "ubuntu"
97-
9886
[backend.hyperstack.l40]
9987
environment_name = "canada"
10088
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
@@ -106,3 +94,9 @@ environment_name = "canada"
10694
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
10795
flavor_name = "n3-RTX-A6000x1"
10896
user = "ubuntu"
97+
98+
[backend.hyperstack.4-l40]
99+
environment_name = "canada"
100+
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
101+
flavor_name = "n3-L40x4"
102+
user = "ubuntu"

0 commit comments

Comments
 (0)