Skip to content

Commit b16ab46

Browse files
fix(gpu): coprocessor bench
1 parent 378c5cc commit b16ab46

File tree

7 files changed

+74
-31
lines changed

7 files changed

+74
-31
lines changed

.github/workflows/coprocessor-benchmark-gpu.yml

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@ name: coprocessor-benchmark-gpu
33

44
on:
55
workflow_dispatch:
6+
inputs:
7+
profile:
8+
description: "Instance type"
9+
required: true
10+
type: choice
11+
options:
12+
- "l40 (n3-L40x1)"
13+
- "4-l40 (n3-L40x4)"
14+
- "single-h100 (n3-H100x1)"
15+
- "2-h100 (n3-H100x2)"
16+
- "4-h100 (n3-H100x4)"
17+
- "multi-h100 (n3-H100x8)"
18+
- "multi-h100-nvlink (n3-H100x8-NVLink)"
19+
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
20+
- "multi-h100-sxm5_fallback (n3-H100x8-SXM5)"
21+
622
schedule:
723
# Weekly tests @ 1AM
824
- cron: "0 1 * * 6"
@@ -17,7 +33,7 @@ env:
1733
RUST_BACKTRACE: "full"
1834
RUST_MIN_STACK: "8388608"
1935
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
20-
PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
36+
PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100x8-SXM5)"
2137
BENCHMARK_TYPE: "ALL"
2238
OPTIMIZATION_TARGET: "throughput"
2339
BATCH_SIZE: "5000"
@@ -40,15 +56,25 @@ jobs:
4056
- name: Parse profile
4157
id: parse_profile
4258
run: |
59+
if [[ ${{ github.event_name == 'workflow_dispatch' }} == true ]]; then
60+
PROFILE_RAW="${{ inputs.profile }}"
61+
else
62+
PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
63+
fi
4364
# shellcheck disable=SC2001
44-
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
65+
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
4566
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
4667
4768
- name: Parse hardware name
4869
id: parse_hardware_name
4970
run: |
71+
if [[ ${{ github.event_name == 'workflow_dispatch' }} == true ]]; then
72+
PROFILE_RAW="${{ inputs.profile }}"
73+
else
74+
PROFILE_RAW="${PROFILE}"
75+
fi
5076
# shellcheck disable=SC2001
51-
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
77+
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|.*[[:space:]](\(.*\))|\1|')
5278
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
5379
5480
setup-instance:
@@ -130,6 +156,13 @@ jobs:
130156
} >> "${GITHUB_ENV}"
131157
working-directory: tfhe-rs/
132158

159+
- name: Setup Hyperstack dependencies
160+
uses: ./tfhe-rs/.github/actions/gpu_setup
161+
with:
162+
cuda-version: ${{ matrix.cuda }}
163+
gcc-version: ${{ matrix.gcc }}
164+
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
165+
133166
- name: Check fhEVM and TFHE-rs repos
134167
run: |
135168
pwd
@@ -140,21 +173,14 @@ jobs:
140173
run: git lfs checkout
141174
working-directory: fhevm/
142175

143-
- name: Setup Hyperstack dependencies
144-
uses: ./fhevm/.github/actions/gpu_setup
145-
with:
146-
cuda-version: ${{ matrix.cuda }}
147-
gcc-version: ${{ matrix.gcc }}
148-
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
149-
150176
- name: Install rust
151177
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
152178
with:
153179
toolchain: nightly
154180

155181
- name: Install cargo dependencies
156182
run: |
157-
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
183+
sudo apt-get install -y protobuf-compiler pkg-config libssl-dev \
158184
libclang-dev docker-compose-v2 docker.io acl
159185
sudo usermod -aG docker "$USER"
160186
newgrp docker
@@ -181,9 +207,16 @@ jobs:
181207
username: ${{ github.actor }}
182208
password: ${{ secrets.GITHUB_TOKEN }}
183209

210+
- name: Login to Chainguard Registry
211+
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
212+
with:
213+
registry: cgr.dev
214+
username: ${{ secrets.CGR_USERNAME }}
215+
password: ${{ secrets.CGR_PASSWORD }}
216+
184217
- name: Init database
185218
run: make init_db
186-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
219+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
187220

188221
- name: Use Node.js
189222
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
@@ -203,8 +236,12 @@ jobs:
203236

204237
- name: Profile erc20 no-cmux benchmark on GPU
205238
run: |
206-
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
207-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
239+
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
240+
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
241+
BENCHMARK_TYPE="THROUGHPUT_200" \
242+
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
243+
make -e "profile_erc20_gpu"
244+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
208245

209246
- name: Get nsys profile name
210247
id: nsys_profile_name
@@ -215,7 +252,7 @@ jobs:
215252
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
216253
run: |
217254
mv report1.nsys-rep ${{ env.REPORT_NAME }}
218-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
255+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
219256

220257
- name: Upload profile artifact
221258
env:
@@ -228,12 +265,12 @@ jobs:
228265
- name: Run latency benchmark on GPU
229266
run: |
230267
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
231-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
268+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
232269

233270
- name: Run throughput benchmarks on GPU
234271
run: |
235272
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
236-
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
273+
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
237274

238275
- name: Parse results
239276
run: |

.github/workflows/gpu_full_multi_gpu_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
slab-url: ${{ secrets.SLAB_BASE_URL }}
8787
job-secret: ${{ secrets.JOB_SECRET }}
8888
backend: hyperstack
89-
profile: multi-gpu-test
89+
profile: 4-l40
9090

9191
# This instance will be spawned especially for pull-request from forked repository
9292
- name: Start GitHub instance

.github/workflows/gpu_integer_long_run_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
slab-url: ${{ secrets.SLAB_BASE_URL }}
4444
job-secret: ${{ secrets.JOB_SECRET }}
4545
backend: hyperstack
46-
profile: multi-gpu-test
46+
profile: 4-l40
4747

4848
cuda-tests:
4949
name: gpu_integer_long_run_tests/cuda-tests

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,11 @@ test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
10041004
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
10051005
-E "test(/high_level_api::.*gpu.*/)"
10061006

1007+
.PHONY: test_high_level_build
1008+
test_high_level_build: install_rs_build_toolchain
1009+
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
1010+
--features=integer,gpu-debug -vv -p tfhe -- "$${TEST}" --test-threads=1 --nocapture
1011+
10071012
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
10081013
ifeq ($(HPU_CONFIG), v80)
10091014
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \

backends/tfhe-cuda-backend/cuda/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
8686
message("Compiling in Debug mode")
8787
add_definitions(-DDEBUG)
8888
set(OPTIMIZATION_FLAGS "${OPTIMIZATION_FLAGS} -O0 -G -g")
89+
set(USE_NVTOOLS 1)
8990
else()
9091
# Release mode
9192
message("Compiling in Release mode")

backends/tfhe-cuda-backend/cuda/src/device.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#include <cstdint>
33
#include <cuda_runtime.h>
44
#include <mutex>
5+
#ifdef USE_NVTOOLS
6+
#include <cuda_profiler_api.h>
7+
#endif
58

69
uint32_t cuda_get_device() {
710
int device;
@@ -83,6 +86,9 @@ void cuda_set_device(uint32_t gpu_index) {
8386
check_cuda_error(cudaSetDevice(gpu_index));
8487
// Mempools are initialized only once in all the GPUS available
8588
cuda_setup_mempool(gpu_index);
89+
#ifdef USE_NVTOOLS
90+
check_cuda_error(cudaProfilerStart());
91+
#endif
8692
}
8793

8894
cudaEvent_t cuda_create_event(uint32_t gpu_index) {

ci/slab.toml

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,18 +83,6 @@ image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
8383
flavor_name = "n3-A100x8-NVLink"
8484
user = "ubuntu"
8585

86-
[backend.hyperstack.multi-gpu-test]
87-
environment_name = "canada"
88-
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
89-
flavor_name = "n3-L40x4"
90-
user = "ubuntu"
91-
92-
[backend.hyperstack.multi-gpu-test_fallback]
93-
environment_name = "canada"
94-
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
95-
flavor_name = "n3-RTX-A6000x2"
96-
user = "ubuntu"
97-
9886
[backend.hyperstack.l40]
9987
environment_name = "canada"
10088
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
@@ -106,3 +94,9 @@ environment_name = "canada"
10694
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
10795
flavor_name = "n3-RTX-A6000x1"
10896
user = "ubuntu"
97+
98+
[backend.hyperstack.4-l40]
99+
environment_name = "canada"
100+
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
101+
flavor_name = "n3-L40x4"
102+
user = "ubuntu"

0 commit comments

Comments
 (0)