Skip to content

Commit 71f427d

Browse files
chore(gpu): add assert macro
1 parent 451458d commit 71f427d

File tree

12 files changed

+109
-67
lines changed

12 files changed

+109
-67
lines changed

backends/tfhe-cuda-backend/cuda/include/device.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,42 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
1919
std::abort();
2020
}
2121
}
22+
23+
// The PANIC macro should be used to validate user-inputs to GPU functions
24+
// it will execute in all targets, including production settings
25+
// e.g., cudaMemCopy to the device should check that the destination pointer is
26+
// a device pointer
2227
#define PANIC(format, ...) \
2328
{ \
2429
std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__, \
2530
__LINE__, __func__, ##__VA_ARGS__); \
2631
std::abort(); \
2732
}
2833

34+
// This is a generic assertion checking macro with user defined printf-style
35+
// message
36+
#define PANIC_IF_FALSE(cond, format, ...) \
37+
do { \
38+
if (!(cond)) { \
39+
PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond); \
40+
} \
41+
} while (0)
42+
43+
#ifndef GPU_ASSERTS_DISABLE
44+
// The GPU assert should be used to validate assumptions in algorithms,
45+
// for example, checking that two user-provided quantities have a certain
46+
// relationship or that the size of the buffer provided to a function is
47+
// sufficient when it is filled with some algorithm that depends on
48+
// user-provided inputs e.g., OPRF corrections buffer should not have a size
49+
// higher than the number of blocks in the datatype that is generated
50+
#define GPU_ASSERT(cond, format, ...) \
51+
PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
52+
#else
53+
#define GPU_ASSERT(cond) \
54+
do { \
55+
} while (0)
56+
#endif
57+
2958
uint32_t cuda_get_device();
3059
void cuda_set_device(uint32_t gpu_index);
3160

backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
5454
int8_t *d_mem, uint32_t r, uint32_t glwe_dim,
5555
uint32_t polynomial_size, uint32_t level_count,
5656
uint32_t max_shared_memory) {
57-
if (gpu_count != 1)
58-
PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
59-
"supported yet.")
57+
PANIC_IF_FALSE(gpu_count == 1,
58+
"GPU error (batch_fft_ggsw_vector): multi-GPU execution on %d "
59+
"gpus is not supported yet.",
60+
gpu_count);
61+
6062
cuda_set_device(gpu_indexes[0]);
6163

6264
int shared_memory_size = sizeof(double) * polynomial_size;

backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,10 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
124124
num_blocks_per_sample, num_threads_x);
125125

126126
int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
127-
if (num_blocks_per_sample > 65536)
128-
PANIC("Cuda error (Keyswitch): number of blocks per sample is too large");
127+
PANIC_IF_FALSE(
128+
num_blocks_per_sample <= 65536,
129+
"Cuda error (Keyswitch): number of blocks per sample (%d) is too large",
130+
num_blocks_per_sample);
129131

130132
// In multiplication of large integers (512, 1024, 2048), the number of
131133
// samples can be larger than 65536, so we need to set it in the first

backends/tfhe-cuda-backend/cuda/src/crypto/packing_keyswitch.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,9 @@ __host__ void host_packing_keyswitch_lwe_list_to_glwe(
204204

205205
// Shared memory requirement is 8192 bytes for 64-bit Torus elements
206206
uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
207-
if (shared_mem_size > 8192)
208-
PANIC("GEMM kernel error: shared memory required might be too large");
207+
// Sanity check: the shared memory size is a constant defined by the algorithm
208+
GPU_ASSERT(shared_mem_size <= 8192,
209+
"GEMM kernel error: shared memory required might be too large");
209210

210211
tgemm<Torus><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
211212
num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,

backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -298,15 +298,14 @@ __host__ void host_improve_noise_modulus_switch(
298298
const double input_variance, const double r_sigma, const double bound,
299299
uint32_t log_modulus) {
300300

301-
if (lwe_size < 512) {
302-
PANIC("The lwe_size is less than 512, this is not supported\n");
303-
return;
304-
}
301+
PANIC_IF_FALSE(lwe_size >= 512,
302+
"The lwe_size (%d) is less than 512, this is not supported\n",
303+
lwe_size);
304+
PANIC_IF_FALSE(
305+
lwe_size <= 1024,
306+
"The lwe_size (%d) is greater than 1024, this is not supported\n",
307+
lwe_size);
305308

306-
if (lwe_size > 1024) {
307-
PANIC("The lwe_size is greater than 1024, this is not supported\n");
308-
return;
309-
}
310309
cuda_set_device(gpu_index);
311310

312311
// This reduction requires a power of two num of threads

backends/tfhe-cuda-backend/cuda/src/device.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,14 @@ void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
196196
return;
197197
cudaPointerAttributes attr_dest;
198198
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
199-
if (attr_dest.type != cudaMemoryTypeDevice) {
200-
PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
201-
}
199+
PANIC_IF_FALSE(
200+
attr_dest.type == cudaMemoryTypeDevice,
201+
"Cuda error: invalid dest device pointer in copy from GPU to GPU.");
202202
cudaPointerAttributes attr_src;
203203
check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
204-
if (attr_src.type != cudaMemoryTypeDevice) {
205-
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
206-
}
204+
PANIC_IF_FALSE(
205+
attr_src.type == cudaMemoryTypeDevice,
206+
"Cuda error: invalid src device pointer in copy from GPU to GPU.");
207207
cuda_set_device(gpu_index);
208208
if (attr_src.device == attr_dest.device) {
209209
check_cuda_error(
@@ -227,14 +227,14 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
227227
return;
228228
cudaPointerAttributes attr_dest;
229229
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
230-
if (attr_dest.type != cudaMemoryTypeDevice) {
231-
PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
232-
}
230+
PANIC_IF_FALSE(
231+
attr_dest.type == cudaMemoryTypeDevice,
232+
"Cuda error: invalid dest device pointer in copy from GPU to GPU.");
233233
cudaPointerAttributes attr_src;
234234
check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
235-
if (attr_src.type != cudaMemoryTypeDevice) {
236-
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
237-
}
235+
PANIC_IF_FALSE(
236+
attr_src.type == cudaMemoryTypeDevice,
237+
"Cuda error: invalid src device pointer in copy from GPU to GPU.");
238238
cuda_set_device(gpu_index);
239239
if (attr_src.device == attr_dest.device) {
240240
check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));

backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@ __host__ void host_integer_radix_bitop_kb(
2020
void *const *bsks, Torus *const *ksks,
2121
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
2222

23-
if (lwe_array_out->num_radix_blocks != lwe_array_1->num_radix_blocks ||
24-
lwe_array_out->num_radix_blocks != lwe_array_2->num_radix_blocks)
25-
PANIC("Cuda error: input and output num radix blocks must be equal")
26-
if (lwe_array_out->lwe_dimension != lwe_array_1->lwe_dimension ||
27-
lwe_array_out->lwe_dimension != lwe_array_2->lwe_dimension)
28-
PANIC("Cuda error: input and output lwe dimension must be equal")
23+
PANIC_IF_FALSE(
24+
lwe_array_out->num_radix_blocks == lwe_array_1->num_radix_blocks &&
25+
lwe_array_out->num_radix_blocks == lwe_array_2->num_radix_blocks,
26+
"Cuda error: input and output num radix blocks must be equal");
27+
28+
PANIC_IF_FALSE(lwe_array_out->lwe_dimension == lwe_array_1->lwe_dimension &&
29+
lwe_array_out->lwe_dimension == lwe_array_2->lwe_dimension,
30+
"Cuda error: input and output lwe dimension must be equal");
31+
2932
auto lut = mem_ptr->lut;
3033
uint64_t degrees[lwe_array_1->num_radix_blocks];
3134
if (mem_ptr->op == BITOP_TYPE::BITAND) {

backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ __host__ void host_trim_radix_blocks_lsb(CudaRadixCiphertextFFI *output,
2525
const uint32_t input_start_lwe_index =
2626
input->num_radix_blocks - output->num_radix_blocks;
2727

28-
if (input->num_radix_blocks <= output->num_radix_blocks) {
29-
PANIC("Cuda error: input num blocks should be greater than output num "
30-
"blocks");
31-
}
28+
PANIC_IF_FALSE(input->num_radix_blocks > output->num_radix_blocks,
29+
"Cuda error: input num blocks (%d) should be greater than "
30+
"output num blocks (%d)",
31+
input->num_radix_blocks, output->num_radix_blocks);
3232

3333
copy_radix_ciphertext_slice_async<Torus>(
3434
streams[0], gpu_indexes[0], output, 0, output->num_radix_blocks, input,
@@ -70,9 +70,7 @@ __host__ void host_extend_radix_with_sign_msb(
7070
PUSH_RANGE("cast/extend")
7171
const uint32_t input_blocks = input->num_radix_blocks;
7272

73-
if (input_blocks == 0) {
74-
PANIC("Cuda error: input blocks cannot be zero");
75-
}
73+
PANIC_IF_FALSE(input_blocks > 0, "Cuda error: input blocks cannot be zero");
7674

7775
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0], output,
7876
0, input_blocks, input, 0,

backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,18 @@ zero_out_if(cudaStream_t const *streams, uint32_t const *gpu_indexes,
1515
Torus *const *ksks,
1616
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
1717
uint32_t num_radix_blocks) {
18-
if (lwe_array_out->num_radix_blocks < num_radix_blocks ||
19-
lwe_array_input->num_radix_blocks < num_radix_blocks)
20-
PANIC("Cuda error: input or output radix ciphertexts does not have enough "
21-
"blocks")
22-
if (lwe_array_out->lwe_dimension != lwe_array_input->lwe_dimension ||
23-
lwe_array_input->lwe_dimension != lwe_condition->lwe_dimension)
24-
PANIC("Cuda error: input and output radix ciphertexts must have the same "
25-
"lwe dimension")
18+
PANIC_IF_FALSE(
19+
lwe_array_out->num_radix_blocks >= num_radix_blocks &&
20+
lwe_array_input->num_radix_blocks >= num_radix_blocks,
21+
"Cuda error: input or output radix ciphertexts does not have enough "
22+
"blocks");
23+
24+
PANIC_IF_FALSE(
25+
lwe_array_out->lwe_dimension == lwe_array_input->lwe_dimension &&
26+
lwe_array_input->lwe_dimension == lwe_condition->lwe_dimension,
27+
"Cuda error: input and output radix ciphertexts must have the same "
28+
"lwe dimension");
29+
2630
cuda_set_device(gpu_indexes[0]);
2731
auto params = mem_ptr->params;
2832

backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,17 +101,20 @@ __host__ void host_radix_blocks_rotate_right(
101101
cudaStream_t const *streams, uint32_t const *gpu_indexes,
102102
uint32_t gpu_count, CudaRadixCiphertextFFI *dst,
103103
CudaRadixCiphertextFFI *src, uint32_t rotations, uint32_t num_blocks) {
104-
if (src == dst) {
105-
PANIC("Cuda error (blocks_rotate_right): the source and destination "
106-
"pointers should be different");
107-
}
108-
if (dst->lwe_dimension != src->lwe_dimension)
109-
PANIC("Cuda error: input and output should have the same "
110-
"lwe dimension")
111-
if (dst->num_radix_blocks < num_blocks || src->num_radix_blocks < num_blocks)
112-
PANIC("Cuda error: input and output should have more blocks than asked for "
113-
"in the "
114-
"function call")
104+
PANIC_IF_FALSE(src != dst,
105+
"Cuda error (blocks_rotate_right): the source and destination "
106+
"pointers should be different");
107+
108+
PANIC_IF_FALSE(dst->lwe_dimension == src->lwe_dimension,
109+
"Cuda error: input and output should have the same "
110+
"lwe dimension");
111+
112+
PANIC_IF_FALSE(
113+
dst->num_radix_blocks >= num_blocks &&
114+
src->num_radix_blocks >= num_blocks,
115+
"Cuda error: input and output should have more blocks than asked for "
116+
"in the "
117+
"function call");
115118

116119
auto lwe_size = src->lwe_dimension + 1;
117120

0 commit comments

Comments
 (0)