@@ -244,8 +244,6 @@ struct int_radix_params {
244244 uint32_t carry_modulus;
245245 PBS_MS_REDUCTION_T noise_reduction_type;
246246
247- int_radix_params (){};
248-
249247 int_radix_params (PBS_TYPE pbs_type, uint32_t glwe_dimension,
250248 uint32_t polynomial_size, uint32_t big_lwe_dimension,
251249 uint32_t small_lwe_dimension, uint32_t ks_level,
@@ -262,6 +260,8 @@ struct int_radix_params {
262260 message_modulus(message_modulus), carry_modulus(carry_modulus),
263261 noise_reduction_type(noise_reduction_type){};
264262
263+ int_radix_params () = default;
264+
265265 void print () {
266266 printf (" pbs_type: %u, glwe_dimension: %u, "
267267 " polynomial_size: %u, "
@@ -2450,8 +2450,7 @@ template <typename Torus> struct int_sc_prop_memory {
24502450 int_sc_prop_memory (cudaStream_t const *streams, uint32_t const *gpu_indexes,
24512451 uint32_t gpu_count, int_radix_params params,
24522452 uint32_t num_radix_blocks, uint32_t requested_flag_in,
2453- uint32_t uses_carry, bool allocate_gpu_memory,
2454- uint64_t &size_tracker) {
2453+ bool allocate_gpu_memory, uint64_t &size_tracker) {
24552454 gpu_memory_allocated = allocate_gpu_memory;
24562455 this ->params = params;
24572456 auto glwe_dimension = params.glwe_dimension ;
@@ -3203,11 +3202,10 @@ template <typename Torus> struct int_mul_memory {
32033202 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
32043203 2 * num_radix_blocks, vector_result_sb, small_lwe_vector, luts_array,
32053204 true , allocate_gpu_memory, size_tracker);
3206- uint32_t uses_carry = 0 ;
32073205 uint32_t requested_flag = outputFlag::FLAG_NONE;
32083206 sc_prop_mem = new int_sc_prop_memory<Torus>(
32093207 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
3210- requested_flag, uses_carry, allocate_gpu_memory, size_tracker);
3208+ requested_flag, allocate_gpu_memory, size_tracker);
32113209 }
32123210
32133211 void release (cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -3823,36 +3821,56 @@ template <typename Torus> struct int_comparison_eq_buffer {
38233821 gpu_memory_allocated = allocate_gpu_memory;
38243822 this ->params = params;
38253823 this ->op = op;
3824+ Torus total_modulus = params.message_modulus * params.carry_modulus ;
3825+ auto active_gpu_count = get_active_gpu_count (num_radix_blocks, gpu_count);
38263826
38273827 are_all_block_true_buffer = new int_are_all_block_true_buffer<Torus>(
38283828 streams, gpu_indexes, gpu_count, op, params, num_radix_blocks,
38293829 allocate_gpu_memory, size_tracker);
38303830
3831- // Operator LUT
3832- auto operator_f = [op](Torus lhs, Torus rhs) -> Torus {
3833- if (op == COMPARISON_TYPE::EQ) {
3834- // EQ
3835- return (lhs == rhs);
3836- } else {
3837- // NE
3838- return (lhs != rhs);
3839- }
3840- };
3841- operator_lut = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count,
3842- params, 1 , num_radix_blocks,
3843- allocate_gpu_memory, size_tracker);
3831+ if (op == COMPARISON_TYPE::EQ || COMPARISON_TYPE::NE) {
3832+ // Operator LUT
3833+ auto operator_f = [op](Torus lhs, Torus rhs) -> Torus {
3834+ if (op == COMPARISON_TYPE::EQ) {
3835+ return (lhs == rhs);
3836+ } else if (op == COMPARISON_TYPE::NE) {
3837+ return (lhs != rhs);
3838+ PANIC ( " Cuda error (eq/ne): invalid comparison type " )
3839+ }
3840+ };
3841+ operator_lut = new int_radix_lut<Torus>(
3842+ streams, gpu_indexes, gpu_count, params, 1 , num_radix_blocks,
3843+ allocate_gpu_memory, size_tracker);
38443844
3845- generate_device_accumulator_bivariate<Torus>(
3846- streams[0 ], gpu_indexes[0 ], operator_lut->get_lut (0 , 0 ),
3847- operator_lut->get_degree (0 ), operator_lut->get_max_degree (0 ),
3848- params.glwe_dimension , params.polynomial_size , params.message_modulus ,
3849- params.carry_modulus , operator_f, gpu_memory_allocated);
3845+ generate_device_accumulator_bivariate<Torus>(
3846+ streams[0 ], gpu_indexes[0 ], operator_lut->get_lut (0 , 0 ),
3847+ operator_lut->get_degree (0 ), operator_lut->get_max_degree (0 ),
3848+ params.glwe_dimension , params.polynomial_size , params.message_modulus ,
3849+ params.carry_modulus , operator_f, gpu_memory_allocated);
38503850
3851- auto active_gpu_count = get_active_gpu_count (num_radix_blocks, gpu_count);
3852- operator_lut->broadcast_lut (streams, gpu_indexes, active_gpu_count);
3851+ operator_lut->broadcast_lut (streams, gpu_indexes, active_gpu_count);
3852+ // Scalar may have up to num_radix_blocks blocks
3853+ scalar_comparison_luts = new int_radix_lut<Torus>(
3854+ streams, gpu_indexes, gpu_count, params, total_modulus,
3855+ num_radix_blocks, allocate_gpu_memory, size_tracker);
3856+
3857+ for (int i = 0 ; i < total_modulus; i++) {
3858+ auto lut_f = [i, operator_f](Torus x) -> Torus {
3859+ return operator_f (i, x);
3860+ };
3861+
3862+ generate_device_accumulator<Torus>(
3863+ streams[0 ], gpu_indexes[0 ], scalar_comparison_luts->get_lut (0 , i),
3864+ scalar_comparison_luts->get_degree (i),
3865+ scalar_comparison_luts->get_max_degree (i), params.glwe_dimension ,
3866+ params.polynomial_size , params.message_modulus ,
3867+ params.carry_modulus , lut_f, gpu_memory_allocated);
3868+ }
3869+ scalar_comparison_luts->broadcast_lut (streams, gpu_indexes,
3870+ active_gpu_count);
3871+ }
38533872
38543873 // f(x) -> x == 0
3855- Torus total_modulus = params.message_modulus * params.carry_modulus ;
38563874 auto is_non_zero_lut_f = [total_modulus](Torus x) -> Torus {
38573875 return (x % total_modulus) != 0 ;
38583876 };
@@ -3868,38 +3886,20 @@ template <typename Torus> struct int_comparison_eq_buffer {
38683886 params.carry_modulus , is_non_zero_lut_f, gpu_memory_allocated);
38693887
38703888 is_non_zero_lut->broadcast_lut (streams, gpu_indexes, active_gpu_count);
3871-
3872- // Scalar may have up to num_radix_blocks blocks
3873- scalar_comparison_luts = new int_radix_lut<Torus>(
3874- streams, gpu_indexes, gpu_count, params, total_modulus,
3875- num_radix_blocks, allocate_gpu_memory, size_tracker);
3876-
3877- for (int i = 0 ; i < total_modulus; i++) {
3878- auto lut_f = [i, operator_f](Torus x) -> Torus {
3879- return operator_f (i, x);
3880- };
3881-
3882- generate_device_accumulator<Torus>(
3883- streams[0 ], gpu_indexes[0 ], scalar_comparison_luts->get_lut (0 , i),
3884- scalar_comparison_luts->get_degree (i),
3885- scalar_comparison_luts->get_max_degree (i), params.glwe_dimension ,
3886- params.polynomial_size , params.message_modulus , params.carry_modulus ,
3887- lut_f, gpu_memory_allocated);
3888- }
3889- scalar_comparison_luts->broadcast_lut (streams, gpu_indexes,
3890- active_gpu_count);
38913889 }
38923890
38933891 void release (cudaStream_t const *streams, uint32_t const *gpu_indexes,
38943892 uint32_t gpu_count) {
3895- operator_lut-> release (streams, gpu_indexes, gpu_count);
3896- delete operator_lut;
3897- is_non_zero_lut-> release (streams, gpu_indexes, gpu_count) ;
3898- delete is_non_zero_lut ;
3899- scalar_comparison_luts-> release (streams, gpu_indexes, gpu_count) ;
3900- delete scalar_comparison_luts;
3893+ if (op == COMPARISON_TYPE::EQ || COMPARISON_TYPE::NE) {
3894+ operator_lut-> release (streams, gpu_indexes, gpu_count) ;
3895+ delete operator_lut ;
3896+ scalar_comparison_luts-> release (streams, gpu_indexes, gpu_count) ;
3897+ delete scalar_comparison_luts ;
3898+ }
39013899 are_all_block_true_buffer->release (streams, gpu_indexes, gpu_count);
39023900 delete are_all_block_true_buffer;
3901+ is_non_zero_lut->release (streams, gpu_indexes, gpu_count);
3902+ delete is_non_zero_lut;
39033903 }
39043904};
39053905
@@ -4020,8 +4020,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
40204020 case LE:
40214021 return (x == IS_INFERIOR) || (x == IS_EQUAL);
40224022 default :
4023- // We don't need a default case but we need to return something
4024- return 42 ;
4023+ PANIC (" Cuda error (comparisons): unknown comparison type" )
40254024 }
40264025 };
40274026
@@ -5069,11 +5068,10 @@ template <typename Torus> struct int_scalar_mul_buffer {
50695068 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
50705069 num_ciphertext_bits, true , allocate_gpu_memory, last_step_mem);
50715070 }
5072- uint32_t uses_carry = 0 ;
50735071 uint32_t requested_flag = outputFlag::FLAG_NONE;
50745072 sc_prop_mem = new int_sc_prop_memory<Torus>(
50755073 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
5076- requested_flag, uses_carry, allocate_gpu_memory, last_step_mem);
5074+ requested_flag, allocate_gpu_memory, last_step_mem);
50775075 if (anticipated_buffer_drop) {
50785076 size_tracker += std::max (anticipated_drop_mem, last_step_mem);
50795077 } else {
@@ -5132,10 +5130,9 @@ template <typename Torus> struct int_abs_buffer {
51325130 streams, gpu_indexes, gpu_count, SHIFT_OR_ROTATE_TYPE::RIGHT_SHIFT,
51335131 params, num_radix_blocks, allocate_gpu_memory, size_tracker);
51345132 uint32_t requested_flag = outputFlag::FLAG_NONE;
5135- uint32_t uses_carry = 0 ;
51365133 scp_mem = new int_sc_prop_memory<Torus>(
51375134 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
5138- requested_flag, uses_carry, allocate_gpu_memory, size_tracker);
5135+ requested_flag, allocate_gpu_memory, size_tracker);
51395136 bitxor_mem = new int_bitop_buffer<Torus>(
51405137 streams, gpu_indexes, gpu_count, BITOP_TYPE::BITXOR, params,
51415138 num_radix_blocks, allocate_gpu_memory, size_tracker);
@@ -5216,13 +5213,12 @@ template <typename Torus> struct int_div_rem_memory {
52165213 params, num_blocks,
52175214 allocate_gpu_memory, size_tracker);
52185215 uint32_t requested_flag = outputFlag::FLAG_NONE;
5219- uint32_t uses_carry = 0 ;
52205216 scp_mem_1 = new int_sc_prop_memory<Torus>(
52215217 streams, gpu_indexes, gpu_count, params, num_blocks, requested_flag,
5222- uses_carry, allocate_gpu_memory, size_tracker);
5218+ allocate_gpu_memory, size_tracker);
52235219 scp_mem_2 = new int_sc_prop_memory<Torus>(
52245220 streams, gpu_indexes, gpu_count, params, num_blocks, requested_flag,
5225- uses_carry, allocate_gpu_memory, size_tracker);
5221+ allocate_gpu_memory, size_tracker);
52265222
52275223 std::function<uint64_t (uint64_t )> quotient_predicate_lut_f =
52285224 [](uint64_t x) -> uint64_t { return x == 1 ; };
@@ -5420,7 +5416,7 @@ template <typename Torus> struct int_sub_and_propagate {
54205416
54215417 this ->sc_prop_mem = new int_sc_prop_memory<Torus>(
54225418 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
5423- requested_flag_in, ( uint32_t ) 0 , allocate_gpu_memory, size_tracker);
5419+ requested_flag_in, allocate_gpu_memory, size_tracker);
54245420
54255421 this ->neg_rhs_array = new CudaRadixCiphertextFFI;
54265422 create_zero_radix_ciphertext_async<Torus>(
@@ -5559,7 +5555,7 @@ template <typename Torus> struct int_unsigned_scalar_div_mem {
55595555 scalar_divisor_ffi->active_bits , allocate_gpu_memory, size_tracker);
55605556 scp_mem = new int_sc_prop_memory<Torus>(
55615557 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
5562- FLAG_NONE, ( uint32_t ) 0 , allocate_gpu_memory, size_tracker);
5558+ FLAG_NONE, allocate_gpu_memory, size_tracker);
55635559 sub_and_propagate_mem = new int_sub_and_propagate<Torus>(
55645560 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
55655561 FLAG_NONE, allocate_gpu_memory, size_tracker);
@@ -5716,7 +5712,7 @@ template <typename Torus> struct int_signed_scalar_div_mem {
57165712 num_radix_blocks, allocate_gpu_memory, size_tracker);
57175713 scp_mem = new int_sc_prop_memory<Torus>(
57185714 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
5719- FLAG_NONE, ( uint32_t ) 0 , allocate_gpu_memory, size_tracker);
5715+ FLAG_NONE, allocate_gpu_memory, size_tracker);
57205716
57215717 } else {
57225718
@@ -5737,7 +5733,7 @@ template <typename Torus> struct int_signed_scalar_div_mem {
57375733 if (scalar_divisor_ffi->is_chosen_multiplier_geq_two_pow_numerator ) {
57385734 scp_mem = new int_sc_prop_memory<Torus>(
57395735 streams, gpu_indexes, gpu_count, params, num_radix_blocks,
5740- FLAG_NONE, ( uint32_t ) 0 , allocate_gpu_memory, size_tracker);
5736+ FLAG_NONE, allocate_gpu_memory, size_tracker);
57415737 }
57425738 }
57435739 }
@@ -5885,7 +5881,7 @@ template <typename Torus> struct int_signed_scalar_div_rem_buffer {
58855881
58865882 this ->scp_mem = new int_sc_prop_memory<Torus>(
58875883 streams, gpu_indexes, gpu_count, params, num_radix_blocks, FLAG_NONE,
5888- ( uint32_t ) 0 , allocate_gpu_memory, size_tracker);
5884+ allocate_gpu_memory, size_tracker);
58895885
58905886 bool is_divisor_one = scalar_divisor_ffi->is_abs_divisor_one &&
58915887 !scalar_divisor_ffi->is_divisor_negative ;
@@ -6085,7 +6081,7 @@ template <typename Torus> struct int_count_of_consecutive_bits_buffer {
60856081 num_radix_blocks, true , allocate_gpu_memory, size_tracker);
60866082
60876083 this ->propagate_mem = new int_sc_prop_memory<Torus>(
6088- streams, gpu_indexes, gpu_count, params, counter_num_blocks, 0 , 0 ,
6084+ streams, gpu_indexes, gpu_count, params, counter_num_blocks, 0 ,
60896085 allocate_gpu_memory, size_tracker);
60906086 }
60916087
0 commit comments