Merge branch 'main' into montgmomery-felts

FrancoGiachetta · web-flow · commit 89ef937668a8 · 2025-11-07T17:41:59.000-03:00
diff --git a/src/executor.rs b/src/executor.rs
@@ -110,13 +110,17 @@ fn invoke_dynamic(
         .peekable();
 
     let num_return_args = ret_types_iter.clone().count();
+    // If there is more than one return value, or the return value is _complex_,
+    // as defined by the architecture ABI, then we pass a return pointer as
+    // the first argument to the program entrypoint.
     let mut return_ptr = if num_return_args > 1
         || ret_types_iter
             .peek()
             .map(|id| registry.get_type(id)?.is_complex(registry))
             .transpose()?
             == Some(true)
     {
+        // The return pointer should be able to hold all the return values.
         let layout = ret_types_iter.try_fold(Layout::new::<()>(), |layout, id| {
             let type_info = registry.get_type(id)?;
             Result::<_, Error>::Ok(layout.extend(type_info.layout(registry)?)?.0)
diff --git a/src/libfuncs/circuit.rs b/src/libfuncs/circuit.rs
@@ -595,28 +595,23 @@ fn build_gate_evaluation<'ctx, 'this>(
                 }
                 // INV: lhs = 1 / rhs
                 (None, Some(rhs_value), Some(_)) => {
-                    // Extend to avoid overflow
-                    let u768_type = IntegerType::new(context, 768).into();
-                    let rhs_value = block.extui(rhs_value, u768_type, location)?;
-                    let circuit_modulus_u768 = block.extui(circuit_modulus, u768_type, location)?;
-
                     // Apply egcd to find gcd and inverse
                     let euclidean_result = runtime_bindings_meta.extended_euclidean_algorithm(
                         context,
                         helper.module,
                         block,
                         location,
                         rhs_value,
-                        circuit_modulus_u768,
+                        circuit_modulus,
                     )?;
                     // Extract the values from the result struct
                     let gcd =
-                        block.extract_value(context, location, euclidean_result, u768_type, 0)?;
+                        block.extract_value(context, location, euclidean_result, u384_type, 0)?;
                     let inverse =
-                        block.extract_value(context, location, euclidean_result, u768_type, 1)?;
+                        block.extract_value(context, location, euclidean_result, u384_type, 1)?;
 
                     // if the gcd is not 1, then fail (a and b are not coprimes)
-                    let one = block.const_int_from_type(context, location, 1, u768_type)?;
+                    let one = block.const_int_from_type(context, location, 1, u384_type)?;
                     let gate_offset_idx_value = block.const_int_from_type(
                         context,
                         location,
@@ -637,7 +632,7 @@ fn build_gate_evaluation<'ctx, 'this>(
                     block = has_inverse_block;
 
                     // if the inverse is negative, then add modulus
-                    let zero = block.const_int_from_type(context, location, 0, u768_type)?;
+                    let zero = block.const_int_from_type(context, location, 0, u384_type)?;
                     let is_negative = block
                         .append_operation(arith::cmpi(
                             context,
@@ -648,17 +643,14 @@ fn build_gate_evaluation<'ctx, 'this>(
                         ))
                         .result(0)?
                         .into();
-                    let wrapped_inverse = block.addi(inverse, circuit_modulus_u768, location)?;
+                    let wrapped_inverse = block.addi(inverse, circuit_modulus, location)?;
                     let inverse = block.append_op_result(arith::select(
                         is_negative,
                         wrapped_inverse,
                         inverse,
                         location,
                     ))?;
 
-                    // Truncate back
-                    let inverse = block.trunci(inverse, u384_type, location)?;
-
                     gates[gate_offset.lhs] = Some(inverse);
                 }
                 // The imposibility to solve this mul gate offset would render the circuit unsolvable
diff --git a/src/metadata/runtime_bindings.rs b/src/metadata/runtime_bindings.rs
@@ -221,7 +221,7 @@ impl RuntimeBindingsMeta {
         {
             build_egcd_function(module, context, location, func_symbol)?;
         }
-        let integer_type: Type = IntegerType::new(context, 384 * 2).into();
+        let integer_type: Type = IntegerType::new(context, 384).into();
         // The struct returned by the function that contains both of the results
         let return_type = llvm::r#type::r#struct(context, &[integer_type, integer_type], false);
         Ok(block
@@ -813,105 +813,164 @@ pub fn setup_runtime(find_symbol_ptr: impl Fn(&str) -> Option<*mut c_void>) {
     }
 }
 
-/// The extended euclidean algorithm calculates the greatest common divisor (gcd) of two integers a and b,
-/// as well as the bezout coefficients x and y such that ax+by=gcd(a,b)
-/// if gcd(a,b) = 1, then x is the modular multiplicative inverse of a modulo b.
-/// See https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm
+/// Build the extended euclidean algorithm MLIR function.
 ///
-/// This function declares a MLIR function that given two numbers a and b, returns a MLIR struct with gcd(a, b)
-/// and the bezout coefficient x. The declaration is done in the body of the module.
+/// The extended euclidean algorithm calculates the greatest common divisor
+/// (gcd) of two integers `a` and `b`, as well as the Bézout coefficients `x`
+/// and `y` such that `ax + by = gcd(a,b)`. If `gcd(a,b) = 1`, then `x` is the
+/// modular multiplicative inverse of `a` modulo `b`.
+///
+/// This function declares a MLIR function that given two 384 bit integers `a`
+/// and `b`, returns a MLIR struct with `gcd(a,b)` and the Bézout coefficient
+/// `x`. The declaration is done in the body of the module.
 fn build_egcd_function<'ctx>(
     module: &Module,
     context: &'ctx Context,
     location: Location<'ctx>,
     func_symbol: &str,
 ) -> Result<()> {
-    let integer_type: Type = IntegerType::new(context, 384 * 2).into();
+    let integer_width = 384;
+    let integer_type = IntegerType::new(context, integer_width).into();
+
+    // Pseudocode for calculating the EGCD of two integers `a` and `b`.
+    // https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Pseudocode.
+    //
+    // ```
+    // (old_r, new_r) := (a, b)
+    // (old_s, new_s) := (1, 0)
+    //
+    // while new_r != 0 do
+    //     quotient := old_r / new_r
+    //     (old_r, new_r) := (new_r, old_r − quotient * new_r)
+    //     (old_s, new_s) := (new_s, old_s − quotient * new_s)
+    //
+    // old_s is equal to Bézout coefficient X
+    // old_r is equal to GCD
+    // ```
+    //
+    // Note that when `b > a`, the first iteration inverts the values. Our
+    // implementation does it manually as we already know that `b > a`.
+    //
+    // The core idea of the method is that `gcd(a,b) = gcd(a,b-a)`, and that
+    // `gcd(a,b) = gcd(b,a)`. As an optimization, we can actually substract `a`
+    // from `b` as many times as possible, so `gcd(a,b) = gcd(b%a,a)`.
+    //
+    // Take, for example, `a=21` and `b=54`:
+    //
+    //   gcd(21, 54)
+    // = gcd(12, 21)
+    // = gcd(9, 12)
+    // = gcd(3, 9)
+    // = gcd(0, 3)
+    // = 3
+    //
+    // Thus, the algorithm works by calculating a series of remainders `r` which
+    // starts with b,a,... being `r[i]` the remainder of dividing `r[i-2]` by
+    // `r[i-1]`. At each step, `r[i]` can be calculated as:
+    //
+    // r[i] = r[i-2] - r[i-1] * quotient
+    //
+    // The GCD will be the last non-zero remainder.
+    //
+    // [54; 21; 12; 9; 3; 0]
+    //                 ^
+    //
+    // See Dr. Katherine Stange's Youtube video for a better explanation on how
+    // this works: https://www.youtube.com/watch?v=Jwf6ncRmhPg.
+    //
+    // The extended algorithm also obtains the Bézout coefficients
+    // by calculating a series of coefficients `s`. See Dr. Katherine
+    // Stange's Youtube video for a better explanation on how this works:
+    // https://www.youtube.com/watch?v=IwRtISxAHY4.
+
+    // Define entry block for function. Receives arguments `a` and `b`.
     let region = Region::new();
-
     let entry_block = region.append_block(Block::new(&[
-        (integer_type, location),
-        (integer_type, location),
+        (integer_type, location), // a
+        (integer_type, location), // b
     ]));
 
-    let a = entry_block.arg(0)?;
-    let b = entry_block.arg(1)?;
-    // The egcd algorithm works by calculating a series of remainders `rem`, being each `rem_i` the remainder of dividing `rem_{i-1}` with `rem_{i-2}`
-    // For the initial setup, rem_0 = b, rem_1 = a.
-    // This order is chosen because if we reverse them, then the first iteration will just swap them
-    let remainder = a;
-    let prev_remainder = b;
-
-    // Similarly we'll calculate another series which starts 0,1,... and from which we
-    // will retrieve the modular inverse of a
-    let prev_inverse = entry_block.const_int_from_type(context, location, 0, integer_type)?;
-    let inverse = entry_block.const_int_from_type(context, location, 1, integer_type)?;
-
+    // Define loop block for function. Each iteration last two values from each series.
     let loop_block = region.append_block(Block::new(&[
-        (integer_type, location),
-        (integer_type, location),
-        (integer_type, location),
-        (integer_type, location),
+        (integer_type, location), // old_r
+        (integer_type, location), // new_r
+        (integer_type, location), // old_s
+        (integer_type, location), // new_s
     ]));
+
+    // Define end block for function.
     let end_block = region.append_block(Block::new(&[
-        (integer_type, location),
-        (integer_type, location),
+        (integer_type, location), // old_r
+        (integer_type, location), // old_s
     ]));
 
+    // Jump to loop block from entry block, with initial values.
+    // - old_r = b
+    // - new_r = a
+    // - old_s = 0
+    // - new_s = 1
     entry_block.append_operation(cf::br(
         &loop_block,
-        &[prev_remainder, remainder, prev_inverse, inverse],
+        &[
+            entry_block.arg(1)?,
+            entry_block.arg(0)?,
+            entry_block.const_int_from_type(context, location, 0, integer_type)?,
+            entry_block.const_int_from_type(context, location, 1, integer_type)?,
+        ],
         location,
     ));
 
-    // -- Loop body --
-    // Arguments are rem_(i-1), rem, inv_(i-1), inv
-    let prev_remainder = loop_block.arg(0)?;
-    let remainder = loop_block.arg(1)?;
-    let prev_inverse = loop_block.arg(2)?;
-    let inverse = loop_block.arg(3)?;
-
-    // First calculate q = rem_(i-1)/rem_i, rounded down
-    let quotient =
-        loop_block.append_op_result(arith::divui(prev_remainder, remainder, location))?;
-
-    // Then rem_(i+1) = rem_(i-1) - q * rem_i, and inv_(i+1) = inv_(i-1) - q * inv_i
-    let rem_times_quo = loop_block.muli(remainder, quotient, location)?;
-    let inv_times_quo = loop_block.muli(inverse, quotient, location)?;
-    let next_remainder =
-        loop_block.append_op_result(arith::subi(prev_remainder, rem_times_quo, location))?;
-    let next_inverse =
-        loop_block.append_op_result(arith::subi(prev_inverse, inv_times_quo, location))?;
-
-    // Check if rem_(i+1) is 0
-    // If true, then:
-    // - rem_i is the gcd of a and b
-    // - inv_i is the bezout coefficient x
-    let zero = loop_block.const_int_from_type(context, location, 0, integer_type)?;
-    let next_remainder_eq_zero =
-        loop_block.cmpi(context, CmpiPredicate::Eq, next_remainder, zero, location)?;
-    loop_block.append_operation(cf::cond_br(
-        context,
-        next_remainder_eq_zero,
-        &end_block,
-        &loop_block,
-        &[remainder, inverse],
-        &[remainder, next_remainder, inverse, next_inverse],
-        location,
-    ));
+    // LOOP BLOCK
+    {
+        let old_r = loop_block.arg(0)?;
+        let new_r = loop_block.arg(1)?;
+        let old_s = loop_block.arg(2)?;
+        let new_s = loop_block.arg(3)?;
+
+        // First calculate quotient of old_r/new_r.
+        let quotient = loop_block.append_op_result(arith::divui(old_r, new_r, location))?;
+
+        // Multiply quotient by new_r and new_s.
+        let quotient_by_new_r = loop_block.muli(quotient, new_r, location)?;
+        let quotient_by_new_s = loop_block.muli(quotient, new_s, location)?;
+
+        // Calculate new values for next iteration.
+        // - next_new_r := old_r − quotient * new_r
+        // - next_new_s := old_s − quotient * new_s
+        let next_new_r =
+            loop_block.append_op_result(arith::subi(old_r, quotient_by_new_r, location))?;
+        let next_new_s =
+            loop_block.append_op_result(arith::subi(old_s, quotient_by_new_s, location))?;
+
+        // Jump to end block if next_new_r is zero.
+        let zero = loop_block.const_int_from_type(context, location, 0, integer_type)?;
+        let next_new_r_is_zero =
+            loop_block.cmpi(context, CmpiPredicate::Eq, next_new_r, zero, location)?;
+        loop_block.append_operation(cf::cond_br(
+            context,
+            next_new_r_is_zero,
+            &end_block,
+            &loop_block,
+            &[new_r, new_s],
+            &[new_r, next_new_r, new_s, next_new_s],
+            location,
+        ));
+    }
 
-    // Create the struct that will contain the results
-    let results = end_block.append_op_result(llvm::undef(
-        llvm::r#type::r#struct(context, &[integer_type, integer_type], false),
-        location,
-    ))?;
-    let results = end_block.insert_values(
-        context,
-        location,
-        results,
-        &[end_block.arg(0)?, end_block.arg(1)?],
-    )?;
-    end_block.append_operation(llvm::r#return(Some(results), location));
+    // END BLOCK
+    {
+        let results = end_block.append_op_result(llvm::undef(
+            llvm::r#type::r#struct(context, &[integer_type, integer_type], false),
+            location,
+        ))?;
+        let results = end_block.insert_values(
+            context,
+            location,
+            results,
+            &[end_block.arg(0)?, end_block.arg(1)?],
+        )?;
+        end_block.append_operation(llvm::r#return(Some(results), location));
+    }
 
     let func_name = StringAttribute::new(context, func_symbol);
     module.body().append_operation(llvm::func(
diff --git a/src/types.rs b/src/types.rs
@@ -85,11 +85,26 @@ pub trait TypeBuilder {
 
     /// Return whether the type is a builtin.
     fn is_builtin(&self) -> bool;
-    /// Return whether the type requires a return pointer when returning.
+    /// Return whether the type requires a return pointer when returning,
+    /// instead of using the CPU registers.
+    ///
+    /// This attribute does not modify the compilation, and it only reflects
+    /// what the ABI of the target architecture already specifies.
+    /// - For x86-64: https://gitlab.com/x86-psABIs/x86-64-ABI.
+    /// - For AArch64: https://github.com/ARM-software/abi-aa.
+    ///
+    /// We can validate this empirically, by building a Cairo program that
+    /// returns a particular type, and seeing how it is lowered to machine code.
+    ///
+    /// ```bash
+    /// llc a.llvmir -o - --mtriple "aarch64"
+    /// llc a.llvmir -o - --mtriple "x86_64"
+    /// ```
     fn is_complex(
         &self,
         registry: &ProgramRegistry<CoreType, CoreLibfunc>,
     ) -> Result<bool, Self::Error>;
+
     /// Return whether the Sierra type resolves to a zero-sized type.
     fn is_zst(
         &self,
@@ -104,8 +119,21 @@ pub trait TypeBuilder {
         registry: &ProgramRegistry<CoreType, CoreLibfunc>,
     ) -> Result<Layout, Self::Error>;
 
-    /// Whether the layout should be allocated in memory (either the stack or the heap) when used as
-    /// a function invocation argument or return value.
+    /// Whether the layout should be allocated in memory (either the stack or
+    /// the heap) when used as a function invocation argument or return value.
+    ///
+    /// Unlike `is_complex`, this attribute alters the compilation:
+    ///
+    /// - When passing a memory allocated value to a function, we allocate that
+    ///   value on the stack, and pass a pointer to it.
+    ///
+    /// - If a function returns a memory allocated value, we receive a return
+    ///   pointer as its first argument, and write the return value there
+    ///   instead.
+    ///
+    /// The rationale behind allocating a value in memory, rather than
+    /// registers, is to avoid putting too much pressure on the register
+    /// allocation pass for really complex types, like enums.
     fn is_memory_allocated(
         &self,
         registry: &ProgramRegistry<CoreType, CoreLibfunc>,