Skip to content

Commit 0b28a8a

Browse files
committed
Test performance penalty for not using lazy reduction in matrix-vector mul
1 parent d526633 commit 0b28a8a

File tree

3 files changed

+28
-25
lines changed

3 files changed

+28
-25
lines changed

mldsa/poly.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,19 @@ void poly_pointwise_montgomery(poly *c, const poly *a, const poly *b)
143143
}
144144
}
145145

146+
void poly_pointwise_acc_montgomery(poly *c, const poly *a, const poly *b)
147+
{
148+
unsigned int i;
149+
150+
for (i = 0; i < MLDSA_N; ++i)
151+
__loop__(
152+
invariant(i <= MLDSA_N))
153+
{
154+
c->coeffs[i] += montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]);
155+
}
156+
}
157+
158+
146159
void poly_power2round(poly *a1, poly *a0, const poly *a)
147160
{
148161
unsigned int i;

mldsa/poly.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,17 @@ __contract__(
174174
assigns(memory_slice(c, sizeof(poly)))
175175
);
176176

177+
178+
#define poly_pointwise_acc_montgomery \
179+
MLD_NAMESPACE(poly_pointwise_acc_montgomery)
180+
void poly_pointwise_acc_montgomery(poly *c, const poly *a, const poly *b)
181+
__contract__(
182+
requires(memory_no_alias(a, sizeof(poly)))
183+
requires(memory_no_alias(b, sizeof(poly)))
184+
requires(memory_no_alias(c, sizeof(poly)))
185+
assigns(memory_slice(c, sizeof(poly)))
186+
);
187+
177188
#define poly_power2round MLD_NAMESPACE(poly_power2round)
178189
/*************************************************
179190
* Name: poly_power2round

mldsa/polyvec.c

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -259,32 +259,11 @@ void polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a,
259259
void polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u,
260260
const polyvecl *v)
261261
{
262-
unsigned int i, j;
263-
/* The second input is bounded by 9q. Hence, we can safely accumulate
264-
* in 64-bits without intermediate reductions as
265-
* MLDSA_L * MLD_NTT_BOUND * INT32_MAX < INT64_MAX
266-
* worst case is ML-DSA-87: 7 * 9 * q * 2**31 < 2**63
267-
* (likewise for negative values)
268-
*/
269-
270-
for (i = 0; i < MLDSA_N; i++)
271-
__loop__(
272-
assigns(i, j, object_whole(w))
273-
invariant(i <= MLDSA_N)
274-
)
262+
unsigned int i;
263+
poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]);
264+
for (i = 1; i < MLDSA_L; i++)
275265
{
276-
int64_t t = 0;
277-
for (j = 0; j < MLDSA_L; j++)
278-
__loop__(
279-
assigns(j, t)
280-
invariant(j <= MLDSA_L)
281-
invariant(t <= -(int64_t)j*INT32_MIN*MLD_NTT_BOUND)
282-
invariant(t >= (int64_t)j*INT32_MIN*MLD_NTT_BOUND)
283-
)
284-
{
285-
t += (int64_t)u->vec[j].coeffs[i] * v->vec[j].coeffs[i];
286-
}
287-
w->coeffs[i] = montgomery_reduce(t);
266+
poly_pointwise_acc_montgomery(w, &u->vec[i], &v->vec[i]);
288267
}
289268
}
290269

0 commit comments

Comments
 (0)