From d38be789db881e427d379f5a0cc6c8f79030cde4 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 11 Dec 2025 15:36:19 -0500 Subject: [PATCH] arm64: fix clang ICE on Windows for zdot_thunderx2t99.c Guard .align directive to avoid internal compiler error on AArch64 Windows with clang. See: https://github.com/llvm/llvm-project/issues/149547 See: #5076 Co-Authored-By: Claude Opus 4.5 --- kernel/arm64/dznrm2_thunderx2t99_fast.c | 3 +++ kernel/arm64/scnrm2_thunderx2t99.c | 3 +++ kernel/arm64/zdot_thunderx2t99.c | 3 +++ kernel/arm64/zsum_thunderx2t99.c | 3 +++ 4 files changed, 12 insertions(+) diff --git a/kernel/arm64/dznrm2_thunderx2t99_fast.c b/kernel/arm64/dznrm2_thunderx2t99_fast.c index 8405b388bc..90558c2a02 100644 --- a/kernel/arm64/dznrm2_thunderx2t99_fast.c +++ b/kernel/arm64/dznrm2_thunderx2t99_fast.c @@ -155,7 +155,10 @@ static double nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) " cmp "J", xzr \n" " beq .Lnrm2_kernel_F1 \n" +/* https://github.com/llvm/llvm-project/issues/149547 */ +#if !(defined(__clang__) && defined(OS_WINDOWS)) " .align 5 \n" +#endif ".Lnrm2_kernel_F: \n" " "KERNEL_F" \n" " subs "J", "J", #1 \n" diff --git a/kernel/arm64/scnrm2_thunderx2t99.c b/kernel/arm64/scnrm2_thunderx2t99.c index 50790f4b70..4585b1ca33 100644 --- a/kernel/arm64/scnrm2_thunderx2t99.c +++ b/kernel/arm64/scnrm2_thunderx2t99.c @@ -238,7 +238,10 @@ static double nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) " cmp "J", xzr \n" " beq 5f //nrm2_kernel_S_BEGIN \n" +/* https://github.com/llvm/llvm-project/issues/149547 */ +#if !(defined(__clang__) && defined(OS_WINDOWS)) " .align 5 \n" +#endif "2: //nrm2_kernel_F: \n" " "KERNEL_F" \n" " subs "J", "J", #1 \n" diff --git a/kernel/arm64/zdot_thunderx2t99.c b/kernel/arm64/zdot_thunderx2t99.c index d48392412b..c8abd38fc5 100644 --- a/kernel/arm64/zdot_thunderx2t99.c +++ b/kernel/arm64/zdot_thunderx2t99.c @@ -236,7 +236,10 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON " cmp "J", xzr \n" " beq 3f //dot_kernel_F1 \n" +/* https://github.com/llvm/llvm-project/issues/149547 */ +#if !(defined(__clang__) && defined(OS_WINDOWS)) " .align 5 \n" +#endif "2: //dot_kernel_F: \n" " "KERNEL_F" \n" " subs "J", "J", #1 \n" diff --git a/kernel/arm64/zsum_thunderx2t99.c b/kernel/arm64/zsum_thunderx2t99.c index 087dae2fea..b8ebdbdec1 100644 --- a/kernel/arm64/zsum_thunderx2t99.c +++ b/kernel/arm64/zsum_thunderx2t99.c @@ -136,7 +136,10 @@ static FLOAT zasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) " cmp "J", xzr \n" " beq 3f //asum_kernel_F1 \n" +/* https://github.com/llvm/llvm-project/issues/149547 */ +#if !(defined(__clang__) && defined(OS_WINDOWS)) ".align 5 \n" +#endif "2: //asum_kernel_F16: \n" " "KERNEL_F16" \n" " subs "J", "J", #1 \n"