From 9b61b32e1540a109429e00513b6f8a26221dcef6 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 13:11:47 +0300 Subject: [PATCH 01/25] feat(ecmascript_atomics): Initial commit --- Cargo.toml | 2 +- ecmascript_atomics/Cargo.toml | 12 + ecmascript_atomics/lib.rs | 1403 +++++++++++++++++++++++++++++++++ 3 files changed, 1416 insertions(+), 1 deletion(-) create mode 100644 ecmascript_atomics/Cargo.toml create mode 100644 ecmascript_atomics/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 2ae20f36f..91f915175 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["nova_cli", "nova_vm", "small_string", "tests"] +members = ["ecmascript_atomics", "nova_cli", "nova_vm", "small_string", "tests"] exclude = ["nova_lint"] [workspace.package] diff --git a/ecmascript_atomics/Cargo.toml b/ecmascript_atomics/Cargo.toml new file mode 100644 index 000000000..d8c8f3f38 --- /dev/null +++ b/ecmascript_atomics/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "ecmascript_atomics" +version = "0.1.0" +repository = "https://github.com/trynova/nova/tree/main/ecmascript_atomics" +description = "ECMAScript specification compliant atomic operations for use with SharedArrayBuffers." +authors.workspace = true +edition.workspace = true +license.workspace = true +homepage.workspace = true + +[lib] +path = "lib.rs" diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs new file mode 100644 index 000000000..6ab87c3e2 --- /dev/null +++ b/ecmascript_atomics/lib.rs @@ -0,0 +1,1403 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//! See the big comment in jit/AtomicOperations.h for an explanation. + +use core::ptr::NonNull; + +// is_64bit = "JS_64BIT" in buildconfig.defines +// cpu_arch = buildconfig.substs["TARGET_CPU"] +// is_gcc = buildconfig.substs["CC_TYPE"] == "gcc" + +macro_rules! fence { + (true, x86) => { + "mfence" + }; + (true, aarch64) => { + "dmb ish" + }; + (true, arm) => { + "dmb sy" + }; + (false, $_: tt) => { + "" + }; +} + +macro_rules! gen_load { + (u8, $ptr: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let z: u8; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov {val}, [{ptr}]", + fence!(false, x86), + ptr = in(reg) ptr, + val = lateout(reg_byte) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[cfg(target_arch = "aarch64")] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldrb {val:w}, [{ptr}]", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z as u8; + } + + #[cfg(target_arch = "arm")] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldrb {val:w}, [{ptr}]", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z as u8; + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + (u16, $ptr: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let z: u16; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov {val:x}, [{ptr}]", + fence!(false, x86), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[cfg(target_arch = "aarch64")] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldrh {val:w}, [{ptr}]", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z as u16; + } + + #[cfg(target_arch = "arm")] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldrh {val:w}, [{ptr}]", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z as u16; + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + (u32, $ptr: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov {val:e}, [{ptr}]", + fence!(false, x86), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[cfg(target_arch = "aarch64")] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldr {val:w}, [{ptr}]", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[cfg(target_arch = "arm")] + { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldr {val:w}, [{ptr}]", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + (u64, $ptr: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let z: u64; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov {val:r}, [{ptr}]", + fence!(false, x86), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[cfg(target_arch = "aarch64")] + { + let z: u64; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "ldr {val:x}, [{ptr}]", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = lateout(reg) z, + options(preserves_flags, nostack, pure, readonly) + ) }; + return z; + } + + #[cfg(target_arch = "arm")] + { + const { panic!("Unexpected size") } + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + ($type: ty, $ptr: ident, $barrier: tt) => { + panic!("Unsupported type"); + }; +} + +macro_rules! gen_store { + (u8, $ptr: ident, $val: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov [{ptr}], {val}", + fence!($barrier, x86), + ptr = in(reg) ptr, + val = in(reg_byte) $val, + options(preserves_flags, nostack) + ) }; + return; + // elif size == 16: + // insns += fmt_insn("movw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("movl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("movq %[val], (%[addr])") + // if barrier: + // insns += fmt_insn("mfence") + // return """ + // INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : + // : [addr] "r" (addr), [val] "r"(val) + // : "memory"); + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + } + + #[cfg(target_arch = "aarch64")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, aarch64), + "strb [{ptr}], {val:w}", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + // elif size == 16: + // insns += fmt_insn("strh %w[val], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("str %w[val], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("str %x[val], [%x[addr]]") + } + + #[cfg(target_arch = "arm")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, arm), + "strb [{ptr}], {val:w}", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + }; + (u16, $ptr: ident, $val: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov [{ptr}], {val:x}", + fence!($barrier, x86), + ptr = in(reg) ptr, + val = in(reg) $val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[cfg(target_arch = "aarch64")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, aarch64), + "strh [{ptr}], {val:w}", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[cfg(target_arch = "arm")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, arm), + "strh [{ptr}], {val:w}", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + }; + (u32, $ptr: ident, $val: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov [{ptr}], {val:e}", + fence!($barrier, x86), + ptr = in(reg) ptr, + val = in(reg) $val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[cfg(target_arch = "aarch64")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, aarch64), + "str [{ptr}], {val:w}", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[cfg(target_arch = "arm")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, arm), + "str [{ptr}], {val:w}", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + }; + (u64, $ptr: ident, $val: ident, $barrier: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + "mov [{ptr}], {val:x}", + fence!($barrier, x86), + ptr = in(reg) ptr, + val = in(reg) $val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[cfg(target_arch = "aarch64")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, aarch64), + "str [{ptr}], {val:x}", + fence!($barrier, aarch64), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[cfg(target_arch = "arm")] + { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + unsafe { core::arch::asm!( + fence!($barrier, arm), + "str [{ptr}], {val:x}", + fence!($barrier, arm), + ptr = in(reg) ptr, + val = in(reg) val, + options(preserves_flags, nostack) + ) }; + return; + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + }; +} + +macro_rules! gen_exchange { + ($type: ty, $size: literal) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + todo!(); + } + + #[cfg(target_arch = "aarch64")] + { + todo!(); + } + + #[cfg(target_arch = "arm")] + { + todo!(); + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + // NOTE: the assembly code must match the generated code in: + // - MacroAssembler::atomicExchange + // - MacroAssembler::atomicExchange64 (on 64-bit platforms) + // if cpu_arch in ("x86", "x86_64"): + // // Request an input/output register for `val` so that we can simply XCHG it + // // with *addr. + // insns = "" + // if size == 8: + // insns += fmt_insn("xchgb %[val], (%[addr])") + // elif size == 16: + // insns += fmt_insn("xchgw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("xchgl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("xchgq %[val], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : [val] "+r" (val) + // : [addr] "r" (addr) + // : "memory"); + // return val; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // if cpu_arch == "aarch64": + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") + // insns += fmt_insn("cbnz %w[scratch], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // if cpu_arch == "arm": + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") + // insns += fmt_insn("cmp %[scratch], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // raise Exception("Unexpected arch") + }; +} + +macro_rules! gen_cmpxchg { + ($type: ty, $size: literal) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + todo!(); + } + + #[cfg(target_arch = "aarch64")] + { + todo!(); + } + + #[cfg(target_arch = "arm")] + { + todo!(); + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + // NOTE: the assembly code must match the generated code in: + // - MacroAssembler::compareExchange + // - MacroAssembler::compareExchange64 + // if cpu_arch == "x86" and size == 64: + // // Use a +A constraint to load `oldval` into EDX:EAX as input/output. + // // `newval` is loaded into ECX:EBX. + // return r""" + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, + // %(cpp_type)s oldval, + // %(cpp_type)s newval) { + // asm volatile ("lock; cmpxchg8b (%%[addr])\n\t" + // : "+A" (oldval) + // : [addr] "r" (addr), + // "b" (uint32_t(newval & 0xffff'ffff)), + // "c" (uint32_t(newval >> 32)) + // : "memory", "cc"); + // return oldval; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // } + // if cpu_arch == "arm" and size == 64: + // return r""" + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, + // %(cpp_type)s oldval, + // %(cpp_type)s newval) { + // uint32_t oldval0 = oldval & 0xffff'ffff; + // uint32_t oldval1 = oldval >> 32; + // uint32_t newval0 = newval & 0xffff'ffff; + // uint32_t newval1 = newval >> 32; + // asm volatile ( + // "dmb sy\n\t" + // "0: ldrexd r0, r1, [%%[addr]]\n\t" + // "cmp r0, %%[oldval0]\n\t" + // "bne 1f\n\t" + // "cmp r1, %%[oldval1]\n\t" + // "bne 1f\n\t" + // "mov r2, %%[newval0]\n\t" + // "mov r3, %%[newval1]\n\t" + // "strexd r4, r2, r3, [%%[addr]]\n\t" + // "cmp r4, #1\n\t" + // "beq 0b\n\t" + // "1: dmb sy\n\t" + // "mov %%[oldval0], r0\n\t" + // "mov %%[oldval1], r1\n\t" + // : [oldval0] "+&r" (oldval0), [oldval1] "+&r"(oldval1) + // : [addr] "r" (addr), [newval0] "r" (newval0), [newval1] "r" (newval1) + // : "memory", "cc", "r0", "r1", "r2", "r3", "r4"); + // return uint64_t(oldval0) | (uint64_t(oldval1) << 32); + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // } + // if cpu_arch in ("x86", "x86_64"): + // // Use a +a constraint to load `oldval` into RAX as input/output register. + // insns = "" + // if size == 8: + // insns += fmt_insn("lock; cmpxchgb %[newval], (%[addr])") + // elif size == 16: + // insns += fmt_insn("lock; cmpxchgw %[newval], (%[addr])") + // elif size == 32: + // insns += fmt_insn("lock; cmpxchgl %[newval], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("lock; cmpxchgq %[newval], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, + // %(cpp_type)s oldval, + // %(cpp_type)s newval) { + // asm volatile (%(insns)s + // : [oldval] "+a" (oldval) + // : [addr] "r" (addr), [newval] "r" (newval) + // : "memory", "cc"); + // return oldval; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // if cpu_arch == "aarch64": + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("uxtb %w[scratch], %w[oldval]") + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("cmp %w[res], %w[scratch]") + // insns += fmt_insn("b.ne 1f") + // insns += fmt_insn("stxrb %w[scratch], %w[newval], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("uxth %w[scratch], %w[oldval]") + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("cmp %w[res], %w[scratch]") + // insns += fmt_insn("b.ne 1f") + // insns += fmt_insn("stxrh %w[scratch], %w[newval], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("mov %w[scratch], %w[oldval]") + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("cmp %w[res], %w[scratch]") + // insns += fmt_insn("b.ne 1f") + // insns += fmt_insn("stxr %w[scratch], %w[newval], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("mov %x[scratch], %x[oldval]") + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("cmp %x[res], %x[scratch]") + // insns += fmt_insn("b.ne 1f") + // insns += fmt_insn("stxr %w[scratch], %x[newval], [%x[addr]]") + // insns += fmt_insn("cbnz %w[scratch], 0b") + // insns += fmt_insn("1: dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, + // %(cpp_type)s oldval, + // %(cpp_type)s newval) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // if cpu_arch == "arm": + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("uxtb %[scratch], %[oldval]") + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("cmp %[res], %[scratch]") + // insns += fmt_insn("bne 1f") + // insns += fmt_insn("strexb %[scratch], %[newval], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("uxth %[scratch], %[oldval]") + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("cmp %[res], %[scratch]") + // insns += fmt_insn("bne 1f") + // insns += fmt_insn("strexh %[scratch], %[newval], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("mov %[scratch], %[oldval]") + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("cmp %[res], %[scratch]") + // insns += fmt_insn("bne 1f") + // insns += fmt_insn("strex %[scratch], %[newval], [%[addr]]") + // insns += fmt_insn("cmp %[scratch], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("1: dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, + // %(cpp_type)s oldval, + // %(cpp_type)s newval) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // raise Exception("Unexpected arch") + }; +} + +macro_rules! gen_fetchop { + ($type: ty, $size: literal, $op: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + todo!(); + } + + #[cfg(target_arch = "aarch64")] + { + todo!(); + } + + #[cfg(target_arch = "arm")] + { + todo!(); + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + // NOTE: the assembly code must match the generated code in: + // - MacroAssembler::atomicFetchOp + // - MacroAssembler::atomicFetchOp64 (on 64-bit platforms) + // if cpu_arch in ("x86", "x86_64"): + // // The `add` operation can be optimized with XADD. + // if op == "add": + // insns = "" + // if size == 8: + // insns += fmt_insn("lock; xaddb %[val], (%[addr])") + // elif size == 16: + // insns += fmt_insn("lock; xaddw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("lock; xaddl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("lock; xaddq %[val], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : [val] "+&r" (val) + // : [addr] "r" (addr) + // : "memory", "cc"); + // return val; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // // Use a +a constraint to ensure `res` is stored in RAX. This is required + // // for the CMPXCHG instruction. + // insns = "" + // if size == 8: + // insns += fmt_insn("movb (%[addr]), %[res]") + // insns += fmt_insn("0: movb %[res], %[scratch]") + // insns += fmt_insn("OPb %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") + // elif size == 16: + // insns += fmt_insn("movw (%[addr]), %[res]") + // insns += fmt_insn("0: movw %[res], %[scratch]") + // insns += fmt_insn("OPw %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") + // elif size == 32: + // insns += fmt_insn("movl (%[addr]), %[res]") + // insns += fmt_insn("0: movl %[res], %[scratch]") + // insns += fmt_insn("OPl %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("movq (%[addr]), %[res]") + // insns += fmt_insn("0: movq %[res], %[scratch]") + // insns += fmt_insn("OPq %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") + // insns = insns.replace("OP", op) + // insns += fmt_insn("jnz 0b") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&a" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // if cpu_arch == "aarch64": + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cbnz %w[scratch2], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // if cpu_arch == "arm": + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cmp %[scratch2], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // raise Exception("Unexpected arch") + }; +} + +macro_rules! gen_copy { + ($type: ty, $size: tt, $unroll: tt, $direction: tt) => { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + todo!(); + } + + #[cfg(target_arch = "aarch64")] + { + todo!(); + } + + #[cfg(target_arch = "arm")] + { + todo!(); + } + + #[expect(unreachable_code)] + const { + panic!("Unexpected arch") + } + // assert direction in ("down", "up") + // offset = 0 + // if direction == "up": + // offset = unroll - 1 + // insns = "" + // for i in range(unroll): + // if cpu_arch in ("x86", "x86_64"): + // if size == 1: + // insns += fmt_insn("movb OFFSET(%[src]), %[scratch]") + // insns += fmt_insn("movb %[scratch], OFFSET(%[dst])") + // elif size == 2: + // insns += fmt_insn("movw OFFSET(%[src]), %[scratch]") + // insns += fmt_insn("movw %[scratch], OFFSET(%[dst])") + // elif size == 4: + // insns += fmt_insn("movl OFFSET(%[src]), %[scratch]") + // insns += fmt_insn("movl %[scratch], OFFSET(%[dst])") + // else: + // assert size == 8 + // insns += fmt_insn("movq OFFSET(%[src]), %[scratch]") + // insns += fmt_insn("movq %[scratch], OFFSET(%[dst])") + // elif cpu_arch == "aarch64": + // if size == 1: + // insns += fmt_insn("ldrb %w[scratch], [%x[src], OFFSET]") + // insns += fmt_insn("strb %w[scratch], [%x[dst], OFFSET]") + // elif size == 2: + // insns += fmt_insn("ldrh %w[scratch], [%x[src], OFFSET]") + // insns += fmt_insn("strh %w[scratch], [%x[dst], OFFSET]") + // elif size == 4: + // insns += fmt_insn("ldr %w[scratch], [%x[src], OFFSET]") + // insns += fmt_insn("str %w[scratch], [%x[dst], OFFSET]") + // else: + // assert size == 8 + // insns += fmt_insn("ldr %x[scratch], [%x[src], OFFSET]") + // insns += fmt_insn("str %x[scratch], [%x[dst], OFFSET]") + // elif cpu_arch == "arm": + // if size == 1: + // insns += fmt_insn("ldrb %[scratch], [%[src], #OFFSET]") + // insns += fmt_insn("strb %[scratch], [%[dst], #OFFSET]") + // elif size == 2: + // insns += fmt_insn("ldrh %[scratch], [%[src], #OFFSET]") + // insns += fmt_insn("strh %[scratch], [%[dst], #OFFSET]") + // else: + // assert size == 4 + // insns += fmt_insn("ldr %[scratch], [%[src], #OFFSET]") + // insns += fmt_insn("str %[scratch], [%[dst], #OFFSET]") + // else: + // raise Exception("Unexpected arch") + // insns = insns.replace("OFFSET", str(offset * size)) + + // if direction == "down": + // offset += 1 + // else: + // offset -= 1 + + // return """ + // INLINE_ATTR void %(fun_name)s(uint8_t* dst, const uint8_t* src) { + // %(cpp_type)s* dst_ = reinterpret_cast<%(cpp_type)s*>(dst); + // const %(cpp_type)s* src_ = reinterpret_cast(src); + // %(cpp_type)s scratch; + // asm volatile (%(insns)s + // : [scratch] "=&r" (scratch) + // : [dst] "r" (dst_), [src] "r"(src_) + // : "memory"); + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + }; +} + +/// ECMAScript atomic memory orderings +/// +/// Memory orderings specify the way atomic operations synchronise memory. +/// With [`Ordering::Unordered`], no synchronisation is performed. With +/// [`Ordering::SeqCst`], a store-load pair of operations synchronize other +/// memory while additionally preserving a total order of such operations +/// across all threads. +/// +/// The ECMAScript memory model is explained in the [ECMAScript Language +/// specification](https://tc39.es/ecma262/#sec-memory-model). Note that the +/// "INIT" ordering is not offered here as it is the purview of the memory +/// allocator. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum Ordering { + Unordered, + SeqCst, +} + +/// A sequentially consistent atomic fence. +/// +/// See [std::sync::atomic::fence] for details. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn fence() { + core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_8_seq_cst(ptr: NonNull<()>) -> u8 { + gen_load!(u8, ptr, true); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_16_seq_cst(ptr: NonNull<()>) -> u16 { + gen_load!(u16, ptr, true); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_32_seq_cst(ptr: NonNull<()>) -> u32 { + gen_load!(u32, ptr, true); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_64_seq_cst(ptr: NonNull<()>) -> u64 { + gen_load!(u64, ptr, true); +} + +// These are access-atomic up to sizeof(uintptr_t). +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_8_unsynchronized(ptr: NonNull<()>) -> u8 { + gen_load!(u8, ptr, false); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_16_unsynchronized(ptr: NonNull<()>) -> u16 { + gen_load!(u16, ptr, false); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_32_unsynchronized(ptr: NonNull<()>) -> u32 { + gen_load!(u32, ptr, false); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_load_64_unsynchronized(ptr: NonNull<()>) -> u64 { + gen_load!(u64, ptr, false); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_8_seq_cst(ptr: NonNull<()>, val: u8) { + gen_store!(u8, ptr, val, true); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_16_seq_cst(ptr: NonNull<()>, val: u16) { + gen_store!(u16, ptr, val, true); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_32_seq_cst(ptr: NonNull<()>, val: u32) { + gen_store!(u32, ptr, val, true); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_64_seq_cst(ptr: NonNull<()>, val: u64) { + gen_store!(u64, ptr, val, true); +} + +// These are access-atomic up to sizeof(uintptr_t). +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_8_unsynchronized(ptr: NonNull<()>, val: u8) { + gen_store!(u8, ptr, val, false); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_16_unsynchronized(ptr: NonNull<()>, val: u16) { + gen_store!(u16, ptr, val, false); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_32_unsynchronized(ptr: NonNull<()>, val: u32) { + gen_store!(u32, ptr, val, false); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_store_64_unsynchronized(ptr: NonNull<()>, val: u64) { + gen_store!(u64, ptr, val, false); +} + +// `exchange` takes a cell address and a value. It stores it in the cell and +// returns the value previously in the cell. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_exchange_8_seq_cst() { + gen_exchange!(u8, 8); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_exchange_16_seq_cst() { + gen_exchange!(u16, 16); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_exchange_32_seq_cst() { + gen_exchange!(u32, 32); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_exchange_64_seq_cst() { + gen_exchange!(u64, 64); +} + +// `cmpxchg` takes a cell address, an expected value and a replacement value. +// If the value in the cell equals the expected value then the replacement value +// is stored in the cell. It always returns the value previously in the cell. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_cmp_xchg_8_seq_cst() { + gen_cmpxchg!(u8, 8); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_cmp_xchg_16_seq_cst() { + gen_cmpxchg!(u16, 16); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_cmp_xchg_32_seq_cst() { + gen_cmpxchg!(u32, 32); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_cmp_xchg_64_seq_cst() { + gen_cmpxchg!(u64, 64); +} + +// `add` adds a value atomically to the cell and returns the old value in the +// cell. (There is no `sub`; just add the negated value.) +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_add_8_seq_cst() { + gen_fetchop!(u8, 8, "add"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_add_16_seq_cst() { + gen_fetchop!(u16, 16, "add"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_add_32_seq_cst() { + gen_fetchop!(u32, 32, "add"); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_add_64_seq_cst() { + gen_fetchop!(u64, 64, "add"); +} + +// `and` bitwise-ands a value atomically into the cell and returns the old value +// in the cell. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_and_8_seq_cst() { + gen_fetchop!(u8, 8, "and"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_and_16_seq_cst() { + gen_fetchop!(u16, 16, "and"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_and_32_seq_cst() { + gen_fetchop!(u32, 32, "and"); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_and_64_seq_cst() { + gen_fetchop!(u64, 64, "and"); +} + +// `or` bitwise-ors a value atomically into the cell and returns the old value +// in the cell. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_or_8_seq_cst() { + gen_fetchop!(u8, 8, "or"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_or_16_seq_cst() { + gen_fetchop!(u16, 16, "or"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_or_32_seq_cst() { + gen_fetchop!(u32, 32, "or"); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_or_64_seq_cst() { + gen_fetchop!(u64, 64, "or"); +} + +// `xor` bitwise-xors a value atomically into the cell and returns the old value +// in the cell. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_xor_8_seq_cst() { + gen_fetchop!(u8, 8, "xor"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_xor_16_seq_cst() { + gen_fetchop!(u16, 16, "xor"); +} +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_xor_32_seq_cst() { + gen_fetchop!(u32, 32, "xor"); +} + +// if is_64bit: +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_xor_64_seq_cst() { + gen_fetchop!(u64, 64, "xor"); +} + +/// Emits a machine instruction to signal the processor that it is running in a +/// busy-wait spin-loop (“spin lock”). +/// +/// See [std::hint::spin_loop] for details. +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_pause() { + core::hint::spin_loop(); +} + +// See comment in jit/AtomicOperations-shared-jit.cpp for an explanation. +// wordsize = 8 if is_64bit else 4 +// words_in_block = 8 +// blocksize = words_in_block * wordsize + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_unaligned_block_down_unsynchronized() { + gen_copy!(u8, 1, blocksize, "down"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_unaligned_block_up_unsynchronized() { + gen_copy!(u8, 1, blocksize, "up"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_unaligned_word_down_unsynchronized() { + gen_copy!(u8, 1, wordsize, "down"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_unaligned_word_up_unsynchronized() { + gen_copy!(u8, 1, wordsize, "up"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_block_down_unsynchronized() { + gen_copy!(uptr, wordsize, words_in_block, "down"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_block_up_unsynchronized() { + gen_copy!(uptr, wordsize, words_in_block, "up"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy_word_unsynchronized() { + gen_copy!(uptr, wordsize, 1, "down"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy32_unsynchronized() { + gen_copy!(u32, 4, 1, "down"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy16_unsynchronized() { + gen_copy!(u16, 2, 1, "down"); +} + +#[inline(always)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +pub fn atomic_copy8_unsynchronized() { + gen_copy!(u8, 1, 1, "down"); +} + +pub const JS_GENERATED_ATOMICS_BLOCKSIZE: usize = 0; +pub const JS_GENERATED_ATOMICS_WORSIZE: usize = 0; + +#[test] +fn test_load() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + assert_eq!(atomic_load_8_unsynchronized(foo), 0); + assert_eq!(atomic_load_16_unsynchronized(foo), 0); + assert_eq!(atomic_load_32_unsynchronized(foo), 0); + assert_eq!(atomic_load_64_unsynchronized(foo), 0); + assert_eq!(atomic_load_8_seq_cst(foo), 0); + assert_eq!(atomic_load_16_seq_cst(foo), 0); + assert_eq!(atomic_load_32_seq_cst(foo), 0); + assert_eq!(atomic_load_64_seq_cst(foo), 0); +} + +#[test] +fn test_store() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + atomic_store_8_unsynchronized(foo, 1); + assert_eq!(atomic_load_8_unsynchronized(foo), 1); + atomic_store_16_unsynchronized(foo, 2); + assert_eq!(atomic_load_16_unsynchronized(foo), 2); + atomic_store_32_unsynchronized(foo, 3); + assert_eq!(atomic_load_32_unsynchronized(foo), 3); + atomic_store_64_unsynchronized(foo, 4); + assert_eq!(atomic_load_64_unsynchronized(foo), 4); + atomic_store_8_seq_cst(foo, 5); + assert_eq!(atomic_load_8_seq_cst(foo), 5); + atomic_store_16_seq_cst(foo, 6); + assert_eq!(atomic_load_16_seq_cst(foo), 6); + atomic_store_32_seq_cst(foo, 7); + assert_eq!(atomic_load_32_seq_cst(foo), 7); + atomic_store_64_seq_cst(foo, 8); + assert_eq!(atomic_load_64_seq_cst(foo), 8); +} From 7a53b14c53d913287b30e61294643b93c01312c3 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 13:13:04 +0300 Subject: [PATCH 02/25] typo --- ecmascript_atomics/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 6ab87c3e2..7f9076fd1 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -1215,7 +1215,7 @@ pub fn atomic_add_64_seq_cst() { gen_fetchop!(u64, 64, "add"); } -// `and` bitwise-ands a value atomically into the cell and returns the old value +// `and` bitwise-and a value atomically into the cell and returns the old value // in the cell. #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] From c1e9d8169e8e4ca86da06b62e0dbe5d48cb55bb7 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 13:16:39 +0300 Subject: [PATCH 03/25] fix: input variables --- ecmascript_atomics/lib.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 7f9076fd1..b2248666c 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -262,7 +262,7 @@ macro_rules! gen_store { "strb [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -284,7 +284,7 @@ macro_rules! gen_store { "strb [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -319,7 +319,7 @@ macro_rules! gen_store { "strh [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -334,7 +334,7 @@ macro_rules! gen_store { "strh [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -369,7 +369,7 @@ macro_rules! gen_store { "str [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -384,7 +384,7 @@ macro_rules! gen_store { "str [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -419,7 +419,7 @@ macro_rules! gen_store { "str [{ptr}], {val:x}", fence!($barrier, aarch64), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; @@ -434,7 +434,7 @@ macro_rules! gen_store { "str [{ptr}], {val:x}", fence!($barrier, arm), ptr = in(reg) ptr, - val = in(reg) val, + val = in(reg) $val, options(preserves_flags, nostack) ) }; return; From 14c25f170f2485d087cc0863fc7d17cb7746f9b2 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 13:25:16 +0300 Subject: [PATCH 04/25] cleanup --- ecmascript_atomics/lib.rs | 308 ++++++++++++-------------------------- 1 file changed, 99 insertions(+), 209 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index b2248666c..0d2742bfa 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -27,181 +27,150 @@ macro_rules! fence { macro_rules! gen_load { (u8, $ptr: ident, $barrier: tt) => { + let z: u8; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - let z: u8; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov {val}, [{ptr}]", fence!(false, x86), ptr = in(reg) ptr, val = lateout(reg_byte) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } #[cfg(target_arch = "aarch64")] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldrb {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z as u8; + ); } #[cfg(target_arch = "arm")] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldrb {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z as u8; + ); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return z; }; (u16, $ptr: ident, $barrier: tt) => { + let z: u16; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - let z: u16; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov {val:x}, [{ptr}]", fence!(false, x86), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } #[cfg(target_arch = "aarch64")] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldrh {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z as u16; + ); } #[cfg(target_arch = "arm")] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldrh {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z as u16; + ); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return z; }; (u32, $ptr: ident, $barrier: tt) => { + let z: u32; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov {val:e}, [{ptr}]", fence!(false, x86), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } #[cfg(target_arch = "aarch64")] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldr {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } #[cfg(target_arch = "arm")] - { - let z: u32; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldr {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return z; }; (u64, $ptr: ident, $barrier: tt) => { + let z: u64; + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - let z: u64; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov {val:r}, [{ptr}]", fence!(false, x86), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } #[cfg(target_arch = "aarch64")] - { - let z: u64; - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "ldr {val:x}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = lateout(reg) z, options(preserves_flags, nostack, pure, readonly) - ) }; - return z; + ); } #[cfg(target_arch = "arm")] @@ -209,8 +178,7 @@ macro_rules! gen_load { const { panic!("Unexpected size") } } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return z; }; ($type: ty, $ptr: ident, $barrier: tt) => { panic!("Unsupported type"); @@ -219,230 +187,152 @@ macro_rules! gen_load { macro_rules! gen_store { (u8, $ptr: ident, $val: ident, $barrier: tt) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov [{ptr}], {val}", fence!($barrier, x86), ptr = in(reg) ptr, val = in(reg_byte) $val, options(preserves_flags, nostack) - ) }; - return; - // elif size == 16: - // insns += fmt_insn("movw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("movl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("movq %[val], (%[addr])") - // if barrier: - // insns += fmt_insn("mfence") - // return """ - // INLINE_ATTR void %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : - // : [addr] "r" (addr), [val] "r"(val) - // : "memory"); - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } + ); } #[cfg(target_arch = "aarch64")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, aarch64), "strb [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; - // elif size == 16: - // insns += fmt_insn("strh %w[val], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("str %w[val], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("str %x[val], [%x[addr]]") + ); } #[cfg(target_arch = "arm")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, arm), "strb [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; - } - - #[expect(unreachable_code)] - const { - panic!("Unexpected arch") + ); } }; (u16, $ptr: ident, $val: ident, $barrier: tt) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov [{ptr}], {val:x}", fence!($barrier, x86), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; + ); } #[cfg(target_arch = "aarch64")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, aarch64), "strh [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; + ); } #[cfg(target_arch = "arm")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, arm), "strh [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; - } - - #[expect(unreachable_code)] - const { - panic!("Unexpected arch") + ); } }; (u32, $ptr: ident, $val: ident, $barrier: tt) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov [{ptr}], {val:e}", fence!($barrier, x86), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; + ); } #[cfg(target_arch = "aarch64")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, aarch64), "str [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; + ); } #[cfg(target_arch = "arm")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, arm), "str [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; - } - - #[expect(unreachable_code)] - const { - panic!("Unexpected arch") + ); } }; (u64, $ptr: ident, $val: ident, $barrier: tt) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( "mov [{ptr}], {val:x}", fence!($barrier, x86), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; + ); } #[cfg(target_arch = "aarch64")] - { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( + unsafe { + core::arch::asm!( fence!($barrier, aarch64), "str [{ptr}], {val:x}", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, options(preserves_flags, nostack) - ) }; - return; + ); } #[cfg(target_arch = "arm")] { - // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. - let ptr = unsafe { &mut *$ptr.as_ptr() }; - unsafe { core::arch::asm!( - fence!($barrier, arm), - "str [{ptr}], {val:x}", - fence!($barrier, arm), - ptr = in(reg) ptr, - val = in(reg) $val, - options(preserves_flags, nostack) - ) }; - return; - } - - #[expect(unreachable_code)] - const { - panic!("Unexpected arch") + const { panic!("Unexpected size") } } }; } From 453178718cb69d0b10d885740f64e04a17482d74 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 13:26:42 +0300 Subject: [PATCH 05/25] fix: strh strb --- ecmascript_atomics/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 0d2742bfa..359256a4e 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -205,7 +205,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "strb [{ptr}], {val:w}", + "str [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -217,7 +217,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "strb [{ptr}], {val:w}", + "str [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, @@ -244,7 +244,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "strh [{ptr}], {val:w}", + "str [{ptr}], {val:w}", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -256,7 +256,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "strh [{ptr}], {val:w}", + "str [{ptr}], {val:w}", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, From e74c09d08dbf6161fecd628fde78d68c2b6f414a Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 13:37:53 +0300 Subject: [PATCH 06/25] try fix arm again --- ecmascript_atomics/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 359256a4e..3eeedc415 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -205,7 +205,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "str [{ptr}], {val:w}", + "str {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -217,7 +217,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "str [{ptr}], {val:w}", + "str {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, @@ -244,7 +244,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "str [{ptr}], {val:w}", + "str {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -256,7 +256,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "str [{ptr}], {val:w}", + "str {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, @@ -283,7 +283,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "str [{ptr}], {val:w}", + "str {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -295,7 +295,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "str [{ptr}], {val:w}", + "str {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, @@ -322,7 +322,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "str [{ptr}], {val:x}", + "str {val:x}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, From 84cc7a9149d1a1119914fa8daa4c456ac10f9d50 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 15:21:38 +0300 Subject: [PATCH 07/25] feat: exchange --- ecmascript_atomics/lib.rs | 624 ++++++++++++++++++++++++++++---------- 1 file changed, 471 insertions(+), 153 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 3eeedc415..eb4b07092 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -310,7 +310,7 @@ macro_rules! gen_store { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] unsafe { core::arch::asm!( - "mov [{ptr}], {val:x}", + "mov [{ptr}], {val:r}", fence!($barrier, x86), ptr = in(reg) ptr, val = in(reg) $val, @@ -338,124 +338,362 @@ macro_rules! gen_store { } macro_rules! gen_exchange { - ($type: ty, $size: literal) => { + (u8, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "xchg [{ptr}], {val}", + ptr = in(reg) ptr, + val = inout(reg_byte) $val, + options(preserves_flags, nostack) + ); + } + + #[cfg(target_arch = "aarch64")] + { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") + // insns += fmt_insn("cbnz %w[scratch], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "arm")] { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") + // insns += fmt_insn("cmp %[scratch], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } todo!(); } + return $val; + }; + (u16, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "xchg [{ptr}], {val:x}", + ptr = in(reg) ptr, + val = inout(reg) $val, + options(preserves_flags, nostack) + ); + } + #[cfg(target_arch = "aarch64")] { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") + // insns += fmt_insn("cbnz %w[scratch], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } todo!(); } #[cfg(target_arch = "arm")] { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") + // insns += fmt_insn("cmp %[scratch], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } todo!(); } - #[expect(unreachable_code)] - const { - panic!("Unexpected arch") + return $val; + }; + (u32, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "xchg [{ptr}], {val:e}", + ptr = in(reg) ptr, + val = inout(reg) $val, + options(preserves_flags, nostack) + ); } - // NOTE: the assembly code must match the generated code in: - // - MacroAssembler::atomicExchange - // - MacroAssembler::atomicExchange64 (on 64-bit platforms) - // if cpu_arch in ("x86", "x86_64"): - // // Request an input/output register for `val` so that we can simply XCHG it - // // with *addr. - // insns = "" - // if size == 8: - // insns += fmt_insn("xchgb %[val], (%[addr])") - // elif size == 16: - // insns += fmt_insn("xchgw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("xchgl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("xchgq %[val], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : [val] "+r" (val) - // : [addr] "r" (addr) - // : "memory"); - // return val; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // if cpu_arch == "aarch64": - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") - // insns += fmt_insn("cbnz %w[scratch], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // if cpu_arch == "arm": - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") - // insns += fmt_insn("cmp %[scratch], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // raise Exception("Unexpected arch") + + #[cfg(target_arch = "aarch64")] + { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") + // insns += fmt_insn("cbnz %w[scratch], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "arm")] + { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") + // insns += fmt_insn("cmp %[scratch], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + return $val; + }; + (u64, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "xchg [{ptr}], {val:r}", + ptr = in(reg) ptr, + val = inout(reg) $val, + options(preserves_flags, nostack) + ); + } + + #[cfg(target_arch = "aarch64")] + { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") + // insns += fmt_insn("cbnz %w[scratch], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "arm")] + { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") + // insns += fmt_insn("cmp %[scratch], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uint32_t scratch; + // asm volatile (%(insns)s + // : [res] "=&r"(res), [scratch] "=&r"(scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + return $val; }; } macro_rules! gen_cmpxchg { - ($type: ty, $size: literal) => { + ($type: ty) => { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { todo!(); @@ -646,7 +884,7 @@ macro_rules! gen_cmpxchg { } macro_rules! gen_fetchop { - ($type: ty, $size: literal, $op: tt) => { + ($type: ty, $op: tt) => { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { todo!(); @@ -1035,25 +1273,25 @@ pub fn atomic_store_64_unsynchronized(ptr: NonNull<()>, val: u64) { // returns the value previously in the cell. #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_exchange_8_seq_cst() { - gen_exchange!(u8, 8); +pub fn atomic_exchange_8_seq_cst(ptr: NonNull<()>, mut val: u8) -> u8 { + gen_exchange!(u8, ptr, val); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_exchange_16_seq_cst() { - gen_exchange!(u16, 16); +pub fn atomic_exchange_16_seq_cst(ptr: NonNull<()>, mut val: u16) -> u16 { + gen_exchange!(u16, ptr, val); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_exchange_32_seq_cst() { - gen_exchange!(u32, 32); +pub fn atomic_exchange_32_seq_cst(ptr: NonNull<()>, mut val: u32) -> u32 { + gen_exchange!(u32, ptr, val); } // if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_exchange_64_seq_cst() { - gen_exchange!(u64, 64); +pub fn atomic_exchange_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { + gen_exchange!(u64, ptr, val); } // `cmpxchg` takes a cell address, an expected value and a replacement value. @@ -1062,22 +1300,22 @@ pub fn atomic_exchange_64_seq_cst() { #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_cmp_xchg_8_seq_cst() { - gen_cmpxchg!(u8, 8); + gen_cmpxchg!(u8); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_cmp_xchg_16_seq_cst() { - gen_cmpxchg!(u16, 16); + gen_cmpxchg!(u16); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_cmp_xchg_32_seq_cst() { - gen_cmpxchg!(u32, 32); + gen_cmpxchg!(u32); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_cmp_xchg_64_seq_cst() { - gen_cmpxchg!(u64, 64); + gen_cmpxchg!(u64); } // `add` adds a value atomically to the cell and returns the old value in the @@ -1085,24 +1323,24 @@ pub fn atomic_cmp_xchg_64_seq_cst() { #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_add_8_seq_cst() { - gen_fetchop!(u8, 8, "add"); + gen_fetchop!(u8, "add"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_add_16_seq_cst() { - gen_fetchop!(u16, 16, "add"); + gen_fetchop!(u16, "add"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_add_32_seq_cst() { - gen_fetchop!(u32, 32, "add"); + gen_fetchop!(u32, "add"); } // if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_add_64_seq_cst() { - gen_fetchop!(u64, 64, "add"); + gen_fetchop!(u64, "add"); } // `and` bitwise-and a value atomically into the cell and returns the old value @@ -1110,24 +1348,24 @@ pub fn atomic_add_64_seq_cst() { #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_and_8_seq_cst() { - gen_fetchop!(u8, 8, "and"); + gen_fetchop!(u8, "and"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_and_16_seq_cst() { - gen_fetchop!(u16, 16, "and"); + gen_fetchop!(u16, "and"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_and_32_seq_cst() { - gen_fetchop!(u32, 32, "and"); + gen_fetchop!(u32, "and"); } // if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_and_64_seq_cst() { - gen_fetchop!(u64, 64, "and"); + gen_fetchop!(u64, "and"); } // `or` bitwise-ors a value atomically into the cell and returns the old value @@ -1135,24 +1373,24 @@ pub fn atomic_and_64_seq_cst() { #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_or_8_seq_cst() { - gen_fetchop!(u8, 8, "or"); + gen_fetchop!(u8, "or"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_or_16_seq_cst() { - gen_fetchop!(u16, 16, "or"); + gen_fetchop!(u16, "or"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_or_32_seq_cst() { - gen_fetchop!(u32, 32, "or"); + gen_fetchop!(u32, "or"); } // if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_or_64_seq_cst() { - gen_fetchop!(u64, 64, "or"); + gen_fetchop!(u64, "or"); } // `xor` bitwise-xors a value atomically into the cell and returns the old value @@ -1160,24 +1398,24 @@ pub fn atomic_or_64_seq_cst() { #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_xor_8_seq_cst() { - gen_fetchop!(u8, 8, "xor"); + gen_fetchop!(u8, "xor"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_xor_16_seq_cst() { - gen_fetchop!(u16, 16, "xor"); + gen_fetchop!(u16, "xor"); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_xor_32_seq_cst() { - gen_fetchop!(u32, 32, "xor"); + gen_fetchop!(u32, "xor"); } // if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] pub fn atomic_xor_64_seq_cst() { - gen_fetchop!(u64, 64, "xor"); + gen_fetchop!(u64, "xor"); } /// Emits a machine instruction to signal the processor that it is running in a @@ -1260,34 +1498,114 @@ pub const JS_GENERATED_ATOMICS_WORSIZE: usize = 0; #[test] fn test_load() { - let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); - assert_eq!(atomic_load_8_unsynchronized(foo), 0); - assert_eq!(atomic_load_16_unsynchronized(foo), 0); - assert_eq!(atomic_load_32_unsynchronized(foo), 0); - assert_eq!(atomic_load_64_unsynchronized(foo), 0); - assert_eq!(atomic_load_8_seq_cst(foo), 0); - assert_eq!(atomic_load_16_seq_cst(foo), 0); - assert_eq!(atomic_load_32_seq_cst(foo), 0); - assert_eq!(atomic_load_64_seq_cst(foo), 0); + let foo = NonNull::from(Box::leak(Box::new([0xFFFF_FFFF_FFFF_FFFFu64; 1]))).cast::<()>(); + + assert_eq!(atomic_load_8_unsynchronized(foo), 0xFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_16_unsynchronized(foo), 0xFFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_32_unsynchronized(foo), 0xFFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_64_unsynchronized(foo), 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_8_seq_cst(foo), 0xFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_16_seq_cst(foo), 0xFFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_32_seq_cst(foo), 0xFFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + assert_eq!(atomic_load_64_seq_cst(foo), 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; } #[test] fn test_store() { let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); - atomic_store_8_unsynchronized(foo, 1); - assert_eq!(atomic_load_8_unsynchronized(foo), 1); - atomic_store_16_unsynchronized(foo, 2); - assert_eq!(atomic_load_16_unsynchronized(foo), 2); - atomic_store_32_unsynchronized(foo, 3); - assert_eq!(atomic_load_32_unsynchronized(foo), 3); - atomic_store_64_unsynchronized(foo, 4); - assert_eq!(atomic_load_64_unsynchronized(foo), 4); - atomic_store_8_seq_cst(foo, 5); - assert_eq!(atomic_load_8_seq_cst(foo), 5); - atomic_store_16_seq_cst(foo, 6); - assert_eq!(atomic_load_16_seq_cst(foo), 6); - atomic_store_32_seq_cst(foo, 7); - assert_eq!(atomic_load_32_seq_cst(foo), 7); - atomic_store_64_seq_cst(foo, 8); - assert_eq!(atomic_load_64_seq_cst(foo), 8); + + atomic_store_8_unsynchronized(foo, 0xFF); + assert_eq!(atomic_load_8_unsynchronized(foo), 0xFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFF); + + atomic_store_16_unsynchronized(foo, 0xFFFF); + assert_eq!(atomic_load_16_unsynchronized(foo), 0xFFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + + atomic_store_32_unsynchronized(foo, 0xFFFF_FFFF); + assert_eq!(atomic_load_32_unsynchronized(foo), 0xFFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); + + atomic_store_64_unsynchronized(foo, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(atomic_load_64_unsynchronized(foo), 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + atomic_store_64_unsynchronized(foo, 0x0); + assert_eq!(atomic_load_64_unsynchronized(foo), 0x0); + assert_eq!(unsafe { foo.cast::().read() }, 0x0); + + atomic_store_8_seq_cst(foo, 0xFF); + assert_eq!(atomic_load_8_seq_cst(foo), 0xFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFF); + + atomic_store_16_seq_cst(foo, 0xFFFF); + assert_eq!(atomic_load_16_seq_cst(foo), 0xFFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + + atomic_store_32_seq_cst(foo, 0xFFFF_FFFF); + assert_eq!(atomic_load_32_seq_cst(foo), 0xFFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); + + atomic_store_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(atomic_load_64_seq_cst(foo), 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; +} + +#[test] +fn test_exchange() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_exchange_8_seq_cst(foo, 0xFF), 0, "u8 initial"); + assert_eq!(atomic_exchange_8_seq_cst(foo, 0), 0xFF, "u8 subsequent"); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_exchange_16_seq_cst(foo, 0xFFFF), 0, "u16 initial"); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + assert_eq!(atomic_exchange_16_seq_cst(foo, 0), 0xFFFF, "u16 subsequent"); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_exchange_32_seq_cst(foo, 0xFFFF_FFFF), + 0, + "u32 initial" + ); + assert_eq!( + atomic_exchange_32_seq_cst(foo, 0), + 0xFFFF_FFFF, + "u32 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_exchange_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF), + 0, + "u64 initial" + ); + assert_eq!( + atomic_exchange_64_seq_cst(foo, 0), + 0xFFFF_FFFF_FFFF_FFFF, + "u64 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; } From fbc580b02e95b3a5f09f94418cf1776aba057e75 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 15:38:56 +0300 Subject: [PATCH 08/25] feat: aarch exchange --- ecmascript_atomics/lib.rs | 188 +++++++++++++------------------------- 1 file changed, 64 insertions(+), 124 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index eb4b07092..0fb0a06d7 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -353,40 +353,22 @@ macro_rules! gen_exchange { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") - // insns += fmt_insn("cbnz %w[scratch], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + let res: u8; + core::arch::asm!( + "dmb ish", + "0:", + "ldxr {res:w} [{ptr}]", + "stxr {scratch:w}, {val:w}, [{ptr}]", + "cnz {scratch:w}, 0b", + "dmb ish", + ptr = in(reg) ptr, + val = in(reg) $val, + res = lateout(reg) res, + scratch = lateout(reg) _, + options(nostack) + ); + $val = res; } #[cfg(target_arch = "arm")] @@ -441,40 +423,26 @@ macro_rules! gen_exchange { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") - // elif size == 16: + unsafe { + let res: u16; + core::arch::asm!( + "dmb ish", + "0:", + "ldxr {res:w} [{ptr}]", + "stxr {scratch:w}, {val:w}, [{ptr}]", + "cnz {scratch:w}, 0b", + "dmb ish", + ptr = in(reg) ptr, + val = in(reg) $val, + res = lateout(reg) res, + scratch = lateout(reg) _, + options(nostack) + ); + $val = res; // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") // insns += fmt_insn("cbnz %w[scratch], 0b") // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); } #[cfg(target_arch = "arm")] @@ -529,40 +497,25 @@ macro_rules! gen_exchange { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") + unsafe { + let res: u32; + core::arch::asm!( + "dmb ish", + "0:", + "ldxr {res:w} [{ptr}]", + "stxr {scratch:w}, {val:w}, [{ptr}]", + "cnz {scratch:w}, 0b", + "dmb ish", + ptr = in(reg) ptr, + val = in(reg) $val, + res = lateout(reg) res, + scratch = lateout(reg) _, + options(nostack) + ); + $val = res; // elif size == 32: // insns += fmt_insn("ldxr %w[res], [%x[addr]]") // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") - // insns += fmt_insn("cbnz %w[scratch], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); } #[cfg(target_arch = "arm")] @@ -617,40 +570,27 @@ macro_rules! gen_exchange { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("stxrb %w[scratch], %w[val], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") - // else: + unsafe { + let res: u64; + core::arch::asm!( + "dmb ish", + "0:", + "ldxr {res:x} [{ptr}]", + "stxr {scratch:x}, {val:x}, [{ptr}]", + "cnz {scratch:x}, 0b", + "dmb ish", + ptr = in(reg) ptr, + val = in(reg) $val, + res = lateout(reg) res, + scratch = lateout(reg) _, + options(nostack) + ); + $val = res; // assert size == 64 // insns += fmt_insn("ldxr %x[res], [%x[addr]]") // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") // insns += fmt_insn("cbnz %w[scratch], 0b") // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); } #[cfg(target_arch = "arm")] From bc26095d3e63a3466829599dd34bc473c0bf8de9 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 15:48:10 +0300 Subject: [PATCH 09/25] fix --- ecmascript_atomics/lib.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 0fb0a06d7..a517e7ef1 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -358,14 +358,14 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb ish", "0:", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "stxr {scratch:w}, {val:w}, [{ptr}]", - "cnz {scratch:w}, 0b", + "cbnz {scratch:w}, 0b", "dmb ish", - ptr = in(reg) ptr, - val = in(reg) $val, res = lateout(reg) res, scratch = lateout(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, options(nostack) ); $val = res; @@ -428,14 +428,14 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb ish", "0:", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "stxr {scratch:w}, {val:w}, [{ptr}]", - "cnz {scratch:w}, 0b", + "cbnz {scratch:w}, 0b", "dmb ish", - ptr = in(reg) ptr, - val = in(reg) $val, res = lateout(reg) res, scratch = lateout(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, options(nostack) ); $val = res; @@ -502,14 +502,14 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb ish", "0:", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "stxr {scratch:w}, {val:w}, [{ptr}]", - "cnz {scratch:w}, 0b", + "cbnz {scratch:w}, 0b", "dmb ish", - ptr = in(reg) ptr, - val = in(reg) $val, res = lateout(reg) res, scratch = lateout(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, options(nostack) ); $val = res; @@ -575,14 +575,14 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb ish", "0:", - "ldxr {res:x} [{ptr}]", + "ldxr {res:x}, [{ptr}]", "stxr {scratch:x}, {val:x}, [{ptr}]", - "cnz {scratch:x}, 0b", + "cbnz {scratch:x}, 0b", "dmb ish", - ptr = in(reg) ptr, - val = in(reg) $val, res = lateout(reg) res, scratch = lateout(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, options(nostack) ); $val = res; From 3332a1e3809a0824e8ed2ad0a6a70dd56aee3f3f Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 15:52:21 +0300 Subject: [PATCH 10/25] asd --- ecmascript_atomics/lib.rs | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index a517e7ef1..3bd2750e8 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -355,6 +355,7 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u8; + let scratch: u8; core::arch::asm!( "dmb ish", "0:", @@ -363,7 +364,7 @@ macro_rules! gen_exchange { "cbnz {scratch:w}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) _, + scratch = lateout(reg) scratch, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -372,20 +373,24 @@ macro_rules! gen_exchange { } #[cfg(target_arch = "arm")] - { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") + unsafe { + let res: u8; + let scratch: u8; + core::arch::asm!( + "dmb sy", + "0:", + "ldrex {res:w}, [{ptr}]", + "strex {scratch:w}, {val:w}, [{ptr}]", + "cmp {scratch:w}, #1", + "beq 0b", + "dmb sy", + res = lateout(reg) res, + scratch = lateout(reg) scratch, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; // insns += fmt_insn("cmp %[scratch], #1") // insns += fmt_insn("beq 0b") // insns += fmt_insn("dmb sy") @@ -425,6 +430,7 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u16; + let scratch: u16; core::arch::asm!( "dmb ish", "0:", @@ -433,7 +439,7 @@ macro_rules! gen_exchange { "cbnz {scratch:w}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) _, + scratch = lateout(reg) scratch, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -499,6 +505,7 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u32; + let scratch: u32; core::arch::asm!( "dmb ish", "0:", @@ -507,7 +514,7 @@ macro_rules! gen_exchange { "cbnz {scratch:w}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) _, + scratch = lateout(reg) scratch, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -572,6 +579,7 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u64; + let scratch: u64; core::arch::asm!( "dmb ish", "0:", @@ -580,7 +588,7 @@ macro_rules! gen_exchange { "cbnz {scratch:x}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) _, + scratch = lateout(reg) scratch, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) From 8d29311d68ced2136dc3f290ce437f24ec7c8e9c Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 15:56:43 +0300 Subject: [PATCH 11/25] clobber --- ecmascript_atomics/lib.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 3bd2750e8..a5a5b354a 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -355,7 +355,6 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u8; - let scratch: u8; core::arch::asm!( "dmb ish", "0:", @@ -364,7 +363,7 @@ macro_rules! gen_exchange { "cbnz {scratch:w}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) scratch, + scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -375,7 +374,6 @@ macro_rules! gen_exchange { #[cfg(target_arch = "arm")] unsafe { let res: u8; - let scratch: u8; core::arch::asm!( "dmb sy", "0:", @@ -385,7 +383,7 @@ macro_rules! gen_exchange { "beq 0b", "dmb sy", res = lateout(reg) res, - scratch = lateout(reg) scratch, + scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -430,7 +428,6 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u16; - let scratch: u16; core::arch::asm!( "dmb ish", "0:", @@ -439,7 +436,7 @@ macro_rules! gen_exchange { "cbnz {scratch:w}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) scratch, + scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -505,7 +502,6 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u32; - let scratch: u32; core::arch::asm!( "dmb ish", "0:", @@ -514,7 +510,7 @@ macro_rules! gen_exchange { "cbnz {scratch:w}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) scratch, + scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) @@ -579,7 +575,6 @@ macro_rules! gen_exchange { #[cfg(target_arch = "aarch64")] unsafe { let res: u64; - let scratch: u64; core::arch::asm!( "dmb ish", "0:", @@ -588,7 +583,7 @@ macro_rules! gen_exchange { "cbnz {scratch:x}, 0b", "dmb ish", res = lateout(reg) res, - scratch = lateout(reg) scratch, + scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) From 184f9a97744ec06d0be793f83ff692785569ae88 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 17 Sep 2025 16:08:38 +0300 Subject: [PATCH 12/25] fix --- ecmascript_atomics/lib.rs | 174 +++++++++----------------------------- 1 file changed, 42 insertions(+), 132 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index a5a5b354a..feb9ebfec 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -362,7 +362,7 @@ macro_rules! gen_exchange { "stxr {scratch:w}, {val:w}, [{ptr}]", "cbnz {scratch:w}, 0b", "dmb ish", - res = lateout(reg) res, + res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, @@ -382,31 +382,13 @@ macro_rules! gen_exchange { "cmp {scratch:w}, #1", "beq 0b", "dmb sy", - res = lateout(reg) res, + res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) ); $val = res; - // insns += fmt_insn("cmp %[scratch], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); } return $val; @@ -435,52 +417,33 @@ macro_rules! gen_exchange { "stxr {scratch:w}, {val:w}, [{ptr}]", "cbnz {scratch:w}, 0b", "dmb ish", - res = lateout(reg) res, + res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) ); $val = res; - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("stxrh %w[scratch], %w[val], [%x[addr]]") - // insns += fmt_insn("cbnz %w[scratch], 0b") - // insns += fmt_insn("dmb ish") } #[cfg(target_arch = "arm")] - { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") - // insns += fmt_insn("cmp %[scratch], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + let res: u8; + core::arch::asm!( + "dmb sy", + "0:", + "ldrex {res:w}, [{ptr}]", + "strex {scratch:w}, {val:w}, [{ptr}]", + "cmp {scratch:w}, #1", + "beq 0b", + "dmb sy", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } return $val; @@ -509,51 +472,33 @@ macro_rules! gen_exchange { "stxr {scratch:w}, {val:w}, [{ptr}]", "cbnz {scratch:w}, 0b", "dmb ish", - res = lateout(reg) res, + res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) ); $val = res; - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %w[val], [%x[addr]]") } #[cfg(target_arch = "arm")] - { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") - // insns += fmt_insn("cmp %[scratch], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + let res: u8; + core::arch::asm!( + "dmb sy", + "0:", + "ldrex {res:w}, [{ptr}]", + "strex {scratch:w}, {val:w}, [{ptr}]", + "cmp {scratch:w}, #1", + "beq 0b", + "dmb sy", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } return $val; @@ -579,56 +524,21 @@ macro_rules! gen_exchange { "dmb ish", "0:", "ldxr {res:x}, [{ptr}]", - "stxr {scratch:x}, {val:x}, [{ptr}]", - "cbnz {scratch:x}, 0b", + "stxr {scratch:w}, {val:x}, [{ptr}]", + "cbnz {scratch:w}, 0b", "dmb ish", - res = lateout(reg) res, + res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, val = in(reg) $val, options(nostack) ); $val = res; - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("stxr %w[scratch], %x[val], [%x[addr]]") - // insns += fmt_insn("cbnz %w[scratch], 0b") - // insns += fmt_insn("dmb ish") } #[cfg(target_arch = "arm")] { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("strexb %[scratch], %[val], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("strexh %[scratch], %[val], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("strex %[scratch], %[val], [%[addr]]") - // insns += fmt_insn("cmp %[scratch], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uint32_t scratch; - // asm volatile (%(insns)s - // : [res] "=&r"(res), [scratch] "=&r"(scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + const { panic!("Unexpected size") } } return $val; From a9f2df44ff3acfbb3d08b86f05af6946b366e138 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 08:08:04 +0300 Subject: [PATCH 13/25] gen_cmpxchg --- ecmascript_atomics/lib.rs | 567 ++++++++++++++++++++++++++------------ 1 file changed, 385 insertions(+), 182 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index feb9ebfec..fdbc442de 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -546,193 +546,305 @@ macro_rules! gen_exchange { } macro_rules! gen_cmpxchg { - ($type: ty) => { + (u8, $ptr: ident, $old_val: ident, $new_val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "lock; cmpxchg [{ptr}], {new_val}", + // Load old_val into RAX as input/output register + inout("al") $old_val, + ptr = in(reg) ptr, + new_val = in(reg_byte) $new_val, + options(nostack) + ); + } + + #[cfg(target_arch = "aarch64")] + unsafe { + let res: u8; + core::arch::asm!( + "dmb ish", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldxr {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "b.ne 1f", + "stxr {scratch:w}, {new_val:w}, [{ptr}]", + "cbnz {scratch:w}, 0b", + "1: dmb ish", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; + } + + #[cfg(target_arch = "arm")] { - todo!(); + let res: u8; + core::arch::asm!( + "dmb sy", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldrex {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "bne 1f", + "strex {scratch:w}, {new_val:w}, [{ptr}]", + "cmp {scratch:w}, #1", + "beq 0b", + "1: dmb sy", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; + } + + return $old_val; + }; + (u16, $ptr: ident, $old_val: ident, $new_val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "lock; cmpxchg [{ptr}], {new_val:x}", + // Load old_val into RAX as input/output register + inout("ax") $old_val, + ptr = in(reg) ptr, + new_val = in(reg) $new_val, + options(nostack) + ); } #[cfg(target_arch = "aarch64")] + unsafe { + let res: u16; + core::arch::asm!( + "dmb ish", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldxr {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "b.ne 1f", + "stxr {scratch:w}, {new_val:w}, [{ptr}]", + "cbnz {scratch:w}, 0b", + "1: dmb ish", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; + } + + #[cfg(target_arch = "arm")] { - todo!(); + let res: u16; + core::arch::asm!( + "dmb sy", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldrex {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "bne 1f", + "strex {scratch:w}, {new_val:w}, [{ptr}]", + "cmp {scratch:w}, #1", + "beq 0b", + "1: dmb sy", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; + } + + return $old_val; + }; + (u32, $ptr: ident, $old_val: ident, $new_val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { + core::arch::asm!( + "lock; cmpxchg [{ptr}], {new_val:e}", + // Load old_val into RAX as input/output register + inout("eax") $old_val, + ptr = in(reg) ptr, + new_val = in(reg) $new_val, + options(nostack) + ); + } + + #[cfg(target_arch = "aarch64")] + unsafe { + let res: u32; + core::arch::asm!( + "dmb ish", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldxr {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "b.ne 1f", + "stxr {scratch:w}, {new_val:w}, [{ptr}]", + "cbnz {scratch:w}, 0b", + "1: dmb ish", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; } #[cfg(target_arch = "arm")] { - todo!(); + let res: u32; + core::arch::asm!( + "dmb sy", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldrex {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "bne 1f", + "strex {scratch:w}, {new_val:w}, [{ptr}]", + "cmp {scratch:w}, #1", + "beq 0b", + "1: dmb sy", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; } - #[expect(unreachable_code)] - const { - panic!("Unexpected arch") + return $old_val; + }; + (u64, $ptr: ident, $old_val: ident, $new_val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(target_arch = "x86")] + unsafe { + let [b0, b1, b2, b3, b4, b5, b6, b7] = $old_val.to_le_bytes(); + let old_bot = u32::from_le_bytes([b0, b1, b2, b3]); + let old_top = u32::from_le_bytes([b4, b5, b6, b7]); + let [b0, b1, b2, b3, b4, b5, b6, b7] = $new_val.to_le_bytes(); + let new_bot = u32::from_le_bytes([b0, b1, b2, b3]); + let new_top = u32::from_le_bytes([b4, b5, b6, b7]); + core::arch::asm!( + "lock; cmpxchg8b [{ptr}]", + // Load old_val into EDX:EAX (high:low). + inout("edx") old_top, + inout("eax") old_bot, + ptr = in(reg) ptr, + // Load old_val into ECX:EBX (high:low). + in("ecx") new_top, + in("ebx") new_bot, + options(nostack) + ); + let [b0, b1, b2, b3] = old_bot.to_le_bytes(); + let [b4, b5, b6, b7] = old_top.to_le_bytes(); + $old_val = u64::from_le_bytes([b0, b1, b2, b3, b4, b5, b6, b7]); } - // NOTE: the assembly code must match the generated code in: - // - MacroAssembler::compareExchange - // - MacroAssembler::compareExchange64 - // if cpu_arch == "x86" and size == 64: - // // Use a +A constraint to load `oldval` into EDX:EAX as input/output. - // // `newval` is loaded into ECX:EBX. - // return r""" - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, - // %(cpp_type)s oldval, - // %(cpp_type)s newval) { - // asm volatile ("lock; cmpxchg8b (%%[addr])\n\t" - // : "+A" (oldval) - // : [addr] "r" (addr), - // "b" (uint32_t(newval & 0xffff'ffff)), - // "c" (uint32_t(newval >> 32)) - // : "memory", "cc"); - // return oldval; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // } - // if cpu_arch == "arm" and size == 64: - // return r""" - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, - // %(cpp_type)s oldval, - // %(cpp_type)s newval) { - // uint32_t oldval0 = oldval & 0xffff'ffff; - // uint32_t oldval1 = oldval >> 32; - // uint32_t newval0 = newval & 0xffff'ffff; - // uint32_t newval1 = newval >> 32; - // asm volatile ( - // "dmb sy\n\t" - // "0: ldrexd r0, r1, [%%[addr]]\n\t" - // "cmp r0, %%[oldval0]\n\t" - // "bne 1f\n\t" - // "cmp r1, %%[oldval1]\n\t" - // "bne 1f\n\t" - // "mov r2, %%[newval0]\n\t" - // "mov r3, %%[newval1]\n\t" - // "strexd r4, r2, r3, [%%[addr]]\n\t" - // "cmp r4, #1\n\t" - // "beq 0b\n\t" - // "1: dmb sy\n\t" - // "mov %%[oldval0], r0\n\t" - // "mov %%[oldval1], r1\n\t" - // : [oldval0] "+&r" (oldval0), [oldval1] "+&r"(oldval1) - // : [addr] "r" (addr), [newval0] "r" (newval0), [newval1] "r" (newval1) - // : "memory", "cc", "r0", "r1", "r2", "r3", "r4"); - // return uint64_t(oldval0) | (uint64_t(oldval1) << 32); - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // } - // if cpu_arch in ("x86", "x86_64"): - // // Use a +a constraint to load `oldval` into RAX as input/output register. - // insns = "" - // if size == 8: - // insns += fmt_insn("lock; cmpxchgb %[newval], (%[addr])") - // elif size == 16: - // insns += fmt_insn("lock; cmpxchgw %[newval], (%[addr])") - // elif size == 32: - // insns += fmt_insn("lock; cmpxchgl %[newval], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("lock; cmpxchgq %[newval], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, - // %(cpp_type)s oldval, - // %(cpp_type)s newval) { - // asm volatile (%(insns)s - // : [oldval] "+a" (oldval) - // : [addr] "r" (addr), [newval] "r" (newval) - // : "memory", "cc"); - // return oldval; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // if cpu_arch == "aarch64": - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("uxtb %w[scratch], %w[oldval]") - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("cmp %w[res], %w[scratch]") - // insns += fmt_insn("b.ne 1f") - // insns += fmt_insn("stxrb %w[scratch], %w[newval], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("uxth %w[scratch], %w[oldval]") - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("cmp %w[res], %w[scratch]") - // insns += fmt_insn("b.ne 1f") - // insns += fmt_insn("stxrh %w[scratch], %w[newval], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("mov %w[scratch], %w[oldval]") - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("cmp %w[res], %w[scratch]") - // insns += fmt_insn("b.ne 1f") - // insns += fmt_insn("stxr %w[scratch], %w[newval], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("mov %x[scratch], %x[oldval]") - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("cmp %x[res], %x[scratch]") - // insns += fmt_insn("b.ne 1f") - // insns += fmt_insn("stxr %w[scratch], %x[newval], [%x[addr]]") - // insns += fmt_insn("cbnz %w[scratch], 0b") - // insns += fmt_insn("1: dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, - // %(cpp_type)s oldval, - // %(cpp_type)s newval) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // if cpu_arch == "arm": - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("uxtb %[scratch], %[oldval]") - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("cmp %[res], %[scratch]") - // insns += fmt_insn("bne 1f") - // insns += fmt_insn("strexb %[scratch], %[newval], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("uxth %[scratch], %[oldval]") - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("cmp %[res], %[scratch]") - // insns += fmt_insn("bne 1f") - // insns += fmt_insn("strexh %[scratch], %[newval], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("mov %[scratch], %[oldval]") - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("cmp %[res], %[scratch]") - // insns += fmt_insn("bne 1f") - // insns += fmt_insn("strex %[scratch], %[newval], [%[addr]]") - // insns += fmt_insn("cmp %[scratch], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("1: dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, - // %(cpp_type)s oldval, - // %(cpp_type)s newval) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [oldval] "r"(oldval), [newval] "r" (newval) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // raise Exception("Unexpected arch") + + #[cfg(target_arch = "x86_64")] + unsafe { + core::arch::asm!( + "lock; cmpxchg [{ptr}], {new_val:r}", + // Load old_val into RAX as input/output register + inout("rax") $old_val, + ptr = in(reg) ptr, + new_val = in(reg) $new_val, + options(nostack) + ); + } + + #[cfg(target_arch = "aarch64")] + unsafe { + let res: u64; + core::arch::asm!( + "dmb ish", + "0:", + "uxt {scratch:w}, {old_val:w}", + "ldxr {res:w} [{ptr}]", + "cmp {res:w}, {scratch:w}", + "b.ne 1f", + "stxr {scratch:w}, {new_val:w}, [{ptr}]", + "cbnz {scratch:w}, 0b", + "1: dmb ish", + res = out(reg) res, + scratch = out(reg) _, + ptr = in(reg) ptr, + old_val = in(reg) $old_val, + new_val = in(reg) $new_val, + options(nostack) + ); + $old_val = res; + } + + #[cfg(target_arch = "arm")] + { + let [b0, b1, b2, b3, b4, b5, b6, b7] = $old_val.to_le_bytes(); + let old_bot = u32::from_le_bytes([b0, b1, b2, b3]); + let old_top = u32::from_le_bytes([b4, b5, b6, b7]); + let [b0, b1, b2, b3, b4, b5, b6, b7] = $new_val.to_le_bytes(); + let new_bot = u32::from_le_bytes([b0, b1, b2, b3]); + let new_top = u32::from_le_bytes([b4, b5, b6, b7]); + core::arch::asm!( + "dmb sy", + "0: ldrexd r0 r1 [{ptr}]", + "cmp r0 {old_bot}", + "b.ne 1f", + "cmp r1 {old_top}", + "b.ne 1f", + "mov r2, {new_bot}" + "mov r3, {new_top}" + "strexd r4, r2, r3, [{ptr}]" + "cmp r4, #1", + "beq 0b", + "1: dmb sy", + "mov {old_bot} r0", + "mov {old_top} r1", + inout(reg) old_bot, + inout(reg) old_top, + ptr = in(reg) ptr, + new_bot = in(reg) new_bot, + new_top = in(reg) new_top, + out("r0") _, + out("r1") _, + out("r2") _, + out("r3") _, + out("r4") _, + options(nostack) + ); + let [b0, b1, b2, b3] = old_bot.to_le_bytes(); + let [b4, b5, b6, b7] = old_top.to_le_bytes(); + $old_val = u64::from_le_bytes([b0, b1, b2, b3, b4, b5, b6, b7]); + } + + return $old_val; }; } @@ -1152,23 +1264,23 @@ pub fn atomic_exchange_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { // is stored in the cell. It always returns the value previously in the cell. #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_cmp_xchg_8_seq_cst() { - gen_cmpxchg!(u8); +pub fn atomic_cmp_xchg_8_seq_cst(ptr: NonNull<()>, mut old_val: u8, new_val: u8) -> u8 { + gen_cmpxchg!(u8, ptr, old_val, new_val); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_cmp_xchg_16_seq_cst() { - gen_cmpxchg!(u16); +pub fn atomic_cmp_xchg_16_seq_cst(ptr: NonNull<()>, mut old_val: u16, new_val: u16) -> u16 { + gen_cmpxchg!(u16, ptr, old_val, new_val); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_cmp_xchg_32_seq_cst() { - gen_cmpxchg!(u32); +pub fn atomic_cmp_xchg_32_seq_cst(ptr: NonNull<()>, mut old_val: u32, new_val: u32) -> u32 { + gen_cmpxchg!(u32, ptr, old_val, new_val); } #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_cmp_xchg_64_seq_cst() { - gen_cmpxchg!(u64); +pub fn atomic_cmp_xchg_64_seq_cst(ptr: NonNull<()>, mut old_val: u64, new_val: u64) -> u64 { + gen_cmpxchg!(u64, ptr, old_val, new_val); } // `add` adds a value atomically to the cell and returns the old value in the @@ -1462,3 +1574,94 @@ fn test_exchange() { let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; } + +#[test] +fn test_compare_exchange() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0xFF, 0xFF), 0, "u8 initial"); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0, 0xFF), 0, "u8 initial"); + assert_eq!(unsafe { foo.cast::().read() }, 0xFF); + assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0, 0), 0xFF, "u8 subsequent"); + assert_eq!(unsafe { foo.cast::().read() }, 0xFF); + assert_eq!( + atomic_cmp_xchg_8_seq_cst(foo, 0xFF, 0), + 0xFF, + "u8 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, 0xFFFF, 0xFFFF), + 0, + "u16 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!(atomic_cmp_xchg_16_seq_cst(foo, 0, 0xFFFF), 0, "u16 initial"); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, 0, 0), + 0xFFFF, + "u16 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, 0xFFFF, 0), + 0xFFFF, + "u16 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, 0xFFFF_FFFF, 0xFFFF_FFFF), + 0, + "u32 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, 0, 0xFFFF_FFFF), + 0, + "u32 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, 0, 0), + 0xFFFF_FFFF, + "u32 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, 0xFFFF_FFFF, 0), + 0xFFFF_FFFF, + "u32 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF), + 0, + "u64 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, 0, 0xFFFF_FFFF_FFFF_FFFF), + 0, + "u64 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, 0, 0), + 0xFFFF_FFFF_FFFF_FFFF, + "u64 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF, 0), + 0xFFFF_FFFF_FFFF_FFFF, + "u64 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; +} From 98948044eb86124383ca69f6d739fb35e1bea60f Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 08:28:18 +0300 Subject: [PATCH 14/25] cleanup and fetchop start --- ecmascript_atomics/lib.rs | 1252 +++++++++++++++++++++++++++++-------- 1 file changed, 995 insertions(+), 257 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index fdbc442de..d47ecda4d 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -151,7 +151,7 @@ macro_rules! gen_load { // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. let ptr = unsafe { &mut *$ptr.as_ptr() }; - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(target_arch = "x86_64")] unsafe { core::arch::asm!( "mov {val:r}, [{ptr}]", @@ -173,8 +173,8 @@ macro_rules! gen_load { ); } - #[cfg(target_arch = "arm")] - { + #[cfg(any(target_arch = "x86", target_arch = "arm"))] + unsafe { const { panic!("Unexpected size") } } @@ -307,7 +307,7 @@ macro_rules! gen_store { // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. let ptr = unsafe { &mut *$ptr.as_ptr() }; - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(target_arch = "x86_64")] unsafe { core::arch::asm!( "mov [{ptr}], {val:r}", @@ -330,8 +330,8 @@ macro_rules! gen_store { ); } - #[cfg(target_arch = "arm")] - { + #[cfg(any(target_arch = "x86", target_arch = "arm"))] + unsafe { const { panic!("Unexpected size") } } }; @@ -507,7 +507,7 @@ macro_rules! gen_exchange { // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. let ptr = unsafe { &mut *$ptr.as_ptr() }; - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(target_arch = "x86_64")] unsafe { core::arch::asm!( "xchg [{ptr}], {val:r}", @@ -536,8 +536,8 @@ macro_rules! gen_exchange { $val = res; } - #[cfg(target_arch = "arm")] - { + #[cfg(any(target_arch = "x86", target_arch = "arm"))] + unsafe { const { panic!("Unexpected size") } } @@ -586,7 +586,7 @@ macro_rules! gen_cmpxchg { } #[cfg(target_arch = "arm")] - { + unsafe { let res: u8; core::arch::asm!( "dmb sy", @@ -651,7 +651,7 @@ macro_rules! gen_cmpxchg { } #[cfg(target_arch = "arm")] - { + unsafe { let res: u16; core::arch::asm!( "dmb sy", @@ -716,7 +716,7 @@ macro_rules! gen_cmpxchg { } #[cfg(target_arch = "arm")] - { + unsafe { let res: u32; core::arch::asm!( "dmb sy", @@ -805,7 +805,7 @@ macro_rules! gen_cmpxchg { } #[cfg(target_arch = "arm")] - { + unsafe { let [b0, b1, b2, b3, b4, b5, b6, b7] = $old_val.to_le_bytes(); let old_bot = u32::from_le_bytes([b0, b1, b2, b3]); let old_top = u32::from_le_bytes([b4, b5, b6, b7]); @@ -848,176 +848,701 @@ macro_rules! gen_cmpxchg { }; } +macro_rules! fetchop { + // The `add` operation can be optimized with XADD. + (add, x86) => { + "lock; xadd {val}, [{ptr}]" + }; + (or, x86) => { + "or {val}, {scratch}" + }; + (xor, x86) => { + "xor {val}, {scratch}" + }; + (add, aarch64) => { + "add {val}, {scratch}" + }; + (or, aarch64) => { + "orr {val}, {scratch}" + }; + (xor, aarch64) => { + "eor {val}, {scratch}" + }; + (add, arm) => { + "add {val}, {scratch}" + }; + (or, arm) => { + "or {val}, {scratch}" + }; + (xor, arm) => { + "xor {val}, {scratch}" + }; +} + macro_rules! gen_fetchop { - ($type: ty, $op: tt) => { + (u8, $op: tt, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + // The `add` operation can be optimized with XADD. + // if op == "add": + // insns = "" + // if size == 8: + // insns += fmt_insn("lock; xaddb %[val], (%[addr])") + // elif size == 16: + // insns += fmt_insn("lock; xaddw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("lock; xaddl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("lock; xaddq %[val], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : [val] "+&r" (val) + // : [addr] "r" (addr) + // : "memory", "cc"); + // return val; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // // Use a +a constraint to ensure `res` is stored in RAX. This is required + // // for the CMPXCHG instruction. + // insns = "" + // if size == 8: + // insns += fmt_insn("movb (%[addr]), %[res]") + // insns += fmt_insn("0: movb %[res], %[scratch]") + // insns += fmt_insn("OPb %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") + // elif size == 16: + // insns += fmt_insn("movw (%[addr]), %[res]") + // insns += fmt_insn("0: movw %[res], %[scratch]") + // insns += fmt_insn("OPw %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") + // elif size == 32: + // insns += fmt_insn("movl (%[addr]), %[res]") + // insns += fmt_insn("0: movl %[res], %[scratch]") + // insns += fmt_insn("OPl %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("movq (%[addr]), %[res]") + // insns += fmt_insn("0: movq %[res], %[scratch]") + // insns += fmt_insn("OPq %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") + // insns = insns.replace("OP", op) + // insns += fmt_insn("jnz 0b") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&a" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } todo!(); } #[cfg(target_arch = "aarch64")] { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cbnz %w[scratch2], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } todo!(); } #[cfg(target_arch = "arm")] + unsafe { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cmp %[scratch2], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + (u16, $op: tt, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // The `add` operation can be optimized with XADD. + // if op == "add": + // insns = "" + // if size == 8: + // insns += fmt_insn("lock; xaddb %[val], (%[addr])") + // elif size == 16: + // insns += fmt_insn("lock; xaddw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("lock; xaddl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("lock; xaddq %[val], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : [val] "+&r" (val) + // : [addr] "r" (addr) + // : "memory", "cc"); + // return val; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // // Use a +a constraint to ensure `res` is stored in RAX. This is required + // // for the CMPXCHG instruction. + // insns = "" + // if size == 8: + // insns += fmt_insn("movb (%[addr]), %[res]") + // insns += fmt_insn("0: movb %[res], %[scratch]") + // insns += fmt_insn("OPb %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") + // elif size == 16: + // insns += fmt_insn("movw (%[addr]), %[res]") + // insns += fmt_insn("0: movw %[res], %[scratch]") + // insns += fmt_insn("OPw %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") + // elif size == 32: + // insns += fmt_insn("movl (%[addr]), %[res]") + // insns += fmt_insn("0: movl %[res], %[scratch]") + // insns += fmt_insn("OPl %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("movq (%[addr]), %[res]") + // insns += fmt_insn("0: movq %[res], %[scratch]") + // insns += fmt_insn("OPq %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") + // insns = insns.replace("OP", op) + // insns += fmt_insn("jnz 0b") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&a" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "aarch64")] { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cbnz %w[scratch2], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "arm")] + unsafe { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cmp %[scratch2], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + (u32, $op: tt, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // The `add` operation can be optimized with XADD. + // if op == "add": + // insns = "" + // if size == 8: + // insns += fmt_insn("lock; xaddb %[val], (%[addr])") + // elif size == 16: + // insns += fmt_insn("lock; xaddw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("lock; xaddl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("lock; xaddq %[val], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : [val] "+&r" (val) + // : [addr] "r" (addr) + // : "memory", "cc"); + // return val; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // // Use a +a constraint to ensure `res` is stored in RAX. This is required + // // for the CMPXCHG instruction. + // insns = "" + // if size == 8: + // insns += fmt_insn("movb (%[addr]), %[res]") + // insns += fmt_insn("0: movb %[res], %[scratch]") + // insns += fmt_insn("OPb %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") + // elif size == 16: + // insns += fmt_insn("movw (%[addr]), %[res]") + // insns += fmt_insn("0: movw %[res], %[scratch]") + // insns += fmt_insn("OPw %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") + // elif size == 32: + // insns += fmt_insn("movl (%[addr]), %[res]") + // insns += fmt_insn("0: movl %[res], %[scratch]") + // insns += fmt_insn("OPl %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("movq (%[addr]), %[res]") + // insns += fmt_insn("0: movq %[res], %[scratch]") + // insns += fmt_insn("OPq %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") + // insns = insns.replace("OP", op) + // insns += fmt_insn("jnz 0b") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&a" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "aarch64")] + { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cbnz %w[scratch2], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "arm")] + unsafe { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cmp %[scratch2], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[expect(unreachable_code)] + const { panic!("Unexpected arch") } + }; + (u64, $op: tt, $ptr: ident, $val: ident) => { + // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. + let ptr = unsafe { &mut *$ptr.as_ptr() }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // The `add` operation can be optimized with XADD. + // if op == "add": + // insns = "" + // if size == 8: + // insns += fmt_insn("lock; xaddb %[val], (%[addr])") + // elif size == 16: + // insns += fmt_insn("lock; xaddw %[val], (%[addr])") + // elif size == 32: + // insns += fmt_insn("lock; xaddl %[val], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("lock; xaddq %[val], (%[addr])") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // asm volatile (%(insns)s + // : [val] "+&r" (val) + // : [addr] "r" (addr) + // : "memory", "cc"); + // return val; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + // // Use a +a constraint to ensure `res` is stored in RAX. This is required + // // for the CMPXCHG instruction. + // insns = "" + // if size == 8: + // insns += fmt_insn("movb (%[addr]), %[res]") + // insns += fmt_insn("0: movb %[res], %[scratch]") + // insns += fmt_insn("OPb %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") + // elif size == 16: + // insns += fmt_insn("movw (%[addr]), %[res]") + // insns += fmt_insn("0: movw %[res], %[scratch]") + // insns += fmt_insn("OPw %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") + // elif size == 32: + // insns += fmt_insn("movl (%[addr]), %[res]") + // insns += fmt_insn("0: movl %[res], %[scratch]") + // insns += fmt_insn("OPl %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") + // else: + // assert size == 64 + // insns += fmt_insn("movq (%[addr]), %[res]") + // insns += fmt_insn("0: movq %[res], %[scratch]") + // insns += fmt_insn("OPq %[val], %[scratch]") + // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") + // insns = insns.replace("OP", op) + // insns += fmt_insn("jnz 0b") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res, scratch; + // asm volatile (%(insns)s + // : [res] "=&a" (res), [scratch] "=&r" (scratch) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "aarch64")] + { + // insns = "" + // insns += fmt_insn("dmb ish") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 16: + // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") + // elif size == 32: + // insns += fmt_insn("ldxr %w[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") + // else: + // assert size == 64 + // insns += fmt_insn("ldxr %x[res], [%x[addr]]") + // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") + // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cbnz %w[scratch2], 0b") + // insns += fmt_insn("dmb ish") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } + todo!(); + } + + #[cfg(target_arch = "arm")] + unsafe { + // insns = "" + // insns += fmt_insn("dmb sy") + // insns += fmt_insn("0:") + // if size == 8: + // insns += fmt_insn("ldrexb %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") + // elif size == 16: + // insns += fmt_insn("ldrexh %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") + // else: + // assert size == 32 + // insns += fmt_insn("ldrex %[res], [%[addr]]") + // insns += fmt_insn("OP %[scratch1], %[res], %[val]") + // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") + // cpu_op = op + // if cpu_op == "or": + // cpu_op = "orr" + // if cpu_op == "xor": + // cpu_op = "eor" + // insns = insns.replace("OP", cpu_op) + // insns += fmt_insn("cmp %[scratch2], #1") + // insns += fmt_insn("beq 0b") + // insns += fmt_insn("dmb sy") + // return """ + // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { + // %(cpp_type)s res; + // uintptr_t scratch1, scratch2; + // asm volatile (%(insns)s + // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) + // : [addr] "r" (addr), [val] "r"(val) + // : "memory", "cc"); + // return res; + // }""" % { + // "cpp_type": cpp_type, + // "fun_name": fun_name, + // "insns": insns, + // } todo!(); } #[expect(unreachable_code)] const { panic!("Unexpected arch") } - // NOTE: the assembly code must match the generated code in: - // - MacroAssembler::atomicFetchOp - // - MacroAssembler::atomicFetchOp64 (on 64-bit platforms) - // if cpu_arch in ("x86", "x86_64"): - // // The `add` operation can be optimized with XADD. - // if op == "add": - // insns = "" - // if size == 8: - // insns += fmt_insn("lock; xaddb %[val], (%[addr])") - // elif size == 16: - // insns += fmt_insn("lock; xaddw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("lock; xaddl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("lock; xaddq %[val], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : [val] "+&r" (val) - // : [addr] "r" (addr) - // : "memory", "cc"); - // return val; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // // Use a +a constraint to ensure `res` is stored in RAX. This is required - // // for the CMPXCHG instruction. - // insns = "" - // if size == 8: - // insns += fmt_insn("movb (%[addr]), %[res]") - // insns += fmt_insn("0: movb %[res], %[scratch]") - // insns += fmt_insn("OPb %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") - // elif size == 16: - // insns += fmt_insn("movw (%[addr]), %[res]") - // insns += fmt_insn("0: movw %[res], %[scratch]") - // insns += fmt_insn("OPw %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") - // elif size == 32: - // insns += fmt_insn("movl (%[addr]), %[res]") - // insns += fmt_insn("0: movl %[res], %[scratch]") - // insns += fmt_insn("OPl %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("movq (%[addr]), %[res]") - // insns += fmt_insn("0: movq %[res], %[scratch]") - // insns += fmt_insn("OPq %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") - // insns = insns.replace("OP", op) - // insns += fmt_insn("jnz 0b") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&a" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // if cpu_arch == "aarch64": - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cbnz %w[scratch2], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // if cpu_arch == "arm": - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 0b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // raise Exception("Unexpected arch") }; } @@ -1034,7 +1559,7 @@ macro_rules! gen_copy { } #[cfg(target_arch = "arm")] - { + unsafe { todo!(); } @@ -1135,101 +1660,168 @@ pub enum Ordering { /// /// See [std::sync::atomic::fence] for details. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn fence() { core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_load_8_seq_cst(ptr: NonNull<()>) -> u8 { gen_load!(u8, ptr, true); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_load_16_seq_cst(ptr: NonNull<()>) -> u16 { gen_load!(u16, ptr, true); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_load_32_seq_cst(ptr: NonNull<()>) -> u32 { gen_load!(u32, ptr, true); } // if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] pub fn atomic_load_64_seq_cst(ptr: NonNull<()>) -> u64 { gen_load!(u64, ptr, true); } // These are access-atomic up to sizeof(uintptr_t). #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_load_8_unsynchronized(ptr: NonNull<()>) -> u8 { gen_load!(u8, ptr, false); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_load_16_unsynchronized(ptr: NonNull<()>) -> u16 { gen_load!(u16, ptr, false); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_load_32_unsynchronized(ptr: NonNull<()>) -> u32 { gen_load!(u32, ptr, false); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] pub fn atomic_load_64_unsynchronized(ptr: NonNull<()>) -> u64 { gen_load!(u64, ptr, false); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_8_seq_cst(ptr: NonNull<()>, val: u8) { gen_store!(u8, ptr, val, true); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_16_seq_cst(ptr: NonNull<()>, val: u16) { gen_store!(u16, ptr, val, true); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_32_seq_cst(ptr: NonNull<()>, val: u32) { gen_store!(u32, ptr, val, true); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_64_seq_cst(ptr: NonNull<()>, val: u64) { gen_store!(u64, ptr, val, true); } // These are access-atomic up to sizeof(uintptr_t). #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_8_unsynchronized(ptr: NonNull<()>, val: u8) { gen_store!(u8, ptr, val, false); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_16_unsynchronized(ptr: NonNull<()>, val: u16) { gen_store!(u16, ptr, val, false); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_store_32_unsynchronized(ptr: NonNull<()>, val: u32) { gen_store!(u32, ptr, val, false); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] pub fn atomic_store_64_unsynchronized(ptr: NonNull<()>, val: u64) { gen_store!(u64, ptr, val, false); } @@ -1237,24 +1829,38 @@ pub fn atomic_store_64_unsynchronized(ptr: NonNull<()>, val: u64) { // `exchange` takes a cell address and a value. It stores it in the cell and // returns the value previously in the cell. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_exchange_8_seq_cst(ptr: NonNull<()>, mut val: u8) -> u8 { gen_exchange!(u8, ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_exchange_16_seq_cst(ptr: NonNull<()>, mut val: u16) -> u16 { gen_exchange!(u16, ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_exchange_32_seq_cst(ptr: NonNull<()>, mut val: u32) -> u32 { gen_exchange!(u32, ptr, val); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] pub fn atomic_exchange_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { gen_exchange!(u64, ptr, val); } @@ -1263,22 +1869,42 @@ pub fn atomic_exchange_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { // If the value in the cell equals the expected value then the replacement value // is stored in the cell. It always returns the value previously in the cell. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_cmp_xchg_8_seq_cst(ptr: NonNull<()>, mut old_val: u8, new_val: u8) -> u8 { gen_cmpxchg!(u8, ptr, old_val, new_val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_cmp_xchg_16_seq_cst(ptr: NonNull<()>, mut old_val: u16, new_val: u16) -> u16 { gen_cmpxchg!(u16, ptr, old_val, new_val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_cmp_xchg_32_seq_cst(ptr: NonNull<()>, mut old_val: u32, new_val: u32) -> u32 { gen_cmpxchg!(u32, ptr, old_val, new_val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_cmp_xchg_64_seq_cst(ptr: NonNull<()>, mut old_val: u64, new_val: u64) -> u64 { gen_cmpxchg!(u64, ptr, old_val, new_val); } @@ -1286,101 +1912,158 @@ pub fn atomic_cmp_xchg_64_seq_cst(ptr: NonNull<()>, mut old_val: u64, new_val: u // `add` adds a value atomically to the cell and returns the old value in the // cell. (There is no `sub`; just add the negated value.) #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_add_8_seq_cst() { - gen_fetchop!(u8, "add"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_add_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { + gen_fetchop!(u8, "add", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_add_16_seq_cst() { - gen_fetchop!(u16, "add"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_add_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { + gen_fetchop!(u16, "add", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_add_32_seq_cst() { - gen_fetchop!(u32, "add"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_add_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { + gen_fetchop!(u32, "add", ptr, val); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_add_64_seq_cst() { - gen_fetchop!(u64, "add"); +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] +pub fn atomic_add_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { + gen_fetchop!(u64, "add", ptr, val); } // `and` bitwise-and a value atomically into the cell and returns the old value // in the cell. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_and_8_seq_cst() { - gen_fetchop!(u8, "and"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_and_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { + gen_fetchop!(u8, "and", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_and_16_seq_cst() { - gen_fetchop!(u16, "and"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_and_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { + gen_fetchop!(u16, "and", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_and_32_seq_cst() { - gen_fetchop!(u32, "and"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_and_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { + gen_fetchop!(u32, "and", ptr, val); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_and_64_seq_cst() { - gen_fetchop!(u64, "and"); +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] +pub fn atomic_and_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { + gen_fetchop!(u64, "and", ptr, val); } // `or` bitwise-ors a value atomically into the cell and returns the old value // in the cell. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_or_8_seq_cst() { - gen_fetchop!(u8, "or"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_or_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { + gen_fetchop!(u8, "or", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_or_16_seq_cst() { - gen_fetchop!(u16, "or"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_or_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { + gen_fetchop!(u16, "or", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_or_32_seq_cst() { - gen_fetchop!(u32, "or"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_or_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { + gen_fetchop!(u32, "or", ptr, val); } -// if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_or_64_seq_cst() { - gen_fetchop!(u64, "or"); +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] +pub fn atomic_or_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { + gen_fetchop!(u64, "or", ptr, val); } // `xor` bitwise-xors a value atomically into the cell and returns the old value // in the cell. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_xor_8_seq_cst() { - gen_fetchop!(u8, "xor"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_xor_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { + gen_fetchop!(u8, "xor", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_xor_16_seq_cst() { - gen_fetchop!(u16, "xor"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_xor_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { + gen_fetchop!(u16, "xor", ptr, val); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_xor_32_seq_cst() { - gen_fetchop!(u32, "xor"); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] +pub fn atomic_xor_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { + gen_fetchop!(u32, "xor", ptr, val); } // if is_64bit: #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -pub fn atomic_xor_64_seq_cst() { - gen_fetchop!(u64, "xor"); +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] +pub fn atomic_xor_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { + gen_fetchop!(u64, "xor", ptr, val); } /// Emits a machine instruction to signal the processor that it is running in a @@ -1388,7 +2071,12 @@ pub fn atomic_xor_64_seq_cst() { /// /// See [std::hint::spin_loop] for details. #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_pause() { core::hint::spin_loop(); } @@ -1399,61 +2087,111 @@ pub fn atomic_pause() { // blocksize = words_in_block * wordsize #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_unaligned_block_down_unsynchronized() { gen_copy!(u8, 1, blocksize, "down"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_unaligned_block_up_unsynchronized() { gen_copy!(u8, 1, blocksize, "up"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_unaligned_word_down_unsynchronized() { gen_copy!(u8, 1, wordsize, "down"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_unaligned_word_up_unsynchronized() { gen_copy!(u8, 1, wordsize, "up"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_block_down_unsynchronized() { gen_copy!(uptr, wordsize, words_in_block, "down"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_block_up_unsynchronized() { gen_copy!(uptr, wordsize, words_in_block, "up"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy_word_unsynchronized() { gen_copy!(uptr, wordsize, 1, "down"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy32_unsynchronized() { gen_copy!(u32, 4, 1, "down"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy16_unsynchronized() { gen_copy!(u16, 2, 1, "down"); } #[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "arm" +))] pub fn atomic_copy8_unsynchronized() { gen_copy!(u8, 1, 1, "down"); } From 39b14efe3b7653e2b4b55cfdcfd4e0d3f53cd74f Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 08:32:19 +0300 Subject: [PATCH 15/25] fix --- ecmascript_atomics/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index d47ecda4d..fee3ae0bf 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -568,7 +568,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb ish", "0:", - "uxt {scratch:w}, {old_val:w}", + "uxtb {scratch:w}, {old_val:w}", "ldxr {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", @@ -591,7 +591,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb sy", "0:", - "uxt {scratch:w}, {old_val:w}", + "uxtb {scratch:w}, {old_val:w}", "ldrex {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "bne 1f", @@ -633,7 +633,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb ish", "0:", - "uxt {scratch:w}, {old_val:w}", + "uxth {scratch:w}, {old_val:w}", "ldxr {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", @@ -656,7 +656,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb sy", "0:", - "uxt {scratch:w}, {old_val:w}", + "uxth {scratch:w}, {old_val:w}", "ldrex {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "bne 1f", @@ -698,7 +698,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb ish", "0:", - "uxt {scratch:w}, {old_val:w}", + "mov {scratch:w}, {old_val:w}", "ldxr {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", @@ -721,7 +721,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb sy", "0:", - "uxt {scratch:w}, {old_val:w}", + "mov {scratch:w}, {old_val:w}", "ldrex {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "bne 1f", @@ -787,7 +787,7 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb ish", "0:", - "uxt {scratch:w}, {old_val:w}", + "mov {scratch:w}, {old_val:w}", "ldxr {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", From 9fca77aedebfc9b19427a40c61784e3071ce47ed Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 08:33:12 +0300 Subject: [PATCH 16/25] fix --- ecmascript_atomics/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index fee3ae0bf..0e56cbce5 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -569,7 +569,7 @@ macro_rules! gen_cmpxchg { "dmb ish", "0:", "uxtb {scratch:w}, {old_val:w}", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", "stxr {scratch:w}, {new_val:w}, [{ptr}]", @@ -634,7 +634,7 @@ macro_rules! gen_cmpxchg { "dmb ish", "0:", "uxth {scratch:w}, {old_val:w}", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", "stxr {scratch:w}, {new_val:w}, [{ptr}]", @@ -699,7 +699,7 @@ macro_rules! gen_cmpxchg { "dmb ish", "0:", "mov {scratch:w}, {old_val:w}", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", "stxr {scratch:w}, {new_val:w}, [{ptr}]", @@ -788,7 +788,7 @@ macro_rules! gen_cmpxchg { "dmb ish", "0:", "mov {scratch:w}, {old_val:w}", - "ldxr {res:w} [{ptr}]", + "ldxr {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", "b.ne 1f", "stxr {scratch:w}, {new_val:w}, [{ptr}]", From 82be186b2053472ffa9b9cea5217dc28c5262498 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 13:30:21 +0300 Subject: [PATCH 17/25] feat: fetch ops --- ecmascript_atomics/lib.rs | 1150 +++++++++++++++++++++---------------- 1 file changed, 646 insertions(+), 504 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 0e56cbce5..d9196190d 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -850,31 +850,76 @@ macro_rules! gen_cmpxchg { macro_rules! fetchop { // The `add` operation can be optimized with XADD. - (add, x86) => { - "lock; xadd {val}, [{ptr}]" + ("add", x86, u8) => { + "lock; xadd [{ptr}], {val}" }; - (or, x86) => { - "or {val}, {scratch}" + ("add", x86, u16) => { + "lock; xadd [{ptr}], {val:x}" }; - (xor, x86) => { - "xor {val}, {scratch}" + ("add", x86, u32) => { + "lock; xadd [{ptr}], {val:e}" + }; + ("add", x86, u64) => { + "lock; xadd [{ptr}], {val:r}" + }; + ("and", x86, u8) => { + "and {scratch}, {val}" + }; + ("and", x86, u16) => { + "and {scratch:x}, {val:x}" + }; + ("and", x86, u32) => { + "and {scratch:e}, {val:e}" + }; + ("and", x86, u64) => { + "and {scratch:r}, {val:r}" + }; + ("or", x86, u8) => { + "or {scratch}, {val}" + }; + ("or", x86, u16) => { + "or {scratch:x}, {val:x}" }; - (add, aarch64) => { + ("or", x86, u32) => { + "or {scratch:e}, {val:e}" + }; + ("or", x86, u64) => { + "or {scratch:r}, {val:r}" + }; + ("xor", x86, u8) => { + "xor {scratch}, {val}" + }; + ("xor", x86, u16) => { + "xor {scratch:x}, {val:x}" + }; + ("xor", x86, u32) => { + "xor {scratch:e}, {val:e}" + }; + ("xor", x86, u64) => { + "xor {scratch:r}, {val:r}" + }; + ("add", aarch64) => { "add {val}, {scratch}" }; - (or, aarch64) => { + ("and", aarch64) => { + "and {val}, {scratch}" + }; + ("or", aarch64) => { "orr {val}, {scratch}" }; - (xor, aarch64) => { + ("xor", aarch64) => { "eor {val}, {scratch}" }; - (add, arm) => { + ("add", arm) => { "add {val}, {scratch}" }; - (or, arm) => { + ("and", arm) => { + "and {val}, {scratch}" + }; + ("or", arm) => { "or {val}, {scratch}" }; - (xor, arm) => { + ("xor", arm) => { "xor {val}, {scratch}" }; } @@ -885,75 +930,36 @@ macro_rules! gen_fetchop { let ptr = unsafe { &mut *$ptr.as_ptr() }; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // The `add` operation can be optimized with XADD. - // if op == "add": - // insns = "" - // if size == 8: - // insns += fmt_insn("lock; xaddb %[val], (%[addr])") - // elif size == 16: - // insns += fmt_insn("lock; xaddw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("lock; xaddl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("lock; xaddq %[val], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : [val] "+&r" (val) - // : [addr] "r" (addr) - // : "memory", "cc"); - // return val; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // // Use a +a constraint to ensure `res` is stored in RAX. This is required - // // for the CMPXCHG instruction. - // insns = "" - // if size == 8: - // insns += fmt_insn("movb (%[addr]), %[res]") - // insns += fmt_insn("0: movb %[res], %[scratch]") - // insns += fmt_insn("OPb %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") - // elif size == 16: - // insns += fmt_insn("movw (%[addr]), %[res]") - // insns += fmt_insn("0: movw %[res], %[scratch]") - // insns += fmt_insn("OPw %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") - // elif size == 32: - // insns += fmt_insn("movl (%[addr]), %[res]") - // insns += fmt_insn("0: movl %[res], %[scratch]") - // insns += fmt_insn("OPl %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("movq (%[addr]), %[res]") - // insns += fmt_insn("0: movq %[res], %[scratch]") - // insns += fmt_insn("OPq %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") - // insns = insns.replace("OP", op) - // insns += fmt_insn("jnz 0b") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&a" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + if $op == "add" { + // The `add` operation can be optimized with XADD. + core::arch::asm!( + "lock; xadd [{ptr}], {val}", + val = inout(reg_byte) $val, + ptr = in(reg) ptr, + options(nostack) + ); + } else { + let res: u8; + core::arch::asm!( + "mov al, [{ptr}]", + "2: mov {scratch}, al", + fetchop!($op, x86, u8), + "lock; cmpxchg [{ptr}], {scratch}", + "jnz 2b", + // Use of RAX is required for the CMPXCHG instruction. + out("al") res, + scratch = out(reg_byte) _, + ptr = in(reg) ptr, + val = in(reg_byte) $val, + options(nostack) + ); + $val = res; + } } #[cfg(target_arch = "aarch64")] - { + unsafe { // insns = "" // insns += fmt_insn("dmb ish") // insns += fmt_insn("0:") @@ -1043,79 +1049,39 @@ macro_rules! gen_fetchop { todo!(); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return $val; }; (u16, $op: tt, $ptr: ident, $val: ident) => { // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. let ptr = unsafe { &mut *$ptr.as_ptr() }; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // The `add` operation can be optimized with XADD. - // if op == "add": - // insns = "" - // if size == 8: - // insns += fmt_insn("lock; xaddb %[val], (%[addr])") - // elif size == 16: - // insns += fmt_insn("lock; xaddw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("lock; xaddl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("lock; xaddq %[val], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : [val] "+&r" (val) - // : [addr] "r" (addr) - // : "memory", "cc"); - // return val; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // // Use a +a constraint to ensure `res` is stored in RAX. This is required - // // for the CMPXCHG instruction. - // insns = "" - // if size == 8: - // insns += fmt_insn("movb (%[addr]), %[res]") - // insns += fmt_insn("0: movb %[res], %[scratch]") - // insns += fmt_insn("OPb %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") - // elif size == 16: - // insns += fmt_insn("movw (%[addr]), %[res]") - // insns += fmt_insn("0: movw %[res], %[scratch]") - // insns += fmt_insn("OPw %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") - // elif size == 32: - // insns += fmt_insn("movl (%[addr]), %[res]") - // insns += fmt_insn("0: movl %[res], %[scratch]") - // insns += fmt_insn("OPl %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("movq (%[addr]), %[res]") - // insns += fmt_insn("0: movq %[res], %[scratch]") - // insns += fmt_insn("OPq %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") - // insns = insns.replace("OP", op) - // insns += fmt_insn("jnz 0b") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&a" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + if $op == "add" { + // The `add` operation can be optimized with XADD. + core::arch::asm!( + "lock; xadd [{ptr}], {val:x}", + val = inout(reg) $val, + ptr = in(reg) ptr, + options(nostack) + ); + } else { + let res: u16; + core::arch::asm!( + "mov ax, [{ptr}]", + "2: mov {scratch:x}, ax", + fetchop!($op, x86, u16), + "lock; cmpxchg [{ptr}], {scratch:x}", + "jnz 2b", + // Use of RAX is required for the CMPXCHG instruction. + out("ax") res, + scratch = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; + } } #[cfg(target_arch = "aarch64")] @@ -1209,79 +1175,39 @@ macro_rules! gen_fetchop { todo!(); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return $val; }; (u32, $op: tt, $ptr: ident, $val: ident) => { // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. let ptr = unsafe { &mut *$ptr.as_ptr() }; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // The `add` operation can be optimized with XADD. - // if op == "add": - // insns = "" - // if size == 8: - // insns += fmt_insn("lock; xaddb %[val], (%[addr])") - // elif size == 16: - // insns += fmt_insn("lock; xaddw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("lock; xaddl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("lock; xaddq %[val], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : [val] "+&r" (val) - // : [addr] "r" (addr) - // : "memory", "cc"); - // return val; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // // Use a +a constraint to ensure `res` is stored in RAX. This is required - // // for the CMPXCHG instruction. - // insns = "" - // if size == 8: - // insns += fmt_insn("movb (%[addr]), %[res]") - // insns += fmt_insn("0: movb %[res], %[scratch]") - // insns += fmt_insn("OPb %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") - // elif size == 16: - // insns += fmt_insn("movw (%[addr]), %[res]") - // insns += fmt_insn("0: movw %[res], %[scratch]") - // insns += fmt_insn("OPw %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") - // elif size == 32: - // insns += fmt_insn("movl (%[addr]), %[res]") - // insns += fmt_insn("0: movl %[res], %[scratch]") - // insns += fmt_insn("OPl %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("movq (%[addr]), %[res]") - // insns += fmt_insn("0: movq %[res], %[scratch]") - // insns += fmt_insn("OPq %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") - // insns = insns.replace("OP", op) - // insns += fmt_insn("jnz 0b") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&a" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + if $op == "add" { + // The `add` operation can be optimized with XADD. + core::arch::asm!( + "lock; xadd [{ptr}], {val:e}", + val = inout(reg) $val, + ptr = in(reg) ptr, + options(nostack) + ); + } else { + let res: u32; + core::arch::asm!( + "mov eax, [{ptr}]", + "2: mov {scratch:e}, eax", + fetchop!($op, x86, u32), + "lock; cmpxchg [{ptr}], {scratch:e}", + "jnz 2b", + // Use of RAX is required for the CMPXCHG instruction. + out("eax") res, + scratch = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; + } } #[cfg(target_arch = "aarch64")] @@ -1375,79 +1301,39 @@ macro_rules! gen_fetchop { todo!(); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return $val; }; (u64, $op: tt, $ptr: ident, $val: ident) => { // SAFETY: ptr is NonNull<()>; it is never null, dangling, or unaligned. let ptr = unsafe { &mut *$ptr.as_ptr() }; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - { - // The `add` operation can be optimized with XADD. - // if op == "add": - // insns = "" - // if size == 8: - // insns += fmt_insn("lock; xaddb %[val], (%[addr])") - // elif size == 16: - // insns += fmt_insn("lock; xaddw %[val], (%[addr])") - // elif size == 32: - // insns += fmt_insn("lock; xaddl %[val], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("lock; xaddq %[val], (%[addr])") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // asm volatile (%(insns)s - // : [val] "+&r" (val) - // : [addr] "r" (addr) - // : "memory", "cc"); - // return val; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - // // Use a +a constraint to ensure `res` is stored in RAX. This is required - // // for the CMPXCHG instruction. - // insns = "" - // if size == 8: - // insns += fmt_insn("movb (%[addr]), %[res]") - // insns += fmt_insn("0: movb %[res], %[scratch]") - // insns += fmt_insn("OPb %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgb %[scratch], (%[addr])") - // elif size == 16: - // insns += fmt_insn("movw (%[addr]), %[res]") - // insns += fmt_insn("0: movw %[res], %[scratch]") - // insns += fmt_insn("OPw %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgw %[scratch], (%[addr])") - // elif size == 32: - // insns += fmt_insn("movl (%[addr]), %[res]") - // insns += fmt_insn("0: movl %[res], %[scratch]") - // insns += fmt_insn("OPl %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgl %[scratch], (%[addr])") - // else: - // assert size == 64 - // insns += fmt_insn("movq (%[addr]), %[res]") - // insns += fmt_insn("0: movq %[res], %[scratch]") - // insns += fmt_insn("OPq %[val], %[scratch]") - // insns += fmt_insn("lock; cmpxchgq %[scratch], (%[addr])") - // insns = insns.replace("OP", op) - // insns += fmt_insn("jnz 0b") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res, scratch; - // asm volatile (%(insns)s - // : [res] "=&a" (res), [scratch] "=&r" (scratch) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + if $op == "add" { + // The `add` operation can be optimized with XADD. + core::arch::asm!( + "lock; xadd [{ptr}], {val:r}", + val = inout(reg) $val, + ptr = in(reg) ptr, + options(nostack) + ); + } else { + let res: u64; + core::arch::asm!( + "mov rax, [{ptr}]", + "2: mov {scratch:r}, rax", + fetchop!($op, x86, u64), + "lock; cmpxchg [{ptr}], {scratch:r}", + "jnz 2b", + // Use of RAX is required for the CMPXCHG instruction. + out("rax") res, + scratch = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; + } } #[cfg(target_arch = "aarch64")] @@ -1541,8 +1427,7 @@ macro_rules! gen_fetchop { todo!(); } - #[expect(unreachable_code)] - const { panic!("Unexpected arch") } + return $val; }; } @@ -1701,7 +1586,6 @@ pub fn atomic_load_32_seq_cst(ptr: NonNull<()>) -> u32 { gen_load!(u32, ptr, true); } -// if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] pub fn atomic_load_64_seq_cst(ptr: NonNull<()>) -> u64 { @@ -1918,7 +1802,7 @@ pub fn atomic_cmp_xchg_64_seq_cst(ptr: NonNull<()>, mut old_val: u64, new_val: u target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_add_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { +pub fn atomic_add_8_seq_cst(ptr: NonNull<()>, mut val: u8) -> u8 { gen_fetchop!(u8, "add", ptr, val); } #[inline(always)] @@ -1928,7 +1812,7 @@ pub fn atomic_add_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_add_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { +pub fn atomic_add_16_seq_cst(ptr: NonNull<()>, mut val: u16) -> u16 { gen_fetchop!(u16, "add", ptr, val); } #[inline(always)] @@ -1938,13 +1822,13 @@ pub fn atomic_add_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_add_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { +pub fn atomic_add_32_seq_cst(ptr: NonNull<()>, mut val: u32) -> u32 { gen_fetchop!(u32, "add", ptr, val); } #[inline(always)] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] -pub fn atomic_add_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { +pub fn atomic_add_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { gen_fetchop!(u64, "add", ptr, val); } @@ -1957,7 +1841,7 @@ pub fn atomic_add_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_and_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { +pub fn atomic_and_8_seq_cst(ptr: NonNull<()>, mut val: u8) -> u8 { gen_fetchop!(u8, "and", ptr, val); } #[inline(always)] @@ -1967,7 +1851,7 @@ pub fn atomic_and_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_and_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { +pub fn atomic_and_16_seq_cst(ptr: NonNull<()>, mut val: u16) -> u16 { gen_fetchop!(u16, "and", ptr, val); } #[inline(always)] @@ -1977,13 +1861,13 @@ pub fn atomic_and_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_and_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { +pub fn atomic_and_32_seq_cst(ptr: NonNull<()>, mut val: u32) -> u32 { gen_fetchop!(u32, "and", ptr, val); } #[inline(always)] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] -pub fn atomic_and_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { +pub fn atomic_and_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { gen_fetchop!(u64, "and", ptr, val); } @@ -1996,7 +1880,7 @@ pub fn atomic_and_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_or_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { +pub fn atomic_or_8_seq_cst(ptr: NonNull<()>, mut val: u8) -> u8 { gen_fetchop!(u8, "or", ptr, val); } #[inline(always)] @@ -2006,7 +1890,7 @@ pub fn atomic_or_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_or_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { +pub fn atomic_or_16_seq_cst(ptr: NonNull<()>, mut val: u16) -> u16 { gen_fetchop!(u16, "or", ptr, val); } #[inline(always)] @@ -2016,13 +1900,13 @@ pub fn atomic_or_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_or_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { +pub fn atomic_or_32_seq_cst(ptr: NonNull<()>, mut val: u32) -> u32 { gen_fetchop!(u32, "or", ptr, val); } #[inline(always)] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] -pub fn atomic_or_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { +pub fn atomic_or_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { gen_fetchop!(u64, "or", ptr, val); } @@ -2035,7 +1919,7 @@ pub fn atomic_or_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_xor_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { +pub fn atomic_xor_8_seq_cst(ptr: NonNull<()>, mut val: u8) -> u8 { gen_fetchop!(u8, "xor", ptr, val); } #[inline(always)] @@ -2045,7 +1929,7 @@ pub fn atomic_xor_8_seq_cst(ptr: NonNull<()>, val: u8) -> u8 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_xor_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { +pub fn atomic_xor_16_seq_cst(ptr: NonNull<()>, mut val: u16) -> u16 { gen_fetchop!(u16, "xor", ptr, val); } #[inline(always)] @@ -2055,14 +1939,13 @@ pub fn atomic_xor_16_seq_cst(ptr: NonNull<()>, val: u16) -> u16 { target_arch = "aarch64", target_arch = "arm" ))] -pub fn atomic_xor_32_seq_cst(ptr: NonNull<()>, val: u32) -> u32 { +pub fn atomic_xor_32_seq_cst(ptr: NonNull<()>, mut val: u32) -> u32 { gen_fetchop!(u32, "xor", ptr, val); } -// if is_64bit: #[inline(always)] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64",))] -pub fn atomic_xor_64_seq_cst(ptr: NonNull<()>, val: u64) -> u64 { +pub fn atomic_xor_64_seq_cst(ptr: NonNull<()>, mut val: u64) -> u64 { gen_fetchop!(u64, "xor", ptr, val); } @@ -2082,9 +1965,9 @@ pub fn atomic_pause() { } // See comment in jit/AtomicOperations-shared-jit.cpp for an explanation. -// wordsize = 8 if is_64bit else 4 -// words_in_block = 8 -// blocksize = words_in_block * wordsize +const WORDS_IN_BLOCK: usize = 8; +const WORD_SIZE: usize = core::mem::size_of::(); +const BLOCK_SIZE: usize = WORDS_IN_BLOCK * WORD_SIZE; #[inline(always)] #[cfg(any( @@ -2094,7 +1977,7 @@ pub fn atomic_pause() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_block_down_unsynchronized() { - gen_copy!(u8, 1, blocksize, "down"); + gen_copy!(u8, 1, BLOCK_SIZE, "down"); } #[inline(always)] @@ -2105,7 +1988,7 @@ pub fn atomic_copy_unaligned_block_down_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_block_up_unsynchronized() { - gen_copy!(u8, 1, blocksize, "up"); + gen_copy!(u8, 1, BLOCK_SIZE, "up"); } #[inline(always)] @@ -2116,7 +1999,7 @@ pub fn atomic_copy_unaligned_block_up_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_word_down_unsynchronized() { - gen_copy!(u8, 1, wordsize, "down"); + gen_copy!(u8, 1, WORD_SIZE, "down"); } #[inline(always)] @@ -2127,7 +2010,7 @@ pub fn atomic_copy_unaligned_word_down_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_word_up_unsynchronized() { - gen_copy!(u8, 1, wordsize, "up"); + gen_copy!(u8, 1, WORD_SIZE, "up"); } #[inline(always)] @@ -2138,7 +2021,7 @@ pub fn atomic_copy_unaligned_word_up_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_block_down_unsynchronized() { - gen_copy!(uptr, wordsize, words_in_block, "down"); + gen_copy!(uptr, WORD_SIZE, WORDS_IN_BLOCK, "down"); } #[inline(always)] @@ -2149,7 +2032,7 @@ pub fn atomic_copy_block_down_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_block_up_unsynchronized() { - gen_copy!(uptr, wordsize, words_in_block, "up"); + gen_copy!(uptr, WORD_SIZE, WORDS_IN_BLOCK, "up"); } #[inline(always)] @@ -2160,7 +2043,7 @@ pub fn atomic_copy_block_up_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_word_unsynchronized() { - gen_copy!(uptr, wordsize, 1, "down"); + gen_copy!(uptr, WORD_SIZE, 1, "down"); } #[inline(always)] @@ -2199,207 +2082,466 @@ pub fn atomic_copy8_unsynchronized() { pub const JS_GENERATED_ATOMICS_BLOCKSIZE: usize = 0; pub const JS_GENERATED_ATOMICS_WORSIZE: usize = 0; -#[test] -fn test_load() { - let foo = NonNull::from(Box::leak(Box::new([0xFFFF_FFFF_FFFF_FFFFu64; 1]))).cast::<()>(); - - assert_eq!(atomic_load_8_unsynchronized(foo), 0xFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - assert_eq!(atomic_load_16_unsynchronized(foo), 0xFFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - assert_eq!(atomic_load_32_unsynchronized(foo), 0xFFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - assert_eq!(atomic_load_64_unsynchronized(foo), 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - assert_eq!(atomic_load_8_seq_cst(foo), 0xFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - assert_eq!(atomic_load_16_seq_cst(foo), 0xFFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - assert_eq!(atomic_load_32_seq_cst(foo), 0xFFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); +#[cfg(test)] +mod test { + use std::ptr::NonNull; - assert_eq!(atomic_load_64_seq_cst(foo), 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + use crate::*; - let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; -} - -#[test] -fn test_store() { - let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); - - atomic_store_8_unsynchronized(foo, 0xFF); - assert_eq!(atomic_load_8_unsynchronized(foo), 0xFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFF); - - atomic_store_16_unsynchronized(foo, 0xFFFF); - assert_eq!(atomic_load_16_unsynchronized(foo), 0xFFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); - - atomic_store_32_unsynchronized(foo, 0xFFFF_FFFF); - assert_eq!(atomic_load_32_unsynchronized(foo), 0xFFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); - - atomic_store_64_unsynchronized(foo, 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!(atomic_load_64_unsynchronized(foo), 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - - atomic_store_64_unsynchronized(foo, 0x0); - assert_eq!(atomic_load_64_unsynchronized(foo), 0x0); - assert_eq!(unsafe { foo.cast::().read() }, 0x0); - - atomic_store_8_seq_cst(foo, 0xFF); - assert_eq!(atomic_load_8_seq_cst(foo), 0xFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFF); + #[test] + fn test_load() { + let foo = NonNull::from(Box::leak(Box::new([u64::MAX; 1]))).cast::<()>(); - atomic_store_16_seq_cst(foo, 0xFFFF); - assert_eq!(atomic_load_16_seq_cst(foo), 0xFFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + assert_eq!(atomic_load_8_unsynchronized(foo), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); - atomic_store_32_seq_cst(foo, 0xFFFF_FFFF); - assert_eq!(atomic_load_32_seq_cst(foo), 0xFFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); + assert_eq!(atomic_load_16_unsynchronized(foo), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); - atomic_store_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!(atomic_load_64_seq_cst(foo), 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + assert_eq!(atomic_load_32_unsynchronized(foo), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); - let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; -} + assert_eq!(atomic_load_64_unsynchronized(foo), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); -#[test] -fn test_exchange() { - let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); - - assert_eq!(atomic_exchange_8_seq_cst(foo, 0xFF), 0, "u8 initial"); - assert_eq!(atomic_exchange_8_seq_cst(foo, 0), 0xFF, "u8 subsequent"); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - assert_eq!(atomic_exchange_16_seq_cst(foo, 0xFFFF), 0, "u16 initial"); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); - assert_eq!(atomic_exchange_16_seq_cst(foo, 0), 0xFFFF, "u16 subsequent"); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - assert_eq!( - atomic_exchange_32_seq_cst(foo, 0xFFFF_FFFF), - 0, - "u32 initial" - ); - assert_eq!( - atomic_exchange_32_seq_cst(foo, 0), - 0xFFFF_FFFF, - "u32 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - assert_eq!( - atomic_exchange_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF), - 0, - "u64 initial" - ); - assert_eq!( - atomic_exchange_64_seq_cst(foo, 0), - 0xFFFF_FFFF_FFFF_FFFF, - "u64 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; -} + assert_eq!(atomic_load_8_seq_cst(foo), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); -#[test] -fn test_compare_exchange() { - let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); - - assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0xFF, 0xFF), 0, "u8 initial"); - assert_eq!(unsafe { foo.cast::().read() }, 0); - assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0, 0xFF), 0, "u8 initial"); - assert_eq!(unsafe { foo.cast::().read() }, 0xFF); - assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0, 0), 0xFF, "u8 subsequent"); - assert_eq!(unsafe { foo.cast::().read() }, 0xFF); - assert_eq!( - atomic_cmp_xchg_8_seq_cst(foo, 0xFF, 0), - 0xFF, - "u8 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - assert_eq!( - atomic_cmp_xchg_16_seq_cst(foo, 0xFFFF, 0xFFFF), - 0, - "u16 initial" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - assert_eq!(atomic_cmp_xchg_16_seq_cst(foo, 0, 0xFFFF), 0, "u16 initial"); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); - assert_eq!( - atomic_cmp_xchg_16_seq_cst(foo, 0, 0), - 0xFFFF, - "u16 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); - assert_eq!( - atomic_cmp_xchg_16_seq_cst(foo, 0xFFFF, 0), - 0xFFFF, - "u16 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - assert_eq!( - atomic_cmp_xchg_32_seq_cst(foo, 0xFFFF_FFFF, 0xFFFF_FFFF), - 0, - "u32 initial" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - assert_eq!( - atomic_cmp_xchg_32_seq_cst(foo, 0, 0xFFFF_FFFF), - 0, - "u32 initial" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); - assert_eq!( - atomic_cmp_xchg_32_seq_cst(foo, 0, 0), - 0xFFFF_FFFF, - "u32 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); - assert_eq!( - atomic_cmp_xchg_32_seq_cst(foo, 0xFFFF_FFFF, 0), - 0xFFFF_FFFF, - "u32 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - assert_eq!( - atomic_cmp_xchg_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF), - 0, - "u64 initial" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - assert_eq!( - atomic_cmp_xchg_64_seq_cst(foo, 0, 0xFFFF_FFFF_FFFF_FFFF), - 0, - "u64 initial" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!( - atomic_cmp_xchg_64_seq_cst(foo, 0, 0), - 0xFFFF_FFFF_FFFF_FFFF, - "u64 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); - assert_eq!( - atomic_cmp_xchg_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF, 0), - 0xFFFF_FFFF_FFFF_FFFF, - "u64 subsequent" - ); - assert_eq!(unsafe { foo.cast::().read() }, 0); - - let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + assert_eq!(atomic_load_16_seq_cst(foo), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + + assert_eq!(atomic_load_32_seq_cst(foo), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + + assert_eq!(atomic_load_64_seq_cst(foo), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_store() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + atomic_store_8_unsynchronized(foo, u8::MAX); + assert_eq!(atomic_load_8_unsynchronized(foo), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u8::MAX as u64); + + atomic_store_16_unsynchronized(foo, u16::MAX); + assert_eq!(atomic_load_16_unsynchronized(foo), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u16::MAX as u64); + + atomic_store_32_unsynchronized(foo, u32::MAX); + assert_eq!(atomic_load_32_unsynchronized(foo), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u32::MAX as u64); + + atomic_store_64_unsynchronized(foo, u64::MAX); + assert_eq!(atomic_load_64_unsynchronized(foo), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + + atomic_store_64_unsynchronized(foo, 0x0); + assert_eq!(atomic_load_64_unsynchronized(foo), 0x0); + assert_eq!(unsafe { foo.cast::().read() }, 0x0); + + atomic_store_8_seq_cst(foo, u8::MAX); + assert_eq!(atomic_load_8_seq_cst(foo), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u8::MAX as u64); + + atomic_store_16_seq_cst(foo, u16::MAX); + assert_eq!(atomic_load_16_seq_cst(foo), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u16::MAX as u64); + + atomic_store_32_seq_cst(foo, u32::MAX); + assert_eq!(atomic_load_32_seq_cst(foo), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u32::MAX as u64); + + atomic_store_64_seq_cst(foo, u64::MAX); + assert_eq!(atomic_load_64_seq_cst(foo), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_exchange() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_exchange_8_seq_cst(foo, u8::MAX), 0, "u8 initial"); + assert_eq!(atomic_exchange_8_seq_cst(foo, 0), u8::MAX, "u8 subsequent"); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_exchange_16_seq_cst(foo, u16::MAX), 0, "u16 initial"); + assert_eq!(unsafe { foo.cast::().read() }, u16::MAX as u64); + assert_eq!( + atomic_exchange_16_seq_cst(foo, 0), + u16::MAX, + "u16 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_exchange_32_seq_cst(foo, u32::MAX), 0, "u32 initial"); + assert_eq!( + atomic_exchange_32_seq_cst(foo, 0), + u32::MAX, + "u32 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_exchange_64_seq_cst(foo, u64::MAX), 0, "u64 initial"); + assert_eq!( + atomic_exchange_64_seq_cst(foo, 0), + u64::MAX, + "u64 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_compare_exchange() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!( + atomic_cmp_xchg_8_seq_cst(foo, u8::MAX, u8::MAX), + 0, + "u8 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!(atomic_cmp_xchg_8_seq_cst(foo, 0, u8::MAX), 0, "u8 initial"); + assert_eq!(unsafe { foo.cast::().read() }, u8::MAX as u64); + assert_eq!( + atomic_cmp_xchg_8_seq_cst(foo, 0, 0), + u8::MAX, + "u8 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, u8::MAX as u64); + assert_eq!( + atomic_cmp_xchg_8_seq_cst(foo, u8::MAX, 0), + u8::MAX, + "u8 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, u16::MAX, u16::MAX), + 0, + "u16 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, 0, u16::MAX), + 0, + "u16 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, u16::MAX as u64); + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, 0, 0), + u16::MAX, + "u16 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, u16::MAX as u64); + assert_eq!( + atomic_cmp_xchg_16_seq_cst(foo, u16::MAX, 0), + u16::MAX, + "u16 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, u32::MAX, u32::MAX), + 0, + "u32 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, 0, u32::MAX), + 0, + "u32 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, u32::MAX as u64); + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, 0, 0), + u32::MAX, + "u32 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, u32::MAX as u64); + assert_eq!( + atomic_cmp_xchg_32_seq_cst(foo, u32::MAX, 0), + u32::MAX, + "u32 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, u64::MAX, u64::MAX), + 0, + "u64 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, 0, u64::MAX), + 0, + "u64 initial" + ); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, 0, 0), + u64::MAX, + "u64 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!( + atomic_cmp_xchg_64_seq_cst(foo, u64::MAX, 0), + u64::MAX, + "u64 subsequent" + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_add() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_add_8_seq_cst(foo, u8::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, u8::MAX as u64); + assert_eq!(atomic_add_8_seq_cst(foo, 1), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_add_16_seq_cst(foo, u16::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, u16::MAX as u64); + assert_eq!(atomic_add_16_seq_cst(foo, 1), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_add_32_seq_cst(foo, u32::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, u32::MAX as u64); + assert_eq!(atomic_add_32_seq_cst(foo, 1), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_add_64_seq_cst(foo, u64::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!(atomic_add_64_seq_cst(foo, 1), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_and() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_and_8_seq_cst(foo, u8::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0); + atomic_store_64_unsynchronized(foo, u64::MAX); + assert_eq!(atomic_and_8_seq_cst(foo, u8::MAX), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!(atomic_and_8_seq_cst(foo, 0xF0), u8::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX - 0xF); + assert_eq!(atomic_and_8_seq_cst(foo, 0), 0xF0); + assert_eq!( + unsafe { foo.cast::().read() }, + u64::MAX - u8::MAX as u64 + ); + unsafe { + foo.cast::().write(0); + } + + assert_eq!(atomic_and_16_seq_cst(foo, u16::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0); + atomic_store_64_unsynchronized(foo, u64::MAX); + assert_eq!(atomic_and_16_seq_cst(foo, u16::MAX), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!(atomic_and_16_seq_cst(foo, 0xFF00), u16::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX - 0xFF); + assert_eq!(atomic_and_16_seq_cst(foo, 0), 0xFF00); + assert_eq!( + unsafe { foo.cast::().read() }, + u64::MAX - u16::MAX as u64 + ); + unsafe { + foo.cast::().write(0); + } + + assert_eq!(atomic_and_32_seq_cst(foo, u32::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0); + atomic_store_64_unsynchronized(foo, u64::MAX); + assert_eq!(atomic_and_32_seq_cst(foo, u32::MAX), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!(atomic_and_32_seq_cst(foo, 0xFFFF_0000), u32::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX - 0xFFFF); + assert_eq!(atomic_and_32_seq_cst(foo, 0), 0xFFFF_0000); + assert_eq!( + unsafe { foo.cast::().read() }, + u64::MAX - u32::MAX as u64 + ); + unsafe { + foo.cast::().write(0); + } + + assert_eq!(atomic_and_64_seq_cst(foo, u64::MAX), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0); + atomic_store_64_unsynchronized(foo, u64::MAX); + assert_eq!(atomic_and_64_seq_cst(foo, u64::MAX), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, u64::MAX); + assert_eq!(atomic_and_64_seq_cst(foo, 0xFFFF_0000_FFFF_0000), u64::MAX); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_0000_FFFF_0000); + assert_eq!( + atomic_and_64_seq_cst(foo, 0x0_FFFF_0000_FFFF), + 0xFFFF_0000_FFFF_0000 + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_or() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_or_8_seq_cst(foo, 0x73), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0x73); + assert_eq!(atomic_or_8_seq_cst(foo, 0x1B), 0x73); + assert_eq!(unsafe { foo.cast::().read() }, 0x7B); + assert_eq!(atomic_or_8_seq_cst(foo, 0xF0), 0x7B); + assert_eq!(unsafe { foo.cast::().read() }, 0xFB); + assert_eq!(atomic_or_8_seq_cst(foo, 0x00), 0xFB); + assert_eq!(atomic_or_8_seq_cst(foo, 0xFF), 0xFB); + assert_eq!(unsafe { foo.cast::().read() }, 0xFF); + unsafe { + foo.cast::().write(0); + } + + assert_eq!(atomic_or_16_seq_cst(foo, 0xB182), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0xB182); + assert_eq!(atomic_or_16_seq_cst(foo, 0x02C3), 0xB182); + assert_eq!(unsafe { foo.cast::().read() }, 0xB3C3); + assert_eq!(atomic_or_16_seq_cst(foo, 0xFF00), 0xB3C3); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFC3); + assert_eq!(atomic_or_16_seq_cst(foo, 0), 0xFFC3); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFC3); + assert_eq!(atomic_or_16_seq_cst(foo, 0x00FF), 0xFFC3); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + assert_eq!(atomic_or_16_seq_cst(foo, 0), 0xFFFF); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF); + unsafe { + foo.cast::().write(0); + } + + assert_eq!(atomic_or_32_seq_cst(foo, 0x01A4_1005), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0x01A4_1005); + assert_eq!(atomic_or_32_seq_cst(foo, 0x5502_D581), 0x01A4_1005); + assert_eq!(unsafe { foo.cast::().read() }, 0x55A6_D585); + assert_eq!(atomic_or_32_seq_cst(foo, 0xFF00_FF00), 0x55A6_D585); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFA6_FF85); + assert_eq!(atomic_or_32_seq_cst(foo, 0), 0xFFA6_FF85); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFA6_FF85); + assert_eq!(atomic_or_32_seq_cst(foo, 0x00FF_00FF), 0xFFA6_FF85); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF); + unsafe { + foo.cast::().write(0); + } + + assert_eq!(atomic_or_64_seq_cst(foo, 0xABCD_3456_01A4_1005), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0xABCD_3456_01A4_1005); + assert_eq!( + atomic_or_64_seq_cst(foo, 0x0F25_0021_232B_C34A), + 0xABCD_3456_01A4_1005 + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xAFED_3477_23AF_D34F); + assert_eq!( + atomic_or_64_seq_cst(foo, 0xFF00_FF00_FF00_FF00), + 0xAFED_3477_23AF_D34F + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFED_FF77_FFAF_FF4F); + assert_eq!(atomic_or_64_seq_cst(foo, 0), 0xFFED_FF77_FFAF_FF4F); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFED_FF77_FFAF_FF4F); + assert_eq!( + atomic_or_64_seq_cst(foo, 0x00FF_00FF_00FF_00FF), + 0xFFED_FF77_FFAF_FF4F + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xFFFF_FFFF_FFFF_FFFF); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } + + #[test] + fn test_xor() { + let foo = NonNull::from(Box::leak(Box::new([0u64; 1]))).cast::<()>(); + + assert_eq!(atomic_xor_8_seq_cst(foo, 0x73), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0x73); + assert_eq!(atomic_xor_8_seq_cst(foo, 0x1B), 0x73); + assert_eq!(unsafe { foo.cast::().read() }, 0x68); + assert_eq!(atomic_xor_8_seq_cst(foo, 0xF0), 0x68); + assert_eq!(unsafe { foo.cast::().read() }, 0x98); + assert_eq!(atomic_xor_8_seq_cst(foo, 0x00), 0x98); + assert_eq!(atomic_xor_8_seq_cst(foo, 0xFF), 0x98); + assert_eq!(unsafe { foo.cast::().read() }, 0x67); + assert_eq!(atomic_xor_8_seq_cst(foo, 0x67), 0x67); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_xor_16_seq_cst(foo, 0xB182), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0xB182); + assert_eq!(atomic_xor_16_seq_cst(foo, 0x02C3), 0xB182); + assert_eq!(unsafe { foo.cast::().read() }, 0xB341); + assert_eq!(atomic_xor_16_seq_cst(foo, 0xFF00), 0xB341); + assert_eq!(unsafe { foo.cast::().read() }, 0x4C41); + assert_eq!(atomic_xor_16_seq_cst(foo, 0), 0x4C41); + assert_eq!(unsafe { foo.cast::().read() }, 0x4C41); + assert_eq!(atomic_xor_16_seq_cst(foo, 0x00FF), 0x4C41); + assert_eq!(unsafe { foo.cast::().read() }, 0x4CBE); + assert_eq!(atomic_xor_16_seq_cst(foo, 0xFFFF), 0x4CBE); + assert_eq!(unsafe { foo.cast::().read() }, 0xB341); + assert_eq!(atomic_xor_16_seq_cst(foo, 0xB341), 0xB341); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_xor_32_seq_cst(foo, 0xA34B_B182), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0xA34B_B182); + assert_eq!(atomic_xor_32_seq_cst(foo, 0x86D0_02C3), 0xA34B_B182); + assert_eq!(unsafe { foo.cast::().read() }, 0x259B_B341); + assert_eq!(atomic_xor_32_seq_cst(foo, 0xFF00_FF00), 0x259B_B341); + assert_eq!(unsafe { foo.cast::().read() }, 0xDA9B_4C41); + assert_eq!(atomic_xor_32_seq_cst(foo, 0), 0xDA9B_4C41); + assert_eq!(unsafe { foo.cast::().read() }, 0xDA9B_4C41); + assert_eq!(atomic_xor_32_seq_cst(foo, 0x00FF_00FF), 0xDA9B_4C41); + assert_eq!(unsafe { foo.cast::().read() }, 0xDA64_4CBE); + assert_eq!(atomic_xor_32_seq_cst(foo, 0xFFFF_FFFF), 0xDA64_4CBE); + assert_eq!(unsafe { foo.cast::().read() }, 0x259B_B341); + assert_eq!(atomic_xor_32_seq_cst(foo, 0x259B_B341), 0x259B_B341); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + assert_eq!(atomic_xor_64_seq_cst(foo, 0x0567_98E0_A34B_B182), 0); + assert_eq!(unsafe { foo.cast::().read() }, 0x0567_98E0_A34B_B182); + assert_eq!( + atomic_xor_64_seq_cst(foo, 0x1135_C732_86D0_02C3), + 0x0567_98E0_A34B_B182 + ); + assert_eq!(unsafe { foo.cast::().read() }, 0x1452_5FD2_259B_B341); + assert_eq!( + atomic_xor_64_seq_cst(foo, 0xFF00_FF00_FF00_FF00), + 0x1452_5FD2_259B_B341 + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xEB52_A0D2_DA9B_4C41); + assert_eq!(atomic_xor_64_seq_cst(foo, 0), 0xEB52_A0D2_DA9B_4C41); + assert_eq!(unsafe { foo.cast::().read() }, 0xEB52_A0D2_DA9B_4C41); + assert_eq!( + atomic_xor_64_seq_cst(foo, 0x00FF_00FF_00FF_00FF), + 0xEB52_A0D2_DA9B_4C41 + ); + assert_eq!(unsafe { foo.cast::().read() }, 0xEBAD_A02D_DA64_4CBE); + assert_eq!( + atomic_xor_64_seq_cst(foo, 0xFFFF_FFFF_FFFF_FFFF), + 0xEBAD_A02D_DA64_4CBE + ); + assert_eq!(unsafe { foo.cast::().read() }, 0x1452_5FD2_259B_B341); + assert_eq!( + atomic_xor_64_seq_cst(foo, 0x1452_5FD2_259B_B341), + 0x1452_5FD2_259B_B341 + ); + assert_eq!(unsafe { foo.cast::().read() }, 0); + + let _ = unsafe { Box::from_raw(foo.cast::().as_ptr()) }; + } } From d7daad447781889aa493cf0e39e48bcb634f13f7 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 14:05:55 +0300 Subject: [PATCH 18/25] feat: aarch64 fetchop --- ecmascript_atomics/lib.rs | 340 +++++++++++++++----------------------- 1 file changed, 135 insertions(+), 205 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index d9196190d..fbe479c20 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -898,29 +898,33 @@ macro_rules! fetchop { ("xor", x86, u64) => { "xor {scratch:r}, {val:r}" }; - ("add", aarch64) => { - "add {val}, {scratch}" + // Note: we differ here from source material. In Firefox the operation + // always operates on :x registers; there doesn't seem to be a reason for + // this so we try to avoid that. + // "OP %x[scratch1], %x[res], %x[val]" + ("add", arm, u32) => { + "add {scratch1:w}, {res:w}, {val:w}" }; - ("and", aarch64) => { - "and {val}, {scratch}" + ("add", arm, u64) => { + "add {scratch1:w}, {res:x}, {val:x}" }; - ("or", aarch64) => { - "orr {val}, {scratch}" + ("and", arm, u32) => { + "and {scratch1:w}, {res:w}, {val:w}" }; - ("xor", aarch64) => { - "eor {val}, {scratch}" + ("and", arm, u64) => { + "and {scratch1:w}, {res:x}, {val:x}" }; - ("add", arm) => { - "add {val}, {scratch}" + ("or", arm, u32) => { + "orr {scratch1:w}, {res:w}, {val:w}" }; - ("and", arm) => { - "and {val}, {scratch}" + ("or", arm, u64) => { + "orr {scratch1:w}, {res:x}, {val:x}" }; - ("or", arm) => { - "or {val}, {scratch}" + ("xor", arm, u32) => { + "eor {scratch1:w}, {res:w}, {val:w}" }; - ("xor", arm) => { - "xor {val}, {scratch}" + ("xor", arm, u64) => { + "eor {scratch1:w}, {res:x}, {val:x}" }; } @@ -960,49 +964,23 @@ macro_rules! gen_fetchop { #[cfg(target_arch = "aarch64")] unsafe { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cbnz %w[scratch2], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + let res: u8; + core::arch::asm!( + "dmb ish", + "2:", + "ldxr {res:w}, [{ptr}]", + fetchop!($op, arm), + "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", + "cbnz {scratch2:w}, 2b", + "3: dmb ish", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } #[cfg(target_arch = "arm")] @@ -1085,50 +1063,24 @@ macro_rules! gen_fetchop { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cbnz %w[scratch2], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + let res: u16; + core::arch::asm!( + "dmb ish", + "2:", + "ldxr {res:w}, [{ptr}]", + fetchop!($op, arm), + "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", + "cbnz {scratch2:w}, 2b", + "3: dmb ish", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } #[cfg(target_arch = "arm")] @@ -1211,50 +1163,24 @@ macro_rules! gen_fetchop { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cbnz %w[scratch2], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + let res: u32; + core::arch::asm!( + "dmb ish", + "2:", + "ldxr {res:w}, [{ptr}]", + fetchop!($op, arm), + "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", + "cbnz {scratch2:w}, 2b", + "3: dmb ish", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } #[cfg(target_arch = "arm")] @@ -1337,50 +1263,24 @@ macro_rules! gen_fetchop { } #[cfg(target_arch = "aarch64")] - { - // insns = "" - // insns += fmt_insn("dmb ish") - // insns += fmt_insn("0:") - // if size == 8: - // insns += fmt_insn("ldxrb %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrb %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 16: - // insns += fmt_insn("ldxrh %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxrh %w[scratch2], %w[scratch1], [%x[addr]]") - // elif size == 32: - // insns += fmt_insn("ldxr %w[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %w[scratch1], [%x[addr]]") - // else: - // assert size == 64 - // insns += fmt_insn("ldxr %x[res], [%x[addr]]") - // insns += fmt_insn("OP %x[scratch1], %x[res], %x[val]") - // insns += fmt_insn("stxr %w[scratch2], %x[scratch1], [%x[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cbnz %w[scratch2], 0b") - // insns += fmt_insn("dmb ish") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + unsafe { + let res: u64; + core::arch::asm!( + "dmb ish", + "2:", + "ldxr {res:x}, [{ptr}]", + fetchop!($op, arm), + "stxr {scratch2:w}, {scratch1:x}, [{ptr}]", + "cbnz {scratch2:w}, 2b", + "3: dmb ish", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } #[cfg(target_arch = "arm")] @@ -1964,11 +1864,6 @@ pub fn atomic_pause() { core::hint::spin_loop(); } -// See comment in jit/AtomicOperations-shared-jit.cpp for an explanation. -const WORDS_IN_BLOCK: usize = 8; -const WORD_SIZE: usize = core::mem::size_of::(); -const BLOCK_SIZE: usize = WORDS_IN_BLOCK * WORD_SIZE; - #[inline(always)] #[cfg(any( target_arch = "x86", @@ -1977,7 +1872,12 @@ const BLOCK_SIZE: usize = WORDS_IN_BLOCK * WORD_SIZE; target_arch = "arm" ))] pub fn atomic_copy_unaligned_block_down_unsynchronized() { - gen_copy!(u8, 1, BLOCK_SIZE, "down"); + #[cfg(target_pointer_width = "16")] + gen_copy!(u8, 1, 16, "down"); + #[cfg(target_pointer_width = "32")] + gen_copy!(u8, 1, 32, "down"); + #[cfg(target_pointer_width = "64")] + gen_copy!(u8, 1, 64, "down"); } #[inline(always)] @@ -1988,7 +1888,12 @@ pub fn atomic_copy_unaligned_block_down_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_block_up_unsynchronized() { - gen_copy!(u8, 1, BLOCK_SIZE, "up"); + #[cfg(target_pointer_width = "16")] + gen_copy!(u8, 1, 16, "up"); + #[cfg(target_pointer_width = "32")] + gen_copy!(u8, 1, 32, "up"); + #[cfg(target_pointer_width = "64")] + gen_copy!(u8, 1, 64, "up"); } #[inline(always)] @@ -1999,7 +1904,12 @@ pub fn atomic_copy_unaligned_block_up_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_word_down_unsynchronized() { - gen_copy!(u8, 1, WORD_SIZE, "down"); + #[cfg(target_pointer_width = "16")] + gen_copy!(u8, 1, 2, "down"); + #[cfg(target_pointer_width = "32")] + gen_copy!(u8, 1, 4, "down"); + #[cfg(target_pointer_width = "64")] + gen_copy!(u8, 1, 8, "down"); } #[inline(always)] @@ -2010,7 +1920,12 @@ pub fn atomic_copy_unaligned_word_down_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_unaligned_word_up_unsynchronized() { - gen_copy!(u8, 1, WORD_SIZE, "up"); + #[cfg(target_pointer_width = "16")] + gen_copy!(u8, 1, 2, "up"); + #[cfg(target_pointer_width = "32")] + gen_copy!(u8, 1, 4, "up"); + #[cfg(target_pointer_width = "64")] + gen_copy!(u8, 1, 8, "up"); } #[inline(always)] @@ -2021,7 +1936,12 @@ pub fn atomic_copy_unaligned_word_up_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_block_down_unsynchronized() { - gen_copy!(uptr, WORD_SIZE, WORDS_IN_BLOCK, "down"); + #[cfg(target_pointer_width = "16")] + gen_copy!(usize, 2, 8, "down"); + #[cfg(target_pointer_width = "32")] + gen_copy!(usize, 4, 8, "down"); + #[cfg(target_pointer_width = "64")] + gen_copy!(usize, 8, 8, "down"); } #[inline(always)] @@ -2032,7 +1952,12 @@ pub fn atomic_copy_block_down_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_block_up_unsynchronized() { - gen_copy!(uptr, WORD_SIZE, WORDS_IN_BLOCK, "up"); + #[cfg(target_pointer_width = "16")] + gen_copy!(usize, 2, 8, "up"); + #[cfg(target_pointer_width = "32")] + gen_copy!(usize, 4, 8, "up"); + #[cfg(target_pointer_width = "64")] + gen_copy!(usize, 8, 8, "up"); } #[inline(always)] @@ -2043,7 +1968,12 @@ pub fn atomic_copy_block_up_unsynchronized() { target_arch = "arm" ))] pub fn atomic_copy_word_unsynchronized() { - gen_copy!(uptr, WORD_SIZE, 1, "down"); + #[cfg(target_pointer_width = "16")] + gen_copy!(usize, 2, 1, "down"); + #[cfg(target_pointer_width = "32")] + gen_copy!(usize, 4, 1, "down"); + #[cfg(target_pointer_width = "64")] + gen_copy!(usize, 8, 1, "down"); } #[inline(always)] @@ -2079,8 +2009,8 @@ pub fn atomic_copy8_unsynchronized() { gen_copy!(u8, 1, 1, "down"); } -pub const JS_GENERATED_ATOMICS_BLOCKSIZE: usize = 0; -pub const JS_GENERATED_ATOMICS_WORSIZE: usize = 0; +pub const JS_GENERATED_ATOMICS_BLOCKSIZE: usize = core::mem::size_of::() * 8; +pub const JS_GENERATED_ATOMICS_WORSIZE: usize = core::mem::size_of::(); #[cfg(test)] mod test { From 457f871541176d624f1549c303426564717b6bb5 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 14:07:15 +0300 Subject: [PATCH 19/25] fix --- ecmascript_atomics/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index fbe479c20..4282d8f1d 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -969,7 +969,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxr {res:w}, [{ptr}]", - fetchop!($op, arm), + fetchop!($op, arm, u32), "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", @@ -1069,7 +1069,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxr {res:w}, [{ptr}]", - fetchop!($op, arm), + fetchop!($op, arm, u32), "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", @@ -1169,7 +1169,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxr {res:w}, [{ptr}]", - fetchop!($op, arm), + fetchop!($op, arm, u32), "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", @@ -1269,7 +1269,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxr {res:x}, [{ptr}]", - fetchop!($op, arm), + fetchop!($op, arm, u64), "stxr {scratch2:w}, {scratch1:x}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", From 24b89dbb7945e121e5b39cf54f149a29c25bebac Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 14:09:35 +0300 Subject: [PATCH 20/25] fix --- ecmascript_atomics/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 4282d8f1d..06a7c2467 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -906,25 +906,25 @@ macro_rules! fetchop { "add {scratch1:w}, {res:w}, {val:w}" }; ("add", arm, u64) => { - "add {scratch1:w}, {res:x}, {val:x}" + "add {scratch1:x}, {res:x}, {val:x}" }; ("and", arm, u32) => { "and {scratch1:w}, {res:w}, {val:w}" }; ("and", arm, u64) => { - "and {scratch1:w}, {res:x}, {val:x}" + "and {scratch1:x}, {res:x}, {val:x}" }; ("or", arm, u32) => { "orr {scratch1:w}, {res:w}, {val:w}" }; ("or", arm, u64) => { - "orr {scratch1:w}, {res:x}, {val:x}" + "orr {scratch1:x}, {res:x}, {val:x}" }; ("xor", arm, u32) => { "eor {scratch1:w}, {res:w}, {val:w}" }; ("xor", arm, u64) => { - "eor {scratch1:w}, {res:x}, {val:x}" + "eor {scratch1:x}, {res:x}, {val:x}" }; } From c2df53e25815b99787d9cf909b29edeade84ea77 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 14:21:30 +0300 Subject: [PATCH 21/25] fix --- ecmascript_atomics/lib.rs | 118 +++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 06a7c2467..57be72b35 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -357,10 +357,10 @@ macro_rules! gen_exchange { let res: u8; core::arch::asm!( "dmb ish", - "0:", + "2:", "ldxr {res:w}, [{ptr}]", "stxr {scratch:w}, {val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", + "cbnz {scratch:w}, 2b", "dmb ish", res = out(reg) res, scratch = out(reg) _, @@ -376,11 +376,11 @@ macro_rules! gen_exchange { let res: u8; core::arch::asm!( "dmb sy", - "0:", + "2:", "ldrex {res:w}, [{ptr}]", "strex {scratch:w}, {val:w}, [{ptr}]", "cmp {scratch:w}, #1", - "beq 0b", + "beq 2b", "dmb sy", res = out(reg) res, scratch = out(reg) _, @@ -412,10 +412,10 @@ macro_rules! gen_exchange { let res: u16; core::arch::asm!( "dmb ish", - "0:", + "2:", "ldxr {res:w}, [{ptr}]", "stxr {scratch:w}, {val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", + "cbnz {scratch:w}, 2b", "dmb ish", res = out(reg) res, scratch = out(reg) _, @@ -431,11 +431,11 @@ macro_rules! gen_exchange { let res: u8; core::arch::asm!( "dmb sy", - "0:", + "2:", "ldrex {res:w}, [{ptr}]", "strex {scratch:w}, {val:w}, [{ptr}]", "cmp {scratch:w}, #1", - "beq 0b", + "beq 2b", "dmb sy", res = out(reg) res, scratch = out(reg) _, @@ -467,10 +467,10 @@ macro_rules! gen_exchange { let res: u32; core::arch::asm!( "dmb ish", - "0:", + "2:", "ldxr {res:w}, [{ptr}]", "stxr {scratch:w}, {val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", + "cbnz {scratch:w}, 2b", "dmb ish", res = out(reg) res, scratch = out(reg) _, @@ -486,11 +486,11 @@ macro_rules! gen_exchange { let res: u8; core::arch::asm!( "dmb sy", - "0:", + "2:", "ldrex {res:w}, [{ptr}]", "strex {scratch:w}, {val:w}, [{ptr}]", "cmp {scratch:w}, #1", - "beq 0b", + "beq 2b", "dmb sy", res = out(reg) res, scratch = out(reg) _, @@ -522,10 +522,10 @@ macro_rules! gen_exchange { let res: u64; core::arch::asm!( "dmb ish", - "0:", + "2:", "ldxr {res:x}, [{ptr}]", "stxr {scratch:w}, {val:x}, [{ptr}]", - "cbnz {scratch:w}, 0b", + "cbnz {scratch:w}, 2b", "dmb ish", res = out(reg) res, scratch = out(reg) _, @@ -567,14 +567,14 @@ macro_rules! gen_cmpxchg { let res: u8; core::arch::asm!( "dmb ish", - "0:", + "2:", "uxtb {scratch:w}, {old_val:w}", - "ldxr {res:w}, [{ptr}]", + "ldxrb {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", - "b.ne 1f", - "stxr {scratch:w}, {new_val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", - "1: dmb ish", + "b.ne 3f", + "stxrb {scratch:w}, {new_val:w}, [{ptr}]", + "cbnz {scratch:w}, 2b", + "3: dmb ish", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -590,15 +590,15 @@ macro_rules! gen_cmpxchg { let res: u8; core::arch::asm!( "dmb sy", - "0:", + "2:", "uxtb {scratch:w}, {old_val:w}", "ldrex {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", - "bne 1f", + "bne 3f", "strex {scratch:w}, {new_val:w}, [{ptr}]", "cmp {scratch:w}, #1", - "beq 0b", - "1: dmb sy", + "beq 2b", + "3: dmb sy", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -632,14 +632,14 @@ macro_rules! gen_cmpxchg { let res: u16; core::arch::asm!( "dmb ish", - "0:", + "2:", "uxth {scratch:w}, {old_val:w}", - "ldxr {res:w}, [{ptr}]", + "ldxrh {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", - "b.ne 1f", - "stxr {scratch:w}, {new_val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", - "1: dmb ish", + "b.ne 3f", + "stxrh {scratch:w}, {new_val:w}, [{ptr}]", + "cbnz {scratch:w}, 2b", + "3: dmb ish", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -655,15 +655,15 @@ macro_rules! gen_cmpxchg { let res: u16; core::arch::asm!( "dmb sy", - "0:", + "2:", "uxth {scratch:w}, {old_val:w}", "ldrex {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", - "bne 1f", + "bne 3f", "strex {scratch:w}, {new_val:w}, [{ptr}]", "cmp {scratch:w}, #1", - "beq 0b", - "1: dmb sy", + "beq 2b", + "3: dmb sy", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -697,14 +697,14 @@ macro_rules! gen_cmpxchg { let res: u32; core::arch::asm!( "dmb ish", - "0:", + "2:", "mov {scratch:w}, {old_val:w}", "ldxr {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", - "b.ne 1f", + "b.ne 3f", "stxr {scratch:w}, {new_val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", - "1: dmb ish", + "cbnz {scratch:w}, 2b", + "3: dmb ish", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -720,15 +720,15 @@ macro_rules! gen_cmpxchg { let res: u32; core::arch::asm!( "dmb sy", - "0:", + "2:", "mov {scratch:w}, {old_val:w}", "ldrex {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", - "bne 1f", + "bne 3f", "strex {scratch:w}, {new_val:w}, [{ptr}]", "cmp {scratch:w}, #1", - "beq 0b", - "1: dmb sy", + "beq 2b", + "3: dmb sy", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -786,14 +786,14 @@ macro_rules! gen_cmpxchg { let res: u64; core::arch::asm!( "dmb ish", - "0:", + "2:", "mov {scratch:w}, {old_val:w}", "ldxr {res:w}, [{ptr}]", "cmp {res:w}, {scratch:w}", - "b.ne 1f", + "b.ne 3f", "stxr {scratch:w}, {new_val:w}, [{ptr}]", - "cbnz {scratch:w}, 0b", - "1: dmb ish", + "cbnz {scratch:w}, 2b", + "3: dmb ish", res = out(reg) res, scratch = out(reg) _, ptr = in(reg) ptr, @@ -814,17 +814,17 @@ macro_rules! gen_cmpxchg { let new_top = u32::from_le_bytes([b4, b5, b6, b7]); core::arch::asm!( "dmb sy", - "0: ldrexd r0 r1 [{ptr}]", + "2: ldrexd r0 r1 [{ptr}]", "cmp r0 {old_bot}", - "b.ne 1f", + "b.ne 3f", "cmp r1 {old_top}", - "b.ne 1f", + "b.ne 3f", "mov r2, {new_bot}" "mov r3, {new_top}" "strexd r4, r2, r3, [{ptr}]" "cmp r4, #1", - "beq 0b", - "1: dmb sy", + "beq 2b", + "3: dmb sy", "mov {old_bot} r0", "mov {old_top} r1", inout(reg) old_bot, @@ -987,7 +987,7 @@ macro_rules! gen_fetchop { unsafe { // insns = "" // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") + // insns += fmt_insn("2:") // if size == 8: // insns += fmt_insn("ldrexb %[res], [%[addr]]") // insns += fmt_insn("OP %[scratch1], %[res], %[val]") @@ -1008,7 +1008,7 @@ macro_rules! gen_fetchop { // cpu_op = "eor" // insns = insns.replace("OP", cpu_op) // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 0b") + // insns += fmt_insn("beq 2b") // insns += fmt_insn("dmb sy") // return """ // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { @@ -1087,7 +1087,7 @@ macro_rules! gen_fetchop { unsafe { // insns = "" // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") + // insns += fmt_insn("2:") // if size == 8: // insns += fmt_insn("ldrexb %[res], [%[addr]]") // insns += fmt_insn("OP %[scratch1], %[res], %[val]") @@ -1108,7 +1108,7 @@ macro_rules! gen_fetchop { // cpu_op = "eor" // insns = insns.replace("OP", cpu_op) // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 0b") + // insns += fmt_insn("beq 2b") // insns += fmt_insn("dmb sy") // return """ // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { @@ -1187,7 +1187,7 @@ macro_rules! gen_fetchop { unsafe { // insns = "" // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") + // insns += fmt_insn("2:") // if size == 8: // insns += fmt_insn("ldrexb %[res], [%[addr]]") // insns += fmt_insn("OP %[scratch1], %[res], %[val]") @@ -1208,7 +1208,7 @@ macro_rules! gen_fetchop { // cpu_op = "eor" // insns = insns.replace("OP", cpu_op) // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 0b") + // insns += fmt_insn("beq 2b") // insns += fmt_insn("dmb sy") // return """ // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { @@ -1287,7 +1287,7 @@ macro_rules! gen_fetchop { unsafe { // insns = "" // insns += fmt_insn("dmb sy") - // insns += fmt_insn("0:") + // insns += fmt_insn("2:") // if size == 8: // insns += fmt_insn("ldrexb %[res], [%[addr]]") // insns += fmt_insn("OP %[scratch1], %[res], %[val]") @@ -1308,7 +1308,7 @@ macro_rules! gen_fetchop { // cpu_op = "eor" // insns = insns.replace("OP", cpu_op) // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 0b") + // insns += fmt_insn("beq 2b") // insns += fmt_insn("dmb sy") // return """ // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { From 68fbfe5837af4bc4bab7e397a21b6b4184c397eb Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 19:46:52 +0300 Subject: [PATCH 22/25] fix --- ecmascript_atomics/lib.rs | 75 ++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 57be72b35..2756c8fb6 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -592,10 +592,10 @@ macro_rules! gen_cmpxchg { "dmb sy", "2:", "uxtb {scratch:w}, {old_val:w}", - "ldrex {res:w} [{ptr}]", + "ldrexb {res:w} [{ptr}]", "cmp {res:w}, {scratch:w}", "bne 3f", - "strex {scratch:w}, {new_val:w}, [{ptr}]", + "strexb {scratch:w}, {new_val:w}, [{ptr}]", "cmp {scratch:w}, #1", "beq 2b", "3: dmb sy", @@ -656,12 +656,12 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb sy", "2:", - "uxth {scratch:w}, {old_val:w}", - "ldrex {res:w} [{ptr}]", - "cmp {res:w}, {scratch:w}", + "uxth {scratch}, {old_val}", + "ldrexh {res} [{ptr}]", + "cmp {res}, {scratch}", "bne 3f", - "strex {scratch:w}, {new_val:w}, [{ptr}]", - "cmp {scratch:w}, #1", + "strexh {scratch}, {new_val}, [{ptr}]", + "cmp {scratch}, #1", "beq 2b", "3: dmb sy", res = out(reg) res, @@ -721,12 +721,12 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb sy", "2:", - "mov {scratch:w}, {old_val:w}", - "ldrex {res:w} [{ptr}]", - "cmp {res:w}, {scratch:w}", + "mov {scratch}, {old_val}", + "ldrex {res} [{ptr}]", + "cmp {res}, {scratch}", "bne 3f", - "strex {scratch:w}, {new_val:w}, [{ptr}]", - "cmp {scratch:w}, #1", + "strex {scratch}, {new_val}, [{ptr}]", + "cmp {scratch}, #1", "beq 2b", "3: dmb sy", res = out(reg) res, @@ -787,11 +787,11 @@ macro_rules! gen_cmpxchg { core::arch::asm!( "dmb ish", "2:", - "mov {scratch:w}, {old_val:w}", - "ldxr {res:w}, [{ptr}]", - "cmp {res:w}, {scratch:w}", + "mov {scratch:x}, {old_val:x}", + "ldxr {res:x}, [{ptr}]", + "cmp {res:x}, {scratch:x}", "b.ne 3f", - "stxr {scratch:w}, {new_val:w}, [{ptr}]", + "stxr {scratch:w}, {new_val:x}, [{ptr}]", "cbnz {scratch:w}, 2b", "3: dmb ish", res = out(reg) res, @@ -901,30 +901,41 @@ macro_rules! fetchop { // Note: we differ here from source material. In Firefox the operation // always operates on :x registers; there doesn't seem to be a reason for // this so we try to avoid that. - // "OP %x[scratch1], %x[res], %x[val]" - ("add", arm, u32) => { + ("add", aarch64, u32) => { "add {scratch1:w}, {res:w}, {val:w}" }; - ("add", arm, u64) => { + ("add", aarch64, u64) => { "add {scratch1:x}, {res:x}, {val:x}" }; - ("and", arm, u32) => { + ("and", aarch64, u32) => { "and {scratch1:w}, {res:w}, {val:w}" }; - ("and", arm, u64) => { + ("and", aarch64, u64) => { "and {scratch1:x}, {res:x}, {val:x}" }; - ("or", arm, u32) => { + ("or", aarch64, u32) => { "orr {scratch1:w}, {res:w}, {val:w}" }; - ("or", arm, u64) => { + ("or", aarch64, u64) => { "orr {scratch1:x}, {res:x}, {val:x}" }; - ("xor", arm, u32) => { + ("xor", aarch64, u32) => { "eor {scratch1:w}, {res:w}, {val:w}" }; - ("xor", arm, u64) => { - "eor {scratch1:x}, {res:x}, {val:x}" + ("xor", aarch64, u64) => { + "eor {scratch1}, {res}, {val}" + }; + ("add", arm) => { + "add {scratch1}, {res}, {val}" + }; + ("and", arm) => { + "and {scratch1}, {res}, {val}" + }; + ("or", arm) => { + "orr {scratch1}, {res}, {val}" + }; + ("xor", arm) => { + "eor {scratch1}, {res}, {val}" }; } @@ -968,9 +979,9 @@ macro_rules! gen_fetchop { core::arch::asm!( "dmb ish", "2:", - "ldxr {res:w}, [{ptr}]", + "ldxrb {res:w}, [{ptr}]", fetchop!($op, arm, u32), - "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", + "stxrb {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", res = out(reg) res, @@ -1068,9 +1079,9 @@ macro_rules! gen_fetchop { core::arch::asm!( "dmb ish", "2:", - "ldxr {res:w}, [{ptr}]", + "ldxrh {res:w}, [{ptr}]", fetchop!($op, arm, u32), - "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", + "stxrh {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", res = out(reg) res, @@ -1268,9 +1279,9 @@ macro_rules! gen_fetchop { core::arch::asm!( "dmb ish", "2:", - "ldxr {res:x}, [{ptr}]", + "ldxr {res:x}, [{ptr:x}]", fetchop!($op, arm, u64), - "stxr {scratch2:w}, {scratch1:x}, [{ptr}]", + "stxr {scratch2:w}, {scratch1:x}, [{ptr:x}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", res = out(reg) res, From 822d7e1ff5693f5476e64f1597cd1b39a8770325 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 19:50:16 +0300 Subject: [PATCH 23/25] fix --- ecmascript_atomics/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 2756c8fb6..1551c4c62 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -980,7 +980,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxrb {res:w}, [{ptr}]", - fetchop!($op, arm, u32), + fetchop!($op, aarch64, u32), "stxrb {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", @@ -1080,7 +1080,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxrh {res:w}, [{ptr}]", - fetchop!($op, arm, u32), + fetchop!($op, aarch64, u32), "stxrh {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", @@ -1180,7 +1180,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxr {res:w}, [{ptr}]", - fetchop!($op, arm, u32), + fetchop!($op, aarch64, u32), "stxr {scratch2:w}, {scratch1:w}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", @@ -1280,7 +1280,7 @@ macro_rules! gen_fetchop { "dmb ish", "2:", "ldxr {res:x}, [{ptr:x}]", - fetchop!($op, arm, u64), + fetchop!($op, aarch64, u64), "stxr {scratch2:w}, {scratch1:x}, [{ptr:x}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", From f046f2c7d62163dd8c0bbc370d68d981892b0c28 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 19:55:09 +0300 Subject: [PATCH 24/25] fix --- ecmascript_atomics/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index 1551c4c62..f1afbf06c 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -205,7 +205,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "str {val:w}, [{ptr}]", + "strb {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -217,7 +217,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "str {val:w}, [{ptr}]", + "strb {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, @@ -244,7 +244,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, aarch64), - "str {val:w}, [{ptr}]", + "strh {val:w}, [{ptr}]", fence!($barrier, aarch64), ptr = in(reg) ptr, val = in(reg) $val, @@ -256,7 +256,7 @@ macro_rules! gen_store { unsafe { core::arch::asm!( fence!($barrier, arm), - "str {val:w}, [{ptr}]", + "strh {val:w}, [{ptr}]", fence!($barrier, arm), ptr = in(reg) ptr, val = in(reg) $val, @@ -1279,9 +1279,9 @@ macro_rules! gen_fetchop { core::arch::asm!( "dmb ish", "2:", - "ldxr {res:x}, [{ptr:x}]", + "ldxr {res:x}, [{ptr}]", fetchop!($op, aarch64, u64), - "stxr {scratch2:w}, {scratch1:x}, [{ptr:x}]", + "stxr {scratch2:w}, {scratch1:x}, [{ptr}]", "cbnz {scratch2:w}, 2b", "3: dmb ish", res = out(reg) res, From ce2b6c089c7126fa7c0e4dc4c11a75046e6f23ca Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Thu, 18 Sep 2025 20:06:32 +0300 Subject: [PATCH 25/25] fix --- ecmascript_atomics/lib.rs | 235 +++++++++++--------------------------- 1 file changed, 65 insertions(+), 170 deletions(-) diff --git a/ecmascript_atomics/lib.rs b/ecmascript_atomics/lib.rs index f1afbf06c..aacba67c7 100644 --- a/ecmascript_atomics/lib.rs +++ b/ecmascript_atomics/lib.rs @@ -358,8 +358,8 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb ish", "2:", - "ldxr {res:w}, [{ptr}]", - "stxr {scratch:w}, {val:w}, [{ptr}]", + "ldxrb {res:w}, [{ptr}]", + "stxrb {scratch:w}, {val:w}, [{ptr}]", "cbnz {scratch:w}, 2b", "dmb ish", res = out(reg) res, @@ -377,8 +377,8 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb sy", "2:", - "ldrex {res:w}, [{ptr}]", - "strex {scratch:w}, {val:w}, [{ptr}]", + "ldrexb {res:w}, [{ptr}]", + "strexb {scratch:w}, {val:w}, [{ptr}]", "cmp {scratch:w}, #1", "beq 2b", "dmb sy", @@ -413,8 +413,8 @@ macro_rules! gen_exchange { core::arch::asm!( "dmb ish", "2:", - "ldxr {res:w}, [{ptr}]", - "stxr {scratch:w}, {val:w}, [{ptr}]", + "ldxrh {res:w}, [{ptr}]", + "stxrh {scratch:w}, {val:w}, [{ptr}]", "cbnz {scratch:w}, 2b", "dmb ish", res = out(reg) res, @@ -428,12 +428,12 @@ macro_rules! gen_exchange { #[cfg(target_arch = "arm")] unsafe { - let res: u8; + let res: u16; core::arch::asm!( "dmb sy", "2:", - "ldrex {res:w}, [{ptr}]", - "strex {scratch:w}, {val:w}, [{ptr}]", + "ldrexh {res:w}, [{ptr}]", + "strexh {scratch:w}, {val:w}, [{ptr}]", "cmp {scratch:w}, #1", "beq 2b", "dmb sy", @@ -483,7 +483,7 @@ macro_rules! gen_exchange { #[cfg(target_arch = "arm")] unsafe { - let res: u8; + let res: u32; core::arch::asm!( "dmb sy", "2:", @@ -996,46 +996,24 @@ macro_rules! gen_fetchop { #[cfg(target_arch = "arm")] unsafe { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("2:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 2b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + let res: u8; + core::arch::asm!( + "dmb sy", + "2:", + "ldrexb {res}, [{ptr}]", + fetchop!($op, arm), + "strexb {scratch2}, {scratch1}, [{ptr}]", + "cmp {scratch2}, #1", + "beq 2b", + "dmb sy", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } return $val; @@ -1096,46 +1074,24 @@ macro_rules! gen_fetchop { #[cfg(target_arch = "arm")] unsafe { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("2:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 2b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + let res: u16; + core::arch::asm!( + "dmb sy", + "2:", + "ldrexh {res}, [{ptr}]", + fetchop!($op, arm), + "strexh {scratch2}, {scratch1}, [{ptr}]", + "cmp {scratch2}, #1", + "beq 2b", + "dmb sy", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } return $val; @@ -1196,46 +1152,24 @@ macro_rules! gen_fetchop { #[cfg(target_arch = "arm")] unsafe { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("2:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 2b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + let res: u32; + core::arch::asm!( + "dmb sy", + "2:", + "ldrex {res}, [{ptr}]", + fetchop!($op, arm), + "strex {scratch2}, {scratch1}, [{ptr}]", + "cmp {scratch2}, #1", + "beq 2b", + "dmb sy", + res = out(reg) res, + scratch1 = out(reg) _, + scratch2 = out(reg) _, + ptr = in(reg) ptr, + val = in(reg) $val, + options(nostack) + ); + $val = res; } return $val; @@ -1296,46 +1230,7 @@ macro_rules! gen_fetchop { #[cfg(target_arch = "arm")] unsafe { - // insns = "" - // insns += fmt_insn("dmb sy") - // insns += fmt_insn("2:") - // if size == 8: - // insns += fmt_insn("ldrexb %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexb %[scratch2], %[scratch1], [%[addr]]") - // elif size == 16: - // insns += fmt_insn("ldrexh %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strexh %[scratch2], %[scratch1], [%[addr]]") - // else: - // assert size == 32 - // insns += fmt_insn("ldrex %[res], [%[addr]]") - // insns += fmt_insn("OP %[scratch1], %[res], %[val]") - // insns += fmt_insn("strex %[scratch2], %[scratch1], [%[addr]]") - // cpu_op = op - // if cpu_op == "or": - // cpu_op = "orr" - // if cpu_op == "xor": - // cpu_op = "eor" - // insns = insns.replace("OP", cpu_op) - // insns += fmt_insn("cmp %[scratch2], #1") - // insns += fmt_insn("beq 2b") - // insns += fmt_insn("dmb sy") - // return """ - // INLINE_ATTR %(cpp_type)s %(fun_name)s(%(cpp_type)s* addr, %(cpp_type)s val) { - // %(cpp_type)s res; - // uintptr_t scratch1, scratch2; - // asm volatile (%(insns)s - // : [res] "=&r" (res), [scratch1] "=&r" (scratch1), [scratch2] "=&r"(scratch2) - // : [addr] "r" (addr), [val] "r"(val) - // : "memory", "cc"); - // return res; - // }""" % { - // "cpp_type": cpp_type, - // "fun_name": fun_name, - // "insns": insns, - // } - todo!(); + const { panic!("Unexpected size") } } return $val;