diff --git a/benchmarks/lockhammer/include/atomics.h b/benchmarks/lockhammer/include/atomics.h index f5e9987..f00dd72 100644 --- a/benchmarks/lockhammer/include/atomics.h +++ b/benchmarks/lockhammer/include/atomics.h @@ -129,6 +129,11 @@ static inline unsigned long fetchadd64_acquire_release (unsigned long *ptr, unsi : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval), [ptr] "+Q" (*ptr) : [val] "r" (addend) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) + asm volatile("amoadd.d.aqrl %[old], %[val], %[ptr]" + : [old] "=&r" (old), [ptr] "+A" (*(ptr)) + : [val] "r" (addend) + : "memory"); #else old = __atomic_fetch_add(ptr, addend, __ATOMIC_ACQ_REL); #endif @@ -162,6 +167,11 @@ static inline unsigned long fetchadd64_acquire (unsigned long *ptr, unsigned lon : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval), [ptr] "+Q" (*ptr) : [val] "r" (addend) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) + asm volatile("amoadd.d.aq %[old], %[val], %[ptr]" + : [old] "=&r" (old), [ptr] "+A" (*(ptr)) + : [val] "r" (addend) + : "memory"); #else old = __atomic_fetch_add(ptr, addend, __ATOMIC_ACQUIRE); #endif @@ -196,6 +206,11 @@ static inline unsigned long fetchadd64_release (unsigned long *ptr, unsigned lon : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval), [ptr] "+Q" (*ptr) : [val] "r" (addend) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) + asm volatile("amoadd.d.rl %[old], %[val], %[ptr]" + : [old] "=&r" (old), [ptr] "+A" (*(ptr)) + : [val] "r" (addend) + : "memory"); #else old = __atomic_fetch_add(ptr, addend, __ATOMIC_RELEASE); #endif @@ -229,6 +244,11 @@ static inline unsigned long fetchadd64 (unsigned long *ptr, unsigned long addend : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval), [ptr] "+Q" (*ptr) : [val] "r" (addend) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) + asm volatile("amoadd.d %[old], %[val], %[ptr]" + : [old] "=&r" (old), [ptr] "+A" (*(ptr)) + : [val] "r" (addend) + : "memory"); #else old = __atomic_fetch_add(ptr, addend, __ATOMIC_RELAXED); #endif @@ -265,6 +285,12 @@ static inline unsigned long fetchsub64 (unsigned long *ptr, unsigned long addend : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval), [ptr] "+Q" (*ptr) : [val] "r" (addend) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) + addend = (unsigned long) (-(long) addend); + asm volatile("amoadd.d %[old], %[val], %[ptr]" + : [old] "=&r" (old), [ptr] "+A" (*(ptr)) + : [val] "r" (addend) + : "memory"); #else old = __atomic_fetch_sub(ptr, addend, __ATOMIC_RELAXED); #endif @@ -296,6 +322,11 @@ static inline unsigned long swap64 (unsigned long *ptr, unsigned long val) { : [tmp] "=&r" (tmp), [old] "=&r" (old), [ptr] "+Q" (*ptr) : [val] "r" (val) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) + asm volatile("amoswap.d.aqrl %[old], %[val], %[ptr]" + : [old] "=&r" (old), [ptr] "+A" (*(ptr)) + : [val] "r" (val) + : "memory"); #else old = __atomic_exchange_n(ptr, val, __ATOMIC_ACQ_REL); #endif @@ -330,6 +361,22 @@ static inline unsigned long cas64 (unsigned long *ptr, unsigned long newval, uns : [tmp] "=&r" (tmp), [old] "=&r" (old), [ptr] "+Q" (*ptr) : [exp] "r" (expected), [val] "r" (newval) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && !defined(__riscv_zacas) + unsigned long tmp; + + asm volatile ( "1: lr.d %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.d %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr)) + : [exp] "r" (expected), [val] "r" (newval) + : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas) + asm volatile("amocas.d %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(ptr)) + : "r[exp]" (expected), [val] "r" (newval) + : "memory"); #else old = expected; __atomic_compare_exchange_n(ptr, &old, expected, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED); @@ -365,6 +412,22 @@ static inline unsigned long cas64_acquire (unsigned long *ptr, unsigned long val : [tmp] "=&r" (tmp), [old] "=&r" (old), [ptr] "+Q" (*ptr) : [exp] "r" (exp), [val] "r" (val) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && !defined(__riscv_zacas) + unsigned long tmp; + + asm volatile ( "1: lr.d.aq %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.d %[tmp], %[newval], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr)) + : [exp] "r" (exp), [newval] "r" (val) + : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas) + asm volatile("amocas.d %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(ptr)) + : "r[exp]" (exp), [val] "r" (val) + : "memory"); #else old = exp; __atomic_compare_exchange_n(ptr, &old, val, true, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); @@ -400,6 +463,22 @@ static inline unsigned long cas64_release (unsigned long *ptr, unsigned long val : [tmp] "=&r" (tmp), [old] "=&r" (old), [ptr] "+Q" (*ptr) : [exp] "r" (exp), [val] "r" (val) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && !defined(__riscv_zacas) + unsigned long tmp; + + asm volatile ( "1: lr.d %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.d.rl %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr)) + : [exp] "r" (exp), [val] "r" (val) + : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas) + asm volatile("amocas.d.rl %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(ptr)) + : "r[exp]" (exp), [val] "r" (val) + : "memory"); #else old = exp; __atomic_compare_exchange_n(ptr, &old, val, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED); // XXX: is relaxed for failure OK? @@ -435,6 +514,22 @@ static inline unsigned long cas64_acquire_release (unsigned long *ptr, unsigned : [tmp] "=&r" (tmp), [old] "=&r" (old), [ptr] "+Q" (*ptr) : [exp] "r" (exp), [val] "r" (val) : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && !defined(__riscv_zacas) + unsigned long tmp; + + asm volatile ( "1: lr.d.aq %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.d.rl %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr)) + : [exp] "r" (exp), [val] "r" (val) + : "memory"); +#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas) + asm volatile("amocas.d.aqrl %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(ptr)) + : "r[exp]" (exp), [val] "r" (val) + : "memory"); #else old = exp; __atomic_compare_exchange_n(ptr, &old, val, true, __ATOMIC_ACQ_REL, diff --git a/benchmarks/lockhammer/include/cpu_relax.h b/benchmarks/lockhammer/include/cpu_relax.h index e0710c6..f34338d 100644 --- a/benchmarks/lockhammer/include/cpu_relax.h +++ b/benchmarks/lockhammer/include/cpu_relax.h @@ -63,9 +63,18 @@ static inline void __cpu_relax(void) { #endif #endif // __x86_64__ +#ifdef __riscv +#if defined(RELAX_IS_EMPTY) + asm volatile ("" : : : "memory"); +#elif defined(RELAX_IS_NOP) + asm volatile ("nop" : : : "memory"); +#elif defined(RELAX_IS_NOTHING) + +#endif +#endif // __riscv + } } - #endif // CPU_RELAX_H /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/benchmarks/lockhammer/include/perf_timer.h b/benchmarks/lockhammer/include/perf_timer.h index 90c1f01..986bd8e 100644 --- a/benchmarks/lockhammer/include/perf_timer.h +++ b/benchmarks/lockhammer/include/perf_timer.h @@ -200,6 +200,19 @@ get_raw_counter(void) { #endif +#ifdef __riscv +static inline uint64_t __attribute__((always_inline)) +get_raw_counter(void) { + uint64_t t; + asm volatile( + "fence.i\n" + "fence r, r\n" + "rdtime %0" + : "=r"(t) : :); + return t; +} +#endif + static inline void __attribute__((always_inline)) timer_reset_counter() { @@ -207,6 +220,12 @@ timer_reset_counter() __asm__ __volatile__ ("isb; mrs %0, cntvct_el0" : "=r" (prev_tsc)); #elif __x86_64__ prev_tsc = rdtscp(); +#elif __riscv + asm volatile( + "fence.i\," + "fence r, r\n" + "rdtime %0" + : "=r"(prev_tsc) : :); #endif } @@ -221,7 +240,14 @@ timer_get_counter() __asm__ __volatile__ ("isb; mrs %0, cntvct_el0" : "=r" (counter_value)); #elif __x86_64__ uint64_t counter_value = rdtscp(); // assume constant_tsc -#endif +#elif __riscv + uint64_t counter_value; + asm volatile( + "fence.i\n" + "fence r, r\n" + "rdtime %0" + : "=r"(counter_value) : :); +#endif return counter_value; } @@ -236,6 +262,14 @@ timer_get_counter_start() __asm__ __volatile__ ("dsb ish; isb; mrs %0, cntvct_el0" : "=r" (counter_value)); #elif __x86_64__ uint64_t counter_value = rdtscp_start(); // assume constant_tsc +#elif __riscv + uint64_t counter_value; + asm volatile( + "fence rw, rw\n" + "fence.i\n" + "fence r,r\n" + "rdtime %0" + : "=r"(counter_value) : :); #endif return counter_value; } @@ -252,6 +286,15 @@ timer_get_counter_end() __asm__ __volatile__ ("isb; mrs %0, cntvct_el0; isb" : "=r" (counter_value)); #elif __x86_64__ uint64_t counter_value = rdtscp_end(); // assume constant_tsc +#elif __riscv + uint64_t counter_value; + asm volatile( + "fence.i\n" + "fence r, r\n" + "rdtime %0\n" + "fence.i\n" + "fence r, r" + : "=r"(counter_value) : :); #endif return counter_value; } @@ -286,6 +329,10 @@ timer_get_timer_freq(void) const struct timeval measurement_duration = { .tv_sec = 0, .tv_usec = 100000 }; + hwtimer_frequency = estimate_hwclock_freq(1, 0, measurement_duration); +#elif __riscv + const struct timeval measurement_duration = { .tv_sec = 0, .tv_usec = 100000 }; + hwtimer_frequency = estimate_hwclock_freq(1, 0, measurement_duration); #else #error "ERROR: timer_get_timer_freq() is not implemented for this system!" diff --git a/benchmarks/lockhammer/src/args.c b/benchmarks/lockhammer/src/args.c index e321c4a..210a03f 100644 --- a/benchmarks/lockhammer/src/args.c +++ b/benchmarks/lockhammer/src/args.c @@ -129,6 +129,8 @@ static size_t get_ctr_erg_bytes(void) { return ERG_words * 4; #elif defined(__x86_64__) return 64; +#elif defined(__riscv) + return 64; #else #error neither __aarch64__ nor __x86_64__ are defined in get_ctr_erg_bytes() #endif diff --git a/benchmarks/lockhammer/src/measure.c b/benchmarks/lockhammer/src/measure.c index d9d8b5d..2ed44fd 100644 --- a/benchmarks/lockhammer/src/measure.c +++ b/benchmarks/lockhammer/src/measure.c @@ -203,6 +203,9 @@ void NOINLINE blackhole(unsigned long iters) { #endif #elif __x86_64__ asm volatile (".p2align 4; 1: add $-1, %0; jne 1b" : "+r" (iters) ); +#elif __riscv + asm volatile ( + ".p2align 4; 1: addi %0, %0, -1; bnez %0, 1b" :"+r" (iters) : "0" (iters)); #endif } diff --git a/ext/jvm/jvm_objectmonitor.h b/ext/jvm/jvm_objectmonitor.h index 24ae719..daa95af 100644 --- a/ext/jvm/jvm_objectmonitor.h +++ b/ext/jvm/jvm_objectmonitor.h @@ -278,6 +278,12 @@ inline static void OrderAccess_fence(void) { } #endif +#ifdef __riscv +inline static void OrderAccess_fence(void) { + __asm__ volatile ("fence rw,rw" : : : "memory"); +} +#endif + inline static void storeload(void) { OrderAccess_fence(); } @@ -301,6 +307,17 @@ inline static int int_xchg(int exchange_value, volatile int* dest) { FULL_MEM_BARRIER; return res; } +#elif defined(__riscv) +inline static int int_xchg(int exchange_value, volatile int* dest) { + int result; + __asm__ __volatile__ ( + "amoswap.w.aqrl %0, %1, (%2)" + : "=r" (result) + : "r" (exchange_value), "r" (dest) + : "memory" + ); + return result; +} #endif /* @@ -636,6 +653,8 @@ static inline int SpinPause(void) { return 0; #elif __x86_64__ return 1; +#elif __riscv + return 2; #else #error "unsupported instruction set architecture" #endif diff --git a/ext/linux/include/lk_atomics.h b/ext/linux/include/lk_atomics.h index 063918c..5045c18 100644 --- a/ext/linux/include/lk_atomics.h +++ b/ext/linux/include/lk_atomics.h @@ -50,6 +50,8 @@ static inline void prefetchw(const void *ptr) { asm volatile("prefetchw %P1\n" : : "m" (*(const char *) ptr)); #elif defined(__aarch64__) asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr)); +#elif defined(__riscv) && defined(__riscv_zicbop) + asm volatile("prefetch.w 0(%0)" : : "m" (ptr) : "memory"); #else #endif } @@ -121,6 +123,22 @@ static inline uint32_t atomic_cmpxchg_acquire32(uint32_t *ptr, uint32_t exp, uin : [exp] "Lr" (exp), [val] "r" (val) : ); #endif +#elif defined(__riscv) && !defined(__riscv_zacas) + unsigned long tmp; + asm volatile ( + "1: lr.w.aq %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.w %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp) , [ptr] "+A" (*(uint32_t *)(ptr)) + : [exp] "r" (exp), [val] "r" (val) + : "memory"); +#elif defined(__riscv) && defined(__riscv_zacas) + asm volatile("amocas.w.aq %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(ptr)) + : "r[exp]" (exp), [val] "r" (val) + : "memory"); #else /* TODO: builtin atomic call */ #endif @@ -165,6 +183,22 @@ static inline uint32_t atomic_cmpxchg_release32(uint32_t *ptr, uint32_t exp, uin : [exp] "Lr" (exp), [val] "r" (val) : ); #endif +#elif defined(__riscv) && !defined(__riscv_zacas) + unsigned long tmp; + asm volatile ( + "1: lr.w %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.w.rl %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp) , [ptr] "+A" (*(uint32_t *)(ptr)) + : [exp] "r" (exp), [val] "r" (val) + : "memory"); +#elif defined(__riscv) && defined(__riscv_zacas) + asm volatile("amocas.w.rl %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(uint32_t *)(ptr)) + : "r[exp]" (exp), [val] "r" (val) + : "memory"); #else /* TODO: builtin atomic call */ #endif @@ -209,6 +243,22 @@ static inline uint32_t atomic_cmpxchg_relaxed32(uint32_t *ptr, uint32_t exp, uin : [exp] "Lr" (exp), [val] "r" (val) : ); #endif +#elif defined(__riscv) && !defined(__riscv_zacas) + unsigned long tmp; + asm volatile ( + "1: lr.w.aq %[old], %[ptr]\n" + " bne %[old], %[exp], 2f\n" + " sc.w.rl %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + "2:" + : [old] "=&r" (old), [tmp] "=&r" (tmp) , [ptr] "+A" (*(uint32_t *)(ptr)) + : [exp] "r" (exp), [val] "r" (val) + : "memory"); +#elif defined(__riscv) && defined(__riscv_zacas) + asm volatile("amocas.w.aqrl %[exp], %[val], %[ptr]" + : [exp] "=&r" (old), [ptr] "+A" (*(uint32_t *)(ptr)) + : "r[exp]" (exp), [val] "r" (val) + : "memory"); #else /* TODO: builtin atomic call */ #endif @@ -277,6 +327,18 @@ atomic_fetch_or_acquire32(uint32_t i, atomic_t *v) : "memory"); #endif return old_val; +#elif defined(__riscv) + uint32_t old_val, new_val, tmp; + asm volatile( + "1: lr.w.aq %[old], %[ptr]\n" + " or %[val], %[old], %[_val]\n" + " sc.w.rl %[tmp], %[val], %[ptr]\n" + " bnez %[tmp], 1b\n" + : [old] "=&r" (old_val), [val] "=&r" (new_val), [tmp] "=&r" (tmp) + : [ptr] "A" (*((uint32_t *)(&v->counter))), [_val] "r" (i) + : "memory" + ); + return old_val; #else #error "Unable to define atomic_fetch_or_acquire" #endif diff --git a/ext/linux/include/lk_barrier.h b/ext/linux/include/lk_barrier.h index 057ac57..4b4aca2 100644 --- a/ext/linux/include/lk_barrier.h +++ b/ext/linux/include/lk_barrier.h @@ -45,6 +45,19 @@ #define smp_rmb() dmb(ishld) #define smp_wmb() dmb(ishst) +#elif defined(__riscv) + +#define RISCV_FENCE_ASM(p, s) "\tfence " #p "," #s "\n" +#define RISCV_FENCE(p, s) \ + ({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); }) + +#define mb() RISCV_FENCE(iorw, iorw) +#define rmb() RISCV_FENCE(ir, ir) +#define wmb() RISCV_FENCE(ow, ow) +#define smp_mb() RISCV_FENCE(rw, rw) +#define smp_rmb() RISCV_FENCE(r, r) +#define smp_wmb() RISCV_FENCE(w, w) + #else /* No Arch */ /* TODO: No Arch Default */ #endif /* __x86_64__ */ diff --git a/ext/linux/include/lk_cmpxchg.h b/ext/linux/include/lk_cmpxchg.h index f5130fb..5e3af68 100644 --- a/ext/linux/include/lk_cmpxchg.h +++ b/ext/linux/include/lk_cmpxchg.h @@ -458,6 +458,190 @@ __XCHG_GEN(_mb) #define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) #define atomic_xchg(v, new) xchg(&((v)->counter), (new)) + +#elif defined(__riscv) + +#define BITS_PER_BYTE 8 +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) +#define BITS_PER_LONG 64 +#define RISCV_FENCE_ASM(p, s) "\tfence " #p "," #s "\n" +#define RISCV_FENCE(p, s) \ + ({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); }) +#define RISCV_ACQUIRE_BARRIER RISCV_FENCE_ASM(r, rw) +#define RISCV_RELEASE_BARRIER RISCV_FENCE_ASM(rw, w) +#define RISCV_FULL_BARRIER RISCV_FENCE_ASM(rw, rw) + +#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ +({ \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z4\n" \ + " or %1, %1, %z3\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" (__newx), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +}) + +#define __arch_xchg(sfx, prepend, append, r, p, n) \ +({ \ + __asm__ __volatile__ ( \ + prepend \ + " amoswap" sfx " %0, %2, %1\n" \ + append \ + : "=r" (r), "+A" (*(p)) \ + : "r" (n) \ + : "memory"); \ +}) + +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ + sc_append, swap_append) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(__ptr)) __new = (new); \ + __typeof__(*(__ptr)) __ret; \ + \ + switch (sizeof(*__ptr)) { \ + case 1: \ + case 2: \ + __arch_xchg_masked(sc_sfx, prepend, sc_append, \ + __ret, __ptr, __new); \ + break; \ + case 4: \ + __arch_xchg(".w" swap_sfx, prepend, swap_append, \ + __ret, __ptr, __new); \ + break; \ + case 8: \ + __arch_xchg(".d" swap_sfx, prepend, swap_append, \ + __ret, __ptr, __new); \ + break; \ + default: \ + } \ + (__typeof__(*(__ptr)))__ret; \ +}) + +#define arch_xchg(ptr, x) \ + _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") + + +#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ +({ \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __oldx = (ulong)(o) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z5\n" \ + " bne %1, %z3, 1f\n" \ + " and %1, %0, %z6\n" \ + " or %1, %1, %z4\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" ((long)__oldx), "rJ" (__newx), \ + "rJ" (__mask), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +}) + +#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +({ \ + register unsigned int __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr" lr_sfx " %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc" sc_sfx " %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ + : "rJ" (co o), "rJ" (n) \ + : "memory"); \ +}) + +#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(__ptr)) __old = (old); \ + __typeof__(*(__ptr)) __new = (new); \ + __typeof__(*(__ptr)) __ret; \ + \ + switch (sizeof(*__ptr)) { \ + case 1: \ + case 2: \ + __arch_cmpxchg_masked(sc_sfx, prepend, append, \ + __ret, __ptr, __old, __new); \ + break; \ + case 4: \ + __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ + __ret, __ptr, (long), __old, __new); \ + break; \ + case 8: \ + __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \ + __ret, __ptr, /**/, __old, __new); \ + break; \ + default: \ + } \ + (__typeof__(*(__ptr)))__ret; \ +}) + +#define arch_cmpxchg(ptr, o, n) \ + _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") + +#define atomic_cmpxchg_acquire(ptr, o, n) \ + _arch_cmpxchg(&((ptr)->counter), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) +#define atomic_cmpxchg_relaxed(ptr, o, n) \ + _arch_cmpxchg(&((ptr)->counter), (o), (n), "", "", "") +#define atomic_cmpxchg_release(ptr, o, n) \ + _arch_cmpxchg(&((ptr)->counter), (o), (n), "", RISCV_RELEASE_BARRIER, "") +#define atomic_cmpxchg(ptr, o, n) \ + _arch_cmpxchg(&((ptr)->counter), (o), (n), ".rl", "", " fence rw, rw\n") +#define atomic_xchg_relaxed(ptr, x) \ + _arch_xchg(&((ptr)->counter), x, "", "", "", "", "") +#define atomic_xchg_acquire(ptr, x) \ + _arch_xchg(&((ptr)->counter), x, "", "", "", \ + RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) +#define atomic_xchg_release(ptr, x) \ + _arch_xchg(&((ptr)->counter), x, "", "", RISCV_RELEASE_BARRIER, "", "") +#define atomic_xchg(ptr, x) \ + _arch_xchg(&((ptr)->counter), x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") + +#define xchg(ptr, x) \ +({ \ + arch_xchg((ptr), (x)); \ +}) + +#define cmpxchg(ptr, o, n) \ +({ \ + arch_cmpxchg((ptr), (o), (n)); \ +}) + #else /* Unknown Arch */ /* TODO: No Arch Default */ #endif /* __x86_64__ */ diff --git a/ext/sms/base/build_config.h b/ext/sms/base/build_config.h index c97e028..4c49e4d 100644 --- a/ext/sms/base/build_config.h +++ b/ext/sms/base/build_config.h @@ -17,6 +17,9 @@ #elif defined(__i386__) #define CONFIG_ARCH_X86 #define CONFIG_ARCH_32BIT +#elif defined(__riscv) +#define CONFIG_ARCH_RISCV64 +#define CONFIG_ARCH_64BIT #endif #if !defined(CONFIG_ARCH_64BIT) && !defined(CONFIG_ARCH_32BIT)