Skip to content

8362193: Re-work MacOS/AArch64 SpinPause to handle SB #26387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6816,6 +6816,7 @@ void MacroAssembler::spin_wait() {
yield();
break;
case SpinWait::SB:
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
sb();
break;
default:
Expand Down
51 changes: 51 additions & 0 deletions src/hotspot/cpu/aarch64/spin_wait_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

#include "spin_wait_aarch64.hpp"

#include <string.h>

bool SpinWait::supports(const char *name) {
return name != nullptr &&
(strcmp(name, "nop") == 0 ||
strcmp(name, "isb") == 0 ||
strcmp(name, "yield") == 0 ||
strcmp(name, "sb") == 0 ||
strcmp(name, "none") == 0);
}

SpinWait::Inst SpinWait::from_name(const char* name) {
assert(supports(name), "spin wait instruction name must be one of: " SPIN_WAIT_INST_OPTIONS);

if (strcmp(name, "nop") == 0) {
return SpinWait::NOP;
} else if (strcmp(name, "isb") == 0) {
return SpinWait::ISB;
} else if (strcmp(name, "yield") == 0) {
return SpinWait::YIELD;
} else if (strcmp(name, "sb") == 0) {
return SpinWait::SB;
}

return SpinWait::NONE;
}
24 changes: 17 additions & 7 deletions src/hotspot/cpu/aarch64/spin_wait_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,41 @@
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
#define CPU_AARCH64_SPIN_WAIT_AARCH64_HPP

#define SPIN_WAIT_INST_OPTIONS "nop, isb, yield, sb, none"

class SpinWait {
public:
// Non-zero values are chosen to have only one bit set.
// This simplifies testing values in assembly code.
// This limits us to 64 possible implementation.
// Value 1 is used for the default implementation.
enum Inst {
NONE = -1,
NOP,
ISB,
YIELD,
SB
NONE = 0,
YIELD = (1 << 0),
ISB = (1 << 1),
SB = (1 << 2),
NOP = (1 << 3)
};

private:
Inst _inst;
int _count;

Inst from_name(const char *name);

public:
SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(count) {}
SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(inst == NONE ? 0 : count) {}
SpinWait(const char *name, int count) : SpinWait(from_name(name), count) {}

Inst inst() const { return _inst; }
int inst_count() const { return _count; }

static bool supports(const char *name);
};

#endif // CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
26 changes: 10 additions & 16 deletions src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,20 @@ uintptr_t VM_Version::_pac_mask;
SpinWait VM_Version::_spin_wait;

static SpinWait get_spin_wait_desc() {
if (strcmp(OnSpinWaitInst, "nop") == 0) {
return SpinWait(SpinWait::NOP, OnSpinWaitInstCount);
} else if (strcmp(OnSpinWaitInst, "isb") == 0) {
return SpinWait(SpinWait::ISB, OnSpinWaitInstCount);
} else if (strcmp(OnSpinWaitInst, "yield") == 0) {
return SpinWait(SpinWait::YIELD, OnSpinWaitInstCount);
} else if (strcmp(OnSpinWaitInst, "sb") == 0) {
if (!VM_Version::supports_sb()) {
vm_exit_during_initialization("OnSpinWaitInst is SB but current CPU does not support SB instruction");
}
return SpinWait(SpinWait::SB, OnSpinWaitInstCount);
} else if (strcmp(OnSpinWaitInst, "none") != 0) {
vm_exit_during_initialization("The options for OnSpinWaitInst are nop, isb, yield, sb, and none", OnSpinWaitInst);
if (!SpinWait::supports(OnSpinWaitInst)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is not about the actual spin-wait hints support, correct? This only checks the option string is in expected domain? A common way to deal with this is to use constraint functions, see for example:

  product(ccstr, AOTCache, nullptr,                                         \
          "Cache for improving start up and warm up")                       \
          constraint(AOTCacheConstraintFunc, AtParse)                       \
                                                                            \

vm_exit_during_initialization("OnSpinWaitInst is not one of "
SPIN_WAIT_INST_OPTIONS,
OnSpinWaitInst);
}

if (!FLAG_IS_DEFAULT(OnSpinWaitInstCount) && OnSpinWaitInstCount > 0) {
vm_exit_during_initialization("OnSpinWaitInstCount cannot be used for OnSpinWaitInst 'none'");
assert(OnSpinWaitInstCount != 0, "allowed range for OnSpinWaitInstCount must not include 0");

SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount);
if (spin_wait.inst() == SpinWait::SB && !VM_Version::supports_sb()) {
vm_exit_during_initialization("OnSpinWaitInst is SB but current CPU does not support SB instruction");
}

return SpinWait{};
return spin_wait;
}

void VM_Version::initialize() {
Expand Down
76 changes: 49 additions & 27 deletions src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/timer.hpp"
#include "runtime/vm_version.hpp"
#include "signals_posix.hpp"
#include "utilities/align.hpp"
#include "utilities/debug.hpp"
#include "utilities/events.hpp"
#include "utilities/vmError.hpp"

Expand Down Expand Up @@ -524,39 +526,59 @@ static inline void atomic_copy64(const volatile void *src, volatile void *dst) {
}

extern "C" {
// needs local assembler label '1:' to avoid trouble when using linktime optimization
// needs local assembler label '4:' to avoid trouble when using linktime optimization
int SpinPause() {
// We don't use StubRoutines::aarch64::spin_wait stub in order to
// avoid a costly call to os::current_thread_enable_wx() on MacOS.
// We should return 1 if SpinPause is implemented, and since there
// will be a sequence of 11 instructions for NONE and YIELD and 12
// instructions for NOP and ISB, SpinPause will always return 1.
uint64_t br_dst;
const int instructions_per_case = 2;
int64_t off = VM_Version::spin_wait_desc().inst() * instructions_per_case * Assembler::instruction_size;

assert(VM_Version::spin_wait_desc().inst() >= SpinWait::NONE &&
VM_Version::spin_wait_desc().inst() <= SpinWait::YIELD, "must be");
assert(-1 == SpinWait::NONE, "must be");
assert( 0 == SpinWait::NOP, "must be");
assert( 1 == SpinWait::ISB, "must be");
assert( 2 == SpinWait::YIELD, "must be");
// will be always a sequence of instructions, SpinPause will always return 1.

assert(SpinWait::NONE == 0, "SpinWait::Inst value 0 reserved to indicate no implementation");
assert(SpinWait::YIELD == 1, "SpinWait::Inst value 1 reserved for 'yield' instruction");
assert(SpinWait::ISB == 2, "SpinWait::Inst value 2 reserved for 'isb' instruction");
assert(SpinWait::SB == 4, "SpinWait::Inst value 4 reserved for 'sb' instruction");
assert(SpinWait::NOP == 8, "SpinWait::Inst value 8 reserved for 'nop' instruction");

const uint64_t inst_id = VM_Version::spin_wait_desc().inst();
assert(inst_id == 0 || is_power_of_2(inst_id), "Values of SpinWait::Inst must be 0 or power of 2");
assert(inst_id != SpinWait::SB || VM_Version::supports_sb(), "current CPU does not support SB instruction");
if (inst_id > SpinWait::NOP) {
warining("Unsupported type of SpinWait::Inst: %d", inst_id);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning

Also, I think we want to minimize the amount of code we actually execute in SpinPause, since it likely sits in the hot loop. So checking this here is probably counter-productive.

ShouldNotReachHere();
}

// The assembly code below is equivalent to the following:
//
// if (inst_id == 1) {
// exec_yield_inst();
// } else if (inst_id == 2) {
// exec_isb_inst();
// } else if (inst_id == 4) {
// exec_sb_inst();
// } else if (inst_id == 8) {
// exec_nop_inst();
// }
asm volatile(
" adr %[d], 20 \n" // 20 == PC here + 5 instructions => address
// to entry for case SpinWait::NOP
" add %[d], %[d], %[o] \n"
" br %[d] \n"
" b 1f \n" // case SpinWait::NONE (-1)
" nop \n" // padding
" nop \n" // case SpinWait::NOP ( 0)
" b 1f \n"
" isb \n" // case SpinWait::ISB ( 1)
" b 1f \n"
" yield \n" // case SpinWait::YIELD ( 2)
"1: \n"
: [d]"=&r"(br_dst)
: [o]"r"(off)
" tbz %[id], 0, 0f \n" // The default instruction for SpinWait is YIELD.
// We check it first before going to switch.
" yield \n"
" b 4f \n"
"0: \n"
" tbnz %[id], 1, 1f \n"
" tbnz %[id], 2, 2f \n"
" tbnz %[id], 3, 3f \n"
" b 4f \n"
"1: \n"
" isb \n"
" b 4f \n"
"2: \n"
" .inst 0xd50330ff \n" // SB instruction, explicitly encoded not to rely on a compiler
" b 4f \n"
"3: \n"
" nop \n"
"4: \n"
:
: [id]"r"(inst_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no good reason to handle all this logic in asm. Please use a switch statement instead, and we can also get rid of most of the assertions by adding a ShouldNotReachHere() in the default clause.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you. I proposed to use the switch when JDK-8321371 was being reviewed: #16994 (comment)

Frederick (@fbredber) wanted to avoid branches: #16994 (comment)

Copy link
Member Author

@eastig eastig Jul 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The switch-based version is committed: e984fde

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, so this inline assembly was to optimize SpinPause, since it sits in hot loop. Have you looked at disassembly for SpinPause before/after? On my M1, I see this:

% lldb -o "disassemble -n SpinPause" -o quit -- build/macosx-aarch64-server-release/images/jdk/lib/server/libjvm.dylib

# Before
libjvm.dylib`::SpinPause():
libjvm.dylib[0x89f0d4] <+0>:  stp    x29, x30, [sp, #-0x10]!
libjvm.dylib[0x89f0d8] <+4>:  mov    x29, sp
libjvm.dylib[0x89f0dc] <+8>:  adrp   x8, 1409
libjvm.dylib[0x89f0e0] <+12>: add    x8, x8, #0x80 ; VM_Version::_spin_wait
libjvm.dylib[0x89f0e4] <+16>: ldrsw  x8, [x8]
libjvm.dylib[0x89f0e8] <+20>: lsl    x8, x8, #3
libjvm.dylib[0x89f0ec] <+24>: adr    x9, 0x89f100 ; <+44> at os_bsd_aarch64.cpp:545:5
libjvm.dylib[0x89f0f0] <+28>: add    x9, x9, x8
libjvm.dylib[0x89f0f4] <+32>: br     x9
libjvm.dylib[0x89f0f8] <+36>: b      0x89f114       ; <+64> at os_bsd_aarch64.cpp:561:5
libjvm.dylib[0x89f0fc] <+40>: nop    
libjvm.dylib[0x89f100] <+44>: nop    
libjvm.dylib[0x89f104] <+48>: b      0x89f114       ; <+64> at os_bsd_aarch64.cpp:561:5
libjvm.dylib[0x89f108] <+52>: isb    
libjvm.dylib[0x89f10c] <+56>: b      0x89f114       ; <+64> at os_bsd_aarch64.cpp:561:5
libjvm.dylib[0x89f110] <+60>: yield  
libjvm.dylib[0x89f114] <+64>: mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f118] <+68>: ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f11c] <+72>: ret  

# After
libjvm.dylib`::SpinPause():
libjvm.dylib[0x89f074] <+0>:   stp    x29, x30, [sp, #-0x10]!
libjvm.dylib[0x89f078] <+4>:   mov    x29, sp
libjvm.dylib[0x89f07c] <+8>:   adrp   x8, 1409
libjvm.dylib[0x89f080] <+12>:  add    x8, x8, #0x80 ; VM_Version::_spin_wait
libjvm.dylib[0x89f084] <+16>:  ldr    w8, [x8]
libjvm.dylib[0x89f088] <+20>:  add    w8, w8, #0x1
libjvm.dylib[0x89f08c] <+24>:  cmp    w8, #0x4
libjvm.dylib[0x89f090] <+28>:  b.hi   0x89f0ec       ; <+120> at os_bsd_aarch64.cpp:551:7
libjvm.dylib[0x89f094] <+32>:  adrp   x9, 0
libjvm.dylib[0x89f098] <+36>:  add    x9, x9, #0xfc ; ___lldb_unnamed_symbol66913
libjvm.dylib[0x89f09c] <+40>:  adr    x10, 0x89f09c ; <+40> at os_bsd_aarch64.cpp
libjvm.dylib[0x89f0a0] <+44>:  ldrsw  x11, [x9, x8, lsl #2]
libjvm.dylib[0x89f0a4] <+48>:  add    x10, x10, x11
libjvm.dylib[0x89f0a8] <+52>:  br     x10
libjvm.dylib[0x89f0ac] <+56>:  nop    
libjvm.dylib[0x89f0b0] <+60>:  mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0b4] <+64>:  ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0b8] <+68>:  ret    
libjvm.dylib[0x89f0bc] <+72>:  isb    
libjvm.dylib[0x89f0c0] <+76>:  mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0c4] <+80>:  ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0c8] <+84>:  ret    
libjvm.dylib[0x89f0cc] <+88>:  yield  
libjvm.dylib[0x89f0d0] <+92>:  mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0d4] <+96>:  ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0d8] <+100>: ret    
libjvm.dylib[0x89f0dc] <+104>: sb     
libjvm.dylib[0x89f0e0] <+108>: mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0e4] <+112>: ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0e8] <+116>: ret    
libjvm.dylib[0x89f0ec] <+120>: adrp   x0, 1063
libjvm.dylib[0x89f0f0] <+124>: add    x0, x0, #0xe2a ; "src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp"
libjvm.dylib[0x89f0f4] <+128>: mov    w1, #0x227 ; =551 
libjvm.dylib[0x89f0f8] <+132>: bl     0x311f84       ; report_should_not_reach_here at debug.cpp:247

So I think switch is fairly well compiled. On first glance, it generates more code by duplicating the epilog for every case, but I think that is a bit cleaner than trying to do branch-overs. It generates marginally better code if you place case in enum order, and do should_not_reach here branch only for debug builds:

#ifdef ASSERT
    default:
      ShouldNotReachHere();
#endif

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly. When writing inline asm, it doesn't much help to try to out-guess the compiler.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the differences might be smaller on the real hardware. Maybe everything will be around 200 clocks.
If we need code easy to maintain then this is switch. If we need performance then this is tbz.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I did quite a lot of performance measurements before I settled on the assembler solution. Have you made any comparison before and after changing from the assembler code to the new c++ code? If so what tests did you run? Since the code is called in tight locking loops, this code really matters.

Given that this routine is a backoff delay, it's not clear that speeding it up helps.

However, if we really wanted to speed this up we'd use an indirect branch to one of four code blocks: isb; ret would be one of them. But I don't think we do want to speed it up.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eastig
When I tested JDK-8320317 the DaCapo-h2-large test showed very stable values when run multiple times. The performance went up on Linux x86 and Windows x86 by approximately 12%, but went down with roughly the same amount on macOS AArch64. That performance decreased could however be avoided by implement SpinPause() on macOS. So I fixed that in JDK-8321371.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eastig
I would prefer your tbz version for reasons stated earlier, but most notably to make it stable across toolchains.
But maybe it's best to just integrate it as is since there are other problems (JDK-8361032 and JDK-8360936) that are waiting for this to be fixed.

Time will tell if this implementation will make performance better or worse. The debate of how to best implement SpinPause() will surely not end today. :)

Copy link
Contributor

@adinn adinn Jul 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Andrew Haley is right. This is a backoff routine. Making it back off that little bit faster or slower is a pointless micro-optimization.

Of course, making the implementation a lot slower could well upset the dynamics of the backoff but that's a different order of magnitude to what is at stake in the various hand-written or generated assembly routines being discussed here. A key thing to note in that regard is that any branching that happens is going to always be followed the same way and hence will be very accurately predicted.

And in other news . . . Rome is burning . . .

: "memory");
return 1;
}
Expand Down
33 changes: 33 additions & 0 deletions test/hotspot/gtest/aarch64/test_spin_pause.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

#if defined(AARCH64) && !defined(ZERO)

#include "utilities/spinYield.hpp"
#include "unittest.hpp"

TEST_VM(SpinPause, sanity) {
ASSERT_EQ(SpinPause(), 1);
}

#endif // AARCH64
46 changes: 46 additions & 0 deletions test/hotspot/jtreg/gtest/TestSpinPauseAArch64.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/*
* @test TestSpinPauseAArch64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A common way to tag the tests:

@test id=default

I assume all these are supported in ARMv8.0 (default? baseline?) profile. That's why I put default. Put something else if that is incorrect.

Same for the second @test block.

* @bug 8362193
* @summary Run SpinPause gtest using different instructions for SpinPause
* @library /test/lib
* @requires vm.flagless
* @requires os.arch=="aarch64"
* @run main/native GTestWrapper --gtest_filter=SpinPause*
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=none
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=nop
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=isb
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=yield
*/

/*
* @test TestSpinPauseSBAArch64
* @bug 8362193
* @summary Run SpinPause gtest using SB instruction for SpinPause
* @library /test/lib
* @requires vm.flagless
* @requires (os.arch=="aarch64" & vm.cpu.features ~= ".*sb.*")
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=sb
*/