-
Notifications
You must be signed in to change notification settings - Fork 6.1k
8362193: Re-work MacOS/AArch64 SpinPause to handle SB #26387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
ae46cf8
f4b2554
3b3c969
60e08a3
c67cede
b2680c9
629ab69
1521ee5
c54b76c
109e667
31cf091
d47fa11
e984fde
57d30a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. | ||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
* | ||
* This code is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU General Public License version 2 only, as | ||
* published by the Free Software Foundation. | ||
* | ||
* This code is distributed in the hope that it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
* version 2 for more details (a copy is included in the LICENSE file that | ||
* accompanied this code). | ||
* | ||
* You should have received a copy of the GNU General Public License version | ||
* 2 along with this work; if not, write to the Free Software Foundation, | ||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
* | ||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||
* or visit www.oracle.com if you need additional information or have any | ||
* questions. | ||
*/ | ||
|
||
#include "spin_wait_aarch64.hpp" | ||
|
||
#include <string.h> | ||
|
||
bool SpinWait::supports(const char *name) { | ||
return name != nullptr && | ||
(strcmp(name, "nop") == 0 || | ||
strcmp(name, "isb") == 0 || | ||
strcmp(name, "yield") == 0 || | ||
strcmp(name, "sb") == 0 || | ||
strcmp(name, "none") == 0); | ||
} | ||
|
||
SpinWait::Inst SpinWait::from_name(const char* name) { | ||
assert(supports(name), "spin wait instruction name must be one of: " SPIN_WAIT_INST_OPTIONS); | ||
|
||
if (strcmp(name, "nop") == 0) { | ||
return SpinWait::NOP; | ||
} else if (strcmp(name, "isb") == 0) { | ||
return SpinWait::ISB; | ||
} else if (strcmp(name, "yield") == 0) { | ||
return SpinWait::YIELD; | ||
} else if (strcmp(name, "sb") == 0) { | ||
return SpinWait::SB; | ||
} | ||
|
||
return SpinWait::NONE; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,8 +49,10 @@ | |
#include "runtime/sharedRuntime.hpp" | ||
#include "runtime/stubRoutines.hpp" | ||
#include "runtime/timer.hpp" | ||
#include "runtime/vm_version.hpp" | ||
#include "signals_posix.hpp" | ||
#include "utilities/align.hpp" | ||
#include "utilities/debug.hpp" | ||
#include "utilities/events.hpp" | ||
#include "utilities/vmError.hpp" | ||
|
||
|
@@ -524,39 +526,59 @@ static inline void atomic_copy64(const volatile void *src, volatile void *dst) { | |
} | ||
|
||
extern "C" { | ||
// needs local assembler label '1:' to avoid trouble when using linktime optimization | ||
// needs local assembler label '4:' to avoid trouble when using linktime optimization | ||
int SpinPause() { | ||
// We don't use StubRoutines::aarch64::spin_wait stub in order to | ||
// avoid a costly call to os::current_thread_enable_wx() on MacOS. | ||
// We should return 1 if SpinPause is implemented, and since there | ||
// will be a sequence of 11 instructions for NONE and YIELD and 12 | ||
// instructions for NOP and ISB, SpinPause will always return 1. | ||
uint64_t br_dst; | ||
const int instructions_per_case = 2; | ||
int64_t off = VM_Version::spin_wait_desc().inst() * instructions_per_case * Assembler::instruction_size; | ||
|
||
assert(VM_Version::spin_wait_desc().inst() >= SpinWait::NONE && | ||
VM_Version::spin_wait_desc().inst() <= SpinWait::YIELD, "must be"); | ||
assert(-1 == SpinWait::NONE, "must be"); | ||
assert( 0 == SpinWait::NOP, "must be"); | ||
assert( 1 == SpinWait::ISB, "must be"); | ||
assert( 2 == SpinWait::YIELD, "must be"); | ||
// will be always a sequence of instructions, SpinPause will always return 1. | ||
|
||
assert(SpinWait::NONE == 0, "SpinWait::Inst value 0 reserved to indicate no implementation"); | ||
assert(SpinWait::YIELD == 1, "SpinWait::Inst value 1 reserved for 'yield' instruction"); | ||
assert(SpinWait::ISB == 2, "SpinWait::Inst value 2 reserved for 'isb' instruction"); | ||
assert(SpinWait::SB == 4, "SpinWait::Inst value 4 reserved for 'sb' instruction"); | ||
assert(SpinWait::NOP == 8, "SpinWait::Inst value 8 reserved for 'nop' instruction"); | ||
|
||
const uint64_t inst_id = VM_Version::spin_wait_desc().inst(); | ||
assert(inst_id == 0 || is_power_of_2(inst_id), "Values of SpinWait::Inst must be 0 or power of 2"); | ||
assert(inst_id != SpinWait::SB || VM_Version::supports_sb(), "current CPU does not support SB instruction"); | ||
if (inst_id > SpinWait::NOP) { | ||
warining("Unsupported type of SpinWait::Inst: %d", inst_id); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Also, I think we want to minimize the amount of code we actually execute in |
||
ShouldNotReachHere(); | ||
} | ||
|
||
// The assembly code below is equivalent to the following: | ||
// | ||
// if (inst_id == 1) { | ||
// exec_yield_inst(); | ||
// } else if (inst_id == 2) { | ||
// exec_isb_inst(); | ||
// } else if (inst_id == 4) { | ||
// exec_sb_inst(); | ||
// } else if (inst_id == 8) { | ||
// exec_nop_inst(); | ||
// } | ||
asm volatile( | ||
" adr %[d], 20 \n" // 20 == PC here + 5 instructions => address | ||
// to entry for case SpinWait::NOP | ||
" add %[d], %[d], %[o] \n" | ||
" br %[d] \n" | ||
" b 1f \n" // case SpinWait::NONE (-1) | ||
" nop \n" // padding | ||
" nop \n" // case SpinWait::NOP ( 0) | ||
" b 1f \n" | ||
" isb \n" // case SpinWait::ISB ( 1) | ||
" b 1f \n" | ||
" yield \n" // case SpinWait::YIELD ( 2) | ||
"1: \n" | ||
: [d]"=&r"(br_dst) | ||
: [o]"r"(off) | ||
" tbz %[id], 0, 0f \n" // The default instruction for SpinWait is YIELD. | ||
// We check it first before going to switch. | ||
" yield \n" | ||
" b 4f \n" | ||
"0: \n" | ||
" tbnz %[id], 1, 1f \n" | ||
" tbnz %[id], 2, 2f \n" | ||
" tbnz %[id], 3, 3f \n" | ||
" b 4f \n" | ||
"1: \n" | ||
" isb \n" | ||
" b 4f \n" | ||
"2: \n" | ||
" .inst 0xd50330ff \n" // SB instruction, explicitly encoded not to rely on a compiler | ||
" b 4f \n" | ||
"3: \n" | ||
" nop \n" | ||
"4: \n" | ||
: | ||
: [id]"r"(inst_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no good reason to handle all this logic in asm. Please use a switch statement instead, and we can also get rid of most of the assertions by adding a ShouldNotReachHere() in the default clause. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with you. I proposed to use the switch when JDK-8321371 was being reviewed: #16994 (comment) Frederick (@fbredber) wanted to avoid branches: #16994 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The switch-based version is committed: e984fde There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, so this inline assembly was to optimize
So I think switch is fairly well compiled. On first glance, it generates more code by duplicating the epilog for every case, but I think that is a bit cleaner than trying to do branch-overs. It generates marginally better code if you place
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly. When writing inline asm, it doesn't much help to try to out-guess the compiler. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the differences might be smaller on the real hardware. Maybe everything will be around 200 clocks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Given that this routine is a backoff delay, it's not clear that speeding it up helps. However, if we really wanted to speed this up we'd use an indirect branch to one of four code blocks: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eastig There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eastig Time will tell if this implementation will make performance better or worse. The debate of how to best implement There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Andrew Haley is right. This is a backoff routine. Making it back off that little bit faster or slower is a pointless micro-optimization. Of course, making the implementation a lot slower could well upset the dynamics of the backoff but that's a different order of magnitude to what is at stake in the various hand-written or generated assembly routines being discussed here. A key thing to note in that regard is that any branching that happens is going to always be followed the same way and hence will be very accurately predicted. And in other news . . . Rome is burning . . . |
||
: "memory"); | ||
return 1; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/* | ||
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. | ||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
* | ||
* This code is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU General Public License version 2 only, as | ||
* published by the Free Software Foundation. | ||
* | ||
* This code is distributed in the hope that it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
* version 2 for more details (a copy is included in the LICENSE file that | ||
* accompanied this code). | ||
* | ||
* You should have received a copy of the GNU General Public License version | ||
* 2 along with this work; if not, write to the Free Software Foundation, | ||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
* | ||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||
* or visit www.oracle.com if you need additional information or have any | ||
* questions. | ||
*/ | ||
|
||
#if defined(AARCH64) && !defined(ZERO) | ||
|
||
#include "utilities/spinYield.hpp" | ||
#include "unittest.hpp" | ||
|
||
TEST_VM(SpinPause, sanity) { | ||
ASSERT_EQ(SpinPause(), 1); | ||
} | ||
|
||
#endif // AARCH64 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. | ||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
* | ||
* This code is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU General Public License version 2 only, as | ||
* published by the Free Software Foundation. | ||
* | ||
* This code is distributed in the hope that it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
* version 2 for more details (a copy is included in the LICENSE file that | ||
* accompanied this code). | ||
* | ||
* You should have received a copy of the GNU General Public License version | ||
* 2 along with this work; if not, write to the Free Software Foundation, | ||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
* | ||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||
* or visit www.oracle.com if you need additional information or have any | ||
* questions. | ||
*/ | ||
|
||
/* | ||
* @test TestSpinPauseAArch64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A common way to tag the tests:
I assume all these are supported in ARMv8.0 (default? baseline?) profile. That's why I put Same for the second |
||
* @bug 8362193 | ||
* @summary Run SpinPause gtest using different instructions for SpinPause | ||
* @library /test/lib | ||
* @requires vm.flagless | ||
* @requires os.arch=="aarch64" | ||
* @run main/native GTestWrapper --gtest_filter=SpinPause* | ||
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=none | ||
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=nop | ||
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=isb | ||
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=yield | ||
*/ | ||
|
||
/* | ||
* @test TestSpinPauseSBAArch64 | ||
* @bug 8362193 | ||
* @summary Run SpinPause gtest using SB instruction for SpinPause | ||
* @library /test/lib | ||
* @requires vm.flagless | ||
* @requires (os.arch=="aarch64" & vm.cpu.features ~= ".*sb.*") | ||
* @run main/native GTestWrapper --gtest_filter=SpinPause* -XX:+UnlockDiagnosticVMOptions -XX:OnSpinWaitInst=sb | ||
*/ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this is not about the actual spin-wait hints support, correct? This only checks the option string is in expected domain? A common way to deal with this is to use constraint functions, see for example: