Skip to content

Commit 7a28ad5

Browse files
gbaraldiKristofferC
authored andcommitted
Actually setup jit targets when compiling packageimages instead of targeting only one (#54471)
(cherry picked from commit 859353d)
1 parent 95f30e5 commit 7a28ad5

File tree

5 files changed

+144
-11
lines changed

5 files changed

+144
-11
lines changed

src/codegen.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6553,8 +6553,11 @@ static Function* gen_cfun_wrapper(
65536553
ctx.builder.ClearInsertionPoint();
65546554

65556555
if (aliasname) {
6556-
GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
6556+
auto alias = GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
65576557
GlobalValue::ExternalLinkage, aliasname, cw, M);
6558+
if(ctx.emission_context.TargetTriple.isOSBinFormatCOFF()) {
6559+
alias->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
6560+
}
65586561
}
65596562

65606563
if (nest) {

src/llvm-multiversioning.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
674674
trampoline->removeFnAttr("julia.mv.reloc");
675675
trampoline->removeFnAttr("julia.mv.clones");
676676
trampoline->addFnAttr("julia.mv.alias");
677+
trampoline->setDLLStorageClass(alias->getDLLStorageClass());
677678
alias->eraseFromParent();
678679

679680
uint32_t id;

src/processor_arm.cpp

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,12 +1871,55 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
18711871
return res;
18721872
}
18731873

1874+
#ifndef __clang_gcanalyzer__
18741875
std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
18751876
{
1876-
if (jit_targets.empty())
1877-
jl_error("JIT targets not initialized");
1877+
auto &cmdline = get_cmdline_targets();
1878+
check_cmdline(cmdline, true);
1879+
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
1880+
for (auto &arg: cmdline) {
1881+
auto data = arg_target_data(arg, image_targets.empty());
1882+
image_targets.push_back(std::move(data));
1883+
}
1884+
auto ntargets = image_targets.size();
1885+
if (image_targets.empty())
1886+
jl_error("No targets specified");
18781887
std::vector<jl_target_spec_t> res;
1879-
for (auto &target: jit_targets) {
1888+
// Now decide the clone condition.
1889+
for (size_t i = 1; i < ntargets; i++) {
1890+
auto &t = image_targets[i];
1891+
if (t.en.flags & JL_TARGET_CLONE_ALL)
1892+
continue;
1893+
auto &features0 = image_targets[t.base].en.features;
1894+
// Always clone when code checks CPU features
1895+
t.en.flags |= JL_TARGET_CLONE_CPU;
1896+
static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
1897+
for (auto fe: clone_fp16) {
1898+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1899+
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
1900+
break;
1901+
}
1902+
}
1903+
// The most useful one in general...
1904+
t.en.flags |= JL_TARGET_CLONE_LOOP;
1905+
#ifdef _CPU_ARM_
1906+
static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
1907+
for (auto fe: clone_math) {
1908+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1909+
t.en.flags |= JL_TARGET_CLONE_MATH;
1910+
break;
1911+
}
1912+
}
1913+
static constexpr uint32_t clone_simd[] = {Feature::neon};
1914+
for (auto fe: clone_simd) {
1915+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1916+
t.en.flags |= JL_TARGET_CLONE_SIMD;
1917+
break;
1918+
}
1919+
}
1920+
#endif
1921+
}
1922+
for (auto &target: image_targets) {
18801923
auto features_en = target.en.features;
18811924
auto features_dis = target.dis.features;
18821925
for (auto &fename: feature_names) {
@@ -1896,6 +1939,8 @@ std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
18961939
}
18971940
return res;
18981941
}
1942+
#endif
1943+
18991944

19001945
extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
19011946
{

src/processor_fallback.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,26 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
145145
return res;
146146
}
147147

148+
#ifndef __clang_gcanalyzer__
148149
extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
149150
{
150-
if (jit_targets.empty())
151-
jl_error("JIT targets not initialized");
151+
auto &cmdline = get_cmdline_targets();
152+
check_cmdline(cmdline, true);
153+
llvm::SmallVector<TargetData<1>, 0> image_targets;
154+
for (auto &arg: cmdline) {
155+
auto data = arg_target_data(arg, image_targets.empty());
156+
image_targets.push_back(std::move(data));
157+
}
158+
auto ntargets = image_targets.size();
159+
// Now decide the clone condition.
160+
for (size_t i = 1; i < ntargets; i++) {
161+
auto &t = image_targets[i];
162+
t.en.flags |= JL_TARGET_CLONE_ALL;
163+
}
164+
if (image_targets.empty())
165+
jl_error("No image targets found");
152166
std::vector<jl_target_spec_t> res;
153-
for (auto &target: jit_targets) {
167+
for (auto &target: image_targets) {
154168
jl_target_spec_t ele;
155169
std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
156170
ele.data = serialize_target_data(target.name, target.en.features,
@@ -161,6 +175,7 @@ extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
161175
}
162176
return res;
163177
}
178+
#endif
164179

165180
JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
166181
{

src/processor_x86.cpp

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,13 +1095,81 @@ extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_di
10951095
{feature_masks, 0}, {{}, 0}, 0});
10961096
return res;
10971097
}
1098-
1098+
#ifndef __clang_gcanalyzer__
10991099
extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
11001100
{
1101-
if (jit_targets.empty())
1102-
jl_error("JIT targets not initialized");
1101+
auto &cmdline = get_cmdline_targets();
1102+
check_cmdline(cmdline, true);
1103+
llvm::SmallVector<TargetData<feature_sz>, 0> image_targets;
1104+
for (auto &arg: cmdline) {
1105+
auto data = arg_target_data(arg, image_targets.empty());
1106+
image_targets.push_back(std::move(data));
1107+
}
1108+
1109+
auto ntargets = image_targets.size();
1110+
// Now decide the clone condition.
1111+
for (size_t i = 1; i < ntargets; i++) {
1112+
auto &t = image_targets[i];
1113+
if (t.en.flags & JL_TARGET_CLONE_ALL)
1114+
continue;
1115+
// Always clone when code checks CPU features
1116+
t.en.flags |= JL_TARGET_CLONE_CPU;
1117+
// The most useful one in general...
1118+
t.en.flags |= JL_TARGET_CLONE_LOOP;
1119+
auto &features0 = image_targets[t.base].en.features;
1120+
// Special case for KNL/KNM since they're so different
1121+
if (!(t.dis.flags & JL_TARGET_CLONE_ALL)) {
1122+
if ((t.name == "knl" || t.name == "knm") &&
1123+
image_targets[t.base].name != "knl" && image_targets[t.base].name != "knm") {
1124+
t.en.flags |= JL_TARGET_CLONE_ALL;
1125+
break;
1126+
}
1127+
}
1128+
static constexpr uint32_t clone_math[] = {Feature::fma, Feature::fma4};
1129+
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
1130+
Feature::sse41, Feature::sse42,
1131+
Feature::avx, Feature::avx2,
1132+
Feature::vaes, Feature::vpclmulqdq,
1133+
Feature::sse4a, Feature::avx512f,
1134+
Feature::avx512dq, Feature::avx512ifma,
1135+
Feature::avx512pf, Feature::avx512er,
1136+
Feature::avx512cd, Feature::avx512bw,
1137+
Feature::avx512vl, Feature::avx512vbmi,
1138+
Feature::avx512vpopcntdq, Feature::avxvnni,
1139+
Feature::avx512vbmi2, Feature::avx512vnni,
1140+
Feature::avx512bitalg, Feature::avx512bf16,
1141+
Feature::avx512vp2intersect, Feature::avx512fp16};
1142+
for (auto fe: clone_math) {
1143+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1144+
t.en.flags |= JL_TARGET_CLONE_MATH;
1145+
break;
1146+
}
1147+
}
1148+
for (auto fe: clone_simd) {
1149+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1150+
t.en.flags |= JL_TARGET_CLONE_SIMD;
1151+
break;
1152+
}
1153+
}
1154+
static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
1155+
for (auto fe: clone_fp16) {
1156+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1157+
t.en.flags |= JL_TARGET_CLONE_FLOAT16;
1158+
break;
1159+
}
1160+
}
1161+
static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
1162+
for (auto fe: clone_bf16) {
1163+
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
1164+
t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
1165+
break;
1166+
}
1167+
}
1168+
}
1169+
if (image_targets.empty())
1170+
jl_error("No targets specified");
11031171
std::vector<jl_target_spec_t> res;
1104-
for (auto &target: jit_targets) {
1172+
for (auto &target: image_targets) {
11051173
auto features_en = target.en.features;
11061174
auto features_dis = target.dis.features;
11071175
for (auto &fename: feature_names) {
@@ -1121,6 +1189,7 @@ extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(
11211189
}
11221190
return res;
11231191
}
1192+
#endif
11241193

11251194
extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
11261195
{

0 commit comments

Comments
 (0)