Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6a42f08
1
aobolensk Aug 19, 2025
fde9406
2
aobolensk Aug 19, 2025
3dd7afc
3
aobolensk Aug 19, 2025
1c92860
4
aobolensk Aug 19, 2025
f1613f2
fmt
aobolensk Aug 19, 2025
945bd8a
override
aobolensk Aug 19, 2025
3854591
tune skip
aobolensk Aug 19, 2025
8aba094
tidy
aobolensk Aug 19, 2025
c850c0f
fix missing includes
aobolensk Aug 19, 2025
3bac861
fix strict aliasing
aobolensk Aug 19, 2025
6ac561e
minor remarks
aobolensk Aug 19, 2025
aa852ef
xuantie ci trigger
aobolensk Aug 19, 2025
150f03f
limit to add
aobolensk Aug 19, 2025
9f56de5
::adj
aobolensk Sep 10, 2025
f4ce6fe
fmt
aobolensk Sep 10, 2025
2e018a3
add includes
aobolensk Sep 10, 2025
17cd929
trace
aobolensk Sep 10, 2025
f604fb5
rm
aobolensk Sep 10, 2025
859da17
tests on CI
aobolensk Sep 10, 2025
9484602
fix
aobolensk Sep 10, 2025
45f8a0d
fix snippets regex
aobolensk Sep 10, 2025
e691b62
fix regex
aobolensk Sep 11, 2025
4bff223
Address part of review comments
aobolensk Sep 15, 2025
c77aea3
getHostIsa function refactoring + minoradjustments
aobolensk Sep 15, 2025
769caa7
exclude mistakenly enabled tests
aobolensk Sep 17, 2025
f4d3953
Merge remote-tracking branch 'origin/master' into snippets-riscv64
aobolensk Sep 17, 2025
449fe8b
Fix compilation on other platforms
aobolensk Sep 17, 2025
d5e2667
Fix missing snippetsManager
aobolensk Sep 18, 2025
c11a9c3
Merge remote-tracking branch 'origin/master' into snippets-riscv64
aobolensk Sep 18, 2025
c22f2bb
restore regex
aobolensk Sep 18, 2025
6f835fd
riscv gpr count
aobolensk Sep 18, 2025
f13b0a1
Merge branch 'master' into snippets-riscv64
aobolensk Sep 22, 2025
8947192
CompiledSnippetCPUCommon
aobolensk Sep 22, 2025
22b6e84
Merge branch 'master' into snippets-riscv64
aobolensk Sep 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/linux_riscv_xuantie_dev_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ on:
- 'src/plugins/intel_cpu/src/emitters/plugin/aarch64/**'
- 'src/plugins/intel_cpu/src/emitters/plugin/riscv64/**'
- 'src/plugins/intel_cpu/src/emitters/plugin/x64/**'
- 'src/plugins/intel_cpu/src/emitters/snippets/riscv64/**'
- 'src/plugins/intel_cpu/src/nodes/executors/aarch64/**'
- 'src/plugins/intel_cpu/src/nodes/executors/shl/**'
- 'src/plugins/intel_cpu/src/nodes/kernels/riscv64/**'
Expand Down Expand Up @@ -234,7 +235,7 @@ jobs:
env:
INSTALL_DIR: ${{ github.workspace }}/install
INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
GTEST_FILTER: ${{ inputs.testFilterType == 'SMOKE' && '*smoke*' || '*ActivationLayer*:*EltwiseLayer*:*LogicalLayer*:*ComparisonLayer*:*SelectLayer*:*MatMulLayerCPUTest*:*ExtremumLayerCPUTest*' }}
GTEST_FILTER: ${{ inputs.testFilterType == 'SMOKE' && '*smoke*' || '*ActivationLayer*:*EltwiseLayer*:*LogicalLayer*:*ComparisonLayer*:*SelectLayer*:*MatMulLayerCPUTest*:*ExtremumLayerCPUTest*:smoke_Snippets*' }}

steps:
- name: Download OpenVINO artifacts (package)
Expand Down
9 changes: 7 additions & 2 deletions src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,18 +212,23 @@ if(NOT (AARCH64 OR ARM))
${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/aarch64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/aarch64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/tpp/aarch64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/executors/aarch64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/aarch64/*)
endif()

if(NOT (AARCH64 OR ARM))
list(APPEND EXCLUDE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/executors/aarch64/*)
endif()

if(NOT AARCH64)
list(APPEND EXCLUDE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/snippets/aarch64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/snippets/aarch64/*)
endif()

if (NOT RISCV64)
list(APPEND EXCLUDE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/riscv64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/riscv64/*)
${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/riscv64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/snippets/riscv64/*
${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/executors/riscv64/*)
endif()

if (NOT ENABLE_MLAS_FOR_CPU)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <cpu/aarch64/cpu_isa_traits.hpp>
#include <cpu/aarch64/jit_generator.hpp>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <set>
#include <utility>
Expand Down Expand Up @@ -213,23 +212,6 @@ class jit_snippet : public dnnl::impl::cpu::aarch64::jit_generator {

namespace intel_cpu::aarch64 {

CompiledSnippetCPU::CompiledSnippetCPU(std::unique_ptr<dnnl::impl::cpu::aarch64::jit_generator> h)
: h_compiled(std::move(h)) {
OPENVINO_ASSERT(h_compiled && h_compiled->jit_ker(), "Got invalid jit generator or kernel was nopt compiled");
}

const uint8_t* CompiledSnippetCPU::get_code() const {
return h_compiled->jit_ker();
}

size_t CompiledSnippetCPU::get_code_size() const {
return h_compiled->getSize();
}

bool CompiledSnippetCPU::empty() const {
return get_code_size() == 0;
}

CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, ov::intel_cpu::MultiCacheWeakPtr cache)
: TargetMachine(std::make_shared<CPURuntimeConfigurator>(cache)),
h(new jit_snippet()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "cache/multi_cache.h"
#include "cpu/aarch64/jit_generator.hpp"
#include "emitters/snippets/common/compiled_snippet_cpu.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "snippets/emitter.hpp"
Expand All @@ -20,16 +21,7 @@

namespace ov::intel_cpu::aarch64 {

class CompiledSnippetCPU : public snippets::CompiledSnippet {
public:
explicit CompiledSnippetCPU(std::unique_ptr<dnnl::impl::cpu::aarch64::jit_generator> h);
[[nodiscard]] const uint8_t* get_code() const override;
[[nodiscard]] size_t get_code_size() const override;
[[nodiscard]] bool empty() const override;

private:
const std::unique_ptr<const dnnl::impl::cpu::aarch64::jit_generator> h_compiled;
};
using CompiledSnippetCPU = ov::intel_cpu::CompiledSnippetCPUCommon<dnnl::impl::cpu::aarch64::jit_generator>;

class CPUTargetMachine : public snippets::TargetMachine {
public:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cstddef>
#include <cstdint>
#include <memory>

#include "openvino/core/except.hpp"
#include "snippets/target_machine.hpp"

namespace ov::intel_cpu {

// A small helper that wraps a platform-specific JIT generator and exposes
// a uniform CompiledSnippet interface. This reduces duplication across
// x64, aarch64 and riscv64 backends.
template <typename JitGeneratorT>
class CompiledSnippetCPUCommon : public ov::snippets::CompiledSnippet {
public:
explicit CompiledSnippetCPUCommon(std::unique_ptr<JitGeneratorT> h) : h_compiled(std::move(h)) {
OPENVINO_ASSERT(h_compiled && h_compiled->jit_ker(), "Got invalid jit generator or kernel was not compiled");
}

[[nodiscard]] const uint8_t* get_code() const override {
return h_compiled->jit_ker();
}
[[nodiscard]] size_t get_code_size() const override {
return h_compiled->getSize();
}
[[nodiscard]] bool empty() const override {
return get_code_size() == 0;
}

private:
const std::unique_ptr<const JitGeneratorT> h_compiled;
};

} // namespace ov::intel_cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "cpu_generator.hpp"

#include <cstddef>
#include <memory>
#include <nodes/kernels/riscv64/cpu_isa_traits.hpp>
#include <nodes/kernels/riscv64/jit_generator.hpp>
#include <set>
#include <utility>
#include <vector>

#include "cache/multi_cache.h"
#include "emitters/plugin/riscv64/jit_eltwise_emitters.hpp"
#include "emitters/snippets/cpu_runtime_configurator.hpp"
#include "jit_kernel_emitter.hpp"
#include "jit_loop_emitters.hpp"
#include "jit_memory_emitters.hpp"
#include "jit_snippets_emitters.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/op/result.hpp"
#include "snippets/emitter.hpp"
#include "snippets/generator.hpp"
#include "snippets/lowered/expression.hpp"
#include "snippets/op/broadcastload.hpp"
#include "snippets/op/kernel.hpp"
#include "snippets/op/load.hpp"
#include "snippets/op/loop.hpp"
#include "snippets/op/scalar.hpp"
#include "snippets/op/store.hpp"
#include "snippets/target_machine.hpp"
#include "utils/general_utils.h"
#include "xbyak_riscv/xbyak_riscv.hpp"

namespace ov {

#define CREATE_SNIPPETS_EMITTER(e_type, ...) \
{[this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
return std::make_shared<e_type>(h.get(), isa, expr, ##__VA_ARGS__); \
}, \
[](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
return e_type::get_supported_precisions(n); \
}}

#define CREATE_CPU_EMITTER(e_type) \
{[this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
return std::make_shared<e_type>(h.get(), isa, expr->get_node()); \
}, \
[](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
return e_type::get_supported_precisions(n); \
}}

class jit_snippet : public ov::intel_cpu::riscv64::jit_generator_t {
public:
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_snippet)

~jit_snippet() override = default;

jit_snippet() = default;

void generate() override {}
};

namespace intel_cpu::riscv64 {

CPUTargetMachine::CPUTargetMachine(ov::intel_cpu::riscv64::cpu_isa_t host_isa, ov::intel_cpu::MultiCacheWeakPtr cache)
: TargetMachine(std::make_shared<CPURuntimeConfigurator>(cache)),
h(new jit_snippet()),
isa(host_isa),
compiled_kernel_cache(std::move(cache)) {
// data movement
jitters[op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_nop_emitter);
jitters[op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_nop_emitter);
jitters[snippets::op::Scalar::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_scalar_emitter);

// memory access
jitters[snippets::op::Load::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_load_memory_emitter);
jitters[snippets::op::LoadReorder::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_load_memory_emitter);
jitters[snippets::op::BroadcastLoad::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_load_broadcast_emitter);
jitters[snippets::op::Store::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_store_memory_emitter);

// loop control
jitters[snippets::op::LoopBegin::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_begin_emitter);
jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_loop_end_emitter);

// service kernel entry points
jitters[snippets::op::KernelStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_static_emitter);
jitters[snippets::op::KernelDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_dynamic_emitter);

// binary operations
jitters[op::v1::Add::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::riscv64::jit_add_emitter);
}

std::shared_ptr<snippets::TargetMachine> CPUTargetMachine::clone() const {
return std::make_shared<CPUTargetMachine>(isa, compiled_kernel_cache);
}

bool CPUTargetMachine::is_supported() const {
return ov::intel_cpu::riscv64::mayiuse(ov::intel_cpu::riscv64::gv);
}

snippets::CompiledSnippetPtr CPUTargetMachine::get_snippet() {
OPENVINO_ASSERT(h->create_kernel(), "Failed to create jit_kernel in get_snippet()");

const auto& result =
std::make_shared<CompiledSnippetCPU>(std::unique_ptr<ov::intel_cpu::riscv64::jit_generator_t>(h.release()));
// Note that we reset all the generated code, since it was copied into CompiledSnippetCPU
h = std::make_unique<jit_snippet>();
return result;
}

size_t CPUTargetMachine::get_lanes() const {
switch (isa) {
case ov::intel_cpu::riscv64::gv:
// RISC-V Vector Extension lanes depend on VLEN, assume 128-bit VLEN with 32-bit elements
return 4; // 128-bit / 32-bit = 4 lanes for float32
default:
OPENVINO_THROW("unknown isa ", isa);
}
}

std::vector<snippets::Reg> CPUTargetMachine::get_abi_arg_regs() const {
// RISC-V ABI argument registers: a0-a7 (x10-x17)
std::vector<snippets::Reg> res;
for (size_t i = 0; i < 8; ++i) {
res.emplace_back(snippets::RegType::gpr, 10 + i); // a0-a7 are x10-x17
}
return res;
}

std::vector<snippets::Reg> CPUTargetMachine::get_gp_reg_pool() const {
using Xbyak_riscv::Reg;
const auto num_gp_regs = 32;
std::vector<snippets::Reg> reg_pool;
for (size_t i = 1; i < num_gp_regs; i++) {
// Reserve: x0 (zero), x1 (ra), x2 (sp), x3 (gp), x4 (tp), x8 (s0/fp)
if (none_of(static_cast<int>(i),
Xbyak_riscv::ra.getIdx(),
Xbyak_riscv::sp.getIdx(),
Xbyak_riscv::gp.getIdx(),
Xbyak_riscv::tp.getIdx(),
Xbyak_riscv::s0.getIdx())) {
reg_pool.emplace_back(snippets::RegType::gpr, i);
}
}
return reg_pool;
}

std::vector<snippets::Reg> CPUTargetMachine::get_vec_reg_pool() const {
const auto num_vec_regs = 32; // RISC-V has 32 vector registers v0-v31
std::vector<snippets::Reg> reg_pool;
reg_pool.reserve(num_vec_regs);
for (int i = 0; i < num_vec_regs; i++) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's just start i from 1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is still actual

// v0 is typically reserved for masks, so exclude it
if (i != 0) {
reg_pool.emplace_back(snippets::RegType::vec, static_cast<size_t>(i));
}
}
return reg_pool;
}

ov::intel_cpu::riscv64::cpu_isa_t CPUTargetMachine::get_isa() const {
return isa;
}

CPUGenerator::CPUGenerator(ov::intel_cpu::riscv64::cpu_isa_t isa_, ov::intel_cpu::MultiCacheWeakPtr cache)
: Generator(std::make_shared<CPUTargetMachine>(isa_, std::move(cache))) {}
CPUGenerator::CPUGenerator(const std::shared_ptr<CPUTargetMachine>& target) : Generator(target) {}

std::shared_ptr<snippets::Generator> CPUGenerator::clone() const {
const auto& cpu_target_machine = std::dynamic_pointer_cast<CPUTargetMachine>(target);
OPENVINO_ASSERT(cpu_target_machine,
"Failed to clone CPUGenerator: the instance contains incompatible TargetMachine type");
return std::make_shared<CPUGenerator>(cpu_target_machine);
}

ov::snippets::RegType CPUGenerator::get_specific_op_out_reg_type(
[[maybe_unused]] const ov::Output<ov::Node>& out) const {
return ov::snippets::RegType::undefined;
}

bool CPUGenerator::uses_precompiled_kernel([[maybe_unused]] const std::shared_ptr<snippets::Emitter>& e) const {
// RISC-V platform doesn't currently use precompiled kernels
return false;
}

} // namespace intel_cpu::riscv64

} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cstddef>
#include <cstdint>
#include <memory>
#include <nodes/kernels/riscv64/cpu_isa_traits.hpp>
#include <vector>

#include "cache/multi_cache.h"
#include "emitters/snippets/common/compiled_snippet_cpu.hpp"
#include "nodes/kernels/riscv64/jit_generator.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "snippets/emitter.hpp"
#include "snippets/generator.hpp"
#include "snippets/target_machine.hpp"

namespace ov::intel_cpu::riscv64 {

using CompiledSnippetCPU = ov::intel_cpu::CompiledSnippetCPUCommon<ov::intel_cpu::riscv64::jit_generator_t>;

class CPUTargetMachine : public snippets::TargetMachine {
public:
explicit CPUTargetMachine(ov::intel_cpu::riscv64::cpu_isa_t host_isa, ov::intel_cpu::MultiCacheWeakPtr cache);
[[nodiscard]] std::shared_ptr<snippets::TargetMachine> clone() const override;
[[nodiscard]] bool is_supported() const override;
snippets::CompiledSnippetPtr get_snippet() override;
[[nodiscard]] size_t get_lanes() const override;

[[nodiscard]] std::vector<snippets::Reg> get_abi_arg_regs() const override;
[[nodiscard]] std::vector<snippets::Reg> get_gp_reg_pool() const override;
[[nodiscard]] std::vector<snippets::Reg> get_vec_reg_pool() const override;

[[nodiscard]] ov::intel_cpu::riscv64::cpu_isa_t get_isa() const;

private:
std::unique_ptr<ov::intel_cpu::riscv64::jit_generator_t> h;
ov::intel_cpu::riscv64::cpu_isa_t isa;
ov::intel_cpu::MultiCacheWeakPtr compiled_kernel_cache;
};

class CPUGenerator : public snippets::Generator {
public:
CPUGenerator(ov::intel_cpu::riscv64::cpu_isa_t isa, ov::intel_cpu::MultiCacheWeakPtr cache);
CPUGenerator(const std::shared_ptr<CPUTargetMachine>& target);
std::shared_ptr<Generator> clone() const override;

protected:
bool uses_precompiled_kernel(const std::shared_ptr<snippets::Emitter>& emitter) const override;
ov::snippets::RegType get_specific_op_out_reg_type(const ov::Output<ov::Node>& out) const override;
};

} // namespace ov::intel_cpu::riscv64
Loading
Loading