|
11 | 11 |
|
12 | 12 | import numba.core.event as ev
|
13 | 13 | from numba.core import errors, sigutils, types
|
14 |
| -from numba.core.compiler import CompileResult |
| 14 | +from numba.core.compiler import CompileResult, Flags |
15 | 15 | from numba.core.compiler_lock import global_compiler_lock
|
16 | 16 | from numba.core.dispatcher import Dispatcher, _FunctionCompiler
|
17 | 17 | from numba.core.target_extension import dispatcher_registry, target_registry
|
| 18 | +from numba.core.types import void |
18 | 19 | from numba.core.typing.typeof import Purpose, typeof
|
19 | 20 |
|
20 | 21 | from numba_dpex import config, spirv_generator
|
21 | 22 | from numba_dpex.core.exceptions import (
|
22 | 23 | ExecutionQueueInferenceError,
|
| 24 | + KernelHasReturnValueError, |
23 | 25 | UnsupportedKernelArgumentError,
|
24 | 26 | )
|
25 | 27 | from numba_dpex.core.pipelines import kernel_compiler
|
| 28 | +from numba_dpex.core.targets.kernel_target import CompilationMode |
26 | 29 | from numba_dpex.core.types import DpnpNdArray
|
27 | 30 |
|
28 | 31 | from .target import DPEX_KERNEL_EXP_TARGET_NAME, dpex_exp_kernel_target
|
@@ -81,10 +84,19 @@ def _compile_to_spirv(
|
81 | 84 | kernel_fn = kernel_targetctx.prepare_spir_kernel(
|
82 | 85 | kernel_func, kernel_fndesc.argtypes
|
83 | 86 | )
|
84 |
| - |
85 |
| - # makes sure that the spir_func is completely inlined into the |
86 |
| - # spir_kernel wrapper |
87 |
| - kernel_library.optimize_final_module() |
| 87 | + # Get the compiler flags that were passed through the target descriptor |
| 88 | + flags = Flags() |
| 89 | + self.targetdescr.options.parse_as_flags(flags, self.targetoptions) |
| 90 | + |
| 91 | + # If the inline_threshold option was set then set the property in the |
| 92 | + # kernel_library to force inlining ``overload`` calls into a kernel. |
| 93 | + inline_threshold = flags.inline_threshold # pylint: disable=E1101 |
| 94 | + kernel_library.inline_threshold = inline_threshold |
| 95 | + |
| 96 | + # Call finalize on the LLVM module. Finalization will result in |
| 97 | + # all linking libraries getting linked together and final optimization |
| 98 | + # including inlining of functions if an inlining level is specified. |
| 99 | + kernel_library.finalize() |
88 | 100 | # Compiled the LLVM IR to SPIR-V
|
89 | 101 | kernel_spirv_module = spirv_generator.llvm_to_spirv(
|
90 | 102 | kernel_targetctx,
|
@@ -144,9 +156,15 @@ def _compile_cached(
|
144 | 156 | try:
|
145 | 157 | cres: CompileResult = self._compile_core(args, return_type)
|
146 | 158 |
|
147 |
| - kernel_device_ir_module = self._compile_to_spirv( |
148 |
| - cres.library, cres.fndesc, cres.target_context |
149 |
| - ) |
| 159 | + if ( |
| 160 | + self.targetoptions["_compilation_mode"] |
| 161 | + == CompilationMode.KERNEL |
| 162 | + ): |
| 163 | + kernel_device_ir_module: _KernelModule = self._compile_to_spirv( |
| 164 | + cres.library, cres.fndesc, cres.target_context |
| 165 | + ) |
| 166 | + else: |
| 167 | + kernel_device_ir_module = None |
150 | 168 |
|
151 | 169 | kcres_attrs = []
|
152 | 170 |
|
@@ -185,9 +203,6 @@ class KernelDispatcher(Dispatcher):
|
185 | 203 | an executable binary, the dispatcher compiles it to SPIR-V and then caches
|
186 | 204 | that SPIR-V bitcode.
|
187 | 205 |
|
188 |
| - FIXME: Fix issues identified by pylint with this class. |
189 |
| - https://github.com/IntelPython/numba-dpex/issues/1196 |
190 |
| -
|
191 | 206 | """
|
192 | 207 |
|
193 | 208 | targetdescr = dpex_exp_kernel_target
|
@@ -282,12 +297,28 @@ def cb_llvm(dur):
|
282 | 297 | with self._compiling_counter:
|
283 | 298 | args, return_type = sigutils.normalize_signature(sig)
|
284 | 299 |
|
285 |
| - try: |
286 |
| - self._compiler.check_queue_equivalence_of_args( |
287 |
| - self._kernel_name, args |
288 |
| - ) |
289 |
| - except ExecutionQueueInferenceError as eqie: |
290 |
| - raise eqie |
| 300 | + if ( |
| 301 | + self.targetoptions["_compilation_mode"] |
| 302 | + == CompilationMode.KERNEL |
| 303 | + ): |
| 304 | + # Compute follows data based queue equivalence is only |
| 305 | + # evaluated for kernel functions whose arguments are |
| 306 | + # supposed to be arrays. For device_func decorated |
| 307 | + # functions, the arguments can be scalar and we skip queue |
| 308 | + # equivalence check. |
| 309 | + try: |
| 310 | + self._compiler.check_queue_equivalence_of_args( |
| 311 | + self._kernel_name, args |
| 312 | + ) |
| 313 | + except ExecutionQueueInferenceError as eqie: |
| 314 | + raise eqie |
| 315 | + |
| 316 | + # A function being compiled in the KERNEL compilation mode |
| 317 | + # cannot have a non-void return value |
| 318 | + if return_type and return_type != void: |
| 319 | + raise KernelHasReturnValueError( |
| 320 | + kernel_name=None, return_type=return_type, sig=sig |
| 321 | + ) |
291 | 322 |
|
292 | 323 | # Don't recompile if signature already exists
|
293 | 324 | existing = self.overloads.get(tuple(args))
|
|
0 commit comments