@@ -583,6 +583,133 @@ void run_long_kernel(ze_context_handle_t context, ze_device_handle_t device,
583583 }
584584}
585585
586+ void run_long_kernel_scratch (ze_context_handle_t context,
587+ ze_device_handle_t device,
588+ process_synchro &synchro, debug_options &options) {
589+
590+ auto command_list = lzt::create_command_list (device);
591+ auto command_queue = lzt::create_command_queue (device);
592+ std::string module_name = options.module_name_in ;
593+
594+ std::string kernel_name = " long_kernel_slm" ;
595+ size_t slm_buffer_size = 512 ; // NOTE: Not all SKUs have same SLM so can go too big.
596+
597+ synchro.wait_for_debugger_signal ();
598+ const char *build_flags =" -g -igc_opts 'VISAOptions=-forcespills'" ;
599+ auto module =
600+ lzt::create_module (device, module_name, ZE_MODULE_FORMAT_IL_SPIRV,
601+ build_flags /* include debug symbols*/ , nullptr );
602+
603+ auto kernel = lzt::create_function (module , kernel_name);
604+ auto size = slm_buffer_size;
605+
606+ ze_kernel_properties_t kernel_properties = {
607+ ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES, nullptr };
608+ EXPECT_EQ (ZE_RESULT_SUCCESS,
609+ zeKernelGetProperties (kernel, &kernel_properties));
610+ int threadCount = std::ceil (size / kernel_properties.maxSubgroupSize );
611+
612+ LOG_INFO << " [Application] Problem size: " << size
613+ << " . Kernel maxSubGroupSize: " << kernel_properties.maxSubgroupSize
614+ << " . GPU thread count: ceil (P size/maxSubGroupSize) = "
615+ << threadCount;
616+
617+ auto dest_buffer_d =
618+ lzt::allocate_device_memory (size, size, 0 , 0 , device, context);
619+ auto dest_buffer_s =
620+ lzt::allocate_shared_memory (size, size, 0 , 0 , device, context);
621+ auto src_buffer_d =
622+ lzt::allocate_device_memory (size, size, 0 , 0 , device, context);
623+ auto src_buffer_s =
624+ lzt::allocate_shared_memory (size, size, 0 , 0 , device, context);
625+
626+ void *slm_output_s = nullptr ;
627+ slm_output_s = lzt::allocate_shared_memory (slm_buffer_size, slm_buffer_size,
628+ 0 , 0 , device, context);
629+
630+ unsigned long loop_max = 1000000000 ;
631+
632+ auto loop_counter_d = lzt::allocate_device_memory (
633+ loop_counter_alloc_size, loop_counter_alloc_size, 0 , 0 , device, context);
634+ auto loop_counter_s = lzt::allocate_shared_memory (
635+ loop_counter_alloc_size, loop_counter_alloc_size, 0 , 0 , device, context);
636+
637+ LOG_DEBUG << " [Application] Allocated source device memory at: " << std::hex
638+ << src_buffer_d;
639+ LOG_DEBUG << " [Application] Allocated destination device memory at: "
640+ << std::hex << dest_buffer_d;
641+
642+ std::memset (dest_buffer_s, 1 , size);
643+ std::memset (src_buffer_s, 0 , size);
644+ std::memset (loop_counter_s, 0 , loop_counter_alloc_size);
645+ for (size_t i = 0 ; i < size; i++) {
646+ static_cast <uint8_t *>(src_buffer_s)[i] = (i + 1 & 0xFF );
647+ }
648+
649+ lzt::set_argument_value (kernel, 0 , sizeof (dest_buffer_d), &dest_buffer_d);
650+ lzt::set_argument_value (kernel, 1 , sizeof (src_buffer_d), &src_buffer_d);
651+ lzt::set_argument_value (kernel, 2 , sizeof (loop_counter_d), &loop_counter_d);
652+ lzt::set_argument_value (kernel, 3 , sizeof (loop_max), &loop_max);
653+ lzt::set_argument_value (kernel, 4 , sizeof (slm_output_s), &slm_output_s);
654+
655+ uint32_t group_size_x = 1 ;
656+ uint32_t group_size_y = 1 ;
657+ uint32_t group_size_z = 1 ;
658+ lzt::suggest_group_size (kernel, size, 1 , 1 , group_size_x, group_size_y,
659+ group_size_z);
660+ lzt::set_group_size (kernel, group_size_x, 1 , 1 );
661+ ze_group_count_t group_count = {};
662+ group_count.groupCountX = size / group_size_x;
663+ group_count.groupCountY = 1 ;
664+ group_count.groupCountZ = 1 ;
665+
666+ lzt::append_memory_copy (command_list, src_buffer_d, src_buffer_s, size);
667+ lzt::append_barrier (command_list);
668+ lzt::append_launch_function (command_list, kernel, &group_count, nullptr , 0 ,
669+ nullptr );
670+ lzt::append_barrier (command_list);
671+ lzt::append_memory_copy (command_list, dest_buffer_s, dest_buffer_d, size);
672+ lzt::append_memory_copy (command_list, loop_counter_s, loop_counter_d,
673+ loop_counter_alloc_size);
674+ lzt::close_command_list (command_list);
675+
676+ LOG_DEBUG << " [Application] launching execution of " << kernel_name;
677+
678+ synchro.update_gpu_buffer_address (reinterpret_cast <uint64_t >(src_buffer_d));
679+ synchro.notify_debugger ();
680+
681+ lzt::execute_command_lists (command_queue, 1 , &command_list, nullptr );
682+ lzt::synchronize (command_queue, UINT64_MAX);
683+
684+ for (size_t i = 1 ; i < size; i++) {
685+ EXPECT_EQ (static_cast <uint8_t *>(dest_buffer_s)[i],
686+ static_cast <uint8_t *>(src_buffer_s)[i]);
687+ if (static_cast <uint8_t *>(dest_buffer_s)[i] !=
688+ static_cast <uint8_t *>(src_buffer_s)[i]) {
689+ LOG_ERROR << " [Application] Buffer Sanity check did not pass" ;
690+ break ;
691+ }
692+ }
693+
694+ // cleanup
695+ lzt::free_memory (context, dest_buffer_s);
696+ lzt::free_memory (context, dest_buffer_d);
697+ lzt::free_memory (context, src_buffer_s);
698+ lzt::free_memory (context, src_buffer_d);
699+ lzt::free_memory (context, loop_counter_s);
700+ lzt::free_memory (context, loop_counter_d);
701+ lzt::free_memory (context, slm_output_s);
702+
703+ lzt::destroy_function (kernel);
704+ lzt::destroy_module (module );
705+ lzt::destroy_command_list (command_list);
706+ lzt::destroy_command_queue (command_queue);
707+
708+ if (::testing::Test::HasFailure ()) {
709+ exit (1 );
710+ }
711+ }
712+
586713void run_multiple_threads (ze_context_handle_t context,
587714 ze_device_handle_t device, process_synchro &synchro,
588715 debug_options &options) {
@@ -1227,6 +1354,11 @@ int main(int argc, char **argv) {
12271354 options.kernel_name_in = " long_kernel_slm" ;
12281355 run_long_kernel (context, device, synchro, options);
12291356 break ;
1357+ case LONG_RUNNING_KERNEL_INTERRUPTED_SCRATCH:
1358+ options.use_custom_module = true ;
1359+ options.module_name_in = " debug_loop_slm.spv" ;
1360+ run_long_kernel_scratch (context, device, synchro, options);
1361+ break ;
12301362 case MULTIPLE_THREADS:
12311363 run_multiple_threads (context, device, synchro, options);
12321364 break ;
0 commit comments