[GPU][BUG] Incomplete conditions for sdpa prefill and head_size (#31727)

luo-cheng2021 · peterchen-intel · web-flow · commit 32e7a5c03bc4 · 2025-08-20T01:58:18.000Z
### Details: - *item1* - *...* ### Tickets: - *CVS-172034, CVS-169994* --------- Co-authored-by: Chen Peter <peter.chen@intel.com>
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl_v2/sdpa/sdpa_opt.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl_v2/sdpa/sdpa_opt.cpp
@@ -123,7 +123,7 @@ class SDPAOptImpl : public SDPAImplBase {
         // So far this case was observed only from the non-lm models such as vision embedding model.
         // If we need to optimize unaligned head size SDPA for 2nd+ token phase of LM model,
         // we'll need to fix single_token kernel to support unaligned head size.
-        if (is_prefill || unaligned_head_size(params)) {
+        if (is_prefill || unaligned_head_size(new_params)) {
             GPU_DEBUG_TRACE_DETAIL << "execute multi_tokens for prefill with indirect = " << is_indirect << "\n";
             return execute_stage(events, instance, is_indirect ? indirect_multi_tokens : regular_multi_tokens);
         }

Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,7 @@ class SDPAOptImpl : public SDPAImplBase {`
`123`	`123`	`// So far this case was observed only from the non-lm models such as vision embedding model.`
`124`	`124`	`// If we need to optimize unaligned head size SDPA for 2nd+ token phase of LM model,`
`125`	`125`	`// we'll need to fix single_token kernel to support unaligned head size.`
`126`		`- if (is_prefill \|\| unaligned_head_size(params)) {`
	`126`	`+ if (is_prefill \|\| unaligned_head_size(new_params)) {`
`127`	`127`	`GPU_DEBUG_TRACE_DETAIL << "execute multi_tokens for prefill with indirect = " << is_indirect << "\n";`
`128`	`128`	`return execute_stage(events, instance, is_indirect ? indirect_multi_tokens : regular_multi_tokens);`
`129`	`129`	`}`