From 8df24f71e32e982e525fbafb1eb18da061905664 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 23 Oct 2025 15:36:38 +0200 Subject: [PATCH] [Security] Fix/remove logically dead code (#448) Signed-off-by: Artur Fierka --- vllm_gaudi/ops/hpu_gptq.py | 19 +++++-------------- vllm_gaudi/v1/worker/hpu_model_runner.py | 16 ++++++---------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/vllm_gaudi/ops/hpu_gptq.py b/vllm_gaudi/ops/hpu_gptq.py index 5894b1ef8..57bcadf19 100644 --- a/vllm_gaudi/ops/hpu_gptq.py +++ b/vllm_gaudi/ops/hpu_gptq.py @@ -168,7 +168,6 @@ def create_weights( group_size = self.quant_config.group_size if self.quant_config.group_size != -1 else input_size scale_and_zero_size = input_size // group_size - scale_and_zero_input_dim = None qweight = PackedvLLMParameter(data=torch.empty( input_size_per_partition // self.quant_config.pack_factor, @@ -205,20 +204,12 @@ def create_weights( ), "weight_loader": weight_loader } - if scale_and_zero_input_dim is None: - scales = ChannelQuantScaleParameter(output_dim=1, **weight_scale_args) - qzeros = PackedColumnParameter(output_dim=1, - packed_dim=1, - packed_factor=self.quant_config.pack_factor, - **qzeros_args) - else: - scales = GroupQuantScaleParameter(output_dim=1, input_dim=0, **weight_scale_args) - qzeros = PackedvLLMParameter(input_dim=0, - output_dim=1, - packed_dim=1, - packed_factor=self.quant_config.pack_factor, - **qzeros_args) + scales = ChannelQuantScaleParameter(output_dim=1, **weight_scale_args) + qzeros = PackedColumnParameter(output_dim=1, + packed_dim=1, + packed_factor=self.quant_config.pack_factor, + **qzeros_args) qzeros.pack_factor = self.quant_config.pack_factor diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py index 908e7fd1b..fea0ddc17 100644 --- a/vllm_gaudi/v1/worker/hpu_model_runner.py +++ b/vllm_gaudi/v1/worker/hpu_model_runner.py @@ -4326,20 +4326,16 @@ def copy_kv_blocks( i = 0 global hpu_buffer - use_hpu_buffer = False for layer_name in src_kv_caches: key_cache = src_kv_caches[layer_name][0] value_cache = src_kv_caches[layer_name][1] - if direction == "d2h" and use_hpu_buffer: - hpu_buffer[i][0] = key_cache.index_select(0, src_slot_mapping) - hpu_buffer[i][1] = value_cache.index_select(0, src_slot_mapping) - else: - #import remote_pdb;remote_pdb.set_trace() - dst_kv_caches[layer_name][0].index_put_((dst_slot_mapping, ), - key_cache.index_select(0, src_slot_mapping).to(target_device)) - dst_kv_caches[layer_name][1].index_put_((dst_slot_mapping, ), - value_cache.index_select(0, src_slot_mapping).to(target_device)) + dst_kv_caches[layer_name][0].index_put_((dst_slot_mapping, ), + key_cache.index_select(0, src_slot_mapping).to(target_device)) + dst_kv_caches[layer_name][1].index_put_((dst_slot_mapping, ), + value_cache.index_select(0, src_slot_mapping).to(target_device)) + if direction == "d2h": + dst_kv_caches[layer_name] = dst_kv_caches[layer_name].unflatten(1, (-1, block_size)) i = i + 1