|
4 | 4 | import torch
|
5 | 5 | from torch import nn
|
6 | 6 | from transformers import Gemma3TextConfig
|
7 |
| - |
8 | 7 | from vllm.compilation.decorators import support_torch_compile
|
9 | 8 | from vllm.config import CacheConfig, VllmConfig
|
| 9 | +from vllm.model_executor.layers.logits_processor import LogitsProcessor |
10 | 10 | from vllm.model_executor.layers.quantization import QuantizationConfig
|
11 | 11 | from vllm.model_executor.layers.vocab_parallel_embedding import \
|
12 | 12 | VocabParallelEmbedding
|
13 | 13 | from vllm.model_executor.model_loader.weight_utils import (
|
14 | 14 | default_weight_loader, maybe_remap_kv_scale_name)
|
15 |
| -from vllm.model_executor.layers.logits_processor import LogitsProcessor |
16 | 15 | from vllm.model_executor.models.gemma3 import (Gemma3DecoderLayer,
|
17 | 16 | Gemma3Model,
|
18 | 17 | Gemma3Attention,
|
19 | 18 | Gemma3MLP,
|
20 | 19 | Gemma3ForCausalLM)
|
21 |
| -from vllm_ascend.quantization.w8a8 import AscendW8A8LinearMethod |
22 |
| -from vllm_ascend.ops.layernorm import AddRMSNormW8A8Quant, AscendRMSNorm |
23 |
| - |
24 | 20 | from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
|
25 | 21 | from vllm.model_executor.models.utils import (is_pp_missing_parameter,
|
26 | 22 | make_empty_intermediate_tensors_factory, make_layers,
|
27 | 23 | maybe_prefix)
|
| 24 | +from vllm_ascend.ops.layernorm import AddRMSNormW8A8Quant, AscendRMSNorm |
| 25 | +from vllm_ascend.quantization.w8a8 import AscendW8A8LinearMethod |
28 | 26 |
|
29 | 27 | class AscendGemma3DecoderLayer(Gemma3DecoderLayer):
|
30 | 28 | def __init__(
|
@@ -57,12 +55,12 @@ def __init__(
|
57 | 55 | prefix=f"{prefix}.mlp",
|
58 | 56 | )
|
59 | 57 |
|
60 |
| - self.input_layernorm = AscendRMSNorm( |
61 |
| - config.hidden_size, eps=config.rms_norm_eps) |
62 |
| - self.post_attention_layernorm = AscendRMSNorm( |
63 |
| - config.hidden_size, eps=config.rms_norm_eps) |
64 |
| - self.pre_feedforward_layernorm = AscendRMSNorm( |
65 |
| - config.hidden_size, eps=config.rms_norm_eps) |
| 58 | + self.input_layernorm = AscendRMSNorm(config.hidden_size, |
| 59 | + eps=config.rms_norm_eps) |
| 60 | + self.post_attention_layernorm = AscendRMSNorm(config.hidden_size, |
| 61 | + eps=config.rms_norm_eps) |
| 62 | + self.pre_feedforward_layernorm = AscendRMSNorm(config.hidden_size, |
| 63 | + eps=config.rms_norm_eps) |
66 | 64 | self.post_feedforward_layernorm = AscendRMSNorm(
|
67 | 65 | config.hidden_size, eps=config.rms_norm_eps)
|
68 | 66 |
|
|
0 commit comments