We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ad74481 commit e4c3acaCopy full SHA for e4c3aca
lmdeploy/pytorch/kernels/ascend/paged_attention_fwd.py
@@ -33,7 +33,7 @@ def flash_context_attention(
33
q_seq_len[i:i + 1],
34
num_q_heads,
35
num_kv_heads,
36
- context.attention_mask[i:i + 1],
+ attn_mask=context.attention_mask[i:i + 1],
37
attn_output=attn_output,
38
)
39
else:
@@ -51,7 +51,7 @@ def flash_context_attention(
51
kv_seq_len[i:i + 1],
52
53
54
55
56
57
0 commit comments