-
Notifications
You must be signed in to change notification settings - Fork 13.2k
llama : support qwen3 rerank and embeddings #14029
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
3f3b9a2
f8fd440
e0eb4b8
030dc3b
f8facb3
0777cd3
8edd2cf
c02f53d
c2f4dc7
cbb6f20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1577,10 +1577,15 @@ void llm_graph_context::build_pooling( | |
cur = ggml_add(ctx0, ggml_mul_mat(ctx0, cls_out, cur), cls_out_b); | ||
} | ||
} else if (cls_out) { | ||
// Single layer classification head (direct projection) | ||
// https://github.com/huggingface/transformers/blob/f4fc42216cd56ab6b68270bf80d811614d8d59e4/src/transformers/models/bert/modeling_bert.py#L1476 | ||
GGML_ASSERT(cls_out_b != nullptr); | ||
cur = ggml_add(ctx0, ggml_mul_mat(ctx0, cls_out, inp), cls_out_b); | ||
if (arch == LLM_ARCH_QWEN3) { | ||
cur = ggml_mul_mat(ctx0, cls_out, inp); | ||
cur = ggml_soft_max(ctx0, cur); // qwen3 uses softmax on the output | ||
|
||
} else { | ||
// Single layer classification head (direct projection) | ||
// https://github.com/huggingface/transformers/blob/f4fc42216cd56ab6b68270bf80d811614d8d59e4/src/transformers/models/bert/modeling_bert.py#L1476 | ||
GGML_ASSERT(cls_out_b != nullptr); | ||
cur = ggml_add(ctx0, ggml_mul_mat(ctx0, cls_out, inp), cls_out_b); | ||
} | ||
} else { | ||
GGML_ABORT("RANK pooling requires either cls+cls_b or cls_out+cls_out_b"); | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.