Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
f0d3696
Added falcon model converter
mehtamansi29 Jan 9, 2025
21df61e
Added falcon model converter -1
mehtamansi29 Jan 10, 2025
bc4b4f7
Falcon converter changes
mehtamansi29 Apr 28, 2025
060e95c
Falcon converter changes_1
mehtamansi29 Apr 28, 2025
496f3e7
transformer config changes of falcon converter
mehtamansi29 Apr 29, 2025
9dd0e61
transformer config changes of falcon converter
mehtamansi29 Apr 29, 2025
b990401
transformer config changes of falcon converter_3
mehtamansi29 Jul 21, 2025
8f2284c
transformer config changes of falcon converter_4
mehtamansi29 Jul 21, 2025
3642f1e
transformer config changes of falcon converter_6
mehtamansi29 Jul 21, 2025
6da4ced
transformer config changes of falcon converter_7
mehtamansi29 Jul 21, 2025
a8ea36f
transformer config changes of falcon converter_8
mehtamansi29 Jul 21, 2025
cea948d
transformer config changes of falcon converter_9
mehtamansi29 Jul 21, 2025
60078c5
transformer config changes of falcon converter_11
mehtamansi29 Jul 24, 2025
d7a5c31
Merge remote-tracking branch 'upstream/master' into patch-1
mehtamansi29 Jul 25, 2025
c7d4a9c
intermediate_dim change
mehtamansi29 Jul 31, 2025
152c19e
intermediate_dim change_1
mehtamansi29 Jul 31, 2025
3bc83bd
backbone_config change
mehtamansi29 Jul 31, 2025
d3cbdec
transformer config intermediate_dim
mehtamansi29 Jul 31, 2025
50e6d06
attention layer weights changes
mehtamansi29 Jul 31, 2025
89bac89
attention layer indention change
mehtamansi29 Jul 31, 2025
164e6cc
transformers_config changes
mehtamansi29 Jul 31, 2025
7873b3c
transformer config changes
mehtamansi29 Jul 31, 2025
5f174d4
transformer config
mehtamansi29 Jul 31, 2025
559ee01
num_key_value_heads change
mehtamansi29 Jul 31, 2025
5047254
remove keyvalue head from transformer config
mehtamansi29 Jul 31, 2025
af2c647
intermediate_dim in transformer c
mehtamansi29 Jul 31, 2025
3aaa529
change head dim
mehtamansi29 Jul 31, 2025
13c04d7
hidden dim changes
mehtamansi29 Jul 31, 2025
8cc06a6
convert_falcon_changes
mehtamansi29 Aug 4, 2025
21e4473
attention layer change
mehtamansi29 Aug 4, 2025
6aa4244
attention layer changes_1
mehtamansi29 Aug 4, 2025
1ce3837
falcon converter changesa
mehtamansi29 Aug 4, 2025
496eeeb
preset_loader precommit run
mehtamansi29 Aug 4, 2025
9ccc46a
Merge branch 'keras-team:master' into patch-1
mehtamansi29 Aug 6, 2025
b64cd4c
backbone and casual_lm test
mehtamansi29 Aug 6, 2025
fba4aba
loading issue for falcon1b
mehtamansi29 Aug 19, 2025
f3c5041
loading issue for falcon1b_1
mehtamansi29 Aug 19, 2025
9b860c1
loading issue for falcon1b_1
mehtamansi29 Aug 19, 2025
41289d8
resolving conflict
mehtamansi29 Aug 19, 2025
2284520
convert_falcon file changes
mehtamansi29 Aug 19, 2025
2933774
convert_falcon chanes_1
mehtamansi29 Aug 19, 2025
b3ba59a
update for 7b parameters
mehtamansi29 Sep 1, 2025
f0fb361
7b parameters mismatch update
mehtamansi29 Sep 2, 2025
88a91a1
Merge remote-tracking branch 'upstream/master' into patch-1
mehtamansi29 Sep 2, 2025
6b26899
Resolve 7b paramter disperancies
mehtamansi29 Sep 2, 2025
e7b39bb
resolve 7b parameter disperancies_1
mehtamansi29 Sep 3, 2025
8e7520b
resolve 7b parameter disperancies_2
mehtamansi29 Sep 3, 2025
6d5ae8c
Revert "Resolve 7b paramter disperancies"
mehtamansi29 Sep 3, 2025
5948b6e
Revert "Resolve 7b paramter disperancies"
mehtamansi29 Sep 3, 2025
79951dd
falcon_transformer_decoder changes
mehtamansi29 Sep 3, 2025
a9eed7c
falcon_transformer_decoder changes_1
mehtamansi29 Sep 3, 2025
0243caf
changes for 7b mismatch parameters
mehtamansi29 Sep 9, 2025
06bd348
Chnages based on comments
mehtamansi29 Sep 11, 2025
a588b76
convert_falcon file changes
mehtamansi29 Sep 23, 2025
92aa32b
layernorm bias term changes
mehtamansi29 Sep 23, 2025
9345630
converter file change
mehtamansi29 Sep 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions keras_hub/src/utils/transformers/convert_falcon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np

from keras_hub.src.models.falcon import FalconBackbone
from keras_hub.src.utils.preset_utils import load_json

backbone_cls = FalconBackbone


def convert_backbone_config(transformers_config):
return {
"vocabulary_size": transformers_config["vocab_size"],
"num_layers": transformers_config["num_hidden_layers"],
"num_attention_heads": transformers_config["num_attention_heads"],
"hidden_dim": transformers_config["hidden_size"],
"intermediate_dim": 32 * 4,
}


def transpose_and_reshape(x, shape):
return np.reshape(np.transpose(x), shape)


def convert_weights(backbone, loader, transformers_config):
# Embeddings
loader.port_weight(
keras_variable=backbone.get_layer("token_embedding").embeddings,
hf_weight_key="word_embeddings.weight",
)

for i in range(backbone.num_layers):
decoder_layer = backbone.get_layer(f"transformer_layer_{i}")

# Norm layer
loader.port_weight(
keras_variable=decoder_layer.input_layernorm.gamma,
hf_weight_key=f"h.{i}.input_layernorm.weight",
)

# Attention layers
loader.port_weight(
keras_variable=decoder_layer.attention_layer.output_dense.kernel,
hf_weight_key=f"h.{i}.self_attention.dense.weight",
)

loader.port_weight(
keras_variable=decoder_layer.post_attention_layernorm.gamma,
hf_weight_key=f"h.{i}.self_attention.query_key_value.weight",
hook_fn=lambda hf_tensor, keras_shape: np.mean(
np.reshape(hf_tensor, (-1, keras_shape[0])), axis=0
),
)


def convert_tokenizer(cls, preset, **kwargs):
tokenizer_data = load_json(preset, "tokenizer.json")
vocab = tokenizer_data["model"]["vocab"]
merges = tokenizer_data["model"].get("merges", None)

tokenizer_kwargs = {"vocabulary": vocab, "merges": merges}
return cls(**tokenizer_kwargs)
23 changes: 23 additions & 0 deletions keras_hub/src/utils/transformers/convert_falcon_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from keras_hub.src.models.falcon.falcon_backbone import FalconBackbone
from keras_hub.src.models.falcon.falcon_causal_lm import FalconCausalLM
from keras_hub.src.tests.test_case import TestCase


class TestTask(TestCase):
@pytest.mark.large
def test_convert_tiny_preset(self):
model = FalconCausalLM.from_preset("hf://tiiuae/falcon-7b")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can afford to download this ~15gb file in our testing setup. You could try the 1b model? Or create a small test model on hf, as was done for llama and others.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattdangerw - I'll create small test with 1b falcon model and commit again.

prompt = "What is your favorite condiment?"
model.generate([prompt], max_length=15)

@pytest.mark.large
def test_class_detection(self):
model = FalconCausalLM.from_preset("hf://tiiuae/falcon-7b")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work? I think we only have Falcon-1b support! 7b model has a different attention mechanism which hasn't been added!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably also attach a colab verifying that output from the huggingface and KerasHub versions align. And sound like that might actually run into differences here due to what @SamanehSaadat is saying.

@SamanehSaadat how much work is needed of the architecture code to support the 7 and other variants? Is it something that could be added here or a ton to do?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattdangerw I think adding support for the 7b is non-trivial. There are some major architectural differences like alibi, GQA vs. MHA, and rotary embedding (to me, it's almost like adding a new architecture!).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Sounds like we will need to either throw in the converter if we encounter the falcon huggingface options we don't currently support, or add them in (on a separate pr?).

@mehtamansi29 we'd probably need a colab verifying that the output matches for some subset of falcon checkpoints on huggingface, and ideally that we throw for falcon checkpoints that needs arch options we don't yet support.

Copy link
Collaborator Author

@mehtamansi29 mehtamansi29 Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay. @mattdangerw - I'll create a colab for verifying that the output matches for some subset of falcon checkpoints on huggingface and share it with you.

self.assertIsInstance(model, FalconCausalLM)
model = FalconBackbone.from_preset(
"hf://tiiuae/falcon-7b",
load_weights=False,
)
self.assertIsInstance(model, FalconBackbone)
3 changes: 3 additions & 0 deletions keras_hub/src/utils/transformers/preset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from keras_hub.src.utils.transformers import convert_bart
from keras_hub.src.utils.transformers import convert_bert
from keras_hub.src.utils.transformers import convert_distilbert
from keras_hub.src.utils.transformers import convert_falcon
from keras_hub.src.utils.transformers import convert_gemma
from keras_hub.src.utils.transformers import convert_gpt2
from keras_hub.src.utils.transformers import convert_llama3
Expand Down Expand Up @@ -41,6 +42,8 @@ def __init__(self, preset, config):
self.converter = convert_pali_gemma
elif model_type == "vit":
self.converter = convert_vit
elif model_type == "falcon":
self.converter = convert_falcon
else:
raise ValueError(
"KerasHub has no converter for huggingface/transformers models "
Expand Down
Loading