Skip to content

Commit 462d141

Browse files
authored
Add files via upload
1 parent 1566af4 commit 462d141

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+85
-47
lines changed

bert4keras3/Layers_add/Attentions.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def call(self, inputs, mask=None, **kwargs):
173173
o = self.o_dense(ops.reshape(o, [b,s,-1]))
174174
# 返回结果
175175

176-
176+
177177
if use_cache:
178178
return o,cache
179179
if self.return_attention_scores:
@@ -282,17 +282,18 @@ def pay_attention_to(self, inputs, mask=None, **kwargs):
282282
a = a * ops.cast(1/np.sqrt(self.key_size), dtype=qw.dtype)
283283
if a_bias is not None and ops.ndim(a_bias) == 3:
284284
a_bias = align(a_bias, [0, -2, -1], ops.ndim(a))
285-
286-
A,mask = attention_normalize(a, v_mask, -1, self.normalization, a_bias)
285+
A = attention_normalize(a, v_mask, -1, self.normalization, a_bias)
287286

288287
if self.attention_dropout:
289-
A,mask = self.dropout(A)
288+
A = self.dropout(A)
289+
290290
# 完成输出
291291
if self.query_head!=self.heads:
292292
o = ops.einsum("bkgts,bskh->btkgh", A, vw)
293293
o = ops.reshape(o, (b, s, self.query_head, -1))
294294
else:
295295
o = ops.einsum('bhjk,bkhd->bjhd', A, vw)
296+
296297
if p_bias == 'typical_relative':
297298
o = o + ops.einsum('bhjk,jkd->bjhd', A, position_bias)
298299

bert4keras3/Layers_add/FFN.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def __init__(
1313
activation='relu',
1414
use_bias=True,
1515
kernel_initializer='glorot_uniform',
16+
1617
**kwargs
1718
):
1819
super(FeedForward, self).__init__(**kwargs)
@@ -98,34 +99,35 @@ class LLamaFeedForward(FeedForward):
9899
def build(self, input_shape):
99100
super(FeedForward, self).build(input_shape)
100101
output_dim = input_shape[-1]
101-
self._feedforward_intermediate_dense = keras.layers.Dense(
102+
self._feedforward_gate_dense = keras.layers.Dense(
102103
self.units,
103104
kernel_initializer=self.kernel_initializer,
104105
use_bias=self.use_bias,
105-
name="feedforward_intermediate_dense",
106+
name="feedforward_gate_dense",
106107
)
107-
self._feedforward_gate_dense = keras.layers.Dense(
108+
self._feedforward_intermediate_dense = keras.layers.Dense(
108109
self.units,
109110
kernel_initializer=self.kernel_initializer,
110111
use_bias=self.use_bias,
111-
name="feedforward_gate_dense",
112+
name="feedforward_intermediate_dense",
112113
)
114+
113115

114116
self._feedforward_output_dense = keras.layers.Dense(
115117
output_dim,
116118
kernel_initializer=self.kernel_initializer,
117-
use_bias=False,
118-
dtype=self.use_bias,
119+
use_bias=self.use_bias,
119120
name="feedforward_output_dense",
120121
)
121122
@recompute_grad
122123
def call(self, x):
124+
123125
activation = activations.get(self.activation[0])
124126
gate_output = self._feedforward_gate_dense(x)
125-
gate_output = ops.cast(gate_output, "float32")
127+
#gate_output = ops.cast(gate_output, "float32")
126128
gate_output = activation(gate_output)
127-
gate_output = ops.cast(gate_output, x.dtype)
129+
#gate_output = ops.cast(gate_output, x.dtype)
128130
x = self._feedforward_intermediate_dense(x)
129131
x = self._feedforward_output_dense(ops.multiply(x, gate_output))
130-
return x
132+
return x#
131133

bert4keras3/Layers_add/LayerNorms.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def call(self, x):
204204
x = ops.cast(x, "float32")
205205
var = ops.mean(ops.power(x, 2), axis=-1, keepdims=True)
206206
x = x * ops.rsqrt(var + self.epsilon)
207+
207208
return ops.cast(x, self.compute_dtype) * self.scale
208209

209210
def get_config(self):
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)