@@ -159,6 +159,14 @@ def call(self, inputs, training=None):
159
159
data_format = "channels_last" )
160
160
else :
161
161
bias = 0
162
+ # If loaded from a ckpt, bias_quantizer is the ckpt value
163
+ # Else if the layer is called for the first time, in this case bias
164
+ # quantizer is None and we need to calculate bias quantizer
165
+ # type according to accumulator type
166
+ if self .bias_quantizer_internal is not None :
167
+ q_bias = self .bias_quantizer_internal (bias )
168
+ else :
169
+ q_bias = bias
162
170
163
171
# begin batchnorm
164
172
_ = self .batchnorm (qdense_outputs , training = bn_training )
@@ -195,8 +203,9 @@ def call(self, inputs, training=None):
195
203
inv = math_ops .rsqrt (new_variance + self .batchnorm .epsilon )
196
204
if gamma is not None :
197
205
inv *= gamma
206
+
198
207
# fold bias with bn stats
199
- folded_bias = inv * (bias - new_mean ) + beta
208
+ folded_bias = inv * (q_bias - new_mean ) + beta
200
209
201
210
elif self .folding_mode == "ema_stats_folding" :
202
211
# We always scale the weights with a correction factor to the long term
@@ -218,35 +227,25 @@ def call(self, inputs, training=None):
218
227
batch_inv *= gamma
219
228
folded_bias = tf_utils .smart_cond (
220
229
bn_training ,
221
- lambda : batch_inv * (bias - mean ) + beta ,
222
- lambda : mv_inv * (bias - moving_mean ) + beta )
230
+ lambda : batch_inv * (q_bias - mean ) + beta ,
231
+ lambda : mv_inv * (q_bias - moving_mean ) + beta )
223
232
# moving stats is always used to fold kernel in tflite; before bn freeze
224
233
# an additional correction factor will be applied to the conv2d output
225
234
# end batchnorm
226
235
inv = mv_inv
227
236
else :
228
237
assert ValueError
229
238
230
- # wrap qdense kernel with bn parameters
231
- folded_kernel = inv * kernel
232
239
# quantize the folded kernel
233
240
if self .kernel_quantizer is not None :
234
- q_folded_kernel = self .kernel_quantizer_internal (folded_kernel )
241
+ q_kernel = self .kernel_quantizer_internal (kernel )
235
242
else :
236
- q_folded_kernel = folded_kernel
237
-
238
- # If loaded from a ckpt, bias_quantizer is the ckpt value
239
- # Else if the layer is called for the first time, in this case bias
240
- # quantizer is None and we need to calculate bias quantizer
241
- # type according to accumulator type
242
-
243
- if self .bias_quantizer_internal is not None :
244
- q_folded_bias = self .bias_quantizer_internal (folded_bias )
245
- else :
246
- q_folded_bias = folded_bias
243
+ q_kernel = kernel
244
+ # wrap qdense kernel with bn parameters
245
+ folded_kernel = inv * q_kernel
247
246
248
- applied_kernel = q_folded_kernel
249
- applied_bias = q_folded_bias
247
+ applied_kernel = folded_kernel
248
+ applied_bias = folded_bias
250
249
251
250
#calculate qdense output using the quantized folded kernel
252
251
folded_outputs = tf .keras .backend .dot (inputs , applied_kernel )
0 commit comments