Merge pull request #1 from julesmuhizi/qdbn_post_fold_quant

julesmuhizi · web-flow · commit 6a6599fd5866 · 2022-01-25T12:00:17.000-05:00
included weight transpose
diff --git a/qkeras/qdense_batchnorm.py b/qkeras/qdense_batchnorm.py
@@ -159,14 +159,6 @@ def call(self, inputs, training=None):
         data_format="channels_last")
     else:
       bias = 0
-    # If loaded from a ckpt, bias_quantizer is the ckpt value
-    # Else if the layer is called for the first time, in this case bias
-    #   quantizer is None and we need to calculate bias quantizer
-    #   type according to accumulator type
-    if self.bias_quantizer_internal is not None:
-        q_bias = self.bias_quantizer_internal(bias)
-    else:
-        q_bias = bias
 
     # begin batchnorm
     _ = self.batchnorm(qdense_outputs, training=bn_training)
@@ -205,7 +197,7 @@ def call(self, inputs, training=None):
         inv *= gamma
 
       # fold bias with bn stats
-      folded_bias = inv * (q_bias - new_mean) + beta
+      folded_bias = inv * (bias - new_mean) + beta
 
     elif self.folding_mode == "ema_stats_folding":
         # We always scale the weights with a correction factor to the long term
@@ -227,25 +219,31 @@ def call(self, inputs, training=None):
             batch_inv *= gamma
         folded_bias = tf_utils.smart_cond(
           bn_training,
-          lambda: batch_inv * (q_bias - mean) + beta,
-          lambda: mv_inv * (q_bias - moving_mean) + beta)
+          lambda: batch_inv * (bias - mean) + beta,
+          lambda: mv_inv * (bias - moving_mean) + beta)
         # moving stats is always used to fold kernel in tflite; before bn freeze
         # an additional correction factor will be applied to the conv2d output
         # end batchnorm 
         inv = mv_inv
     else:
         assert ValueError
 
+    # wrap dense kernel with bn parameters
+    folded_kernel = inv*kernel
     # quantize the folded kernel
     if self.kernel_quantizer is not None:
-        q_kernel = self.kernel_quantizer_internal(kernel)
+        q_folded_kernel = self.kernel_quantizer_internal(folded_kernel)
+    else:
+        q_folded_kernel = folded_kernel
+    
+    #quantize the folded bias
+    if self.bias_quantizer_internal is not None:
+        q_folded_bias = self.bias_quantizer_internal(folded_bias)
     else:
-        q_kernel = kernel
-    # wrap qdense kernel with bn parameters
-    folded_kernel = inv * q_kernel
+        q_folded_bias = folded_bias
 
-    applied_kernel = folded_kernel
-    applied_bias = folded_bias
+    applied_kernel = q_folded_kernel
+    applied_bias = q_folded_bias
     
     #calculate qdense output using the quantized folded kernel
     folded_outputs = tf.keras.backend.dot(inputs, applied_kernel)
@@ -290,8 +288,9 @@ def get_quantization_config(self):
         "kernel_quantizer": str(self.kernel_quantizer_internal),
         "bias_quantizer": str(self.bias_quantizer_internal),
     }
-    def get_quantizers(self):
-      return self.quantizers
+    
+  def get_quantizers(self):
+    return self.quantizers
 
   # def get_prunable_weights(self):
   #   return [self.kernel]