diff --git a/dlrm_s_pytorch.py b/dlrm_s_pytorch.py index 9192b607..f6c51ae0 100644 --- a/dlrm_s_pytorch.py +++ b/dlrm_s_pytorch.py @@ -209,8 +209,7 @@ def create_mlp(self, ln, sigmoid_layer): mean = 0.0 # std_dev = np.sqrt(variance) std_dev = np.sqrt(2 / (m + n)) # np.sqrt(1 / m) # np.sqrt(1 / n) W = np.random.normal(mean, std_dev, size=(m, n)).astype(np.float32) - std_dev = np.sqrt(1 / m) # np.sqrt(2 / (m + 1)) - bt = np.random.normal(mean, std_dev, size=m).astype(np.float32) + bt = np.zeros(m).astype(np.float32) # approach 1 LL.weight.data = torch.tensor(W, requires_grad=True) LL.bias.data = torch.tensor(bt, requires_grad=True)