add group norm back

Yin Hongyun · Yin Hongyun · commit 20d83c4b7f98 · 2024-11-28T23:37:35.000+08:00
diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py
@@ -7209,23 +7209,23 @@
         atol=1e-4,
         rtol=1e-5,
         para=dict(
-            num_groups=[32, 4, 5, 1],
-            eps=[1e-05, 1e-05, 1e-05, 1e-05],
-            reduced_axes = [[2, 3], [1, 3], [0, 3], [2, 3]],
-            channel_axis = [1, 2, 1, 0]
+            num_groups=[32],
+            eps=[1e-05],
+            reduced_axes = [[2, 3]],
+            channel_axis = [1]
         ),
         tensor_para=dict(
             args=[
                 {
                     "ins": ["input"],
-                    "shape": ((2, 256, 7, 10), (2, 256, 12, 12),
-                            (12, 15, 8, 9),(3, 6, 9, 0)),
+                    "requires_grad": [True],
+                    "shape": ((2, 256, 12, 10),),
                     "dtype": [np.float32, np.float64, np.float16],
                 },
                 {
                     "ins": ["weight", "bias"],
-                    "shape": ((256,), (12,),
-                               (15,), (3,)),
+                    "requires_grad": [True],
+                    "shape": ((256,),),
                     "dtype": [np.float32, np.float64, np.float16],
                 },
             ]
diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py
@@ -5275,6 +5275,48 @@ def group_norm_GB(input, num_groups, weight=None, bias=None, eps=1e-05, reduced_
     GLOBAL_STATE["group_norm_GB_save_invstd"] = save_invstd
     return out
 
+
+def group_norm_GB_backward(
+    input,
+    grad_outputs,
+    num_groups,
+    weight=None,
+    bias=None,
+    eps=1e-05,
+    reduced_axes=[2, 3], 
+    channel_axis=1,
+    **kwargs,
+) -> Tensor:
+    assert len(grad_outputs) == 1, "only accept 1 gradient to do backward"
+    save_mean = GLOBAL_STATE.pop("group_norm_GB_save_mean")
+    save_invstd = GLOBAL_STATE.pop("group_norm_GB_save_invstd")
+    grad_input = raw_like(input)
+    grad_weight = raw_like(weight)
+    grad_bias = raw_like(bias)
+    weight = None if weight is None else weight
+    bias = None if bias is None else bias
+
+    out = {"input": grad_input, "weight": grad_weight, "bias": grad_bias}
+    func = check_function("diopiGroupNormGBBackward")
+    reduced_axes = Sizes(reduced_axes)
+    ret = func(
+        input.context(),
+        grad_input,
+        grad_weight,
+        grad_bias,
+        grad_outputs[0],
+        input,
+        weight,
+        save_mean,
+        save_invstd,
+        num_groups,
+        reduced_axes, 
+        channel_axis,
+    )
+    check_returncode(ret)
+    return {k: v for k, v in out.items() if v.requires_grad}
+
+
 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-05):
     dim = list(input.size().data)
     save_mean = Tensor((dim[0], num_groups), input.get_dtype())
diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp
@@ -4240,6 +4240,74 @@ diopiError_t diopiGroupNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out,
     return diopiSuccess;
 }
 
+diopiError_t diopiGroupNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias,
+                                    diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight,
+                                    diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, int64_t num_groups, diopiSize_t reduced_axes, const int64_t channel_axis) {
+    impl::aten::setCurStream(ctx);
+    auto atGradOutput = impl::aten::buildATen(grad_output);
+    auto atInput = impl::aten::buildATen(input);
+    auto atWeight = impl::aten::buildATen(weight);
+    auto atSaveMean = impl::aten::buildATen(mean);
+    auto atSaveVar = impl::aten::buildATen(rstd);
+    auto atGradWeight = impl::aten::buildATen(grad_weight);
+    auto atGradBias = impl::aten::buildATen(grad_bias);
+    auto axisSize = atInput.size(channel_axis);
+    auto k = axisSize / num_groups;
+    at::IntArrayRef atReducedAxes = impl::aten::buildAtIntArray(reduced_axes);
+    std::vector<int64_t> dims; 
+    int64_t N = 1;
+    for (int i = 0; i < atInput.dim(); i++) {
+        if (i == channel_axis) {
+            continue;
+        } else {
+            bool is_reduced_axis = false;
+            for (int m = 0; m < reduced_axes.len; m++) {
+                if (i == reduced_axes.data[m]) {
+                    is_reduced_axis = true;
+                    break;
+                }
+            }
+            if (is_reduced_axis) {
+                continue;
+            } else {
+                dims.push_back(i);
+                N *= atInput.size(i);
+            }
+        }
+    }
+    dims.push_back(channel_axis);
+    int64_t HxW = 1;
+    for(auto i = 0; i < reduced_axes.len; i++) {
+        dims.push_back(reduced_axes.data[i]);
+        HxW *= atInput.size(reduced_axes.data[i]);
+    }
+    auto C = atInput.size(channel_axis);
+    auto permutedInput = atInput.permute(dims);
+    auto permutedShape = permutedInput.sizes();
+    auto reshapedInput = permutedInput.reshape({N, C, HxW, 1}).contiguous();
+
+    std::vector<int64_t> reverse_order(dims.size());
+    for (auto i = 0; i < atInput.dim(); i++) {
+        reverse_order[dims[i]] = i;
+    }
+
+    if (grad_weight && grad_bias) {
+        auto atGradInput = impl::aten::buildATen(grad_input).permute(dims).reshape({N, C, HxW, 1});
+          
+        at::native_group_norm_backward_out(
+            atGradInput, atGradWeight, atGradBias, atGradOutput.permute(dims).reshape({N, C, HxW, 1}), reshapedInput, atSaveMean, atSaveVar, atWeight, N, C, HxW, num_groups, {true, true, true});
+        atGradInput = atGradInput.reshape(permutedShape).permute(reverse_order);
+    } else {
+        auto atOuts = at::native_group_norm_backward(
+            atGradOutput.permute(dims).reshape({N, C, HxW, 1}), reshapedInput, atSaveMean, atSaveVar, atWeight, N, C, HxW, num_groups, {true, grad_weight != nullptr, grad_bias != nullptr});
+        impl::aten::updateATen2Tensor(ctx, std::get<0>(atOuts).reshape(permutedShape).permute(reverse_order), grad_input);
+        impl::aten::updateATen2Tensor(ctx, std::get<1>(atOuts), grad_weight);
+        impl::aten::updateATen2Tensor(ctx, std::get<2>(atOuts), grad_bias);
+    }
+
+    return diopiSuccess;
+}
+
 diopiError_t diopiGroupNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t save_mean, diopiTensorHandle_t save_invstd,
                             diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, int64_t num_groups,
                             double eps) {
diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h
@@ -3607,6 +3607,13 @@ DIOPI_API diopiError_t diopiGroupNormGB(diopiContextHandle_t ctx, diopiTensorHan
                                       diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, int64_t num_groups,
                                       double eps, diopiSize_t reduced_axes, const int64_t channel_axis);
 
+/**
+ * @brief Compute the backward pass of diopiGroupNorm().
+ */
+DIOPI_API diopiError_t diopiGroupNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight,
+                                              diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input,
+                                              diopiConstTensorHandle_t weight, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd,
+                                              int64_t num_groups, diopiSize_t reduced_axes, const int64_t channel_axis);
 /**
  * @brief Compute the backward pass of diopiGroupNorm().
  * @param[in] ctx Context environment.