Replace MSDModule by MSDModule2d and remove MSDModule2d

Allard Hendriksen · Allard Hendriksen · commit c986df1eccfa · 2020-03-10T10:26:01.000+01:00
In previous versions, MSDModule was not used anymore by
MSDRegressionModel and MSDSegmentationModel.

Now it has been replaced by the MSDModule2d implementation.

This allows removing the stitch functions and modules.
diff --git a/msd_pytorch/msd_block.py b/msd_pytorch/msd_block.py
@@ -1,10 +1,7 @@
 import torch
 import msd_custom_convolutions as cc
-from msd_pytorch.msd_module import MSDFinalLayer, init_convolution_weights
 import numpy as np
 
-IDX_WEIGHT_START = 3
-
 
 class MSDBlockImpl2d(torch.autograd.Function):
     @staticmethod
@@ -90,6 +87,7 @@ def backward(ctx, grad_output):
             )
 
             # Gradient w.r.t weights
+            IDX_WEIGHT_START = 3            # The first weight has index 3 in the forward pass.
             if ctx.needs_input_grad[i + IDX_WEIGHT_START]:
                 sub_grad_weight = torch.zeros_like(sub_weight)
                 cc.conv_relu_backward_k(
@@ -181,53 +179,3 @@ def forward(self, input):
         weights = (self.__getattr__("weight{}".format(i)) for i in range(len(self.weights)))
 
         return MSDBlockImpl2d.apply(input, self.dilations, bias, *weights)
-
-
-class MSDModule2d(torch.nn.Module):
-    def __init__(
-        self, c_in, c_out, depth, width, dilations=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-    ):
-        """Create a 2-dimensional MSD Module
-
-        :param c_in: # of input channels
-        :param c_out: # of output channels
-        :param depth: # of layers
-        :param width: # the width of the module
-        :param dilations: `list(int)`
-
-        A list of dilations to use. Default is ``[1, 2, ..., 10]``.  A
-        good alternative is ``[1, 2, 4, 8]``. The dilations are
-        repeated.
-
-        :returns: an MSD module
-        :rtype: MSDModule2d
-
-        """
-
-        super(MSDModule2d, self).__init__()
-
-        self.c_in = c_in
-        self.c_out = c_out
-        self.depth = depth
-        self.width = width
-        self.dilations = [dilations[i % len(dilations)] for i in range(depth)]
-
-        self.msd_block = MSDBlock2d(self.c_in, self.dilations, self.width)
-        self.final_layer = MSDFinalLayer(c_in=c_in + width * depth, c_out=c_out)
-
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        # Initialize weights for hidden layers:
-        for w in self.msd_block.weights:
-            init_convolution_weights(
-                w.data, self.c_in, self.c_out, self.width, self.depth
-            )
-
-        self.msd_block.bias.data.zero_()
-        self.final_layer.reset_parameters()
-
-    def forward(self, input):
-        output = self.msd_block(input)
-        output = self.final_layer(output)
-        return output
diff --git a/msd_pytorch/msd_model.py b/msd_pytorch/msd_model.py
@@ -1,4 +1,4 @@
-from msd_pytorch.msd_block import MSDModule2d
+from msd_pytorch.msd_module import MSDModule
 from torch.autograd import Variable
 import numpy as np
 import torch as t
@@ -84,7 +84,7 @@ def __init__(
         # network is saved.
         self.scale_in = scaling_module(c_in, c_in)
         self.scale_out = scaling_module(c_out, c_out)
-        self.msd = MSDModule2d(c_in, c_out, depth, width, dilations)
+        self.msd = MSDModule(c_in, c_out, depth, width, dilations)
 
         # It is the task of any subclass to initialize `self.net` and
         # call `init_optimizer` to set the trainable parameters.
diff --git a/msd_pytorch/msd_module.py b/msd_pytorch/msd_module.py
@@ -1,9 +1,7 @@
-import torch.nn as nn
-from msd_pytorch.conv import Conv2dInPlaceModule
-from msd_pytorch.conv_relu import ConvRelu2dInPlaceModule
-from msd_pytorch.stitch import stitchLazy, StitchCopyModule, StitchBuffer
+import torch
 from math import sqrt
 import numpy as np
+from msd_pytorch.msd_block import MSDBlock2d
 
 
 def units_in_front(c_in, width, layer_depth):
@@ -59,57 +57,7 @@ def init_convolution_weights(conv_weight, c_in, c_out, width, depth):
     conv_weight.normal_(0, std_dev)
 
 
-class MSDLayerModule(nn.Module):
-    """A hidden layer of the MSD module.
-
-    The primary responsibility of this module is to define the
-    `forward()` method.
-
-    This module is used by the `MSDModule`.
-
-    This module is not responsible for
-
-    * Buffer management
-    * Weight initialization
-    """
-
-    def __init__(self, buffer, c_in, layer_depth, width, dilation):
-        """Initialize the hidden layer.
-
-        :param buffer: a StitchBuffer object for storing the L and G buffers.
-        :param c_in: The number of input channels of the MSD module.
-        :param layer_depth:
-            The depth of this layer in the MSD module.  This index is
-            zero-based: the first hidden layer has index zero.
-        :param width: The width of the MSD module.
-        :param dilation:
-            An integer describing the dilation factor for the
-            convolutions in this layer.
-        :returns: A module for the MSD hidden layer.
-        :rtype: MSDLayerModule
-
-        """
-        super(MSDLayerModule, self).__init__()
-
-        in_front = units_in_front(c_in, width, layer_depth)
-        self.buffer, self.in_front, self.width = buffer, in_front, width
-
-        # Set output to None for the Conv2dInPlaceModule for now. We
-        # set it in the forward pass.
-        output = None
-        self.convolution = ConvRelu2dInPlaceModule(
-            output, in_front, width, kernel_size=3, dilation=dilation
-        )
-
-    def forward(self, input):
-        # Set output
-        self.convolution.output = self.buffer.L.narrow(1, self.in_front, self.width)
-        output = self.convolution(input)
-        output = stitchLazy(output, self.buffer.L, self.buffer.G, self.in_front)
-        return output
-
-
-class MSDFinalLayer(nn.Module):
+class MSDFinalLayer(torch.nn.Module):
     """Documentation for MSDFinalLayer
 
     Implements the final 1x1 multiplication and bias addition for all
@@ -122,7 +70,7 @@ def __init__(self, c_in, c_out):
         super(MSDFinalLayer, self).__init__()
         self.c_in = c_in
         self.c_out = c_out
-        self.linear = nn.Conv1d(c_in, c_out, 1)
+        self.linear = torch.nn.Conv1d(c_in, c_out, 1)
         self.reset_parameters()
 
     def forward(self, input):
@@ -140,7 +88,7 @@ def reset_parameters(self):
         self.linear.bias.data.zero_()
 
 
-class MSDModule(nn.Module):
+class MSDModule(torch.nn.Module):
     def __init__(
         self, c_in, c_out, depth, width, dilations=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     ):
@@ -154,57 +102,41 @@ def __init__(
 
         A list of dilations to use. Default is ``[1, 2, ..., 10]``.  A
         good alternative is ``[1, 2, 4, 8]``. The dilations are
-        repeated.
+        repeated when the depth of the module exceeds the length of
+        the list.
 
         :returns: an MSD module
         :rtype: MSDModule
 
         """
         super(MSDModule, self).__init__()
-        #
         self.c_in = c_in
         self.c_out = c_out
         self.depth = depth
         self.width = width
-        self.dilations = dilations
+        self.dilations = [dilations[i % len(dilations)] for i in range(depth)]
 
-        buffer = StitchBuffer()
-        self.buffer = buffer
+        if depth < 1:
+            raise ValueError(f"Depth must be at least 1. Got: {depth}.")
+        if width < 1:
+            raise ValueError(f"Width must be at least 1. Got: {width}.")
 
-        # The first layer copies input into the L stitch buffer
-        stitch_layer = StitchCopyModule(buffer, 0)
+        self.msd_block = MSDBlock2d(self.c_in, self.dilations, self.width)
+        self.final_layer = MSDFinalLayer(c_in=c_in + width * depth, c_out=c_out)
 
-        # Then we have `depth` number of hidden layers:
-        self.hidden_layers = [
-            MSDLayerModule(buffer, c_in, d, width, dilations[d % len(dilations)])
-            for d in range(depth)
-        ]
+        self.reset_parameters()
 
+    def reset_parameters(self):
         # Initialize weights for hidden layers:
-        for m in self.hidden_layers:
+        for w in self.msd_block.weights:
             init_convolution_weights(
-                m.convolution.weight.data, c_in, c_out, width, depth
+                w.data, self.c_in, self.c_out, self.width, self.depth
             )
-            m.convolution.bias.data.zero_()
-
-        in_front = units_in_front(c_in, width, depth)
-        self.c_final = MSDFinalLayer(in_front, c_out)
 
-        self.net = nn.Sequential(stitch_layer, *self.hidden_layers, self.c_final)
-
-        self.net.cuda()
+        self.msd_block.bias.data.zero_()
+        self.final_layer.reset_parameters()
 
     def forward(self, input):
-        self.init_buffers(input.data)
-        return self.net(input)
-
-    def init_buffers(self, input):
-        batch_sz, c_in, *shape = input.shape
-
-        assert c_in == self.c_in, "Unexpected number of input channels"
-
-        # Ensure that the stitch buffer is the correct shape
-        total_units = units_in_front(self.c_in, self.width, self.depth)
-        new_shape = (batch_sz, total_units, *shape)
-
-        self.buffer.like_(input, new_shape)
+        output = self.msd_block(input)
+        output = self.final_layer(output)
+        return output
diff --git a/msd_pytorch/tests/test_msd_block.py b/msd_pytorch/tests/test_msd_block.py
@@ -52,65 +52,3 @@ def assert_grads_equal(module, module2d):
 
     assert torch_equal(module2d.final_layer.linear.weight.grad, module.c_final.linear.weight.grad)
     assert torch_equal(module2d.final_layer.linear.bias.grad, module.c_final.linear.bias.grad)
-
-
-def test_compare_msd_module():
-    dtype = torch.float    #
-    device = torch.device("cuda")
-    B = 2                   # Batch size
-    C_IN = 3                # Input channels
-    C_OUT = 2               # Output channels
-    H = 13                  # Height
-    W = 21                  # Width
-    dilations = [1, 5, 3]   # Dilation
-    depth = 10
-    width = 2
-
-    # Input
-    with_grad = dict(requires_grad=True, device=device, dtype=dtype)
-    no_grad = dict(requires_grad=False, device=device, dtype=dtype)
-    x1 = torch.randn(B, C_IN, H, W, **with_grad)
-    x2 = x1.clone()
-    tgt = torch.randn(B, C_OUT, H, W, **no_grad)
-
-    # Models
-    m1 = msd_module.MSDModule(C_IN, C_OUT, depth, width, dilations).to(device)
-    m2 = msd_block.MSDModule2d(C_IN, C_OUT, depth, width, dilations).to(device)
-
-    # Output
-    init_weights_for_testing(m1)
-    copy_weights(m1, m2)
-
-    o1 = m1(x1)
-    o2 = m2(x2)
-    l1 = torch.nn.MSELoss()(o1, tgt)
-    l2 = torch.nn.MSELoss()(o2, tgt)
-    l1.backward(torch.ones_like(o1))
-    l2.backward(torch.ones_like(o2))
-
-    assert torch_equal(o1, o2)
-    assert_grads_equal(m1, m2)
-
-
-def test_grad_check():
-    torch.manual_seed(1)
-
-    dtype = torch.double
-    size = (11, 13)
-    batch_sz = 2
-
-    for depth in [9]:
-        print(f"Depth: {depth}")
-        width = c_in = c_out = batch_sz
-        x = torch.randn(batch_sz, c_in, *size, dtype=dtype).cuda()
-        x.requires_grad = True
-
-        net = msd_block.MSDModule2d(c_in, c_out, depth, width)
-        net.cuda()
-        net.double()
-
-        for p in net.parameters():
-            p.data = torch.randn_like(p.data)
-
-        assert net is not None
-        gradcheck(net, [x], raise_exception=True, atol=1e-4, rtol=1e-3)
diff --git a/msd_pytorch/tests/test_msd_module.py b/msd_pytorch/tests/test_msd_module.py