Batch fill rectangle (keras-team#65)

(Ian Stenbit) · LukeWood · (Ian Stenbit) · commit d9ed09c3d22a · 2022-01-29T19:53:36.000-08:00
* temporary push benchmark file

* temporary push benchmark file

* refactor fill_utils.py

* refactor

* refactor

* batch fill_rectangle

* small refactor

* docstring fix

* Fix issue after rebase

Co-authored-by: Luke Wood &lt;lukewood@google.com&gt;
diff --git a/keras_cv/layers/preprocessing/cut_mix.py b/keras_cv/layers/preprocessing/cut_mix.py
@@ -120,17 +120,13 @@ def _cutmix(self, images, labels):
         lambda_sample = 1.0 - bbox_area / (image_height * image_width)
         lambda_sample = tf.cast(lambda_sample, dtype=tf.float32)
 
-        images = tf.map_fn(
-            lambda x: fill_utils.fill_rectangle(*x),
-            (
-                images,
-                random_center_width,
-                random_center_height,
-                cut_width // 2,
-                cut_height // 2,
-                tf.gather(images, permutation_order),
-            ),
-            fn_output_signature=tf.TensorSpec.from_tensor(images[0]),
+        images = fill_utils.fill_rectangle(
+            images,
+            random_center_width,
+            random_center_height,
+            cut_width,
+            cut_height,
+            tf.gather(images, permutation_order),
         )
 
         return images, labels, lambda_sample, permutation_order
diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py
@@ -31,7 +31,7 @@ class RandomCutout(layers.Layer):
             in the range `[20% of image height, 30% of image height]`.
             `height_factor=(32, 64)` results in a height picked in the range
             [32, 64]. `height_factor=0.2` results in a height of [0%, 20%] of image
-            height, and `height_factor=32` results in a height of 32.
+            height, and `height_factor=32` results in a height between [0, 32].
         width_factor: One of:
             - a positive float representing a fraction of image width
             - an integer representing an absolute width
@@ -40,7 +40,7 @@ class RandomCutout(layers.Layer):
             in the range `[20% of image width, 30% of image width]`.
             `width_factor=(32, 64)` results in a width picked in the range
             [32, 64]. `width_factor=0.2` results in a width of [0%, 20%] of image
-            width, and `width_factor=32` results in a width of 32.
+            width, and `width_factor=32` results in a width between [0, 32].
         fill_mode: Pixels inside the patches are filled according to the given
             mode (one of `{"constant", "gaussian_noise"}`).
             - *constant*: Pixels are filled with the same constant value.
@@ -151,19 +151,13 @@ def _random_cutout(self, inputs):
         center_x, center_y = self._compute_rectangle_position(inputs)
         rectangle_height, rectangle_width = self._compute_rectangle_size(inputs)
         rectangle_fill = self._compute_rectangle_fill(inputs)
-        half_height = tf.cast(tf.math.ceil(rectangle_height / 2), tf.int32)
-        half_width = tf.cast(tf.math.ceil(rectangle_width / 2), tf.int32)
-        inputs = tf.map_fn(
-            lambda x: fill_utils.fill_rectangle(*x),
-            (
-                inputs,
-                center_y,
-                center_x,
-                half_width,
-                half_height,
-                rectangle_fill,
-            ),
-            fn_output_signature=tf.TensorSpec.from_tensor(inputs[0]),
+        inputs = fill_utils.fill_rectangle(
+            inputs,
+            center_x,
+            center_y,
+            rectangle_width,
+            rectangle_height,
+            rectangle_fill,
         )
         return inputs
 
@@ -177,14 +171,14 @@ def _compute_rectangle_position(self, inputs):
         center_x = tf.random.uniform(
             shape=[batch_size],
             minval=0,
-            maxval=image_height,
+            maxval=image_width,
             dtype=tf.int32,
             seed=self.seed,
         )
         center_y = tf.random.uniform(
             shape=[batch_size],
             minval=0,
-            maxval=image_width,
+            maxval=image_height,
             dtype=tf.int32,
             seed=self.seed,
         )
diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py
@@ -10,42 +10,80 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.grep -q Copyright $i
+# limitations under the License.
 import tensorflow as tf
 
+from keras_cv.utils import bbox
 
-def fill_rectangle(
-    image, center_width, center_height, half_width, half_height, fill=None
-):
-    """Fill a rectangle in a given image using the value provided in replace.
+
+def rectangle_masks(mask_shape, corners):
+    """Computes positional masks of rectangles in images
 
     Args:
-        image: the starting image to fill the rectangle on.
-        center_width: the X center of the rectangle to fill
-        center_height: the Y center of the rectangle to fill
-        half_width: 1/2 the width of the resulting rectangle
-        half_height: 1/2 the height of the resulting rectangle
-        fill: A tensor with same shape as image. Values at rectangle
-         position are used as fill.
+        mask_shape: shape of the masks as [batch_size, height, width].
+        corners: rectangle coordinates in corners format.
+
     Returns:
-        image: the modified image with the chosen rectangle filled.
+        boolean masks with True at rectangle positions.
     """
-    image_shape = tf.shape(image)
-    image_height = image_shape[0]
-    image_width = image_shape[1]
-
-    lower_pad = tf.maximum(0, center_height - half_height)
-    upper_pad = tf.maximum(0, image_height - center_height - half_height)
-    left_pad = tf.maximum(0, center_width - half_width)
-    right_pad = tf.maximum(0, image_width - center_width - half_width)
-
-    shape = [
-        image_height - (lower_pad + upper_pad),
-        image_width - (left_pad + right_pad),
-    ]
-    padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
-    mask = tf.pad(tf.zeros(shape, dtype=image.dtype), padding_dims, constant_values=1)
-    mask = tf.expand_dims(mask, -1)
-
-    image = tf.where(tf.equal(mask, 0), fill, image)
-    return image
+    # add broadcasting axes
+    corners = corners[..., tf.newaxis, tf.newaxis]
+
+    # split coordinates
+    x0 = corners[:, 0]
+    y0 = corners[:, 1]
+    x1 = corners[:, 2]
+    y1 = corners[:, 3]
+
+    # repeat height and width
+    batch_size, height, width = mask_shape
+    x0_rep = tf.repeat(x0, height, axis=1)
+    y0_rep = tf.repeat(y0, width, axis=2)
+    x1_rep = tf.repeat(x1, height, axis=1)
+    y1_rep = tf.repeat(y1, width, axis=2)
+
+    # range grid
+    range_row = tf.range(0, height, dtype=corners.dtype)
+    range_col = tf.range(0, width, dtype=corners.dtype)
+    range_row = tf.repeat(range_row[tf.newaxis, :, tf.newaxis], batch_size, 0)
+    range_col = tf.repeat(range_col[tf.newaxis, tf.newaxis, :], batch_size, 0)
+
+    # boolean masks
+    mask_x0 = tf.less_equal(x0_rep, range_col)
+    mask_y0 = tf.less_equal(y0_rep, range_row)
+    mask_x1 = tf.less(range_col, x1_rep)
+    mask_y1 = tf.less(range_row, y1_rep)
+
+    masks = mask_x0 & mask_y0 & mask_x1 & mask_y1
+
+    return masks
+
+
+def fill_rectangle(images, center_x, center_y, width, height, fill):
+    """Fill rectangles with fill value into images.
+
+    Args:
+        images: Tensor of images to fill rectangles into.
+        center_x: Tensor of positions of the rectangle centers on the x-axis.
+        center_y: Tensor f positions of the rectangle centers on the y-axis.
+        width: Tensor of widths of the rectangles
+        height: Tensor of heights of the rectangles
+        fill: Tensor with same shape as images to get rectangle fill from.
+    Returns:
+        images with filled rectangles.
+    """
+    images_shape = tf.shape(images)
+    batch_size = images_shape[0]
+    images_height = images_shape[1]
+    images_width = images_shape[2]
+
+    xywh = tf.stack([center_x, center_y, width, height], axis=1)
+    xywh = tf.cast(xywh, tf.float32)
+    corners = bbox.xywh_to_corners(xywh)
+
+    masks_shape = (batch_size, images_height, images_width)
+    is_patch_mask = rectangle_masks(masks_shape, corners)
+    is_patch_mask = tf.expand_dims(is_patch_mask, -1)
+
+    images = tf.where(tf.equal(is_patch_mask, True), fill, images)
+    return images