-
Notifications
You must be signed in to change notification settings - Fork 14.5k
Open
Labels
Description
The following MLIR file has two functions with the same loop body, one of which gets successfully fused:
func.func @f1(%input : memref<10xf32>, %output : memref<10xf32>, %reduc : memref<10xf32>) {
%zero = arith.constant 0. : f32
%one = arith.constant 1. : f32
affine.for %i = 0 to 10 {
%0 = affine.load %input[%i] : memref<10xf32>
%2 = arith.addf %0, %one : f32
affine.store %2, %output[%i] : memref<10xf32>
}
affine.for %i = 0 to 10 {
%0 = affine.load %input[%i] : memref<10xf32>
%1 = affine.load %reduc[0] : memref<10xf32>
%2 = arith.addf %0, %1 : f32
affine.store %2, %reduc[0] : memref<10xf32>
}
return
}
func.func @f2() {
%input = memref.alloc() : memref<10xf32>
%output = memref.alloc() : memref<10xf32>
%reduc = memref.alloc() : memref<10xf32>
%zero = arith.constant 0. : f32
%one = arith.constant 1. : f32
affine.for %i = 0 to 10 {
%0 = affine.load %input[%i] : memref<10xf32>
%2 = arith.addf %0, %one : f32
affine.store %2, %output[%i] : memref<10xf32>
}
affine.for %i = 0 to 10 {
%0 = affine.load %input[%i] : memref<10xf32>
%1 = affine.load %reduc[0] : memref<10xf32>
%2 = arith.addf %0, %1 : f32
affine.store %2, %reduc[0] : memref<10xf32>
}
return
}
Running with
./bin/mlir-opt ../testing-2.mlir -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=sibling}))'
yields:
module {
func.func @f1(%arg0: memref<10xf32>, %arg1: memref<10xf32>, %arg2: memref<10xf32>) {
%cst = arith.constant 0.000000e+00 : f32
%cst_0 = arith.constant 1.000000e+00 : f32
affine.for %arg3 = 0 to 10 {
%0 = affine.load %arg0[%arg3] : memref<10xf32>
%1 = arith.addf %0, %cst_0 : f32
affine.store %1, %arg1[%arg3] : memref<10xf32>
%2 = affine.load %arg0[%arg3] : memref<10xf32>
%3 = affine.load %arg2[0] : memref<10xf32>
%4 = arith.addf %2, %3 : f32
affine.store %4, %arg2[0] : memref<10xf32>
}
return
}
func.func @f2() {
%alloc = memref.alloc() : memref<10xf32>
%alloc_0 = memref.alloc() : memref<10xf32>
%alloc_1 = memref.alloc() : memref<10xf32>
%cst = arith.constant 0.000000e+00 : f32
%cst_2 = arith.constant 1.000000e+00 : f32
affine.for %arg0 = 0 to 10 {
%0 = affine.load %alloc[%arg0] : memref<10xf32>
%1 = arith.addf %0, %cst_2 : f32
affine.store %1, %alloc_0[%arg0] : memref<10xf32>
}
affine.for %arg0 = 0 to 10 {
%0 = affine.load %alloc[%arg0] : memref<10xf32>
%1 = affine.load %alloc_1[0] : memref<10xf32>
%2 = arith.addf %0, %1 : f32
affine.store %2, %alloc_1[0] : memref<10xf32>
}
return
}
}
When the memref
s involved are allocated as part of the function instead of arguments, the pass fails. There's no output from adding -debug-only=affine-loop-fusion,loop-fusion-utils
that I see to further understand why the sibling fusion pass fails in this case.