Skip to content

Commit

Permalink
Fully unroll for-loops with tripCount < unrollFactor (#1568)
Browse files Browse the repository at this point in the history
Co-authored-by: AndraBisca <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
3 people committed Aug 7, 2024
1 parent f27c385 commit 80fb5c0
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 32 deletions.
12 changes: 11 additions & 1 deletion lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,17 @@ struct AIEObjectFifoStatefulTransformPass

int unrollFactor =
computeLCM(objFifoSizes); // also counts original loop body

// if loop iterations < unrollFactor, unroll the loop fully
if (forLoop.getSingleLowerBound() && forLoop.getSingleUpperBound() &&
forLoop.getSingleStep()) {
int64_t tripCount =
constantTripCount(*(forLoop.getSingleLowerBound()),
*(forLoop.getSingleUpperBound()),
*(forLoop.getSingleStep()))
.value_or(0);
if (tripCount < unrollFactor)
unrollFactor = tripCount;
}
if (found) {
if (failed(mlir::loopUnrollByFactor(forLoop, unrollFactor))) {
forLoop.emitOpError()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- loop_test.aie.mlir --------------------------------------*- MLIR -*-===//
//===- loop_test_common.mlir ------------------------------------*- MLIR -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand Down Expand Up @@ -64,36 +64,23 @@
// CHECK: func.call @some_work(%[[BUFF_1]], %[[ARG0]]) : (memref<16xi32>, index) -> ()
// CHECK-NEXT: aie.use_lock(%[[LOCK_1]], Release, 1)
// CHECK-NEXT: }
// CHECK: %[[C1_0:.+]] = arith.constant 1 : index
// CHECK: %[[C4_1:.+]] = arith.constant 4 : index
// CHECK: scf.for %[[ARG0:.+]] = %[[C1]] to %[[C1_0]] step %[[C4_1]] {
// CHECK-NEXT: aie.use_lock(%[[LOCK_2]], Acquire, 0)
// CHECK-NEXT: func.call @some_work(%[[BUFF_2]], %[[ARG0]]) : (memref<16xi32>, index) -> ()
// CHECK-NEXT: aie.use_lock(%[[LOCK_2]], Release, 1)
// CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[MUL_0:.*]] = arith.muli %[[C1]], %[[C1_2]] : index
// CHECK-DAG: %[[ADD_0:.*]] = arith.addi %[[ARG0]], %[[MUL_0]] : index
// CHECK-DAG: aie.use_lock(%[[LOCK_3]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_3]], %[[ADD_0]]) : (memref<16xi32>, index) -> ()
// CHECK-NEXT: aie.use_lock(%[[LOCK_3]], Release, 1)
// CHECK-DAG: %[[C2_3:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[MUL_1:.*]] = arith.muli %[[C1]], %[[C2_3]] : index
// CHECK-DAG: %[[ADD_1:.*]] = arith.addi %[[ARG0]], %[[MUL_1]] : index
// CHECK-DAG: aie.use_lock(%[[LOCK_0]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_0]], %[[ADD_1]]) : (memref<16xi32>, index) -> ()
// CHECK-NEXT: aie.use_lock(%[[LOCK_0]], Release, 1)
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[MUL_2:.*]] = arith.muli %[[C1]], %[[C3]] : index
// CHECK-DAG: %[[ADD_2:.*]] = arith.addi %[[ARG0]], %[[MUL_2]] : index
// CHECK-DAG: aie.use_lock(%[[LOCK_1]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_1]], %[[ADD_2]]) : (memref<16xi32>, index) -> ()
// CHECK-NEXT: aie.use_lock(%[[LOCK_1]], Release, 1)
// CHECK-NEXT: }
// CHECK: scf.for %[[ARG0:.+]] = %[[C1_0]] to %[[C4]] step %[[C1]] {
// CHECK-DAG: aie.use_lock(%[[LOCK_2]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_2]], %[[ARG0]]) : (memref<16xi32>, index) -> ()
// CHECK-NEXT: aie.use_lock(%[[LOCK_2]], Release, 1)
// CHECK-NEXT: }
// CHECK: %[[C3:.*]] = arith.constant 3 : index
// CHECK: aie.use_lock(%[[LOCK_2]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_2]], %[[C1]]) : (memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%[[LOCK_2]], Release, 1)
// CHECK: %[[C1_0:.*]] = arith.constant 1 : index
// CHECK: %[[MUL_0:.*]] = arith.muli %c1, %[[C1_0]] : index
// CHECK: %[[ADD_0:.*]] = arith.addi %c1, %[[MUL_0]] : index
// CHECK: aie.use_lock(%[[LOCK_3]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_3]], %[[ADD_0]]) : (memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%[[LOCK_3]], Release, 1)
// CHECK: %[[C2_1:.*]] = arith.constant 2 : index
// CHECK: %[[MUL_1:.*]] = arith.muli %c1, %[[C2_1]] : index
// CHECK: %[[ADD_1:.*]] = arith.addi %c1, %[[MUL_1]] : index
// CHECK: aie.use_lock(%[[LOCK_0]], Acquire, 0)
// CHECK: func.call @some_work(%[[BUFF_0]], %[[ADD_1]]) : (memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%[[LOCK_0]], Release, 1)

module {
aie.device(xcvc1902) {
%tile12 = aie.tile(1, 2)
Expand Down
95 changes: 95 additions & 0 deletions test/objectFifo-stateful-transform/loop_test_small.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//===- loop_test_small.mlir ------------------------------------*- MLIR -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2024, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//

// RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s

// CHECK-LABEL: aie.device(xcvc1902) {
// CHECK: memref.global "public" @of_2 : memref<16xi32>
// CHECK: memref.global "public" @of_1 : memref<16xi32>
// CHECK: %tile_1_2 = aie.tile(1, 2)
// CHECK: %tile_1_3 = aie.tile(1, 3)
// CHECK: %of_2_buff_0 = aie.buffer(%tile_1_2) {sym_name = "of_2_buff_0"} : memref<16xi32>
// CHECK: %of_2_buff_1 = aie.buffer(%tile_1_2) {sym_name = "of_2_buff_1"} : memref<16xi32>
// CHECK: %of_2_buff_2 = aie.buffer(%tile_1_2) {sym_name = "of_2_buff_2"} : memref<16xi32>
// CHECK: %of_2_lock_0 = aie.lock(%tile_1_2, 0) {init = 0 : i32, sym_name = "of_2_lock_0"}
// CHECK: %of_2_lock_1 = aie.lock(%tile_1_2, 1) {init = 0 : i32, sym_name = "of_2_lock_1"}
// CHECK: %of_2_lock_2 = aie.lock(%tile_1_2, 2) {init = 0 : i32, sym_name = "of_2_lock_2"}
// CHECK: %of_1_buff_0 = aie.buffer(%tile_1_3) {sym_name = "of_1_buff_0"} : memref<16xi32>
// CHECK: %of_1_buff_1 = aie.buffer(%tile_1_3) {sym_name = "of_1_buff_1"} : memref<16xi32>
// CHECK: %of_1_lock_0 = aie.lock(%tile_1_3, 0) {init = 0 : i32, sym_name = "of_1_lock_0"}
// CHECK: %of_1_lock_1 = aie.lock(%tile_1_3, 1) {init = 0 : i32, sym_name = "of_1_lock_1"}
// CHECK: func.func @some_work(%arg0: memref<16xi32>, %arg1: memref<16xi32>, %arg2: index) {
// CHECK: return
// CHECK: }
// CHECK: %core_1_2 = aie.core(%tile_1_2) {
// CHECK: %c0 = arith.constant 0 : index
// CHECK: %c1 = arith.constant 1 : index
// CHECK: %c4 = arith.constant 4 : index
// CHECK: aie.use_lock(%of_1_lock_0, Acquire, 1)
// CHECK: aie.use_lock(%of_2_lock_0, Acquire, 0)
// CHECK: func.call @some_work(%of_1_buff_0, %of_2_buff_0, %c0) : (memref<16xi32>, memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%of_1_lock_0, Release, 0)
// CHECK: aie.use_lock(%of_2_lock_0, Release, 1)
// CHECK: %c1_1 = arith.constant 1 : index
// CHECK: %0 = arith.muli %c1, %c1_1 : index
// CHECK: %1 = arith.addi %c0, %0 : index
// CHECK: aie.use_lock(%of_1_lock_1, Acquire, 1)
// CHECK: aie.use_lock(%of_2_lock_1, Acquire, 0)
// CHECK: func.call @some_work(%of_1_buff_1, %of_2_buff_1, %1) : (memref<16xi32>, memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%of_1_lock_1, Release, 0)
// CHECK: aie.use_lock(%of_2_lock_1, Release, 1)
// CHECK: %c2 = arith.constant 2 : index
// CHECK: %2 = arith.muli %c1, %c2 : index
// CHECK: %3 = arith.addi %c0, %2 : index
// CHECK: aie.use_lock(%of_1_lock_0, Acquire, 1)
// CHECK: aie.use_lock(%of_2_lock_2, Acquire, 0)
// CHECK: func.call @some_work(%of_1_buff_0, %of_2_buff_2, %3) : (memref<16xi32>, memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%of_1_lock_0, Release, 0)
// CHECK: aie.use_lock(%of_2_lock_2, Release, 1)
// CHECK: %c3 = arith.constant 3 : index
// CHECK: %4 = arith.muli %c1, %c3 : index
// CHECK: %5 = arith.addi %c0, %4 : index
// CHECK: aie.use_lock(%of_1_lock_1, Acquire, 1)
// CHECK: aie.use_lock(%of_2_lock_0, Acquire, 0)
// CHECK: func.call @some_work(%of_1_buff_1, %of_2_buff_0, %5) : (memref<16xi32>, memref<16xi32>, index) -> ()
// CHECK: aie.use_lock(%of_1_lock_1, Release, 0)
// CHECK: aie.use_lock(%of_2_lock_0, Release, 1)
// CHECK: aie.end
// CHECK: }
// CHECK: }

module {
aie.device(xcvc1902) {
%tile12 = aie.tile(1, 2)
%tile13 = aie.tile(1, 3)
aie.objectfifo @of_1 (%tile13, {%tile12}, 2 : i32) : !aie.objectfifo<memref<16xi32>>
aie.objectfifo @of_2 (%tile12, {%tile13}, 3 : i32) : !aie.objectfifo<memref<16xi32>>
func.func @some_work(%line_inA:memref<16xi32>, %line_inB:memref<16xi32>, %index:index) -> () {
return
}
%core12 = aie.core(%tile12) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index

scf.for %indexInHeight = %c0 to %c4 step %c1 {
%subviewIn = aie.objectfifo.acquire @of_1 (Consume, 1) : !aie.objectfifosubview<memref<16xi32>>
%subviewOut = aie.objectfifo.acquire @of_2 (Produce, 1) : !aie.objectfifosubview<memref<16xi32>>
%elemIn = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview<memref<16xi32>> -> memref<16xi32>
%elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview<memref<16xi32>> -> memref<16xi32>
func.call @some_work(%elemIn, %elemOut, %indexInHeight) : (memref<16xi32>, memref<16xi32>, index) -> ()
aie.objectfifo.release @of_1 (Consume, 1)
aie.objectfifo.release @of_2 (Produce, 1)
}

aie.end
}
}
}

0 comments on commit 80fb5c0

Please sign in to comment.