From 9530ac4da2d1158563f03219573111d745c3f6c7 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 15 Jul 2024 16:42:08 -0700 Subject: [PATCH] `aiex.npu.dma_memcpy_nd` op verifier update (#1621) --- lib/Dialect/AIEX/IR/AIEXDialect.cpp | 4 +++- lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp | 3 ++- test/dialect/AIEX/bad_npu_nd.mlir | 22 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/lib/Dialect/AIEX/IR/AIEXDialect.cpp b/lib/Dialect/AIEX/IR/AIEXDialect.cpp index 75625e1e15..b3a88ba9a4 100644 --- a/lib/Dialect/AIEX/IR/AIEXDialect.cpp +++ b/lib/Dialect/AIEX/IR/AIEXDialect.cpp @@ -193,7 +193,9 @@ LogicalResult AIEX::NpuDmaMemcpyNdOp::verify() { if (strides[1] && sizes[0] > (1 << wrap_bits) - 1) return emitOpError("Size 0 exceeds the [0:" + std::to_string((1 << wrap_bits) - 1) + "] range."); - if (strides[3] > (1 << step_bits)) + // strides[3] exceeding the range is ok iff the sizes[3] is one, which is + // checked below + if (strides[3] > (1 << step_bits) && sizes[3] != 1) return emitOpError("Stride 3 exceeds the [1:" + std::to_string(1 << step_bits) + "] range."); if (strides[2] > (1 << step_bits)) diff --git a/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp b/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp index 5a27bf446b..7a3294838a 100644 --- a/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp +++ b/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp @@ -282,7 +282,8 @@ struct DmaToNpuPattern : OpConversionPattern { // iteration_current // iteration_size - if (strides[3]) + // strides[3] doesn't need to lower to hardware if sizes[3] is one + if (strides[3] && sizes[3] != 1) iteration_size = IntegerAttr::get(i32ty, sizes[3] - 1); // iteration_stride diff --git a/test/dialect/AIEX/bad_npu_nd.mlir b/test/dialect/AIEX/bad_npu_nd.mlir index ed5e5daf28..64efe18059 100644 --- a/test/dialect/AIEX/bad_npu_nd.mlir +++ b/test/dialect/AIEX/bad_npu_nd.mlir @@ -220,3 +220,25 @@ module { aie.shim_dma_allocation @objectfifo (MM2S, 0, 0) } } + +// ----- + +// first (highest-dimension) stride can go beyond the limit, as long as the corresponding wrap is 1 + +module { + aie.device(npu1_4col) { + func.func @bad_npu_nd(%a : memref<8xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c2 = arith.constant 2 : i64 + %c3 = arith.constant 3 : i64 + %c8 = arith.constant 8 : i64 + %c1572864 = arith.constant 1572864 : i64 + aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c1572864,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi32> + // expected-error@+1 {{Stride 3 exceeds the [1:1048576] range.}} + aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c1572864,%c0,%c0,%c1]) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32> + return + } + aie.shim_dma_allocation @objectfifo (MM2S, 0, 0) + } +}