Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
b766215
[mlir][amdgpu] Add make_dma_descriptor op
amd-eochoalo Nov 26, 2025
3c31d68
[mlir][amdgpu] Add tensor load store operation
amd-eochoalo Nov 25, 2025
cb116ea
[mlir][amdgpu] Lower amdgpu.make_dma_base.
amd-eochoalo Nov 25, 2025
3ee5464
Update documentation
amd-eochoalo Nov 27, 2025
7aa7699
[amdgpu][mlir] make_dma_base add type information.
amd-eochoalo Nov 27, 2025
9f37e60
[mlir][amdgpu] Add AllElementTypesMatch attribute to make_dma_base
amd-eochoalo Nov 27, 2025
3a42775
[mlir][amdgpu] verify element type sizes for make_dma_base
amd-eochoalo Nov 27, 2025
c0cd803
[mlir][amdgpu] Lower make_dma_descriptor
amd-eochoalo Nov 26, 2025
2973181
Folding
amd-eochoalo Nov 28, 2025
bf0600b
const SmallVector<T>& to ArrayRef<T>
amd-eochoalo Dec 2, 2025
0aa6fe1
change expression for 48-bits
amd-eochoalo Dec 2, 2025
1c987e8
Merge branch 'main' into eochoa/2025-12-02/merging-last
amd-eochoalo Dec 2, 2025
f482bbb
int to int64_t
amd-eochoalo Dec 2, 2025
ffd5685
format
amd-eochoalo Dec 2, 2025
5d45a72
revert exposing utility functions
amd-eochoalo Dec 2, 2025
f187e76
Avoid or if possible
amd-eochoalo Dec 2, 2025
661931c
Use a single constant
amd-eochoalo Dec 2, 2025
a1a82f8
explicit type to auto
amd-eochoalo Dec 2, 2025
ac543c2
Remove unnecessary braces
amd-eochoalo Dec 2, 2025
5672371
Use log2_32
amd-eochoalo Dec 3, 2025
1549f5b
!isDynamic -> isStatic
amd-eochoalo Dec 3, 2025
dc76238
Dot at end of comments
amd-eochoalo Dec 3, 2025
8ee42b1
C-array and for-loop
amd-eochoalo Dec 3, 2025
535f8ce
Delete superfluous empty line
amd-eochoalo Dec 3, 2025
be6560d
Assert type conversion succeeded.
amd-eochoalo Dec 3, 2025
2092acb
Use getIntOrFloatBitWidth
amd-eochoalo Dec 3, 2025
ea45349
use getIntOrFloatBitWidth
amd-eochoalo Dec 3, 2025
0634350
Add documentation.
amd-eochoalo Dec 3, 2025
2fafa28
documentation
amd-eochoalo Dec 3, 2025
dab96a5
General nit against ints
amd-eochoalo Dec 3, 2025
9b7e059
one-liner getElementTypeWidth
amd-eochoalo Dec 3, 2025
c04e41a
Use getMixedValues
amd-eochoalo Dec 3, 2025
eeb008a
Better messages in assertion
amd-eochoalo Dec 3, 2025
7955fe0
correction about rank2
amd-eochoalo Dec 3, 2025
e154dd4
Use pattern matcher
amd-eochoalo Dec 3, 2025
7a9d429
format
amd-eochoalo Dec 3, 2025
46d3e64
Propoagte type from getTypeWidth and int64_t
amd-eochoalo Dec 3, 2025
203ef02
Remove maybeUpdateDynamicIndexList
amd-eochoalo Dec 4, 2025
3fdf187
Use getIntOrFloatBitWidth
amd-eochoalo Dec 4, 2025
2039294
Undef -> Poison
amd-eochoalo Dec 5, 2025
b5313a3
Allow tensors less than rank 2
amd-eochoalo Dec 5, 2025
5f4ad77
Allow tensors with rank less than 2
amd-eochoalo Dec 5, 2025
91b2e96
Add TODOs
amd-eochoalo Dec 5, 2025
097a0e8
Fix atomicBarrierAddress calculation.
amd-eochoalo Dec 5, 2025
6b62d0e
Merge branch 'main' into eochoa/2025-12-02/merging-last
amd-eochoalo Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 51 additions & 5 deletions mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ def AMDGPU_ScaledMFMAOp :
}

def AMDGPU_MakeDmaBaseOp :
AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments, AllElementTypesMatch<["global", "lds"]>]>,
Arguments<(ins Arg<AnyMemRef>:$global,
Variadic<Index>:$global_indices,
Arg<AnyMemRef>:$lds,
Expand Down Expand Up @@ -1294,8 +1294,8 @@ def AMDGPU_MakeDmaDescriptorOp :
DenseI64ArrayAttr: $global_static_strides,
Variadic<Index>: $shared_dynamic_sizes,
DenseI64ArrayAttr: $shared_static_sizes,
Optional<Index>: $pad,
Optional<Index>: $pad_every,
Optional<Index>: $pad_amount,
Optional<Index>: $pad_interval,
Optional<AnyMemRef>: $atomic_barrier_address,
Variadic<Index>: $atomic_barrier_indices,
Optional<Index>: $global_increment,
Expand Down Expand Up @@ -1331,7 +1331,7 @@ def AMDGPU_MakeDmaDescriptorOp :

// Example of moving a two dimension tensor to LDS where padding is applied after every integer.
%base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad pad_every %pad_every) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad_amount pad_every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
```
}];
Expand All @@ -1341,14 +1341,60 @@ def AMDGPU_MakeDmaDescriptorOp :
`globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
`globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
`sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
( `padShared` `(` $pad^ `every` $pad_every `)` )?
( `padShared` `(` $pad_amount^ `every` $pad_interval `)` )?
( `atomicBarrier` `(` $atomic_barrier_address^ `[` $atomic_barrier_indices `]`
`:` type($atomic_barrier_address) `)`)?
( `iterate` $global_increment^ `,` $lds_increment `,` $iteration_count )?
attr-dict `:` qualified(type($base)) `->` type(results)
}];

let extraClassDeclaration = [{
int getRank() {
return getGlobalStaticSizes().size();
}

int getElementTypeWidth() {
Type elementType = getBase().getType().getElementType();
int width;
if (auto floatType = dyn_cast<FloatType>(elementType)) {
width = floatType.getWidth();
} else if (auto intType = dyn_cast<IntegerType>(elementType)) {
width = intType.getWidth();
} else {
llvm_unreachable("element type must have getWidth interface");
}
return width;
}

SmallVector<OpFoldResult> getMixedList(SmallVector<Value> dynamics, ArrayRef<int64_t> statics) {
SmallVector<OpFoldResult> result;
unsigned ctr = 0;
OpBuilder b(getContext());
for (int64_t static_elem : statics) {
if (ShapedType::isDynamic(static_elem)) {
result.push_back(dynamics[ctr++]);
} else {
result.push_back(b.getIndexAttr(static_elem));
}
}
return result;
}

SmallVector<OpFoldResult> getMixedGlobalSizes() {
return getMixedList(getGlobalDynamicSizes(), getGlobalStaticSizes());
}

SmallVector<OpFoldResult> getMixedGlobalStrides() {
return getMixedList(getGlobalDynamicStrides(), getGlobalStaticStrides());
}

SmallVector<OpFoldResult> getMixedSharedSizes() {
return getMixedList(getSharedDynamicSizes(), getSharedStaticSizes());
}
}];

let hasVerifier = 1;
let hasFolder = 1;
}

#endif // AMDGPU
Loading