Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
b766215
[mlir][amdgpu] Add make_dma_descriptor op
amd-eochoalo Nov 26, 2025
3c31d68
[mlir][amdgpu] Add tensor load store operation
amd-eochoalo Nov 25, 2025
cb116ea
[mlir][amdgpu] Lower amdgpu.make_dma_base.
amd-eochoalo Nov 25, 2025
3ee5464
Update documentation
amd-eochoalo Nov 27, 2025
7aa7699
[amdgpu][mlir] make_dma_base add type information.
amd-eochoalo Nov 27, 2025
9f37e60
[mlir][amdgpu] Add AllElementTypesMatch attribute to make_dma_base
amd-eochoalo Nov 27, 2025
3a42775
[mlir][amdgpu] verify element type sizes for make_dma_base
amd-eochoalo Nov 27, 2025
c0cd803
[mlir][amdgpu] Lower make_dma_descriptor
amd-eochoalo Nov 26, 2025
2973181
Folding
amd-eochoalo Nov 28, 2025
bf0600b
const SmallVector<T>& to ArrayRef<T>
amd-eochoalo Dec 2, 2025
0aa6fe1
change expression for 48-bits
amd-eochoalo Dec 2, 2025
1c987e8
Merge branch 'main' into eochoa/2025-12-02/merging-last
amd-eochoalo Dec 2, 2025
f482bbb
int to int64_t
amd-eochoalo Dec 2, 2025
ffd5685
format
amd-eochoalo Dec 2, 2025
5d45a72
revert exposing utility functions
amd-eochoalo Dec 2, 2025
f187e76
Avoid or if possible
amd-eochoalo Dec 2, 2025
661931c
Use a single constant
amd-eochoalo Dec 2, 2025
a1a82f8
explicit type to auto
amd-eochoalo Dec 2, 2025
ac543c2
Remove unnecessary braces
amd-eochoalo Dec 2, 2025
5672371
Use log2_32
amd-eochoalo Dec 3, 2025
1549f5b
!isDynamic -> isStatic
amd-eochoalo Dec 3, 2025
dc76238
Dot at end of comments
amd-eochoalo Dec 3, 2025
8ee42b1
C-array and for-loop
amd-eochoalo Dec 3, 2025
535f8ce
Delete superfluous empty line
amd-eochoalo Dec 3, 2025
be6560d
Assert type conversion succeeded.
amd-eochoalo Dec 3, 2025
2092acb
Use getIntOrFloatBitWidth
amd-eochoalo Dec 3, 2025
ea45349
use getIntOrFloatBitWidth
amd-eochoalo Dec 3, 2025
0634350
Add documentation.
amd-eochoalo Dec 3, 2025
2fafa28
documentation
amd-eochoalo Dec 3, 2025
dab96a5
General nit against ints
amd-eochoalo Dec 3, 2025
9b7e059
one-liner getElementTypeWidth
amd-eochoalo Dec 3, 2025
c04e41a
Use getMixedValues
amd-eochoalo Dec 3, 2025
eeb008a
Better messages in assertion
amd-eochoalo Dec 3, 2025
7955fe0
correction about rank2
amd-eochoalo Dec 3, 2025
e154dd4
Use pattern matcher
amd-eochoalo Dec 3, 2025
7a9d429
format
amd-eochoalo Dec 3, 2025
46d3e64
Propoagte type from getTypeWidth and int64_t
amd-eochoalo Dec 3, 2025
203ef02
Remove maybeUpdateDynamicIndexList
amd-eochoalo Dec 4, 2025
3fdf187
Use getIntOrFloatBitWidth
amd-eochoalo Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ def AMDGPU_ScaledMFMAOp :
}

def AMDGPU_MakeDmaBaseOp :
AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments, AllElementTypesMatch<["global", "lds"]>]>,
Arguments<(ins Arg<AnyMemRef>:$global,
Variadic<Index>:$global_indices,
Arg<AnyMemRef>:$lds,
Expand Down Expand Up @@ -1294,8 +1294,8 @@ def AMDGPU_MakeDmaDescriptorOp :
DenseI64ArrayAttr: $global_static_strides,
Variadic<Index>: $shared_dynamic_sizes,
DenseI64ArrayAttr: $shared_static_sizes,
Optional<Index>: $pad,
Optional<Index>: $pad_every,
Optional<Index>: $pad_amount,
Optional<Index>: $pad_interval,
Optional<AnyMemRef>: $atomic_barrier_address,
Variadic<Index>: $atomic_barrier_indices,
Optional<Index>: $global_increment,
Expand All @@ -1317,6 +1317,10 @@ def AMDGPU_MakeDmaDescriptorOp :
Padding can be applied to the LDS address when copying from memory to LDS,
but not when copying from LDS to memory.
The values in the padded target addresses remain the same as before the operation was applied.
$pad_interval must be a power of two contained in [2, 256].
$pad_amount must be a value contained in [1, 128].

$atomic_barrier_address must be aligned to 8 bytes.

2D and 3D tensors may be iterated over by setting $global_increment, $lds_increment, and $iteration_count.
$global_increment determines how much to increment the starting global memory address per iteration in units of the $base's element type.
Expand All @@ -1331,7 +1335,7 @@ def AMDGPU_MakeDmaDescriptorOp :

// Example of moving a two dimension tensor to LDS where padding is applied after every integer.
%base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad pad_every %pad_every) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad_amount pad_every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
```
}];
Expand All @@ -1341,14 +1345,37 @@ def AMDGPU_MakeDmaDescriptorOp :
`globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
`globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
`sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
( `padShared` `(` $pad^ `every` $pad_every `)` )?
( `padShared` `(` $pad_amount^ `every` $pad_interval `)` )?
( `atomicBarrier` `(` $atomic_barrier_address^ `[` $atomic_barrier_indices `]`
`:` type($atomic_barrier_address) `)`)?
( `iterate` $global_increment^ `,` $lds_increment `,` $iteration_count )?
attr-dict `:` qualified(type($base)) `->` type(results)
}];

let extraClassDeclaration = [{
int64_t getRank() {
return getGlobalStaticSizes().size();
}

unsigned getElementTypeWidth() {
return getBase().getType().getElementType().getIntOrFloatBitWidth();
}

SmallVector<OpFoldResult> getMixedGlobalSizes() {
return getMixedValues(getGlobalStaticSizes(), getGlobalDynamicSizes(), getContext());
}

SmallVector<OpFoldResult> getMixedGlobalStrides() {
return getMixedValues(getGlobalStaticStrides(), getGlobalDynamicStrides(), getContext());
}

SmallVector<OpFoldResult> getMixedSharedSizes() {
return getMixedValues(getSharedStaticSizes(), getSharedDynamicSizes(), getContext());
}
}];

let hasVerifier = 1;
let hasFolder = 1;
}

#endif // AMDGPU
Loading
Loading