llvm
diff --git a/‎projects/eudsl-python-extras/mlir/extras/dialects/cf.py‎
Lines changed: 63 additions & 0 deletions b/‎projects/eudsl-python-extras/mlir/extras/dialects/cf.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎projects/eudsl-python-extras/mlir/extras/dialects/nvgpu.py‎
Lines changed: 163 additions & 0 deletions b/‎projects/eudsl-python-extras/mlir/extras/dialects/nvgpu.py‎
Lines changed: 163 additions & 0 deletions
diff --git a/‎projects/eudsl-python-extras/mlir/extras/runtime/refbackend.py‎
Lines changed: 2 additions & 1 deletion b/‎projects/eudsl-python-extras/mlir/extras/runtime/refbackend.py‎
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1,63 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+from typing import List, Union
+
+from ..util import Successor
+from ...dialects._cf_ops_gen import _Dialect
+from ...dialects._ods_common import (
+    _cext,
+)
+from ...dialects.cf import *
+from ...ir import Block, InsertionPoint, Value
+
+
+@_cext.register_operation(_Dialect, replace=True)
+class CondBranchOp(CondBranchOp):
+    @property
+    def true(self):
+        return Successor(self, self.trueDestOperands, self.successors[0], 0)
+
+    @property
+    def false(self):
+        return Successor(self, self.falseDestOperands, self.successors[1], 1)
+
+
+def br(
+    dest: Union[Value, Block] = None, *dest_operands: List[Value], loc=None, ip=None
+):
+    if isinstance(dest, Value):
+        dest_operands = [dest] + list(dest_operands)
+        dest = None
+    if dest is None:
+        dest = InsertionPoint.current.block
+    return BranchOp(dest_operands, dest, loc=loc, ip=ip)
+
+
+def cond_br(
+    condition: Value,
+    true_dest: Union[Value, Block] = None,
+    false_dest: Union[Value, Block] = None,
+    true_dest_operands: List[Value] = None,
+    false_dest_operands: List[Value] = None,
+    *,
+    loc=None,
+    ip=None,
+):
+    if true_dest is None:
+        true_dest = InsertionPoint.current.block
+    if false_dest is None:
+        false_dest = InsertionPoint.current.block
+    if true_dest_operands is None:
+        true_dest_operands = []
+    if false_dest_operands is None:
+        false_dest_operands = []
+    return CondBranchOp(
+        condition,
+        true_dest_operands,
+        false_dest_operands,
+        true_dest,
+        false_dest,
+        loc=loc,
+        ip=ip,
+    )
@@ -0,0 +1,163 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+from .gpu import smem_space
+from . import arith
+from ...dialects.nvgpu import *
+from ...ir import Attribute, Type
+from .. import types as T
+
+
+def nvgpu_type(mnemonic, attr_value):
+    return Type.parse(f"!nvgpu.{mnemonic}<{attr_value}>")
+
+
+def barrier_group_t(num_barriers=1, address_space=None):
+    if address_space is None:
+        address_space = smem_space()
+    return nvgpu_type(
+        "mbarrier.group", f"memorySpace={address_space}, num_barriers = {num_barriers}"
+    )
+
+
+def warpgroup_accumulator_t(M, N, dtype):
+    return nvgpu_type("warpgroup.accumulator", f"fragmented=vector<{M}x{N}x{dtype}>")
+
+
+def warpgroup_descriptor(M, N, dtype):
+    return nvgpu_type(
+        "warpgroup.descriptor",
+        f"tensor=memref<{M}x{N}x{dtype}, {smem_space()}>",
+    )
+
+
+_mbarrier_init = mbarrier_init
+
+
+_mbarrier_create = mbarrier_create
+
+
+def mbarrier_create(num_barriers=1, address_space=None, *, loc=None, ip=None):
+    return _mbarrier_create(
+        barriers=barrier_group_t(num_barriers, address_space), loc=loc, ip=ip
+    )
+
+
+def mbarrier_init(barriers, count, mbar_id, *, predicate=None, loc=None, ip=None):
+    if isinstance(count, int):
+        count = arith.constant(count, index=True)
+    if isinstance(mbar_id, int):
+        mbar_id = arith.constant(mbar_id, index=True)
+    return _mbarrier_init(
+        barriers=barriers,
+        count=count,
+        mbar_id=mbar_id,
+        predicate=predicate,
+        loc=loc,
+        ip=ip,
+    )
+
+
+_mbarrier_arrive_expect_tx = mbarrier_arrive_expect_tx
+
+
+def mbarrier_arrive_expect_tx(
+    barriers, txcount, mbar_id, *, predicate=None, loc=None, ip=None
+):
+    if isinstance(txcount, int):
+        txcount = arith.constant(txcount, index=True)
+    if isinstance(mbar_id, int):
+        mbar_id = arith.constant(mbar_id, index=True)
+    return _mbarrier_arrive_expect_tx(
+        barriers=barriers,
+        txcount=txcount,
+        mbar_id=mbar_id,
+        predicate=predicate,
+        loc=loc,
+        ip=ip,
+    )
+
+
+_tma_async_load = tma_async_load
+
+
+def tma_async_load(
+    dst,
+    barriers,
+    tensor_map_descriptor,
+    coordinates,
+    mbar_id,
+    *,
+    multicast_mask=None,
+    predicate=None,
+    loc=None,
+    ip=None,
+):
+    for i, c in enumerate(coordinates):
+        if isinstance(c, int):
+            coordinates[i] = arith.constant(c, index=True)
+
+    if isinstance(mbar_id, int):
+        mbar_id = arith.constant(mbar_id, index=True)
+
+    return _tma_async_load(
+        dst=dst,
+        barriers=barriers,
+        tensor_map_descriptor=tensor_map_descriptor,
+        coordinates=coordinates,
+        mbar_id=mbar_id,
+        multicast_mask=multicast_mask,
+        predicate=predicate,
+        loc=loc,
+        ip=ip,
+    )
+
+
+_mbarrier_try_wait_parity = mbarrier_try_wait_parity
+
+
+def mbarrier_try_wait_parity(
+    barriers, mbar_id, phase_parity=False, ticks=10000000, *, loc=None, ip=None
+):
+    if isinstance(ticks, int):
+        ticks = arith.constant(ticks, index=True)
+    if isinstance(mbar_id, int):
+        mbar_id = arith.constant(mbar_id, index=True)
+    if isinstance(phase_parity, bool):
+        phase_parity = arith.constant(phase_parity, type=T.bool())
+    return _mbarrier_try_wait_parity(
+        barriers=barriers,
+        phase_parity=phase_parity,
+        ticks=ticks,
+        mbar_id=mbar_id,
+        loc=loc,
+        ip=ip,
+    )
+
+
+_warpgroup_mma = warpgroup_mma
+
+
+def warpgroup_mma(
+    matrix_c,
+    descriptor_a,
+    descriptor_b,
+    *,
+    wait_group=None,
+    transpose_a=None,
+    transpose_b=None,
+    loc=None,
+    ip=None,
+):
+    matrix_d = matrix_c.type
+    return _warpgroup_mma(
+        matrix_d=matrix_d,
+        descriptor_a=descriptor_a,
+        descriptor_b=descriptor_b,
+        matrix_c=matrix_c,
+        wait_group=wait_group,
+        transpose_a=transpose_a,
+        transpose_b=transpose_b,
+        loc=loc,
+        ip=ip,
+    )
@@ -223,10 +223,11 @@ def wrapper(*args, **_kwargs):
 class LLVMJITBackend:
     def __init__(
         self,
-        shared_lib_paths=None,
+        shared_lib_paths: set[str | Path] | None = None,
     ):
         if shared_lib_paths is None:
             shared_lib_paths = set()
+        shared_lib_paths = set(shared_lib_paths)
         if platform.system() != "Windows":
             shared_lib_paths |= set(_exec_engine_shared_libs)
         self.shared_lib_paths = list(shared_lib_paths)