Skip to content

Commit a8b3f9c

Browse files
Update at 2025-10-03
1 parent 7ab3bfe commit a8b3f9c

File tree

368 files changed

+19837
-6873
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

368 files changed

+19837
-6873
lines changed

opt/R/x86_64/bin/cmake

210 KB
Binary file not shown.

opt/R/x86_64/include/omp-tools.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@
7878
/* implicit barrier at the end of worksharing */ \
7979
macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
8080
macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
81+
macro (ompt_state_wait_barrier_implementation, 0x015) /* implementation barrier */ \
82+
macro (ompt_state_wait_barrier_teams, 0x016) /* teams barrier */ \
8183
\
8284
/* task wait states (32..63) */ \
8385
macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
@@ -211,6 +213,10 @@ typedef enum kmp_mutex_impl_t {
211213
* definitions generated from spec
212214
*****************************************************************************/
213215

216+
#if defined(__cplusplus)
217+
extern "C" {
218+
#endif
219+
214220
typedef enum ompt_callbacks_t {
215221
ompt_callback_thread_begin = 1,
216222
ompt_callback_thread_end = 2,
@@ -1404,6 +1410,14 @@ typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (
14041410
ompt_buffer_cursor_t current
14051411
);
14061412

1413+
#ifdef _WIN32
1414+
__declspec(dllexport)
1415+
#else
1416+
__attribute__((visibility("default")))
1417+
#endif
1418+
ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
1419+
const char *runtime_version);
1420+
14071421
#define ompt_id_none 0
14081422
#define ompt_data_none {0}
14091423
#define ompt_time_none 0
@@ -1414,4 +1428,8 @@ typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (
14141428

14151429
#define ompd_segment_none 0
14161430

1431+
#if defined(__cplusplus)
1432+
} // extern "C"
1433+
#endif
1434+
14171435
#endif /* __OMPT__ */

opt/R/x86_64/include/omp.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef __OMP_H
1616
# define __OMP_H
1717

18+
# include <stddef.h>
1819
# include <stdlib.h>
1920
# include <stdint.h>
2021

@@ -155,6 +156,8 @@
155156
/* OpenMP 5.1 interop */
156157
typedef intptr_t omp_intptr_t;
157158

159+
extern void __KAI_KMPC_CONVENTION ompx_dump_mapping_tables(void);
160+
158161
/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined properties */
159162
typedef enum omp_interop_property {
160163
omp_ipr_fr_id = -1,
@@ -236,6 +239,11 @@
236239
extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect_async(void *, const void *, size_t, int, const size_t *,
237240
const size_t *, const size_t *, const size_t *, const size_t *, int, int,
238241
int, omp_depend_t *);
242+
243+
/* OpenMP 6.0 device memory routines */
244+
extern void * __KAI_KMPC_CONVENTION omp_target_memset(void *, int, size_t, int);
245+
extern void * __KAI_KMPC_CONVENTION omp_target_memset_async(void *, int, size_t, int, int, omp_depend_t *);
246+
239247
/*!
240248
* The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device.
241249
*/
@@ -497,7 +505,7 @@
497505
extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void);
498506

499507
/* LLVM Extensions */
500-
extern void *llvm_omp_target_dynamic_shared_alloc();
508+
extern void *llvm_omp_target_dynamic_shared_alloc(void);
501509

502510
# undef __KAI_KMPC_CONVENTION
503511
# undef __KMP_IMP

opt/R/x86_64/include/ompt.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@
7878
/* implicit barrier at the end of worksharing */ \
7979
macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
8080
macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
81+
macro (ompt_state_wait_barrier_implementation, 0x015) /* implementation barrier */ \
82+
macro (ompt_state_wait_barrier_teams, 0x016) /* teams barrier */ \
8183
\
8284
/* task wait states (32..63) */ \
8385
macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
@@ -211,6 +213,10 @@ typedef enum kmp_mutex_impl_t {
211213
* definitions generated from spec
212214
*****************************************************************************/
213215

216+
#if defined(__cplusplus)
217+
extern "C" {
218+
#endif
219+
214220
typedef enum ompt_callbacks_t {
215221
ompt_callback_thread_begin = 1,
216222
ompt_callback_thread_end = 2,
@@ -1404,6 +1410,14 @@ typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (
14041410
ompt_buffer_cursor_t current
14051411
);
14061412

1413+
#ifdef _WIN32
1414+
__declspec(dllexport)
1415+
#else
1416+
__attribute__((visibility("default")))
1417+
#endif
1418+
ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
1419+
const char *runtime_version);
1420+
14071421
#define ompt_id_none 0
14081422
#define ompt_data_none {0}
14091423
#define ompt_time_none 0
@@ -1414,4 +1428,8 @@ typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (
14141428

14151429
#define ompd_segment_none 0
14161430

1431+
#if defined(__cplusplus)
1432+
} // extern "C"
1433+
#endif
1434+
14171435
#endif /* __OMPT__ */

opt/R/x86_64/include/ompx.h

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __OMPX_H
10+
#define __OMPX_H
11+
12+
#ifdef __AMDGCN_WAVEFRONT_SIZE
13+
#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
14+
#else
15+
#define __WARP_SIZE 32
16+
#endif
17+
18+
typedef unsigned long uint64_t;
19+
20+
#ifdef __cplusplus
21+
extern "C" {
22+
#endif
23+
24+
int omp_get_ancestor_thread_num(int);
25+
int omp_get_team_size(int);
26+
27+
#ifdef __cplusplus
28+
}
29+
#endif
30+
31+
/// Target kernel language extensions
32+
///
33+
/// These extensions exist for the host to allow fallback implementations,
34+
/// however, they cannot be arbitrarily composed with OpenMP. If the rules of
35+
/// the kernel language are followed, the host fallbacks should behave as
36+
/// expected since the kernel is represented as 3 sequential outer loops, one
37+
/// for each grid dimension, and three (nested) parallel loops, one for each
38+
/// block dimension. This fallback is not supposed to be optimal and should be
39+
/// configurable by the user.
40+
///
41+
///{
42+
43+
#ifdef __cplusplus
44+
extern "C" {
45+
#endif
46+
47+
enum {
48+
ompx_relaxed = __ATOMIC_RELAXED,
49+
ompx_aquire = __ATOMIC_ACQUIRE,
50+
ompx_release = __ATOMIC_RELEASE,
51+
ompx_acq_rel = __ATOMIC_ACQ_REL,
52+
ompx_seq_cst = __ATOMIC_SEQ_CST,
53+
};
54+
55+
enum {
56+
ompx_dim_x = 0,
57+
ompx_dim_y = 1,
58+
ompx_dim_z = 2,
59+
};
60+
61+
// TODO: The following implementation is for host fallback. We need to disable
62+
// generation of host fallback in kernel language mode.
63+
#pragma omp begin declare variant match(device = {kind(cpu)})
64+
65+
/// ompx_{thread,block}_{id,dim}
66+
///{
67+
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \
68+
static inline int ompx_##NAME(int Dim) { return VALUE; }
69+
70+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
71+
omp_get_ancestor_thread_num(Dim + 1))
72+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1))
73+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
74+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
75+
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
76+
///}
77+
78+
/// ompx_{sync_block}_{,divergent}
79+
///{
80+
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \
81+
static inline RETTY ompx_##NAME(ARGS) { BODY; }
82+
83+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
84+
_Pragma("omp barrier"))
85+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
86+
ompx_sync_block(ompx_acq_rel))
87+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
88+
ompx_sync_block(Ordering))
89+
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
90+
///}
91+
92+
static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
93+
__builtin_trap();
94+
}
95+
96+
/// ompx_shfl_down_sync_{i,f,l,d}
97+
///{
98+
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
99+
static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
100+
unsigned delta, int width) { \
101+
__builtin_trap(); \
102+
}
103+
104+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
108+
109+
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
110+
///}
111+
112+
#pragma omp end declare variant
113+
114+
/// ompx_{sync_block}_{,divergent}
115+
///{
116+
#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
117+
RETTY ompx_##NAME(ARGS);
118+
119+
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120+
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121+
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
122+
#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
123+
///}
124+
125+
/// ompx_{thread,block}_{id,dim}_{x,y,z}
126+
///{
127+
#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \
128+
int ompx_##NAME(int Dim); \
129+
static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \
130+
static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \
131+
static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }
132+
133+
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
134+
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
135+
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
136+
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
137+
#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
138+
///}
139+
140+
uint64_t ompx_ballot_sync(uint64_t mask, int pred);
141+
142+
/// ompx_shfl_down_sync_{i,f,l,d}
143+
///{
144+
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
145+
TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
146+
int width);
147+
148+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
152+
153+
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
154+
///}
155+
156+
#ifdef __cplusplus
157+
}
158+
#endif
159+
160+
#ifdef __cplusplus
161+
162+
namespace ompx {
163+
164+
enum {
165+
dim_x = ompx_dim_x,
166+
dim_y = ompx_dim_y,
167+
dim_z = ompx_dim_z,
168+
};
169+
170+
enum {
171+
relaxed = ompx_relaxed ,
172+
aquire = ompx_aquire,
173+
release = ompx_release,
174+
acc_rel = ompx_acq_rel,
175+
seq_cst = ompx_seq_cst,
176+
};
177+
178+
/// ompx::{thread,block}_{id,dim}_{,x,y,z}
179+
///{
180+
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \
181+
static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \
182+
static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \
183+
static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \
184+
static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }
185+
186+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
187+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
188+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
189+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
190+
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
191+
///}
192+
193+
/// ompx_{sync_block}_{,divergent}
194+
///{
195+
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \
196+
static inline RETTY NAME(ARGS) { \
197+
return ompx_##NAME(CALL_ARGS); \
198+
}
199+
200+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
201+
Ordering)
202+
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
203+
int Ordering = acc_rel, Ordering)
204+
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
205+
///}
206+
207+
static inline uint64_t ballot_sync(uint64_t mask, int pred) {
208+
return ompx_ballot_sync(mask, pred);
209+
}
210+
211+
/// shfl_down_sync
212+
///{
213+
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
214+
static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
215+
int width = __WARP_SIZE) { \
216+
return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
217+
}
218+
219+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222+
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
223+
224+
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
225+
///}
226+
227+
} // namespace ompx
228+
#endif
229+
230+
///}
231+
232+
#endif /* __OMPX_H */

0 commit comments

Comments
 (0)