Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
tilelang
Commits
17bbc0ca
Unverified
Commit
17bbc0ca
authored
Nov 21, 2025
by
Lei Wang
Committed by
GitHub
Nov 21, 2025
Browse files
[Bugfix] Fallback to the old AtomicAdd implementation for legacy architectures (#1306)
parent
2426090f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
53 additions
and
6 deletions
+53
-6
src/tl_templates/cuda/atomic.h
src/tl_templates/cuda/atomic.h
+53
-6
No files found.
src/tl_templates/cuda/atomic.h
View file @
17bbc0ca
...
@@ -169,6 +169,7 @@ TL_DEVICE T1 AtomicMinRet(T1 &ref, T2 val,
...
@@ -169,6 +169,7 @@ TL_DEVICE T1 AtomicMinRet(T1 &ref, T2 val,
}
}
}
}
#if (defined(__CUDA_ARCH_LIST__) && (__CUDA_ARCH_LIST__ > 890))
template
<
typename
T1
,
typename
T2
>
template
<
typename
T1
,
typename
T2
>
TL_DEVICE
void
AtomicAdd
(
T1
&
ref
,
T2
val
,
TL_DEVICE
void
AtomicAdd
(
T1
&
ref
,
T2
val
,
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
...
@@ -236,14 +237,18 @@ TL_DEVICE void AtomicAdd(T1 &ref, T2 val,
...
@@ -236,14 +237,18 @@ TL_DEVICE void AtomicAdd(T1 &ref, T2 val,
}
}
}
}
}
else
{
}
else
{
#if CUDART_VERSION >= 11080
atomicAdd
(
reinterpret_cast
<
NT1
*>
(
address
),
cuda_cast
<
NT1
>
(
val
));
cuda
::
atomic_ref
<
NT1
,
cuda
::
thread_scope_device
>
aref
(
*
address
);
aref
.
fetch_add
(
cuda_cast
<
NT1
>
(
val
),
cuda
::
memory_order
(
memory_order
));
#else
TL_NOT_IMPLEMENTED
();
#endif
}
}
}
}
#else
template
<
typename
T1
,
typename
T2
>
TL_DEVICE
void
AtomicAdd
(
T1
&
ref
,
T2
val
,
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
using
NT1
=
typename
normalize_atomic_type
<
T1
>::
type
;
(
void
)
memory_order
;
atomicAdd
(
reinterpret_cast
<
NT1
*>
(
&
ref
),
cuda_cast
<
NT1
>
(
val
));
}
#endif
template
<
typename
T1
,
typename
T2
>
template
<
typename
T1
,
typename
T2
>
TL_DEVICE
T1
AtomicAddRet
(
T1
&
ref
,
T2
val
,
TL_DEVICE
T1
AtomicAddRet
(
T1
&
ref
,
T2
val
,
...
@@ -643,6 +648,48 @@ AtomicAddx4Ret(float *ref, float *val,
...
@@ -643,6 +648,48 @@ AtomicAddx4Ret(float *ref, float *val,
return
ret_val
;
return
ret_val
;
}
}
}
}
#else
TL_DEVICE
void
AtomicAddx2
(
float
*
ref
,
float
*
val
,
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
(
void
)
memory_order
;
float2
add_val
=
*
reinterpret_cast
<
float2
*>
(
val
);
atomicAdd
(
ref
+
0
,
add_val
.
x
);
atomicAdd
(
ref
+
1
,
add_val
.
y
);
}
TL_DEVICE
float2
AtomicAddx2Ret
(
float
*
ref
,
float
*
val
,
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
(
void
)
memory_order
;
float2
add_val
=
*
reinterpret_cast
<
float2
*>
(
val
);
float2
ret
;
ret
.
x
=
atomicAdd
(
ref
+
0
,
add_val
.
x
);
ret
.
y
=
atomicAdd
(
ref
+
1
,
add_val
.
y
);
return
ret
;
}
TL_DEVICE
void
AtomicAddx4
(
float
*
ref
,
float
*
val
,
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
(
void
)
memory_order
;
float4
add_val
=
*
reinterpret_cast
<
float4
*>
(
val
);
atomicAdd
(
ref
+
0
,
add_val
.
x
);
atomicAdd
(
ref
+
1
,
add_val
.
y
);
atomicAdd
(
ref
+
2
,
add_val
.
z
);
atomicAdd
(
ref
+
3
,
add_val
.
w
);
}
TL_DEVICE
float4
AtomicAddx4Ret
(
float
*
ref
,
float
*
val
,
int
memory_order
=
int
(
cuda
::
memory_order_relaxed
))
{
(
void
)
memory_order
;
float4
add_val
=
*
reinterpret_cast
<
float4
*>
(
val
);
float4
ret
;
ret
.
x
=
atomicAdd
(
ref
+
0
,
add_val
.
x
);
ret
.
y
=
atomicAdd
(
ref
+
1
,
add_val
.
y
);
ret
.
z
=
atomicAdd
(
ref
+
2
,
add_val
.
z
);
ret
.
w
=
atomicAdd
(
ref
+
3
,
add_val
.
w
);
return
ret
;
}
#endif
#endif
template
<
typename
T
>
TL_DEVICE
T
AtomicLoad
(
T
&
ref
,
int
memory_order
)
{
template
<
typename
T
>
TL_DEVICE
T
AtomicLoad
(
T
&
ref
,
int
memory_order
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment