Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
2eafdb3d
Unverified
Commit
2eafdb3d
authored
Jan 29, 2022
by
Burc Eryilmaz
Committed by
GitHub
Jan 28, 2022
Browse files
add inline asm 128-bit counter (#1265)
parent
b1c75f6f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
12 deletions
+33
-12
apex/contrib/csrc/multihead_attn/philox.cuh
apex/contrib/csrc/multihead_attn/philox.cuh
+33
-12
No files found.
apex/contrib/csrc/multihead_attn/philox.cuh
View file @
2eafdb3d
...
...
@@ -59,26 +59,47 @@ private:
return
;
++
counter
.
w
;
}
__device__
uint4
incr128
(
uint4
ctr
)
{
uint4
res
;
asm
(
"add.cc.u32 %0, %4, %8;
\n\t
"
"addc.cc.u32 %1, %5, %9;
\n\t
"
"addc.cc.u32 %2, %6, %10;
\n\t
"
"addc.u32 %3, %7, %11;
\n\t
"
:
"=r"
(
res
.
x
),
"=r"
(
res
.
y
),
"=r"
(
res
.
z
),
"=r"
(
res
.
w
)
:
"r"
(
ctr
.
x
),
"r"
(
ctr
.
y
),
"r"
(
ctr
.
z
),
"r"
(
ctr
.
w
),
"n"
(
1
),
"n"
(
0
),
"n"
(
0
),
"n"
(
0
));
return
res
;
}
__device__
inline
void
incr
()
{
if
(
++
counter
.
x
)
return
;
if
(
++
counter
.
y
)
return
;
if
(
++
counter
.
z
)
return
;
++
counter
.
w
;
counter
=
incr128
(
counter
);
}
__device__
unsigned
int
mulhilo32
(
unsigned
int
a
,
unsigned
int
b
,
unsigned
int
*
result_high
)
{
*
result_high
=
__umulhi
(
a
,
b
);
return
a
*
b
;
}
__device__
uint2
mulhilo32_v2
(
unsigned
int
a
,
unsigned
int
b
)
{
uint2
*
res
;
unsigned
long
long
tmp
;
asm
(
"mul.wide.u32 %0, %1, %2;
\n\t
"
:
"=l"
(
tmp
)
:
"r"
(
a
),
"r"
(
b
));
res
=
(
uint2
*
)(
&
tmp
);
return
*
res
;
}
__device__
inline
uint4
single_round
(
uint4
ctr
,
uint2
key
)
{
unsigned
int
hi0
;
unsigned
int
hi1
;
unsigned
int
lo0
=
mulhilo32
(
kPhiloxSA
,
ctr
.
x
,
&
hi0
);
unsigned
int
lo1
=
mulhilo32
(
kPhiloxSB
,
ctr
.
z
,
&
hi1
);
uint4
ret
=
{
hi1
^
ctr
.
y
^
key
.
x
,
lo1
,
hi0
^
ctr
.
w
^
key
.
y
,
lo0
};
//unsigned int hi0;
//unsigned int hi1;
//unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0);
//unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1);
//uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0};
uint2
res0
=
mulhilo32_v2
(
kPhiloxSA
,
ctr
.
x
);
uint2
res1
=
mulhilo32_v2
(
kPhiloxSB
,
ctr
.
z
);
uint4
ret
=
{
res1
.
y
^
ctr
.
y
^
key
.
x
,
res1
.
x
,
res0
.
y
^
ctr
.
w
^
key
.
y
,
res0
.
x
};
return
ret
;
}
static
const
unsigned
long
kPhilox10A
=
0x9E3779B9
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment