Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
2eafdb3d
"docs/git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "352d63c57858a8717eeece82c393072a1b7d701f"
Unverified
Commit
2eafdb3d
authored
Jan 29, 2022
by
Burc Eryilmaz
Committed by
GitHub
Jan 28, 2022
Browse files
add inline asm 128-bit counter (#1265)
parent
b1c75f6f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
12 deletions
+33
-12
apex/contrib/csrc/multihead_attn/philox.cuh
apex/contrib/csrc/multihead_attn/philox.cuh
+33
-12
No files found.
apex/contrib/csrc/multihead_attn/philox.cuh
View file @
2eafdb3d
...
@@ -59,26 +59,47 @@ private:
...
@@ -59,26 +59,47 @@ private:
return
;
return
;
++
counter
.
w
;
++
counter
.
w
;
}
}
__device__
uint4
incr128
(
uint4
ctr
)
{
uint4
res
;
asm
(
"add.cc.u32 %0, %4, %8;
\n\t
"
"addc.cc.u32 %1, %5, %9;
\n\t
"
"addc.cc.u32 %2, %6, %10;
\n\t
"
"addc.u32 %3, %7, %11;
\n\t
"
:
"=r"
(
res
.
x
),
"=r"
(
res
.
y
),
"=r"
(
res
.
z
),
"=r"
(
res
.
w
)
:
"r"
(
ctr
.
x
),
"r"
(
ctr
.
y
),
"r"
(
ctr
.
z
),
"r"
(
ctr
.
w
),
"n"
(
1
),
"n"
(
0
),
"n"
(
0
),
"n"
(
0
));
return
res
;
}
__device__
inline
void
incr
()
{
__device__
inline
void
incr
()
{
if
(
++
counter
.
x
)
counter
=
incr128
(
counter
);
return
;
if
(
++
counter
.
y
)
return
;
if
(
++
counter
.
z
)
return
;
++
counter
.
w
;
}
}
__device__
unsigned
int
mulhilo32
(
unsigned
int
a
,
unsigned
int
b
,
__device__
unsigned
int
mulhilo32
(
unsigned
int
a
,
unsigned
int
b
,
unsigned
int
*
result_high
)
{
unsigned
int
*
result_high
)
{
*
result_high
=
__umulhi
(
a
,
b
);
*
result_high
=
__umulhi
(
a
,
b
);
return
a
*
b
;
return
a
*
b
;
}
}
__device__
uint2
mulhilo32_v2
(
unsigned
int
a
,
unsigned
int
b
)
{
uint2
*
res
;
unsigned
long
long
tmp
;
asm
(
"mul.wide.u32 %0, %1, %2;
\n\t
"
:
"=l"
(
tmp
)
:
"r"
(
a
),
"r"
(
b
));
res
=
(
uint2
*
)(
&
tmp
);
return
*
res
;
}
__device__
inline
uint4
single_round
(
uint4
ctr
,
uint2
key
)
{
__device__
inline
uint4
single_round
(
uint4
ctr
,
uint2
key
)
{
unsigned
int
hi0
;
//unsigned int hi0;
unsigned
int
hi1
;
//unsigned int hi1;
unsigned
int
lo0
=
mulhilo32
(
kPhiloxSA
,
ctr
.
x
,
&
hi0
);
//unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0);
unsigned
int
lo1
=
mulhilo32
(
kPhiloxSB
,
ctr
.
z
,
&
hi1
);
//unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1);
uint4
ret
=
{
hi1
^
ctr
.
y
^
key
.
x
,
lo1
,
hi0
^
ctr
.
w
^
key
.
y
,
lo0
};
//uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0};
uint2
res0
=
mulhilo32_v2
(
kPhiloxSA
,
ctr
.
x
);
uint2
res1
=
mulhilo32_v2
(
kPhiloxSB
,
ctr
.
z
);
uint4
ret
=
{
res1
.
y
^
ctr
.
y
^
key
.
x
,
res1
.
x
,
res0
.
y
^
ctr
.
w
^
key
.
y
,
res0
.
x
};
return
ret
;
return
ret
;
}
}
static
const
unsigned
long
kPhilox10A
=
0x9E3779B9
;
static
const
unsigned
long
kPhilox10A
=
0x9E3779B9
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment