Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3a9dabcf
Commit
3a9dabcf
authored
May 23, 2023
by
guangzlu
Browse files
updated philox and pass pt3
parent
d37c1d0b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
39 additions
and
4 deletions
+39
-4
include/ck/tensor_operation/gpu/block/blockwise_dropout.hpp
include/ck/tensor_operation/gpu/block/blockwise_dropout.hpp
+28
-4
include/ck/utility/philox_rand.hpp
include/ck/utility/philox_rand.hpp
+11
-0
No files found.
include/ck/tensor_operation/gpu/block/blockwise_dropout.hpp
View file @
3a9dabcf
...
...
@@ -137,14 +137,23 @@ struct BlockwiseDropout
constexpr
int
tmp_size
=
MRepeat
*
KRepeat
;
int
philox_calls
=
tmp_size
/
8
;
int
philox_calls
=
tmp_size
/
4
;
ushort
tmp
[
tmp_size
];
// ushort tmp_id[tmp_size];
for
(
int
i
=
0
;
i
<
philox_calls
;
i
++
)
{
ph
.
get_random_
8
x16
((
tmp
+
i
*
8
),
element_global_1d_id
+
i
*
8
);
ph
.
get_random_
4
x16
((
tmp
+
i
*
4
),
element_global_1d_id
+
i
*
8
);
}
// int philox_calls_2 = tmp_size / 4;
// ushort tmp_id[tmp_size];
// for(int j = 0; j < philox_calls_2; j++){
// for(int i = 0; i < 4; i++){
// tmp_id[j * 4 + i] = element_global_1d_id + j * 8;
// }
//}
block_sync_lds
();
int
tmp_index
=
0
;
...
...
@@ -180,20 +189,35 @@ struct BlockwiseDropout
constexpr
int
tmp_size
=
MRepeat
*
KRepeat
/
N0
{}.
value
;
int
philox_calls
=
tmp_size
/
8
;
int
philox_calls_2
=
tmp_size
/
4
;
ushort
tmp
[
tmp_size
];
ushort
tmp_id
[
tmp_size
];
for
(
int
i
=
0
;
i
<
philox_calls
;
i
++
)
{
ph
.
get_random_8x16
((
tmp
+
i
*
8
),
element_global_1d_id
+
i
*
8
);
}
for
(
int
j
=
0
;
j
<
philox_calls_2
;
j
++
)
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
tmp_id
[
j
*
4
+
i
]
=
element_global_1d_id
+
j
*
8
;
}
}
// if(get_thread_global_1d_id() == 0){
// printf("tmp_size is %d \n", tmp_size);
// //printf("n0.value is %d \n", n0.value);
//}
block_sync_lds
();
constexpr
auto
iOffset
=
Number
<
tmp_size
>
{}
*
Offset
{};
static_for
<
0
,
tmp_size
,
1
>
{}([
&
](
auto
i
)
{
in_thread_buf
(
i
+
iOffset
)
=
execute_dropout
(
tmp
[
i
.
value
]
<=
p_dropout_16bits
,
in_thread_buf
(
i
+
iOffset
));
z_thread_buf
(
i
)
=
tmp
[
i
.
value
];
z_thread_buf
(
i
)
=
tmp
_id
[
i
.
value
];
});
}
...
...
include/ck/utility/philox_rand.hpp
View file @
3a9dabcf
...
...
@@ -84,6 +84,17 @@ class philox
out_tmp
[
3
]
=
tmp_ph
.
w
;
}
__device__
void
get_random_4x16
(
ushort
*
out
,
const
unsigned
long
long
subsequence
)
{
uint4
tmp_ph
;
tmp_ph
=
get_philox_4x32
(
subsequence
);
out
[
0
]
=
static_cast
<
ushort
>
(
tmp_ph
.
x
);
out
[
1
]
=
static_cast
<
ushort
>
(
tmp_ph
.
y
);
out
[
2
]
=
static_cast
<
ushort
>
(
tmp_ph
.
z
);
out
[
3
]
=
static_cast
<
ushort
>
(
tmp_ph
.
w
);
}
private:
struct
ull2
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment