Commit 018b1b5f authored by Woosuk Kwon's avatar Woosuk Kwon
Browse files

test


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent e9820408
Pipeline #2018 failed with stages
in 0 seconds
...@@ -406,16 +406,16 @@ mha_fwd(at::Tensor &q, // batch_size x seqlen_q x num_heads x head_size ...@@ -406,16 +406,16 @@ mha_fwd(at::Tensor &q, // batch_size x seqlen_q x num_heads x head_size
params, batch_size, num_heads, head_size, seqlen_k, seqlen_q, params, batch_size, num_heads, head_size, seqlen_k, seqlen_q,
head_size_rounded, p_dropout, /*num_splits*/ 0, dprops, opts); head_size_rounded, p_dropout, /*num_splits*/ 0, dprops, opts);
// NOTE(woosuk): Commented out because they are not used in inference. // number of times random will be generated per thread, to offset philox counter in thc random
// // number of times random will be generated per thread, to offset philox counter in thc random // state
// // state // We use a custom RNG that increases the offset by batch_size * nheads * 32.
// // We use a custom RNG that increases the offset by batch_size * nheads * 32. int64_t counter_offset = params.b * params.h * 32;
// int64_t counter_offset = params.b * params.h * 32; auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA);
// auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA); auto rng_state = torch::empty({2}, options.dtype(torch::kInt64));
// auto rng_state = torch::empty({2}, options.dtype(torch::kInt64)); // Forward kernel will populate memory with the seed and offset.
// // Forward kernel will populate memory with the seed and offset. params.rng_state = reinterpret_cast<uint64_t*>(rng_state.data_ptr());
// params.rng_state = reinterpret_cast<uint64_t*>(rng_state.data_ptr());
// NOTE(woosuk): Commented out because they are not used in inference.
// if (p_dropout > 0.0) { // if (p_dropout > 0.0) {
// auto gen = at::get_generator_or_default<at::CUDAGeneratorImpl>( // auto gen = at::get_generator_or_default<at::CUDAGeneratorImpl>(
// gen_, at::cuda::detail::getDefaultCUDAGenerator()); // gen_, at::cuda::detail::getDefaultCUDAGenerator());
...@@ -661,7 +661,6 @@ mha_varlen_fwd(at::Tensor &q, // total_q x num_heads x head_size, total_q := \s ...@@ -661,7 +661,6 @@ mha_varlen_fwd(at::Tensor &q, // total_q x num_heads x head_size, total_q := \s
p_dropout, /*num_splits*/ 0, dprops, opts); p_dropout, /*num_splits*/ 0, dprops, opts);
} }
// NOTE(woosuk): Commented out because they are not used in inference.
// number of times random will be generated per thread, to offset philox counter in thc random // number of times random will be generated per thread, to offset philox counter in thc random
// state // state
// We use a custom RNG that increases the offset by batch_size * nheads * 32. // We use a custom RNG that increases the offset by batch_size * nheads * 32.
...@@ -671,6 +670,7 @@ mha_varlen_fwd(at::Tensor &q, // total_q x num_heads x head_size, total_q := \s ...@@ -671,6 +670,7 @@ mha_varlen_fwd(at::Tensor &q, // total_q x num_heads x head_size, total_q := \s
// Forward kernel will populate memory with the seed and offset. // Forward kernel will populate memory with the seed and offset.
params.rng_state = reinterpret_cast<uint64_t*>(rng_state.data_ptr()); params.rng_state = reinterpret_cast<uint64_t*>(rng_state.data_ptr());
// NOTE(woosuk): Commented out because they are not used in inference.
// if (p_dropout > 0.0) { // if (p_dropout > 0.0) {
// auto gen = at::get_generator_or_default<at::CUDAGeneratorImpl>( // auto gen = at::get_generator_or_default<at::CUDAGeneratorImpl>(
// gen_, at::cuda::detail::getDefaultCUDAGenerator()); // gen_, at::cuda::detail::getDefaultCUDAGenerator());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment