"magic_pdf/vscode:/vscode.git/clone" did not exist on "74b389525f498b4cb00d4a5bdc26e68fb7e07454"
Unverified Commit 0b8f117f authored by ClementLinCF's avatar ClementLinCF Committed by GitHub
Browse files

[CK_TILE] Adjust kBlockSize of reduce example for better perf (#1779)

* Observed a 2x perf improvement with kBlockSize = 256
* Using 512 threads may lead to redundant computations
parent 3d50f57f
......@@ -52,7 +52,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
// using WarpTile = ck_tile::sequence<1, 512>;
// using Vector = ck_tile::sequence<1, 8>;
constexpr ck_tile::index_t kBlockSize = 512;
constexpr ck_tile::index_t kBlockSize = 256;
constexpr ck_tile::index_t kBlockPerCu = 1;
ck_tile::index_t kGridSize = (m / BlockTile::at(ck_tile::number<0>{}));
std::cout << "grid size " << kGridSize << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment