Commit 454248c7 authored by Lei Wang's avatar Lei Wang Committed by GitHub
Browse files

[Bugfix] Fix `T.copy` for scalar datatypes (#190)

* Optimize CMake build process with dynamic job count calculation

- Modify build_csrc function to use 90% of available CPU cores
- Ensure at least one job is used during compilation
- Improve build performance by dynamically adjusting parallel job count

* Optimize build_csrc function with multiprocessing module

- Replace os.cpu_count() with multiprocessing.cpu_count()
- Maintain existing 90% CPU utilization logic
- Improve CPU core count calculation for build process

* Add dynamic shape support with out_idx in Cython JIT kernel compilation

- Implement `run_cython_dynamic_shape_with_out_idx` function in test_tilelang_jit_gemm_cython.py
- Update Cython wrapper to handle dynamic symbolic shapes during tensor allocation
- Add support for resolving dynamic shape dimensions using input tensor references
- Enhance flexibility of JIT kernel compilation with symbolic shape handling

* Enhance error reporting for dynamic symbolic shape resolution in Cython JIT kernel

- Add detailed error message when a dynamic symbolic dimension is not found in dynamic_symbolic_map
- Improve debugging by providing context about missing symbolic dimensions
- Maintain existing dynamic shape resolution logic

* Fix Copy operation handling for scalar and multi-dimensional tensors

- Add special handling for scalar tensor copy operations
- Enhance error reporting in MakeIndices method with more detailed diagnostic information
- Improve SIMT loop generation to support zero-dimensional tensors
- Add explicit check and handling for scalar tensor scenarios

* Refactor Copy operation code formatting and improve readability

- Improve code formatting in MakeIndices and MakeSIMTLoop methods
- Add line breaks to enhance readability of complex ICHECK statements
- Simplify code structure in scalar tensor handling
- Remove unnecessary whitespace and improve code alignment
parent 5fafcb32
Subproject commit d310bd5aadce96145546fb7a87a6d325ea392b2b Subproject commit 2654ce86a8cda7d28eab73db7e9104c90511c072
...@@ -71,7 +71,9 @@ Array<PrimExpr> Copy::MakeIndices(const Array<IterVar> &ivs, ...@@ -71,7 +71,9 @@ Array<PrimExpr> Copy::MakeIndices(const Array<IterVar> &ivs,
idx++; idx++;
} }
} }
ICHECK(idx == ivs.size()); ICHECK(idx == ivs.size())
<< "idx = " << idx << ", ivs.size() = " << ivs.size()
<< "src name = " << src->name << ", dst name = " << dst->name;
return indices; return indices;
} }
...@@ -107,6 +109,12 @@ PrimExpr Copy::MakePredicate(arith::Analyzer *analyzer, ...@@ -107,6 +109,12 @@ PrimExpr Copy::MakePredicate(arith::Analyzer *analyzer,
For Copy::MakeSIMTLoop(arith::Analyzer *analyzer) const { For Copy::MakeSIMTLoop(arith::Analyzer *analyzer) const {
Array<IterVar> loop_vars = MakeIterVars(); Array<IterVar> loop_vars = MakeIterVars();
bool is_scalar = loop_vars.size() == 0;
if (is_scalar) {
return For(Var("i"), 0, 1, ForKind::kSerial,
BufferStore(dst, BufferLoad(src, {0}), {0}));
}
for (const auto &iv : loop_vars) for (const auto &iv : loop_vars)
analyzer->Bind(iv->var, iv->dom); analyzer->Bind(iv->var, iv->dom);
...@@ -125,7 +133,6 @@ For Copy::MakeSIMTLoop(arith::Analyzer *analyzer) const { ...@@ -125,7 +133,6 @@ For Copy::MakeSIMTLoop(arith::Analyzer *analyzer) const {
Stmt body = BufferStore(dst, value, dst_indices); Stmt body = BufferStore(dst, value, dst_indices);
if (dst_predicate.defined()) if (dst_predicate.defined())
body = IfThenElse(dst_predicate, body); body = IfThenElse(dst_predicate, body);
for (int i = loop_vars.size() - 1; i >= 0; i--) { for (int i = loop_vars.size() - 1; i >= 0; i--) {
Map<String, ObjectRef> annotations = {}; Map<String, ObjectRef> annotations = {};
if (coalesced_width.defined()) { if (coalesced_width.defined()) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment