issue/288: improve the compatibility of the torch implementations of gemm and random sample

f49235e3 · Zimin Li · a0abcb2c · f49235e3 · f49235e3
Commit f49235e3 authored Jun 30, 2025 by Zimin Li
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 7 deletions

test/infiniop/gemm.py test/infiniop/gemm.py +8 -5

test/infiniop/random_sample.py test/infiniop/random_sample.py +7 -2

No files found.
--- a/test/infiniop/gemm.py
+++ b/test/infiniop/gemm.py
@@ -57,11 +57,14 @@ infiniopGemmDescriptor_t = POINTER(GemmDescriptor)

 # PyTorch implementation for matrix multiplication
 def gemm(d, _c, beta, _a, _b, alpha):
+    try:
        if _c.ndim == 2:
            torch.addmm(_c, _a, _b, beta=beta, alpha=alpha, out=d)
        elif _c.ndim == 3:
            torch.baddbmm(_c, _a, _b, beta=beta, alpha=alpha, out=d)
        else:
+            raise
+    except Exception:
        torch.matmul(_a, _b, out=d)
        d.mul_(alpha).add_(_c, alpha=beta)


--- a/test/infiniop/random_sample.py
+++ b/test/infiniop/random_sample.py
@@ -68,7 +68,12 @@ def random_sample(data, random_val, topp, topk, voc, temperature):
        k_index = min(topk, voc) - 1
        threshold = min(cum_probs[k_index], topp) * random_val
        
+        try:
            idx = torch.searchsorted(cum_probs, threshold)
+        except Exception:
+            # Fallback for manual search if torch.searchsorted is not supported
+            indices = (cum_probs >= threshold).nonzero(as_tuple=True)[0]
+            idx = indices[0] if indices.numel() > 0 else torch.tensor(len(cum_probs)-1, device=cum_probs.device)
        return sorted_indices[idx]

    return torch.argmax(data)