import time import numpy as np import migraphx def pct(a, p): return float(np.percentile(np.asarray(a, dtype=np.float64), p)) def build_mlp(batch, hidden, layers, seed=0): rng = np.random.default_rng(seed) p = migraphx.program() mm = p.get_main_module() x = mm.add_parameter("x", migraphx.shape(type="float", lens=[batch, hidden])) y = x for i in range(layers): w = mm.add_literal(rng.random((hidden, hidden), dtype=np.float32)) y = mm.add_instruction(migraphx.op("dot"), [y, w]) y = mm.add_instruction(migraphx.op("relu"), [y]) mm.add_return([y]) return p def run_case(target, batch, hidden, layers, warmup=20, iters=200): p = build_mlp(batch, hidden, layers) p.compile(migraphx.get_target(target)) x = np.random.rand(batch, hidden).astype(np.float32) for _ in range(warmup): p.run({"x": x}) times = [] t0 = time.perf_counter() for _ in range(iters): s = time.perf_counter() p.run({"x": x}) times.append((time.perf_counter() - s) * 1000) t1 = time.perf_counter() avg = (t1 - t0) * 1000 / iters thr = (batch * iters) / (t1 - t0) return avg, pct(times, 50), pct(times, 95), thr def main(): target = "gpu" # ✅ 你的镜像就是这个 cases = [ (128, 8192*4,8), # 更猛:内存/显存不够就删掉这行 ] print(f"target={target}\n") for b, h, l in cases: avg, p50, p95, thr = run_case(target, b, h, l, warmup=20, iters=200) print(f"[batch={b} hidden={h} layers={l}] " f"avg={avg:.3f} ms p50={p50:.3f} ms p95={p95:.3f} ms " f"throughput={thr:.1f} samples/s") if __name__ == "__main__": main()