f"Capture draft extend cuda graph end. Time elapsed: {time.perf_counter()-tic:.2f} s. avail mem={after_mem:.2f} GB. mem usage={(before_mem-after_mem):.2f} GB."
)
@property
defdraft_model_runner(self):
...
...
@@ -656,6 +674,7 @@ class EAGLEWorker(TpModelWorker):