"Run inference on all ranks with arbitrary arguments")
.def("forward",[](InferEngine&self,constInferEngine::Input&input)->InferEngine::Output{returnself.forward(input);},"Run inference on all ranks with arbitrary arguments")
.def("reset_cache",py::overload_cast<size_t>(&InferEngine::reset_cache),py::arg("pos")=0,"Reset the internal cache in all workers to a specific position")
.def("reset_cache",py::overload_cast<constcache::CacheConfig&,size_t>(&InferEngine::reset_cache),py::arg("cache_config"),py::arg("pos")=0,"Reset cache with new KV configuration")
.def("get_cache_config",&InferEngine::get_cache_config,"Get current KV configuration")