.def("generate",[](InferEngine&self,py::objectinput_ids,py::objectposition_ids)->infinicore::Tensor{returnself.generate(input_ids.cast<infinicore::Tensor>(),position_ids.cast<infinicore::Tensor>());},"Run inference on all ranks with arbitrary arguments");
.def("generate",[](InferEngine&self,py::objectinput_ids,py::objectposition_ids)->infinicore::Tensor{returnself.generate(input_ids.cast<infinicore::Tensor>(),position_ids.cast<infinicore::Tensor>());},"Run inference on all ranks with arbitrary arguments")
.def("reset_cache",&InferEngine::reset_cache,
py::arg("pos")=0,py::arg("async")=false,
"Reset the internal cache in all workers to a specific position (clears state between generations). "
"By default, this is synchronous. If async=True, this becomes asynchronous (unstable - use with caution).");