Unverified Commit 919234fe authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Fix initial DP request load imbalance (#22910)


Signed-off-by: default avatarNick Hill <nhill@redhat.com>
parent ebcce2cd
...@@ -965,7 +965,7 @@ class DPAsyncMPClient(AsyncMPClient): ...@@ -965,7 +965,7 @@ class DPAsyncMPClient(AsyncMPClient):
# List of [waiting, running] pair per engine. # List of [waiting, running] pair per engine.
# Used only by DPLBAsyncMPClient subclass. # Used only by DPLBAsyncMPClient subclass.
self.lb_engines: list[list[int]] = [] self.lb_engines: list[list[int]] = [[0, 0] for _ in self.core_engines]
self.first_req_sock_addr = get_open_zmq_inproc_path() self.first_req_sock_addr = get_open_zmq_inproc_path()
self.first_req_send_socket = self.resources.first_req_send_socket = ( self.first_req_send_socket = self.resources.first_req_send_socket = (
...@@ -1121,10 +1121,8 @@ class DPLBAsyncMPClient(DPAsyncMPClient): ...@@ -1121,10 +1121,8 @@ class DPLBAsyncMPClient(DPAsyncMPClient):
def get_core_engine_for_request( def get_core_engine_for_request(
self, request: EngineCoreRequest) -> EngineIdentity: self, request: EngineCoreRequest) -> EngineIdentity:
# Engines are in rank order. # Engines are in rank order.
current_counts = self.lb_engines
if (eng_index := request.data_parallel_rank) is None: if (eng_index := request.data_parallel_rank) is None:
if not current_counts: current_counts = self.lb_engines
return self.core_engine
# TODO use P2C alg for larger DP sizes # TODO use P2C alg for larger DP sizes
num_engines = len(current_counts) num_engines = len(current_counts)
min_score = sys.maxsize min_score = sys.maxsize
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment