"vscode:/vscode.git/clone" did not exist on "9bc6d98d58f10f9671b71c72178fa46affa58c8e"
Unverified Commit a441aaf8 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

feat: Enable EPP worker discovery mode (#6592)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent b77f9765
...@@ -403,12 +403,18 @@ class MockerProcess: ...@@ -403,12 +403,18 @@ class MockerProcess:
f"(known_ids={known_ids})" f"(known_ids={known_ids})"
) )
# Register each dp_rank endpoint with the standalone indexer # Register each dp_rank endpoint with the standalone indexer.
# The mocker binds on base_port + dp_rank (contiguous), so we must
# use the same formula here rather than indexing into the allocated
# port list, which may contain gaps when intervening ports are busy.
zmq_addresses = {} zmq_addresses = {}
register_url = f"{self.standalone_indexer_url}/register" register_url = f"{self.standalone_indexer_url}/register"
replay_base = (
self._zmq_replay_ports[i * dp_size] if self._zmq_replay_ports else None
)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
for dp_rank in range(dp_size): for dp_rank in range(dp_size):
port = self._zmq_kv_events_ports[i * dp_size + dp_rank] port = base_port + dp_rank
endpoint = f"tcp://127.0.0.1:{port}" endpoint = f"tcp://127.0.0.1:{port}"
zmq_addresses[dp_rank] = endpoint zmq_addresses[dp_rank] = endpoint
...@@ -421,9 +427,10 @@ class MockerProcess: ...@@ -421,9 +427,10 @@ class MockerProcess:
"block_size", BLOCK_SIZE "block_size", BLOCK_SIZE
), ),
} }
if self._zmq_replay_ports: if replay_base is not None:
replay_port = self._zmq_replay_ports[i * dp_size + dp_rank] payload[
payload["replay_endpoint"] = f"tcp://127.0.0.1:{replay_port}" "replay_endpoint"
] = f"tcp://127.0.0.1:{replay_base + dp_rank}"
async with session.post(register_url, json=payload) as resp: async with session.post(register_url, json=payload) as resp:
if resp.status != 201: if resp.status != 201:
body = await resp.text() body = await resp.text()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment