Unverified Commit 16a6b1d8 authored by Zilin Zhu's avatar Zilin Zhu Committed by GitHub
Browse files

[RL] Add --nccl-port to prevent port conflict (#7418)

parent 14229ccf
......@@ -68,6 +68,7 @@ class ServerArgs:
# Port for the HTTP server
host: str = "127.0.0.1"
port: int = 30000
nccl_port: Optional[int] = None
# Memory and scheduling
mem_fraction_static: Optional[float] = None
......@@ -599,6 +600,12 @@ class ServerArgs:
default=ServerArgs.port,
help="The port of the HTTP server.",
)
parser.add_argument(
"--nccl-port",
type=int,
default=ServerArgs.nccl_port,
help="The port for NCCL distributed environment setup. Defaults to a random port.",
)
parser.add_argument(
"--tokenizer-mode",
type=str,
......@@ -1747,14 +1754,17 @@ class PortArgs:
@staticmethod
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
port = server_args.port + random.randint(100, 1000)
while True:
if is_port_available(port):
break
if port < 60000:
port += 42
else:
port -= 43
if server_args.nccl_port is None:
port = server_args.port + random.randint(100, 1000)
while True:
if is_port_available(port):
break
if port < 60000:
port += 42
else:
port -= 43
else:
port = server_args.nccl_port
if not server_args.enable_dp_attention:
# Normal case, use IPC within a single node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment