Unverified Commit 16a6b1d8 authored by Zilin Zhu's avatar Zilin Zhu Committed by GitHub
Browse files

[RL] Add --nccl-port to prevent port conflict (#7418)

parent 14229ccf
...@@ -68,6 +68,7 @@ class ServerArgs: ...@@ -68,6 +68,7 @@ class ServerArgs:
# Port for the HTTP server # Port for the HTTP server
host: str = "127.0.0.1" host: str = "127.0.0.1"
port: int = 30000 port: int = 30000
nccl_port: Optional[int] = None
# Memory and scheduling # Memory and scheduling
mem_fraction_static: Optional[float] = None mem_fraction_static: Optional[float] = None
...@@ -599,6 +600,12 @@ class ServerArgs: ...@@ -599,6 +600,12 @@ class ServerArgs:
default=ServerArgs.port, default=ServerArgs.port,
help="The port of the HTTP server.", help="The port of the HTTP server.",
) )
parser.add_argument(
"--nccl-port",
type=int,
default=ServerArgs.nccl_port,
help="The port for NCCL distributed environment setup. Defaults to a random port.",
)
parser.add_argument( parser.add_argument(
"--tokenizer-mode", "--tokenizer-mode",
type=str, type=str,
...@@ -1747,14 +1754,17 @@ class PortArgs: ...@@ -1747,14 +1754,17 @@ class PortArgs:
@staticmethod @staticmethod
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs": def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
port = server_args.port + random.randint(100, 1000) if server_args.nccl_port is None:
while True: port = server_args.port + random.randint(100, 1000)
if is_port_available(port): while True:
break if is_port_available(port):
if port < 60000: break
port += 42 if port < 60000:
else: port += 42
port -= 43 else:
port -= 43
else:
port = server_args.nccl_port
if not server_args.enable_dp_attention: if not server_args.enable_dp_attention:
# Normal case, use IPC within a single node # Normal case, use IPC within a single node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment