Unverified Commit 2adb4409 authored by Yan Ma's avatar Yan Ma Committed by GitHub
Browse files

[Bugfix] Fix ray instance detect issue (#9439)

parent feb92fbe
...@@ -232,9 +232,16 @@ def initialize_ray_cluster( ...@@ -232,9 +232,16 @@ def initialize_ray_cluster(
# Connect to a ray cluster. # Connect to a ray cluster.
if current_platform.is_rocm() or current_platform.is_xpu(): if current_platform.is_rocm() or current_platform.is_xpu():
ray.init(address=ray_address, # Try to connect existing ray instance and create a new one if not found
ignore_reinit_error=True, try:
num_gpus=parallel_config.world_size) ray.init("auto")
except ConnectionError:
logger.warning(
"No existing RAY instance detected. "
"A new instance will be launched with current node resources.")
ray.init(address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
else: else:
ray.init(address=ray_address, ignore_reinit_error=True) ray.init(address=ray_address, ignore_reinit_error=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment