"openmmapi/src/PythonForce.cpp" did not exist on "15811b7c56b65a5e94e4c7b212100b37b4de331f"
Unverified Commit a2d2354e authored by Shangyan Zhou's avatar Shangyan Zhou Committed by GitHub
Browse files

Merge pull request #222 from deepseek-ai/set_dev_id

Set `device_id` to suppress pytorch warning.
parents 77f97f79 cd371d31
...@@ -4,6 +4,7 @@ import numpy as np ...@@ -4,6 +4,7 @@ import numpy as np
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from typing import Optional from typing import Optional
import inspect
def init_dist(local_rank: int, num_local_ranks: int): def init_dist(local_rank: int, num_local_ranks: int):
...@@ -14,12 +15,16 @@ def init_dist(local_rank: int, num_local_ranks: int): ...@@ -14,12 +15,16 @@ def init_dist(local_rank: int, num_local_ranks: int):
node_rank = int(os.getenv('RANK', 0)) node_rank = int(os.getenv('RANK', 0))
assert (num_local_ranks < 8 and num_nodes == 1) or num_local_ranks == 8 assert (num_local_ranks < 8 and num_nodes == 1) or num_local_ranks == 8
dist.init_process_group( sig = inspect.signature(dist.init_process_group)
backend='nccl', params = {
init_method=f'tcp://{ip}:{port}', 'backend': 'nccl',
world_size=num_nodes * num_local_ranks, 'init_method': f'tcp://{ip}:{port}',
rank=node_rank * num_local_ranks + local_rank 'world_size': num_nodes * num_local_ranks,
) 'rank': node_rank * num_local_ranks + local_rank,
}
if 'device_id' in sig.parameters:
params['device_id'] = torch.device(f"cuda:{local_rank}")
dist.init_process_group(**params)
torch.set_default_dtype(torch.bfloat16) torch.set_default_dtype(torch.bfloat16)
torch.set_default_device('cuda') torch.set_default_device('cuda')
torch.cuda.set_device(local_rank) torch.cuda.set_device(local_rank)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment