Commit 827b4040 authored by chenych's avatar chenych
Browse files

Fix ray.init bug and update README

parent 496acb03
......@@ -169,3 +169,7 @@ python3 scripts/model_merger.py --local_dir path_to_your_actor_checkpoint
> RuntimeError: No HIP GPUs are available
`~/.bashrc`中新增 `export HIP_VISIBLE_DEVICES=0,1,2,3`,并重新source下环境
> ImportError: cannot import name 'index_first_axis' from 'transformers.modeling_flash_attention_utils'
降低transformers版本,Qwen2.5可参考版本:4.51.3
\ No newline at end of file
......@@ -111,13 +111,13 @@ def main():
"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:False",
}
}
ray.init(runtime_env=runtime_env) # this is for local ray cluster
if torch.version.hip is not None:
ray.init(num_gpus=torch.cuda.device_count(), ## for dcu devices
ignore_reinit_error=True,
runtime_env=runtime_env)
else:
ray.init(runtime_env=runtime_env)
ray.init(runtime_env=runtime_env) # this is for local ray cluster
runner = Runner.remote()
ray.get(runner.run.remote(ppo_config))
......
......@@ -89,7 +89,7 @@ class vLLMRollout(BaseRollout):
)
# Offload vllm model to reduce peak memory usage
self.inference_engine.sleep(level=1)
# self.inference_engine.sleep(level=1)
sampling_kwargs = {
"max_tokens": config.response_length,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment