Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
EasyR1
Commits
827b4040
"git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "02a1918ad90f91f67f9d493af48c0b4dce38f81e"
Commit
827b4040
authored
Jul 29, 2025
by
chenych
Browse files
Fix ray.init bug and update README
parent
496acb03
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
3 deletions
+7
-3
README.md
README.md
+4
-0
verl/trainer/main.py
verl/trainer/main.py
+2
-2
verl/workers/rollout/vllm_rollout_spmd.py
verl/workers/rollout/vllm_rollout_spmd.py
+1
-1
No files found.
README.md
View file @
827b4040
...
@@ -169,3 +169,7 @@ python3 scripts/model_merger.py --local_dir path_to_your_actor_checkpoint
...
@@ -169,3 +169,7 @@ python3 scripts/model_merger.py --local_dir path_to_your_actor_checkpoint
> RuntimeError: No HIP GPUs are available
> RuntimeError: No HIP GPUs are available
在
`~/.bashrc`
中新增
`export HIP_VISIBLE_DEVICES=0,1,2,3`
,并重新source下环境
在
`~/.bashrc`
中新增
`export HIP_VISIBLE_DEVICES=0,1,2,3`
,并重新source下环境
> ImportError: cannot import name 'index_first_axis' from 'transformers.modeling_flash_attention_utils'
降低transformers版本,Qwen2.5可参考版本:4.51.3
\ No newline at end of file
verl/trainer/main.py
View file @
827b4040
...
@@ -111,13 +111,13 @@ def main():
...
@@ -111,13 +111,13 @@ def main():
"PYTORCH_CUDA_ALLOC_CONF"
:
"expandable_segments:False"
,
"PYTORCH_CUDA_ALLOC_CONF"
:
"expandable_segments:False"
,
}
}
}
}
ray
.
init
(
runtime_env
=
runtime_env
)
# this is for local ray cluster
if
torch
.
version
.
hip
is
not
None
:
if
torch
.
version
.
hip
is
not
None
:
ray
.
init
(
num_gpus
=
torch
.
cuda
.
device_count
(),
## for dcu devices
ray
.
init
(
num_gpus
=
torch
.
cuda
.
device_count
(),
## for dcu devices
ignore_reinit_error
=
True
,
ignore_reinit_error
=
True
,
runtime_env
=
runtime_env
)
runtime_env
=
runtime_env
)
else
:
else
:
ray
.
init
(
runtime_env
=
runtime_env
)
ray
.
init
(
runtime_env
=
runtime_env
)
# this is for local ray cluster
runner
=
Runner
.
remote
()
runner
=
Runner
.
remote
()
ray
.
get
(
runner
.
run
.
remote
(
ppo_config
))
ray
.
get
(
runner
.
run
.
remote
(
ppo_config
))
...
...
verl/workers/rollout/vllm_rollout_spmd.py
View file @
827b4040
...
@@ -89,7 +89,7 @@ class vLLMRollout(BaseRollout):
...
@@ -89,7 +89,7 @@ class vLLMRollout(BaseRollout):
)
)
# Offload vllm model to reduce peak memory usage
# Offload vllm model to reduce peak memory usage
self
.
inference_engine
.
sleep
(
level
=
1
)
#
self.inference_engine.sleep(level=1)
sampling_kwargs
=
{
sampling_kwargs
=
{
"max_tokens"
:
config
.
response_length
,
"max_tokens"
:
config
.
response_length
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment