config.py 1.56 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Reward config
"""

chenych's avatar
update  
chenych committed
18
19
20
import os
from dataclasses import dataclass, field
from typing import Optional
chenych's avatar
chenych committed
21
22
23
24


@dataclass
class RewardConfig:
chenych's avatar
chenych committed
25
    reward_type: str = "batch"
chenych's avatar
chenych committed
26
27
    reward_function: Optional[str] = None
    reward_function_kwargs: dict = field(default_factory=dict)
chenych's avatar
Update  
chenych committed
28
    skip_special_tokens: bool = True
chenych's avatar
chenych committed
29
    num_cpus: int = 1
chenych's avatar
update  
chenych committed
30
    """auto keys"""
chenych's avatar
chenych committed
31
    reward_function_name: Optional[str] = field(default=None, init=False)
chenych's avatar
update  
chenych committed
32
33

    def post_init(self):
chenych's avatar
chenych committed
34
35
36
        if self.reward_function is not None:  # support custom reward function, e.g., ./math.py:main
            if ":" not in self.reward_function:
                self.reward_function_name = "main"
chenych's avatar
update  
chenych committed
37
            else:
chenych's avatar
chenych committed
38
                self.reward_function, self.reward_function_name = self.reward_function.rsplit(":", maxsplit=1)
chenych's avatar
update  
chenych committed
39

chenych's avatar
chenych committed
40
41
            if os.path.exists(self.reward_function):  # ray job uses absolute path
                self.reward_function = os.path.abspath(self.reward_function)
chenych's avatar
update  
chenych committed
42
            else:
chenych's avatar
chenych committed
43
                self.reward_function = None