# Tokenizer class or path. If null, it will be inferred from the model. tokenizer: null # Whether to use shared memory for data loading. use_shm: False # Training set parquet. Can be a list or a single file. # The program will read all files into memory, so it can't be too large (< 100GB). # The path can be either a local path or an HDFS path. # For HDFS path, we provide utils to download it to DRAM and convert it to a local path. train_files: ~/data/rlhf/gsm8k/train.parquet # Validation parquet. Can be a list or a single file. val_files: ~/data/rlhf/gsm8k/test.parquet # The field in the dataset where the prompt is located. Default is 'prompt'. prompt_key: prompt # The field used to select the reward function (if using different ones per example). reward_fn_key: data_source # Maximum prompt length. All prompts will be left-padded to this length. # An error will be reported if the length is too long. # oc.select: default val for rollout.prompt_length max_prompt_length: 512 # Maximum response length. Rollout in RL algorithms (e.g. PPO) generates up to this length. # oc.select: default val for rollout.response_length max_response_length: 512 # Batch size sampled for one training iteration of different RL algorithms. train_batch_size: 1024 # Batch size used during validation. Can be null. val_batch_size: null # Whether to return the original input_ids without adding chat template. # This is used when the reward model's chat template differs from the policy. # If using a model-based RM with different templates, this should be True. return_raw_input_ids: False # Whether to return the original chat (prompt) without applying chat template. return_raw_chat: False # Whether to return the full prompt with chat template. return_full_prompt: False # Whether to shuffle the data in the dataloader. shuffle: True # num dataloader workers dataloader_num_workers: 8 # Whether to shuffle the validation set. validation_shuffle: False # Whether to filter overlong prompts. filter_overlong_prompts: False # Number of workers for filtering overlong prompts. # For large-scale datasets, filtering can be time-consuming. # Use multiprocessing to speed up. Default is 1. filter_overlong_prompts_workers: 1 # Truncate the input_ids or prompt if they exceed max_prompt_length. # Options: 'error', 'left', 'right', 'middle'. Default is 'error'. truncation: error # The field in the multi-modal dataset where the image is located. Default is 'images'. image_key: images # The field in the multi-modal dataset where the video is located. video_key: videos # If the remote tokenizer has a Python file, this flag determines whether to allow using it. trust_remote_code: False # Optional: specify a custom dataset class path and name if overriding default loading behavior. custom_cls: # The path to the file containing your customized dataset class. If not specified, pre-implemented dataset will be used. path: null # The name of the dataset class within the specified file. name: null # Whether to return multi-modal inputs in the dataset. Set to False if rollout generates new multi-modal inputs. return_multi_modal_inputs: True # settings related to data sampler sampler: # the path to the module containing a curriculum class which implements the # AbstractSampler interface class_path: null # the name of the curriculum class like `MySampler` class_name: null # Data generation configuration for augmenting the dataset. datagen: # The path to the file containing your customized data generation class. # E.g. 'pkg://verl.experimental.dynamic_dataset.dynamicgen_dataset' path: null # The class name of the data generation class within the specified file. # E.g. 'MockDataGenerator' name: null