parser.add_argument('--epoch_completion_hook_program',type=str,help='Path to the program/script to be executed after the epoch ends and the checkpoint is saved')
parser.add_argument('--regular_checkpoint_hook_program',type=str,help='Path to the program/script to be executed after the regualar checkpoint is saved')
parser.add_argument('--training_type',type=str,choices=['pre','sft','dpo'],help='Specifies the type of training: pre (pre-training), sft (supervised fine-tuning), or dpo (direct preference optimization)')
parser.add_argument('--freeze_layers',action='store_true',help='Freeze all layers')
parser.add_argument('--keep_layers_trainable',type=int,nargs='*',default=[],help='List of layer indices to keep trainable (e.g., --keep_layers_trainable 0 31)')
parser.add_argument('--dpo_chosen_beta',type=float,help='Temperature parameter for the chosen part of the DPO loss, typically something in the range of 0.1 to 0.5')
parser.add_argument('--dpo_rejected_beta',type=float,help='Temperature parameter for the rejected part of the DPO loss, typically something in the range of 0.1 to 0.5')
parser.add_argument('--dpo_penalty_lambda',type=float,help='Temperature parameter for penalty-positive in the DPO loss, typically in the range of 1 to 100')
parser.add_argument('--reference_checkpoint_name',type=str,help='Checkpoint name for the reference model')
parser.add_argument('--training_type',type=str,choices=['pre','sft','dpo'],help='Specifies the type of training: pre (pre-training), sft (supervised fine-tuning), or dpo (direct preference optimization)')