START TIME: Fri Mar 15 10:58:49 CST 2024 [2024-03-15 10:59:30,546] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) [2024-03-15 11:00:04,794] [INFO] [runner.py:463:main] Using IP address of 10.3.6.47 for node c06r3n06 [2024-03-15 11:00:04,815] [INFO] [multinode_runner.py:72:get_cmd] Running on the following workers: c06r3n06,c06r3n07,c06r3n08,c06r3n09 [2024-03-15 11:00:04,815] [INFO] [runner.py:570:main] cmd = pdsh -S -f 1024 -w c06r3n06,c06r3n07,c06r3n08,c06r3n09 export UCX_MAX_EAGER_LANES=4; export UCX_MAX_RNDV_LANES=4; export UCX_ZCOPY_THRESH=auto; export UCX_WARN_UNUSED_ENV_VARS=n; export UCX_RNDV_THRESH=auto; export NCCL_IB_TIMEOUT=22; export UCX_IB_PCI_BW=mlx5_0:50Gbs,mlx5_1:50Gbs,mlx5_2:50Gbs,mlx5_3:50Gbs; export UCX_NET_DEVICES=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1; export PYTHONPATH=/work/home/liangjing/LLM/LLaMA-Factory-main; cd /work/home/liangjing/LLM/LLaMA-Factory-main; /work/home/liangjing/anaconda3/envs/torch2.1/bin/python -u -m deepspeed.launcher.launch --world_info=eyJjMDZyM24wNiI6IFswLCAxLCAyLCAzXSwgImMwNnIzbjA3IjogWzAsIDEsIDIsIDNdLCAiYzA2cjNuMDgiOiBbMCwgMSwgMiwgM10sICJjMDZyM24wOSI6IFswLCAxLCAyLCAzXX0= --node_rank=%n --master_addr=10.3.6.47 --master_port=29500 src/train_bash.py --stage 'sft' --do_train --template 'llama2' --dataset 'alpaca_gpt4_en,alpaca_gpt4_zh' --finetuning_type 'full' --model_name_or_path '/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b' --output_dir '/work/share/huchen1/liangjj/llama_factory' --per_device_train_batch_size '1' --per_device_eval_batch_size '1' --gradient_accumulation_steps '1' --preprocessing_num_workers '2' --lr_scheduler_type 'cosine' --logging_steps '10' --save_steps '100' --eval_steps '100' --learning_rate '5e-5' --max_grad_norm '0.5' --num_train_epochs '4.0' --val_size '0.01' --evaluation_strategy 'steps' --load_best_model_at_end --weight_decay '0.' --warmup_ratio '0.03' --plot_loss --fp16 --save_on_each_node --deepspeed 'deepspeed.json' c06r3n06: [2024-03-15 11:00:32,273] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n09: [2024-03-15 11:00:33,456] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n07: [2024-03-15 11:00:33,458] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n08: [2024-03-15 11:00:33,542] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n06: [2024-03-15 11:00:54,615] [INFO] [launch.py:138:main] 0 NCCL_IB_TIMEOUT=22 c06r3n06: [2024-03-15 11:00:54,615] [INFO] [launch.py:145:main] WORLD INFO DICT: {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [0, 1, 2, 3], 'c06r3n08': [0, 1, 2, 3], 'c06r3n09': [0, 1, 2, 3]} c06r3n06: [2024-03-15 11:00:54,615] [INFO] [launch.py:151:main] nnodes=4, num_local_procs=4, node_rank=0 c06r3n06: [2024-03-15 11:00:54,615] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(, {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [4, 5, 6, 7], 'c06r3n08': [8, 9, 10, 11], 'c06r3n09': [12, 13, 14, 15]}) c06r3n06: [2024-03-15 11:00:54,615] [INFO] [launch.py:163:main] dist_world_size=16 c06r3n06: [2024-03-15 11:00:54,615] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3 c06r3n08: [2024-03-15 11:00:54,617] [INFO] [launch.py:138:main] 2 NCCL_IB_TIMEOUT=22 c06r3n09: [2024-03-15 11:00:54,617] [INFO] [launch.py:138:main] 3 NCCL_IB_TIMEOUT=22 c06r3n07: [2024-03-15 11:00:54,618] [INFO] [launch.py:138:main] 1 NCCL_IB_TIMEOUT=22 c06r3n07: [2024-03-15 11:00:54,646] [INFO] [launch.py:145:main] WORLD INFO DICT: {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [0, 1, 2, 3], 'c06r3n08': [0, 1, 2, 3], 'c06r3n09': [0, 1, 2, 3]} c06r3n07: [2024-03-15 11:00:54,646] [INFO] [launch.py:151:main] nnodes=4, num_local_procs=4, node_rank=1 c06r3n07: [2024-03-15 11:00:54,646] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(, {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [4, 5, 6, 7], 'c06r3n08': [8, 9, 10, 11], 'c06r3n09': [12, 13, 14, 15]}) c06r3n07: [2024-03-15 11:00:54,646] [INFO] [launch.py:163:main] dist_world_size=16 c06r3n07: [2024-03-15 11:00:54,646] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3 c06r3n09: [2024-03-15 11:00:54,645] [INFO] [launch.py:145:main] WORLD INFO DICT: {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [0, 1, 2, 3], 'c06r3n08': [0, 1, 2, 3], 'c06r3n09': [0, 1, 2, 3]} c06r3n08: [2024-03-15 11:00:54,645] [INFO] [launch.py:145:main] WORLD INFO DICT: {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [0, 1, 2, 3], 'c06r3n08': [0, 1, 2, 3], 'c06r3n09': [0, 1, 2, 3]} c06r3n09: [2024-03-15 11:00:54,645] [INFO] [launch.py:151:main] nnodes=4, num_local_procs=4, node_rank=3 c06r3n08: [2024-03-15 11:00:54,645] [INFO] [launch.py:151:main] nnodes=4, num_local_procs=4, node_rank=2 c06r3n09: [2024-03-15 11:00:54,645] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(, {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [4, 5, 6, 7], 'c06r3n08': [8, 9, 10, 11], 'c06r3n09': [12, 13, 14, 15]}) c06r3n08: [2024-03-15 11:00:54,645] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(, {'c06r3n06': [0, 1, 2, 3], 'c06r3n07': [4, 5, 6, 7], 'c06r3n08': [8, 9, 10, 11], 'c06r3n09': [12, 13, 14, 15]}) c06r3n08: [2024-03-15 11:00:54,645] [INFO] [launch.py:163:main] dist_world_size=16 c06r3n09: [2024-03-15 11:00:54,645] [INFO] [launch.py:163:main] dist_world_size=16 c06r3n08: [2024-03-15 11:00:54,645] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3 c06r3n09: [2024-03-15 11:00:54,645] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3 c06r3n09: [2024-03-15 11:01:30,694] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n09: [2024-03-15 11:01:30,694] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n09: [2024-03-15 11:01:30,694] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n09: [2024-03-15 11:01:30,694] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n06: [2024-03-15 11:01:30,696] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n06: [2024-03-15 11:01:30,696] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n06: [2024-03-15 11:01:30,697] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n06: [2024-03-15 11:01:30,697] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n08: [2024-03-15 11:01:30,725] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n08: [2024-03-15 11:01:30,725] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n08: [2024-03-15 11:01:30,725] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n08: [2024-03-15 11:01:30,725] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n07: [2024-03-15 11:01:30,767] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n07: [2024-03-15 11:01:30,767] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n07: [2024-03-15 11:01:30,767] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n07: [2024-03-15 11:01:30,767] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n08: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n09: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n06: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/gradio_client/documentation.py:103: UserWarning: Could not get documentation group for : No known documentation group for module 'gradio.mix' c06r3n07: warnings.warn(f"Could not get documentation group for {cls}: {exc}") c06r3n07: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n06: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n07: [2024-03-15 11:02:10,386] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n07: [2024-03-15 11:02:10,386] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n07: [2024-03-15 11:02:10,387] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n08: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n09: [2024-03-15 11:02:10,384] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n06: [2024-03-15 11:02:10,385] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl c06r3n08: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n08: [2024-03-15 11:02:10,386] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n06: [2024-03-15 11:02:10,387] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n08: [2024-03-15 11:02:10,386] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n09: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n09: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n09: [2024-03-15 11:02:10,385] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n06: [2024-03-15 11:02:10,387] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n06: [2024-03-15 11:02:10,390] [INFO] [comm.py:637:init_distributed] cdb=None c06r3n06: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n06: I0315 11:02:10.465345 9281 ProcessGroupNCCL.cpp:686] [Rank 1] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=272689168 c06r3n08: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n06: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n06: I0315 11:02:10.465369 9280 ProcessGroupNCCL.cpp:686] [Rank 0] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=269933744 c06r3n06: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n06: I0315 11:02:10.465369 9282 ProcessGroupNCCL.cpp:686] [Rank 2] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=274916336 c06r3n06: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n08: I0315 11:02:10.464723 5117 ProcessGroupNCCL.cpp:686] [Rank 8] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=277477312 c06r3n08: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n06: I0315 11:02:10.465376 9283 ProcessGroupNCCL.cpp:686] [Rank 3] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=269125168 c06r3n08: I0315 11:02:10.464741 5118 ProcessGroupNCCL.cpp:686] [Rank 9] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=251847536 c06r3n08: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n08: I0315 11:02:10.464764 5119 ProcessGroupNCCL.cpp:686] [Rank 10] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=271524512 c06r3n08: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n08: I0315 11:02:10.464764 5120 ProcessGroupNCCL.cpp:686] [Rank 11] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=247908352 c06r3n09: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n09: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n09: I0315 11:02:10.466920 6717 ProcessGroupNCCL.cpp:686] [Rank 15] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=256439648 c06r3n09: I0315 11:02:10.466917 6715 ProcessGroupNCCL.cpp:686] [Rank 13] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=248844784 c06r3n09: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n09: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n09: I0315 11:02:10.466934 6716 ProcessGroupNCCL.cpp:686] [Rank 14] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=262415232 c06r3n09: I0315 11:02:10.466943 6714 ProcessGroupNCCL.cpp:686] [Rank 12] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=269533008 c06r3n07: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n07: I0315 11:02:10.468040 24634 ProcessGroupNCCL.cpp:686] [Rank 4] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=250615200 c06r3n07: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n07: I0315 11:02:10.468057 24636 ProcessGroupNCCL.cpp:686] [Rank 6] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=251166720 c06r3n07: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n07: I0315 11:02:10.468082 24635 ProcessGroupNCCL.cpp:686] [Rank 5] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=273037680 c06r3n07: WARNING: Logging before InitGoogleLogging() is written to STDERR c06r3n07: I0315 11:02:10.468070 24637 ProcessGroupNCCL.cpp:686] [Rank 7] ProcessGroupNCCL initialization options:NCCL_ASYNC_ERROR_HANDLING: 1, NCCL_DESYNC_DEBUG: 0, NCCL_ENABLE_TIMING: 0, NCCL_BLOCKING_WAIT: 0, TIMEOUT(ms): 1800000, USE_HIGH_PRIORITY_STREAM: 0, TORCH_DISTRIBUTED_DEBUG: OFF, NCCL_DEBUG: OFF, ID=263651728 c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1 c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1 c06r3n07: distributed training: True, compute dtype: torch.float16 c06r3n06: distributed training: True, compute dtype: torch.float16 c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1 c06r3n09: distributed training: True, compute dtype: torch.float16 c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1 c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1 c06r3n06: distributed training: True, compute dtype: torch.float16 c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1 c06r3n09: distributed training: True, compute dtype: torch.float16 c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1 c06r3n09: distributed training: True, compute dtype: torch.float16 c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1 c06r3n09: distributed training: True, compute dtype: torch.float16 c06r3n07: distributed training: True, compute dtype: torch.float16 c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n06: _n_gpu=1, c06r3n06: adafactor=False, c06r3n06: adam_beta1=0.9, c06r3n06: adam_beta2=0.999, c06r3n06: adam_epsilon=1e-08, c06r3n06: auto_find_batch_size=False, c06r3n06: bf16=False, c06r3n06: bf16_full_eval=False, c06r3n06: data_seed=None, c06r3n06: dataloader_drop_last=False, c06r3n06: dataloader_num_workers=0, c06r3n06: dataloader_persistent_workers=False, c06r3n06: dataloader_pin_memory=True, c06r3n06: ddp_backend=None, c06r3n06: ddp_broadcast_buffers=None, c06r3n06: ddp_bucket_cap_mb=None, c06r3n06: ddp_find_unused_parameters=None, c06r3n06: ddp_timeout=1800, c06r3n06: debug=[], c06r3n06: deepspeed=deepspeed.json, c06r3n06: disable_tqdm=False, c06r3n06: dispatch_batches=None, c06r3n06: do_eval=True, c06r3n06: do_predict=False, c06r3n06: do_train=True, c06r3n06: eval_accumulation_steps=None, c06r3n06: eval_delay=0, c06r3n06: eval_steps=100, c06r3n06: evaluation_strategy=steps, c06r3n06: fp16=True, c06r3n06: fp16_backend=auto, c06r3n06: fp16_full_eval=False, c06r3n06: fp16_opt_level=O1, c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n07: _n_gpu=1, c06r3n07: adafactor=False, c06r3n07: adam_beta1=0.9, c06r3n07: adam_beta2=0.999, c06r3n07: adam_epsilon=1e-08, c06r3n07: auto_find_batch_size=False, c06r3n07: bf16=False, c06r3n07: bf16_full_eval=False, c06r3n07: data_seed=None, c06r3n07: dataloader_drop_last=False, c06r3n07: dataloader_num_workers=0, c06r3n07: dataloader_persistent_workers=False, c06r3n07: dataloader_pin_memory=True, c06r3n07: ddp_backend=None, c06r3n07: ddp_broadcast_buffers=None, c06r3n07: ddp_bucket_cap_mb=None, c06r3n07: ddp_find_unused_parameters=None, c06r3n07: ddp_timeout=1800, c06r3n07: debug=[], c06r3n07: deepspeed=deepspeed.json, c06r3n07: disable_tqdm=False, c06r3n07: dispatch_batches=None, c06r3n07: do_eval=True, c06r3n07: do_predict=False, c06r3n07: do_train=True, c06r3n07: eval_accumulation_steps=None, c06r3n07: eval_delay=0, c06r3n07: eval_steps=100, c06r3n07: evaluation_strategy=steps, c06r3n07: fp16=True, c06r3n07: fp16_backend=auto, c06r3n07: fp16_full_eval=False, c06r3n07: fp16_opt_level=O1, c06r3n07: fsdp=[], c06r3n07: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n07: fsdp_min_num_params=0, c06r3n07: fsdp_transformer_layer_cls_to_wrap=None, c06r3n07: full_determinism=False, c06r3n07: generation_config=None, c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n07: generation_max_length=None, c06r3n07: generation_num_beams=None, c06r3n07: gradient_accumulation_steps=1, c06r3n07: gradient_checkpointing=False, c06r3n07: gradient_checkpointing_kwargs=None, c06r3n07: greater_is_better=False, c06r3n07: group_by_length=False, c06r3n07: half_precision_backend=auto, c06r3n07: hub_always_push=False, c06r3n07: hub_model_id=None, c06r3n07: hub_private_repo=False, c06r3n07: hub_strategy=every_save, c06r3n07: hub_token=, c06r3n07: ignore_data_skip=False, c06r3n07: include_inputs_for_metrics=False, c06r3n07: include_num_input_tokens_seen=False, c06r3n07: include_tokens_per_second=False, c06r3n07: jit_mode_eval=False, c06r3n07: label_names=None, c06r3n07: label_smoothing_factor=0.0, c06r3n07: learning_rate=5e-05, c06r3n07: length_column_name=length, c06r3n07: load_best_model_at_end=True, c06r3n07: local_rank=2, c06r3n07: log_level=passive, c06r3n07: log_level_replica=warning, c06r3n07: log_on_each_node=True, c06r3n07: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n07, c06r3n07: logging_first_step=False, c06r3n07: logging_nan_inf_filter=True, c06r3n07: logging_steps=10, c06r3n07: logging_strategy=steps, c06r3n07: lr_scheduler_kwargs={}, c06r3n07: lr_scheduler_type=cosine, c06r3n07: max_grad_norm=0.5, c06r3n07: max_steps=-1, c06r3n07: metric_for_best_model=loss, c06r3n06: fsdp=[], c06r3n06: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n06: fsdp_min_num_params=0, c06r3n06: fsdp_transformer_layer_cls_to_wrap=None, c06r3n06: full_determinism=False, c06r3n06: generation_config=None, c06r3n07: mp_parameters=, c06r3n07: neftune_noise_alpha=None, c06r3n07: no_cuda=False, c06r3n07: num_train_epochs=4.0, c06r3n07: optim=adamw_torch, c06r3n07: optim_args=None, c06r3n07: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n07: overwrite_output_dir=False, c06r3n07: past_index=-1, c06r3n07: per_device_eval_batch_size=1, c06r3n07: per_device_train_batch_size=1, c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1 c06r3n08: distributed training: True, compute dtype: torch.float16 c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1 c06r3n08: distributed training: True, compute dtype: torch.float16 c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1 c06r3n08: distributed training: True, compute dtype: torch.float16 c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1 c06r3n08: distributed training: True, compute dtype: torch.float16 c06r3n09: _n_gpu=1, c06r3n09: adafactor=False, c06r3n09: adam_beta1=0.9, c06r3n09: adam_beta2=0.999, c06r3n09: adam_epsilon=1e-08, c06r3n09: auto_find_batch_size=False, c06r3n09: bf16=False, c06r3n09: bf16_full_eval=False, c06r3n09: data_seed=None, c06r3n09: dataloader_drop_last=False, c06r3n09: dataloader_num_workers=0, c06r3n09: dataloader_persistent_workers=False, c06r3n09: dataloader_pin_memory=True, c06r3n09: ddp_backend=None, c06r3n09: ddp_broadcast_buffers=None, c06r3n09: ddp_bucket_cap_mb=None, c06r3n09: ddp_find_unused_parameters=None, c06r3n09: ddp_timeout=1800, c06r3n09: debug=[], c06r3n09: deepspeed=deepspeed.json, c06r3n09: disable_tqdm=False, c06r3n09: dispatch_batches=None, c06r3n09: do_eval=True, c06r3n09: do_predict=False, c06r3n09: do_train=True, c06r3n09: eval_accumulation_steps=None, c06r3n09: eval_delay=0, c06r3n09: eval_steps=100, c06r3n09: evaluation_strategy=steps, c06r3n09: fp16=True, c06r3n09: fp16_backend=auto, c06r3n09: fp16_full_eval=False, c06r3n09: fp16_opt_level=O1, c06r3n09: fsdp=[], c06r3n09: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n09: fsdp_min_num_params=0, c06r3n09: fsdp_transformer_layer_cls_to_wrap=None, c06r3n06: generation_max_length=None, c06r3n09: full_determinism=False, c06r3n09: generation_config=None, c06r3n07: predict_with_generate=False, c06r3n07: prediction_loss_only=False, c06r3n07: push_to_hub=False, c06r3n07: push_to_hub_model_id=None, c06r3n07: push_to_hub_organization=None, c06r3n09: generation_max_length=None, c06r3n07: push_to_hub_token=, c06r3n07: ray_scope=last, c06r3n07: remove_unused_columns=True, c06r3n07: report_to=['tensorboard'], c06r3n07: resume_from_checkpoint=None, c06r3n07: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n07: save_on_each_node=True, c06r3n07: save_only_model=False, c06r3n07: save_safetensors=True, c06r3n07: save_steps=100, c06r3n07: save_strategy=steps, c06r3n07: save_total_limit=None, c06r3n07: seed=42, c06r3n07: skip_memory_metrics=True, c06r3n07: sortish_sampler=False, c06r3n07: split_batches=False, c06r3n07: tf32=None, c06r3n07: torch_compile=False, c06r3n07: torch_compile_backend=None, c06r3n07: torch_compile_mode=None, c06r3n07: torchdynamo=None, c06r3n07: tpu_metrics_debug=False, c06r3n07: tpu_num_cores=None, c06r3n07: use_cpu=False, c06r3n07: use_ipex=False, c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n08: _n_gpu=1, c06r3n08: adafactor=False, c06r3n08: adam_beta1=0.9, c06r3n08: adam_beta2=0.999, c06r3n08: adam_epsilon=1e-08, c06r3n08: auto_find_batch_size=False, c06r3n08: bf16=False, c06r3n08: bf16_full_eval=False, c06r3n08: data_seed=None, c06r3n08: dataloader_drop_last=False, c06r3n08: dataloader_num_workers=0, c06r3n08: dataloader_persistent_workers=False, c06r3n08: dataloader_pin_memory=True, c06r3n08: ddp_backend=None, c06r3n08: ddp_broadcast_buffers=None, c06r3n08: ddp_bucket_cap_mb=None, c06r3n08: ddp_find_unused_parameters=None, c06r3n08: ddp_timeout=1800, c06r3n08: debug=[], c06r3n08: deepspeed=deepspeed.json, c06r3n08: disable_tqdm=False, c06r3n08: dispatch_batches=None, c06r3n08: do_eval=True, c06r3n08: do_predict=False, c06r3n08: do_train=True, c06r3n08: eval_accumulation_steps=None, c06r3n08: eval_delay=0, c06r3n08: eval_steps=100, c06r3n08: evaluation_strategy=steps, c06r3n08: fp16=True, c06r3n08: fp16_backend=auto, c06r3n08: fp16_full_eval=False, c06r3n08: fp16_opt_level=O1, c06r3n08: fsdp=[], c06r3n08: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n09: generation_num_beams=None, c06r3n09: gradient_accumulation_steps=1, c06r3n07: use_legacy_prediction_loop=False, c06r3n09: gradient_checkpointing=False, c06r3n06: generation_num_beams=None, c06r3n06: gradient_accumulation_steps=1, c06r3n06: gradient_checkpointing=False, c06r3n06: gradient_checkpointing_kwargs=None, c06r3n06: greater_is_better=False, c06r3n06: group_by_length=False, c06r3n06: half_precision_backend=auto, c06r3n06: hub_always_push=False, c06r3n06: hub_model_id=None, c06r3n06: hub_private_repo=False, c06r3n06: hub_strategy=every_save, c06r3n06: hub_token=, c06r3n06: ignore_data_skip=False, c06r3n06: include_inputs_for_metrics=False, c06r3n06: include_num_input_tokens_seen=False, c06r3n06: include_tokens_per_second=False, c06r3n06: jit_mode_eval=False, c06r3n06: label_names=None, c06r3n06: label_smoothing_factor=0.0, c06r3n06: learning_rate=5e-05, c06r3n06: length_column_name=length, c06r3n06: load_best_model_at_end=True, c06r3n06: local_rank=1, c06r3n06: log_level=passive, c06r3n06: log_level_replica=warning, c06r3n06: log_on_each_node=True, c06r3n06: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n06, c06r3n06: logging_first_step=False, c06r3n06: logging_nan_inf_filter=True, c06r3n06: logging_steps=10, c06r3n06: logging_strategy=steps, c06r3n06: lr_scheduler_kwargs={}, c06r3n06: lr_scheduler_type=cosine, c06r3n06: max_grad_norm=0.5, c06r3n06: max_steps=-1, c06r3n06: metric_for_best_model=loss, c06r3n06: mp_parameters=, c06r3n06: neftune_noise_alpha=None, c06r3n06: no_cuda=False, c06r3n06: num_train_epochs=4.0, c06r3n06: optim=adamw_torch, c06r3n06: optim_args=None, c06r3n06: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n06: overwrite_output_dir=False, c06r3n06: past_index=-1, c06r3n06: per_device_eval_batch_size=1, c06r3n06: per_device_train_batch_size=1, c06r3n06: predict_with_generate=False, c06r3n06: prediction_loss_only=False, c06r3n06: push_to_hub=False, c06r3n06: push_to_hub_model_id=None, c06r3n06: push_to_hub_organization=None, c06r3n06: push_to_hub_token=, c06r3n06: ray_scope=last, c06r3n06: remove_unused_columns=True, c06r3n06: report_to=['tensorboard'], c06r3n06: resume_from_checkpoint=None, c06r3n06: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n06: save_on_each_node=True, c06r3n06: save_only_model=False, c06r3n06: save_safetensors=True, c06r3n06: save_steps=100, c06r3n06: save_strategy=steps, c06r3n06: save_total_limit=None, c06r3n06: seed=42, c06r3n06: skip_memory_metrics=True, c06r3n06: sortish_sampler=False, c06r3n06: split_batches=False, c06r3n06: tf32=None, c06r3n06: torch_compile=False, c06r3n06: torch_compile_backend=None, c06r3n06: torch_compile_mode=None, c06r3n06: torchdynamo=None, c06r3n06: tpu_metrics_debug=False, c06r3n06: tpu_num_cores=None, c06r3n06: use_cpu=False, c06r3n06: use_ipex=False, c06r3n06: use_legacy_prediction_loop=False, c06r3n06: use_mps_device=False, c06r3n06: warmup_ratio=0.03, c06r3n06: warmup_steps=0, c06r3n06: weight_decay=0.0, c06r3n06: ) c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1 c06r3n06: distributed training: True, compute dtype: torch.float16 c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n06: _n_gpu=1, c06r3n06: adafactor=False, c06r3n06: adam_beta1=0.9, c06r3n06: adam_beta2=0.999, c06r3n06: adam_epsilon=1e-08, c06r3n06: auto_find_batch_size=False, c06r3n06: bf16=False, c06r3n06: bf16_full_eval=False, c06r3n06: data_seed=None, c06r3n06: dataloader_drop_last=False, c06r3n06: dataloader_num_workers=0, c06r3n06: dataloader_persistent_workers=False, c06r3n06: dataloader_pin_memory=True, c06r3n06: ddp_backend=None, c06r3n06: ddp_broadcast_buffers=None, c06r3n06: ddp_bucket_cap_mb=None, c06r3n06: ddp_find_unused_parameters=None, c06r3n06: ddp_timeout=1800, c06r3n06: debug=[], c06r3n06: deepspeed=deepspeed.json, c06r3n06: disable_tqdm=False, c06r3n06: dispatch_batches=None, c06r3n06: do_eval=True, c06r3n06: do_predict=False, c06r3n06: do_train=True, c06r3n06: eval_accumulation_steps=None, c06r3n06: eval_delay=0, c06r3n06: eval_steps=100, c06r3n06: evaluation_strategy=steps, c06r3n07: use_mps_device=False, c06r3n07: warmup_ratio=0.03, c06r3n07: warmup_steps=0, c06r3n07: weight_decay=0.0, c06r3n07: ) c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1 c06r3n07: distributed training: True, compute dtype: torch.float16 c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n07: _n_gpu=1, c06r3n07: adafactor=False, c06r3n07: adam_beta1=0.9, c06r3n07: adam_beta2=0.999, c06r3n07: adam_epsilon=1e-08, c06r3n07: auto_find_batch_size=False, c06r3n07: bf16=False, c06r3n07: bf16_full_eval=False, c06r3n07: data_seed=None, c06r3n07: dataloader_drop_last=False, c06r3n07: dataloader_num_workers=0, c06r3n07: dataloader_persistent_workers=False, c06r3n07: dataloader_pin_memory=True, c06r3n07: ddp_backend=None, c06r3n07: ddp_broadcast_buffers=None, c06r3n07: ddp_bucket_cap_mb=None, c06r3n07: ddp_find_unused_parameters=None, c06r3n07: ddp_timeout=1800, c06r3n07: debug=[], c06r3n07: deepspeed=deepspeed.json, c06r3n07: disable_tqdm=False, c06r3n07: dispatch_batches=None, c06r3n07: do_eval=True, c06r3n07: do_predict=False, c06r3n07: do_train=True, c06r3n07: eval_accumulation_steps=None, c06r3n07: eval_delay=0, c06r3n07: eval_steps=100, c06r3n07: evaluation_strategy=steps, c06r3n06: fp16=True, c06r3n06: fp16_backend=auto, c06r3n06: fp16_full_eval=False, c06r3n06: fp16_opt_level=O1, c06r3n06: fsdp=[], c06r3n06: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n06: fsdp_min_num_params=0, c06r3n06: fsdp_transformer_layer_cls_to_wrap=None, c06r3n06: full_determinism=False, c06r3n06: generation_config=None, c06r3n06: generation_max_length=None, c06r3n06: generation_num_beams=None, c06r3n06: gradient_accumulation_steps=1, c06r3n06: gradient_checkpointing=False, c06r3n06: gradient_checkpointing_kwargs=None, c06r3n06: greater_is_better=False, c06r3n06: group_by_length=False, c06r3n06: half_precision_backend=auto, c06r3n06: hub_always_push=False, c06r3n06: hub_model_id=None, c06r3n06: hub_private_repo=False, c06r3n06: hub_strategy=every_save, c06r3n06: hub_token=, c06r3n06: ignore_data_skip=False, c06r3n06: include_inputs_for_metrics=False, c06r3n06: include_num_input_tokens_seen=False, c06r3n06: include_tokens_per_second=False, c06r3n06: jit_mode_eval=False, c06r3n06: label_names=None, c06r3n06: label_smoothing_factor=0.0, c06r3n06: learning_rate=5e-05, c06r3n06: length_column_name=length, c06r3n06: load_best_model_at_end=True, c06r3n06: local_rank=2, c06r3n06: log_level=passive, c06r3n06: log_level_replica=warning, c06r3n06: log_on_each_node=True, c06r3n07: fp16=True, c06r3n07: fp16_backend=auto, c06r3n07: fp16_full_eval=False, c06r3n07: fp16_opt_level=O1, c06r3n07: fsdp=[], c06r3n07: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n07: fsdp_min_num_params=0, c06r3n07: fsdp_transformer_layer_cls_to_wrap=None, c06r3n07: full_determinism=False, c06r3n07: generation_config=None, c06r3n07: generation_max_length=None, c06r3n07: generation_num_beams=None, c06r3n07: gradient_accumulation_steps=1, c06r3n07: gradient_checkpointing=False, c06r3n07: gradient_checkpointing_kwargs=None, c06r3n07: greater_is_better=False, c06r3n07: group_by_length=False, c06r3n07: half_precision_backend=auto, c06r3n07: hub_always_push=False, c06r3n07: hub_model_id=None, c06r3n07: hub_private_repo=False, c06r3n07: hub_strategy=every_save, c06r3n07: hub_token=, c06r3n07: ignore_data_skip=False, c06r3n07: include_inputs_for_metrics=False, c06r3n07: include_num_input_tokens_seen=False, c06r3n07: include_tokens_per_second=False, c06r3n07: jit_mode_eval=False, c06r3n07: label_names=None, c06r3n07: label_smoothing_factor=0.0, c06r3n07: learning_rate=5e-05, c06r3n07: length_column_name=length, c06r3n07: load_best_model_at_end=True, c06r3n07: local_rank=0, c06r3n07: log_level=passive, c06r3n07: log_level_replica=warning, c06r3n07: log_on_each_node=True, c06r3n06: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n06, c06r3n06: logging_first_step=False, c06r3n06: logging_nan_inf_filter=True, c06r3n06: logging_steps=10, c06r3n06: logging_strategy=steps, c06r3n06: lr_scheduler_kwargs={}, c06r3n06: lr_scheduler_type=cosine, c06r3n06: max_grad_norm=0.5, c06r3n06: max_steps=-1, c06r3n06: metric_for_best_model=loss, c06r3n06: mp_parameters=, c06r3n06: neftune_noise_alpha=None, c06r3n06: no_cuda=False, c06r3n06: num_train_epochs=4.0, c06r3n06: optim=adamw_torch, c06r3n06: optim_args=None, c06r3n06: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n06: overwrite_output_dir=False, c06r3n06: past_index=-1, c06r3n06: per_device_eval_batch_size=1, c06r3n06: per_device_train_batch_size=1, c06r3n06: predict_with_generate=False, c06r3n06: prediction_loss_only=False, c06r3n06: push_to_hub=False, c06r3n06: push_to_hub_model_id=None, c06r3n06: push_to_hub_organization=None, c06r3n06: push_to_hub_token=, c06r3n06: ray_scope=last, c06r3n06: remove_unused_columns=True, c06r3n06: report_to=['tensorboard'], c06r3n06: resume_from_checkpoint=None, c06r3n06: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n06: save_on_each_node=True, c06r3n06: save_only_model=False, c06r3n06: save_safetensors=True, c06r3n06: save_steps=100, c06r3n06: save_strategy=steps, c06r3n07: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n07, c06r3n07: logging_first_step=False, c06r3n07: logging_nan_inf_filter=True, c06r3n07: logging_steps=10, c06r3n07: logging_strategy=steps, c06r3n07: lr_scheduler_kwargs={}, c06r3n07: lr_scheduler_type=cosine, c06r3n07: max_grad_norm=0.5, c06r3n07: max_steps=-1, c06r3n07: metric_for_best_model=loss, c06r3n07: mp_parameters=, c06r3n07: neftune_noise_alpha=None, c06r3n07: no_cuda=False, c06r3n07: num_train_epochs=4.0, c06r3n07: optim=adamw_torch, c06r3n07: optim_args=None, c06r3n07: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n07: overwrite_output_dir=False, c06r3n07: past_index=-1, c06r3n07: per_device_eval_batch_size=1, c06r3n07: per_device_train_batch_size=1, c06r3n07: predict_with_generate=False, c06r3n07: prediction_loss_only=False, c06r3n07: push_to_hub=False, c06r3n07: push_to_hub_model_id=None, c06r3n07: push_to_hub_organization=None, c06r3n07: push_to_hub_token=, c06r3n07: ray_scope=last, c06r3n07: remove_unused_columns=True, c06r3n07: report_to=['tensorboard'], c06r3n07: resume_from_checkpoint=None, c06r3n07: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n07: save_on_each_node=True, c06r3n07: save_only_model=False, c06r3n07: save_safetensors=True, c06r3n07: save_steps=100, c06r3n07: save_strategy=steps, c06r3n06: save_total_limit=None, c06r3n06: seed=42, c06r3n06: skip_memory_metrics=True, c06r3n06: sortish_sampler=False, c06r3n06: split_batches=False, c06r3n06: tf32=None, c06r3n06: torch_compile=False, c06r3n06: torch_compile_backend=None, c06r3n06: torch_compile_mode=None, c06r3n06: torchdynamo=None, c06r3n06: tpu_metrics_debug=False, c06r3n06: tpu_num_cores=None, c06r3n06: use_cpu=False, c06r3n06: use_ipex=False, c06r3n06: use_legacy_prediction_loop=False, c06r3n06: use_mps_device=False, c06r3n06: warmup_ratio=0.03, c06r3n06: warmup_steps=0, c06r3n06: weight_decay=0.0, c06r3n06: ) c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1 c06r3n06: distributed training: True, compute dtype: torch.float16 c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n06: _n_gpu=1, c06r3n06: adafactor=False, c06r3n06: adam_beta1=0.9, c06r3n06: adam_beta2=0.999, c06r3n06: adam_epsilon=1e-08, c06r3n06: auto_find_batch_size=False, c06r3n06: bf16=False, c06r3n06: bf16_full_eval=False, c06r3n06: data_seed=None, c06r3n06: dataloader_drop_last=False, c06r3n06: dataloader_num_workers=0, c06r3n06: dataloader_persistent_workers=False, c06r3n06: dataloader_pin_memory=True, c06r3n06: ddp_backend=None, c06r3n06: ddp_broadcast_buffers=None, c06r3n07: save_total_limit=None, c06r3n06: ddp_bucket_cap_mb=None, c06r3n09: gradient_checkpointing_kwargs=None, c06r3n06: ddp_find_unused_parameters=None, c06r3n09: greater_is_better=False, c06r3n06: ddp_timeout=1800, c06r3n09: group_by_length=False, c06r3n06: debug=[], c06r3n09: half_precision_backend=auto, c06r3n06: deepspeed=deepspeed.json, c06r3n09: hub_always_push=False, c06r3n06: disable_tqdm=False, c06r3n09: hub_model_id=None, c06r3n06: dispatch_batches=None, c06r3n09: hub_private_repo=False, c06r3n06: do_eval=True, c06r3n09: hub_strategy=every_save, c06r3n06: do_predict=False, c06r3n09: hub_token=, c06r3n06: do_train=True, c06r3n09: ignore_data_skip=False, c06r3n06: eval_accumulation_steps=None, c06r3n09: include_inputs_for_metrics=False, c06r3n06: eval_delay=0, c06r3n09: include_num_input_tokens_seen=False, c06r3n06: eval_steps=100, c06r3n09: include_tokens_per_second=False, c06r3n06: evaluation_strategy=steps, c06r3n09: jit_mode_eval=False, c06r3n06: fp16=True, c06r3n09: label_names=None, c06r3n06: fp16_backend=auto, c06r3n09: label_smoothing_factor=0.0, c06r3n06: fp16_full_eval=False, c06r3n09: learning_rate=5e-05, c06r3n06: fp16_opt_level=O1, c06r3n09: length_column_name=length, c06r3n06: fsdp=[], c06r3n09: load_best_model_at_end=True, c06r3n06: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n09: local_rank=3, c06r3n06: fsdp_min_num_params=0, c06r3n09: log_level=passive, c06r3n06: fsdp_transformer_layer_cls_to_wrap=None, c06r3n09: log_level_replica=warning, c06r3n06: full_determinism=False, c06r3n09: log_on_each_node=True, c06r3n06: generation_config=None, c06r3n09: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n09, c06r3n06: generation_max_length=None, c06r3n09: logging_first_step=False, c06r3n06: generation_num_beams=None, c06r3n09: logging_nan_inf_filter=True, c06r3n06: gradient_accumulation_steps=1, c06r3n09: logging_steps=10, c06r3n06: gradient_checkpointing=False, c06r3n09: logging_strategy=steps, c06r3n06: gradient_checkpointing_kwargs=None, c06r3n09: lr_scheduler_kwargs={}, c06r3n06: greater_is_better=False, c06r3n09: lr_scheduler_type=cosine, c06r3n06: group_by_length=False, c06r3n09: max_grad_norm=0.5, c06r3n06: half_precision_backend=auto, c06r3n09: max_steps=-1, c06r3n06: hub_always_push=False, c06r3n09: metric_for_best_model=loss, c06r3n06: hub_model_id=None, c06r3n09: mp_parameters=, c06r3n06: hub_private_repo=False, c06r3n09: neftune_noise_alpha=None, c06r3n06: hub_strategy=every_save, c06r3n09: no_cuda=False, c06r3n06: hub_token=, c06r3n09: num_train_epochs=4.0, c06r3n06: ignore_data_skip=False, c06r3n09: optim=adamw_torch, c06r3n06: include_inputs_for_metrics=False, c06r3n09: optim_args=None, c06r3n06: include_num_input_tokens_seen=False, c06r3n09: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n06: include_tokens_per_second=False, c06r3n09: overwrite_output_dir=False, c06r3n06: jit_mode_eval=False, c06r3n09: past_index=-1, c06r3n06: label_names=None, c06r3n09: per_device_eval_batch_size=1, c06r3n06: label_smoothing_factor=0.0, c06r3n09: per_device_train_batch_size=1, c06r3n06: learning_rate=5e-05, c06r3n09: predict_with_generate=False, c06r3n06: length_column_name=length, c06r3n09: prediction_loss_only=False, c06r3n06: load_best_model_at_end=True, c06r3n09: push_to_hub=False, c06r3n06: local_rank=0, c06r3n09: push_to_hub_model_id=None, c06r3n06: log_level=passive, c06r3n09: push_to_hub_organization=None, c06r3n06: log_level_replica=warning, c06r3n09: push_to_hub_token=, c06r3n06: log_on_each_node=True, c06r3n09: ray_scope=last, c06r3n06: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n06, c06r3n09: remove_unused_columns=True, c06r3n06: logging_first_step=False, c06r3n09: report_to=['tensorboard'], c06r3n06: logging_nan_inf_filter=True, c06r3n09: resume_from_checkpoint=None, c06r3n06: logging_steps=10, c06r3n09: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n06: logging_strategy=steps, c06r3n09: save_on_each_node=True, c06r3n06: lr_scheduler_kwargs={}, c06r3n09: save_only_model=False, c06r3n06: lr_scheduler_type=cosine, c06r3n09: save_safetensors=True, c06r3n06: max_grad_norm=0.5, c06r3n09: save_steps=100, c06r3n06: max_steps=-1, c06r3n06: metric_for_best_model=loss, c06r3n06: mp_parameters=, c06r3n06: neftune_noise_alpha=None, c06r3n06: no_cuda=False, c06r3n06: num_train_epochs=4.0, c06r3n06: optim=adamw_torch, c06r3n06: optim_args=None, c06r3n06: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n06: overwrite_output_dir=False, c06r3n06: past_index=-1, c06r3n06: per_device_eval_batch_size=1, c06r3n06: per_device_train_batch_size=1, c06r3n06: predict_with_generate=False, c06r3n06: prediction_loss_only=False, c06r3n06: push_to_hub=False, c06r3n06: push_to_hub_model_id=None, c06r3n06: push_to_hub_organization=None, c06r3n06: push_to_hub_token=, c06r3n06: ray_scope=last, c06r3n09: save_strategy=steps, c06r3n06: remove_unused_columns=True, c06r3n09: save_total_limit=None, c06r3n06: report_to=['tensorboard'], c06r3n09: seed=42, c06r3n06: resume_from_checkpoint=None, c06r3n09: skip_memory_metrics=True, c06r3n06: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n09: sortish_sampler=False, c06r3n06: save_on_each_node=True, c06r3n09: split_batches=False, c06r3n06: save_only_model=False, c06r3n09: tf32=None, c06r3n06: save_safetensors=True, c06r3n09: torch_compile=False, c06r3n06: save_steps=100, c06r3n09: torch_compile_backend=None, c06r3n06: save_strategy=steps, c06r3n09: torch_compile_mode=None, c06r3n06: save_total_limit=None, c06r3n09: torchdynamo=None, c06r3n06: seed=42, c06r3n09: tpu_metrics_debug=False, c06r3n06: skip_memory_metrics=True, c06r3n09: tpu_num_cores=None, c06r3n06: sortish_sampler=False, c06r3n09: use_cpu=False, c06r3n06: split_batches=False, c06r3n09: use_ipex=False, c06r3n06: tf32=None, c06r3n09: use_legacy_prediction_loop=False, c06r3n06: torch_compile=False, c06r3n09: use_mps_device=False, c06r3n06: torch_compile_backend=None, c06r3n09: warmup_ratio=0.03, c06r3n06: torch_compile_mode=None, c06r3n09: warmup_steps=0, c06r3n06: torchdynamo=None, c06r3n09: weight_decay=0.0, c06r3n06: tpu_metrics_debug=False, c06r3n09: ) c06r3n06: tpu_num_cores=None, c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n06: use_cpu=False, c06r3n09: _n_gpu=1, c06r3n06: use_ipex=False, c06r3n09: adafactor=False, c06r3n06: use_legacy_prediction_loop=False, c06r3n09: adam_beta1=0.9, c06r3n06: use_mps_device=False, c06r3n09: adam_beta2=0.999, c06r3n06: warmup_ratio=0.03, c06r3n09: adam_epsilon=1e-08, c06r3n06: warmup_steps=0, c06r3n09: auto_find_batch_size=False, c06r3n06: weight_decay=0.0, c06r3n09: bf16=False, c06r3n06: ) c06r3n09: bf16_full_eval=False, c06r3n06: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n09: data_seed=None, c06r3n06: _n_gpu=1, c06r3n09: dataloader_drop_last=False, c06r3n06: adafactor=False, c06r3n09: dataloader_num_workers=0, c06r3n06: adam_beta1=0.9, c06r3n09: dataloader_persistent_workers=False, c06r3n06: adam_beta2=0.999, c06r3n09: dataloader_pin_memory=True, c06r3n06: adam_epsilon=1e-08, c06r3n09: ddp_backend=None, c06r3n06: auto_find_batch_size=False, c06r3n09: ddp_broadcast_buffers=None, c06r3n06: bf16=False, c06r3n09: ddp_bucket_cap_mb=None, c06r3n06: bf16_full_eval=False, c06r3n09: ddp_find_unused_parameters=None, c06r3n06: data_seed=None, c06r3n09: ddp_timeout=1800, c06r3n06: dataloader_drop_last=False, c06r3n09: debug=[], c06r3n06: dataloader_num_workers=0, c06r3n09: deepspeed=deepspeed.json, c06r3n09: disable_tqdm=False, c06r3n09: dispatch_batches=None, c06r3n09: do_eval=True, c06r3n09: do_predict=False, c06r3n09: do_train=True, c06r3n09: eval_accumulation_steps=None, c06r3n09: eval_delay=0, c06r3n09: eval_steps=100, c06r3n09: evaluation_strategy=steps, c06r3n09: fp16=True, c06r3n09: fp16_backend=auto, c06r3n09: fp16_full_eval=False, c06r3n09: fp16_opt_level=O1, c06r3n09: fsdp=[], c06r3n09: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n09: fsdp_min_num_params=0, c06r3n06: dataloader_persistent_workers=False, c06r3n09: fsdp_transformer_layer_cls_to_wrap=None, c06r3n06: dataloader_pin_memory=True, c06r3n09: full_determinism=False, c06r3n09: generation_config=None, c06r3n09: generation_max_length=None, c06r3n09: generation_num_beams=None, c06r3n09: gradient_accumulation_steps=1, c06r3n09: gradient_checkpointing=False, c06r3n09: gradient_checkpointing_kwargs=None, c06r3n09: greater_is_better=False, c06r3n09: group_by_length=False, c06r3n09: half_precision_backend=auto, c06r3n09: hub_always_push=False, c06r3n09: hub_model_id=None, c06r3n09: hub_private_repo=False, c06r3n09: hub_strategy=every_save, c06r3n09: hub_token=, c06r3n09: ignore_data_skip=False, c06r3n09: include_inputs_for_metrics=False, c06r3n09: include_num_input_tokens_seen=False, c06r3n09: include_tokens_per_second=False, c06r3n09: jit_mode_eval=False, c06r3n09: label_names=None, c06r3n09: label_smoothing_factor=0.0, c06r3n09: learning_rate=5e-05, c06r3n09: length_column_name=length, c06r3n09: load_best_model_at_end=True, c06r3n09: local_rank=2, c06r3n09: log_level=passive, c06r3n09: log_level_replica=warning, c06r3n09: log_on_each_node=True, c06r3n09: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n09, c06r3n09: logging_first_step=False, c06r3n09: logging_nan_inf_filter=True, c06r3n09: logging_steps=10, c06r3n09: logging_strategy=steps, c06r3n09: lr_scheduler_kwargs={}, c06r3n06: ddp_backend=None, c06r3n09: lr_scheduler_type=cosine, c06r3n06: ddp_broadcast_buffers=None, c06r3n09: max_grad_norm=0.5, c06r3n09: max_steps=-1, c06r3n09: metric_for_best_model=loss, c06r3n09: mp_parameters=, c06r3n09: neftune_noise_alpha=None, c06r3n09: no_cuda=False, c06r3n09: num_train_epochs=4.0, c06r3n09: optim=adamw_torch, c06r3n09: optim_args=None, c06r3n09: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n09: overwrite_output_dir=False, c06r3n09: past_index=-1, c06r3n09: per_device_eval_batch_size=1, c06r3n09: per_device_train_batch_size=1, c06r3n09: predict_with_generate=False, c06r3n09: prediction_loss_only=False, c06r3n09: push_to_hub=False, c06r3n09: push_to_hub_model_id=None, c06r3n09: push_to_hub_organization=None, c06r3n09: push_to_hub_token=, c06r3n09: ray_scope=last, c06r3n09: remove_unused_columns=True, c06r3n09: report_to=['tensorboard'], c06r3n09: resume_from_checkpoint=None, c06r3n09: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n09: save_on_each_node=True, c06r3n09: save_only_model=False, c06r3n09: save_safetensors=True, c06r3n09: save_steps=100, c06r3n09: save_strategy=steps, c06r3n09: save_total_limit=None, c06r3n09: seed=42, c06r3n09: skip_memory_metrics=True, c06r3n09: sortish_sampler=False, c06r3n09: split_batches=False, c06r3n09: tf32=None, c06r3n09: torch_compile=False, c06r3n09: torch_compile_backend=None, c06r3n09: torch_compile_mode=None, c06r3n07: seed=42, c06r3n09: torchdynamo=None, c06r3n09: tpu_metrics_debug=False, c06r3n09: tpu_num_cores=None, c06r3n09: use_cpu=False, c06r3n09: use_ipex=False, c06r3n09: use_legacy_prediction_loop=False, c06r3n09: use_mps_device=False, c06r3n09: warmup_ratio=0.03, c06r3n09: warmup_steps=0, c06r3n09: weight_decay=0.0, c06r3n09: ) c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n09: _n_gpu=1, c06r3n09: adafactor=False, c06r3n09: adam_beta1=0.9, c06r3n09: adam_beta2=0.999, c06r3n09: adam_epsilon=1e-08, c06r3n09: auto_find_batch_size=False, c06r3n09: bf16=False, c06r3n09: bf16_full_eval=False, c06r3n09: data_seed=None, c06r3n09: dataloader_drop_last=False, c06r3n09: dataloader_num_workers=0, c06r3n09: dataloader_persistent_workers=False, c06r3n09: dataloader_pin_memory=True, c06r3n09: ddp_backend=None, c06r3n09: ddp_broadcast_buffers=None, c06r3n09: ddp_bucket_cap_mb=None, c06r3n09: ddp_find_unused_parameters=None, c06r3n09: ddp_timeout=1800, c06r3n09: debug=[], c06r3n09: deepspeed=deepspeed.json, c06r3n09: disable_tqdm=False, c06r3n09: dispatch_batches=None, c06r3n09: do_eval=True, c06r3n09: do_predict=False, c06r3n09: do_train=True, c06r3n09: eval_accumulation_steps=None, c06r3n09: eval_delay=0, c06r3n09: eval_steps=100, c06r3n09: evaluation_strategy=steps, c06r3n09: fp16=True, c06r3n09: fp16_backend=auto, c06r3n09: fp16_full_eval=False, c06r3n09: fp16_opt_level=O1, c06r3n09: fsdp=[], c06r3n09: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n06: ddp_bucket_cap_mb=None, c06r3n06: ddp_find_unused_parameters=None, c06r3n06: ddp_timeout=1800, c06r3n06: debug=[], c06r3n06: deepspeed=deepspeed.json, c06r3n06: disable_tqdm=False, c06r3n06: dispatch_batches=None, c06r3n06: do_eval=True, c06r3n06: do_predict=False, c06r3n06: do_train=True, c06r3n06: eval_accumulation_steps=None, c06r3n06: eval_delay=0, c06r3n08: fsdp_min_num_params=0, c06r3n06: eval_steps=100, c06r3n07: skip_memory_metrics=True, c06r3n06: evaluation_strategy=steps, c06r3n07: sortish_sampler=False, c06r3n07: split_batches=False, c06r3n07: tf32=None, c06r3n07: torch_compile=False, c06r3n07: torch_compile_backend=None, c06r3n07: torch_compile_mode=None, c06r3n07: torchdynamo=None, c06r3n07: tpu_metrics_debug=False, c06r3n07: tpu_num_cores=None, c06r3n07: use_cpu=False, c06r3n07: use_ipex=False, c06r3n07: use_legacy_prediction_loop=False, c06r3n07: use_mps_device=False, c06r3n07: warmup_ratio=0.03, c06r3n07: warmup_steps=0, c06r3n07: weight_decay=0.0, c06r3n07: ) c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1 c06r3n07: distributed training: True, compute dtype: torch.float16 c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n06: fp16=True, c06r3n07: _n_gpu=1, c06r3n06: fp16_backend=auto, c06r3n06: fp16_full_eval=False, c06r3n06: fp16_opt_level=O1, c06r3n06: fsdp=[], c06r3n06: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n06: fsdp_min_num_params=0, c06r3n06: fsdp_transformer_layer_cls_to_wrap=None, c06r3n06: full_determinism=False, c06r3n06: generation_config=None, c06r3n06: generation_max_length=None, c06r3n06: generation_num_beams=None, c06r3n06: gradient_accumulation_steps=1, c06r3n06: gradient_checkpointing=False, c06r3n06: gradient_checkpointing_kwargs=None, c06r3n06: greater_is_better=False, c06r3n06: group_by_length=False, c06r3n06: half_precision_backend=auto, c06r3n06: hub_always_push=False, c06r3n06: hub_model_id=None, c06r3n06: hub_private_repo=False, c06r3n06: hub_strategy=every_save, c06r3n06: hub_token=, c06r3n08: fsdp_transformer_layer_cls_to_wrap=None, c06r3n08: full_determinism=False, c06r3n08: generation_config=None, c06r3n08: generation_max_length=None, c06r3n08: generation_num_beams=None, c06r3n08: gradient_accumulation_steps=1, c06r3n08: gradient_checkpointing=False, c06r3n08: gradient_checkpointing_kwargs=None, c06r3n08: greater_is_better=False, c06r3n08: group_by_length=False, c06r3n08: half_precision_backend=auto, c06r3n08: hub_always_push=False, c06r3n08: hub_model_id=None, c06r3n08: hub_private_repo=False, c06r3n08: hub_strategy=every_save, c06r3n08: hub_token=, c06r3n08: ignore_data_skip=False, c06r3n08: include_inputs_for_metrics=False, c06r3n08: include_num_input_tokens_seen=False, c06r3n08: include_tokens_per_second=False, c06r3n08: jit_mode_eval=False, c06r3n08: label_names=None, c06r3n08: label_smoothing_factor=0.0, c06r3n08: learning_rate=5e-05, c06r3n08: length_column_name=length, c06r3n08: load_best_model_at_end=True, c06r3n08: local_rank=2, c06r3n07: adafactor=False, c06r3n07: adam_beta1=0.9, c06r3n07: adam_beta2=0.999, c06r3n07: adam_epsilon=1e-08, c06r3n07: auto_find_batch_size=False, c06r3n07: bf16=False, c06r3n07: bf16_full_eval=False, c06r3n07: data_seed=None, c06r3n07: dataloader_drop_last=False, c06r3n07: dataloader_num_workers=0, c06r3n07: dataloader_persistent_workers=False, c06r3n07: dataloader_pin_memory=True, c06r3n07: ddp_backend=None, c06r3n07: ddp_broadcast_buffers=None, c06r3n08: log_level=passive, c06r3n08: log_level_replica=warning, c06r3n08: log_on_each_node=True, c06r3n08: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n08, c06r3n08: logging_first_step=False, c06r3n08: logging_nan_inf_filter=True, c06r3n08: logging_steps=10, c06r3n08: logging_strategy=steps, c06r3n08: lr_scheduler_kwargs={}, c06r3n08: lr_scheduler_type=cosine, c06r3n08: max_grad_norm=0.5, c06r3n08: max_steps=-1, c06r3n08: metric_for_best_model=loss, c06r3n09: fsdp_min_num_params=0, c06r3n06: ignore_data_skip=False, c06r3n07: ddp_bucket_cap_mb=None, c06r3n06: include_inputs_for_metrics=False, c06r3n09: fsdp_transformer_layer_cls_to_wrap=None, c06r3n09: full_determinism=False, c06r3n09: generation_config=None, c06r3n09: generation_max_length=None, c06r3n09: generation_num_beams=None, c06r3n09: gradient_accumulation_steps=1, c06r3n09: gradient_checkpointing=False, c06r3n09: gradient_checkpointing_kwargs=None, c06r3n09: greater_is_better=False, c06r3n09: group_by_length=False, c06r3n09: half_precision_backend=auto, c06r3n09: hub_always_push=False, c06r3n09: hub_model_id=None, c06r3n09: hub_private_repo=False, c06r3n09: hub_strategy=every_save, c06r3n09: hub_token=, c06r3n09: ignore_data_skip=False, c06r3n09: include_inputs_for_metrics=False, c06r3n09: include_num_input_tokens_seen=False, c06r3n09: include_tokens_per_second=False, c06r3n09: jit_mode_eval=False, c06r3n09: label_names=None, c06r3n09: label_smoothing_factor=0.0, c06r3n09: learning_rate=5e-05, c06r3n09: length_column_name=length, c06r3n09: load_best_model_at_end=True, c06r3n09: local_rank=0, c06r3n09: log_level=passive, c06r3n09: log_level_replica=warning, c06r3n09: log_on_each_node=True, c06r3n08: mp_parameters=, c06r3n08: neftune_noise_alpha=None, c06r3n08: no_cuda=False, c06r3n08: num_train_epochs=4.0, c06r3n08: optim=adamw_torch, c06r3n08: optim_args=None, c06r3n09: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n09, c06r3n06: include_num_input_tokens_seen=False, c06r3n09: logging_first_step=False, c06r3n09: logging_nan_inf_filter=True, c06r3n09: logging_steps=10, c06r3n09: logging_strategy=steps, c06r3n09: lr_scheduler_kwargs={}, c06r3n09: lr_scheduler_type=cosine, c06r3n09: max_grad_norm=0.5, c06r3n09: max_steps=-1, c06r3n09: metric_for_best_model=loss, c06r3n09: mp_parameters=, c06r3n09: neftune_noise_alpha=None, c06r3n09: no_cuda=False, c06r3n09: num_train_epochs=4.0, c06r3n09: optim=adamw_torch, c06r3n09: optim_args=None, c06r3n09: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n09: overwrite_output_dir=False, c06r3n09: past_index=-1, c06r3n09: per_device_eval_batch_size=1, c06r3n09: per_device_train_batch_size=1, c06r3n09: predict_with_generate=False, c06r3n09: prediction_loss_only=False, c06r3n09: push_to_hub=False, c06r3n09: push_to_hub_model_id=None, c06r3n09: push_to_hub_organization=None, c06r3n09: push_to_hub_token=, c06r3n09: ray_scope=last, c06r3n09: remove_unused_columns=True, c06r3n09: report_to=['tensorboard'], c06r3n09: resume_from_checkpoint=None, c06r3n09: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n09: save_on_each_node=True, c06r3n09: save_only_model=False, c06r3n09: save_safetensors=True, c06r3n09: save_steps=100, c06r3n09: save_strategy=steps, c06r3n08: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n09: save_total_limit=None, c06r3n08: overwrite_output_dir=False, c06r3n08: past_index=-1, c06r3n08: per_device_eval_batch_size=1, c06r3n08: per_device_train_batch_size=1, c06r3n08: predict_with_generate=False, c06r3n08: prediction_loss_only=False, c06r3n08: push_to_hub=False, c06r3n08: push_to_hub_model_id=None, c06r3n08: push_to_hub_organization=None, c06r3n08: push_to_hub_token=, c06r3n08: ray_scope=last, c06r3n08: remove_unused_columns=True, c06r3n08: report_to=['tensorboard'], c06r3n08: resume_from_checkpoint=None, c06r3n08: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n08: save_on_each_node=True, c06r3n08: save_only_model=False, c06r3n08: save_safetensors=True, c06r3n08: save_steps=100, c06r3n08: save_strategy=steps, c06r3n08: save_total_limit=None, c06r3n08: seed=42, c06r3n08: skip_memory_metrics=True, c06r3n08: sortish_sampler=False, c06r3n08: split_batches=False, c06r3n08: tf32=None, c06r3n08: torch_compile=False, c06r3n08: torch_compile_backend=None, c06r3n08: torch_compile_mode=None, c06r3n08: torchdynamo=None, c06r3n08: tpu_metrics_debug=False, c06r3n06: include_tokens_per_second=False, c06r3n06: jit_mode_eval=False, c06r3n06: label_names=None, c06r3n06: label_smoothing_factor=0.0, c06r3n06: learning_rate=5e-05, c06r3n06: length_column_name=length, c06r3n06: load_best_model_at_end=True, c06r3n06: local_rank=3, c06r3n06: log_level=passive, c06r3n06: log_level_replica=warning, c06r3n06: log_on_each_node=True, c06r3n06: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n06, c06r3n09: seed=42, c06r3n09: skip_memory_metrics=True, c06r3n09: sortish_sampler=False, c06r3n09: split_batches=False, c06r3n09: tf32=None, c06r3n09: torch_compile=False, c06r3n09: torch_compile_backend=None, c06r3n09: torch_compile_mode=None, c06r3n09: torchdynamo=None, c06r3n09: tpu_metrics_debug=False, c06r3n09: tpu_num_cores=None, c06r3n09: use_cpu=False, c06r3n09: use_ipex=False, c06r3n09: use_legacy_prediction_loop=False, c06r3n09: use_mps_device=False, c06r3n09: warmup_ratio=0.03, c06r3n09: warmup_steps=0, c06r3n09: weight_decay=0.0, c06r3n09: ) c06r3n09: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n09: _n_gpu=1, c06r3n09: adafactor=False, c06r3n09: adam_beta1=0.9, c06r3n09: adam_beta2=0.999, c06r3n09: adam_epsilon=1e-08, c06r3n09: auto_find_batch_size=False, c06r3n09: bf16=False, c06r3n09: bf16_full_eval=False, c06r3n09: data_seed=None, c06r3n09: dataloader_drop_last=False, c06r3n09: dataloader_num_workers=0, c06r3n09: dataloader_persistent_workers=False, c06r3n09: dataloader_pin_memory=True, c06r3n09: ddp_backend=None, c06r3n09: ddp_broadcast_buffers=None, c06r3n09: ddp_bucket_cap_mb=None, c06r3n09: ddp_find_unused_parameters=None, c06r3n09: ddp_timeout=1800, c06r3n09: debug=[], c06r3n09: deepspeed=deepspeed.json, c06r3n09: disable_tqdm=False, c06r3n09: dispatch_batches=None, c06r3n09: do_eval=True, c06r3n08: tpu_num_cores=None, c06r3n09: do_predict=False, c06r3n06: logging_first_step=False, c06r3n09: do_train=True, c06r3n06: logging_nan_inf_filter=True, c06r3n09: eval_accumulation_steps=None, c06r3n06: logging_steps=10, c06r3n09: eval_delay=0, c06r3n09: eval_steps=100, c06r3n09: evaluation_strategy=steps, c06r3n09: fp16=True, c06r3n09: fp16_backend=auto, c06r3n09: fp16_full_eval=False, c06r3n09: fp16_opt_level=O1, c06r3n09: fsdp=[], c06r3n09: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n09: fsdp_min_num_params=0, c06r3n09: fsdp_transformer_layer_cls_to_wrap=None, c06r3n09: full_determinism=False, c06r3n09: generation_config=None, c06r3n09: generation_max_length=None, c06r3n09: generation_num_beams=None, c06r3n09: gradient_accumulation_steps=1, c06r3n09: gradient_checkpointing=False, c06r3n09: gradient_checkpointing_kwargs=None, c06r3n09: greater_is_better=False, c06r3n09: group_by_length=False, c06r3n09: half_precision_backend=auto, c06r3n09: hub_always_push=False, c06r3n09: hub_model_id=None, c06r3n09: hub_private_repo=False, c06r3n09: hub_strategy=every_save, c06r3n09: hub_token=, c06r3n09: ignore_data_skip=False, c06r3n09: include_inputs_for_metrics=False, c06r3n09: include_num_input_tokens_seen=False, c06r3n09: include_tokens_per_second=False, c06r3n09: jit_mode_eval=False, c06r3n09: label_names=None, c06r3n09: label_smoothing_factor=0.0, c06r3n09: learning_rate=5e-05, c06r3n09: length_column_name=length, c06r3n09: load_best_model_at_end=True, c06r3n07: ddp_find_unused_parameters=None, c06r3n09: local_rank=1, c06r3n07: ddp_timeout=1800, c06r3n07: debug=[], c06r3n07: deepspeed=deepspeed.json, c06r3n07: disable_tqdm=False, c06r3n07: dispatch_batches=None, c06r3n07: do_eval=True, c06r3n07: do_predict=False, c06r3n07: do_train=True, c06r3n07: eval_accumulation_steps=None, c06r3n07: eval_delay=0, c06r3n07: eval_steps=100, c06r3n07: evaluation_strategy=steps, c06r3n07: fp16=True, c06r3n07: fp16_backend=auto, c06r3n07: fp16_full_eval=False, c06r3n07: fp16_opt_level=O1, c06r3n07: fsdp=[], c06r3n07: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n07: fsdp_min_num_params=0, c06r3n07: fsdp_transformer_layer_cls_to_wrap=None, c06r3n07: full_determinism=False, c06r3n07: generation_config=None, c06r3n07: generation_max_length=None, c06r3n07: generation_num_beams=None, c06r3n07: gradient_accumulation_steps=1, c06r3n07: gradient_checkpointing=False, c06r3n07: gradient_checkpointing_kwargs=None, c06r3n07: greater_is_better=False, c06r3n07: group_by_length=False, c06r3n07: half_precision_backend=auto, c06r3n07: hub_always_push=False, c06r3n07: hub_model_id=None, c06r3n07: hub_private_repo=False, c06r3n07: hub_strategy=every_save, c06r3n07: hub_token=, c06r3n07: ignore_data_skip=False, c06r3n07: include_inputs_for_metrics=False, c06r3n07: include_num_input_tokens_seen=False, c06r3n07: include_tokens_per_second=False, c06r3n06: logging_strategy=steps, c06r3n07: jit_mode_eval=False, c06r3n07: label_names=None, c06r3n07: label_smoothing_factor=0.0, c06r3n07: learning_rate=5e-05, c06r3n07: length_column_name=length, c06r3n07: load_best_model_at_end=True, c06r3n07: local_rank=3, c06r3n07: log_level=passive, c06r3n07: log_level_replica=warning, c06r3n07: log_on_each_node=True, c06r3n07: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n07, c06r3n07: logging_first_step=False, c06r3n07: logging_nan_inf_filter=True, c06r3n07: logging_steps=10, c06r3n07: logging_strategy=steps, c06r3n07: lr_scheduler_kwargs={}, c06r3n07: lr_scheduler_type=cosine, c06r3n07: max_grad_norm=0.5, c06r3n07: max_steps=-1, c06r3n07: metric_for_best_model=loss, c06r3n07: mp_parameters=, c06r3n07: neftune_noise_alpha=None, c06r3n07: no_cuda=False, c06r3n07: num_train_epochs=4.0, c06r3n07: optim=adamw_torch, c06r3n07: optim_args=None, c06r3n07: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n07: overwrite_output_dir=False, c06r3n07: past_index=-1, c06r3n07: per_device_eval_batch_size=1, c06r3n07: per_device_train_batch_size=1, c06r3n07: predict_with_generate=False, c06r3n07: prediction_loss_only=False, c06r3n07: push_to_hub=False, c06r3n07: push_to_hub_model_id=None, c06r3n07: push_to_hub_organization=None, c06r3n07: push_to_hub_token=, c06r3n06: lr_scheduler_kwargs={}, c06r3n06: lr_scheduler_type=cosine, c06r3n07: ray_scope=last, c06r3n06: max_grad_norm=0.5, c06r3n07: remove_unused_columns=True, c06r3n06: max_steps=-1, c06r3n07: report_to=['tensorboard'], c06r3n06: metric_for_best_model=loss, c06r3n06: mp_parameters=, c06r3n06: neftune_noise_alpha=None, c06r3n06: no_cuda=False, c06r3n06: num_train_epochs=4.0, c06r3n06: optim=adamw_torch, c06r3n06: optim_args=None, c06r3n06: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n06: overwrite_output_dir=False, c06r3n06: past_index=-1, c06r3n06: per_device_eval_batch_size=1, c06r3n06: per_device_train_batch_size=1, c06r3n06: predict_with_generate=False, c06r3n07: resume_from_checkpoint=None, c06r3n06: prediction_loss_only=False, c06r3n07: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n06: push_to_hub=False, c06r3n06: push_to_hub_model_id=None, c06r3n06: push_to_hub_organization=None, c06r3n06: push_to_hub_token=, c06r3n06: ray_scope=last, c06r3n06: remove_unused_columns=True, c06r3n06: report_to=['tensorboard'], c06r3n06: resume_from_checkpoint=None, c06r3n06: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n06: save_on_each_node=True, c06r3n06: save_only_model=False, c06r3n06: save_safetensors=True, c06r3n06: save_steps=100, c06r3n06: save_strategy=steps, c06r3n06: save_total_limit=None, c06r3n06: seed=42, c06r3n06: skip_memory_metrics=True, c06r3n06: sortish_sampler=False, c06r3n06: split_batches=False, c06r3n06: tf32=None, c06r3n06: torch_compile=False, c06r3n06: torch_compile_backend=None, c06r3n06: torch_compile_mode=None, c06r3n06: torchdynamo=None, c06r3n06: tpu_metrics_debug=False, c06r3n06: tpu_num_cores=None, c06r3n06: use_cpu=False, c06r3n06: use_ipex=False, c06r3n06: use_legacy_prediction_loop=False, c06r3n06: use_mps_device=False, c06r3n06: warmup_ratio=0.03, c06r3n06: warmup_steps=0, c06r3n06: weight_decay=0.0, c06r3n06: ) c06r3n08: use_cpu=False, c06r3n08: use_ipex=False, c06r3n08: use_legacy_prediction_loop=False, c06r3n08: use_mps_device=False, c06r3n08: warmup_ratio=0.03, c06r3n08: warmup_steps=0, c06r3n08: weight_decay=0.0, c06r3n08: ) c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n08: _n_gpu=1, c06r3n08: adafactor=False, c06r3n08: adam_beta1=0.9, c06r3n08: adam_beta2=0.999, c06r3n08: adam_epsilon=1e-08, c06r3n08: auto_find_batch_size=False, c06r3n08: bf16=False, c06r3n08: bf16_full_eval=False, c06r3n08: data_seed=None, c06r3n08: dataloader_drop_last=False, c06r3n08: dataloader_num_workers=0, c06r3n08: dataloader_persistent_workers=False, c06r3n08: dataloader_pin_memory=True, c06r3n08: ddp_backend=None, c06r3n08: ddp_broadcast_buffers=None, c06r3n08: ddp_bucket_cap_mb=None, c06r3n08: ddp_find_unused_parameters=None, c06r3n08: ddp_timeout=1800, c06r3n08: debug=[], c06r3n08: deepspeed=deepspeed.json, c06r3n08: disable_tqdm=False, c06r3n08: dispatch_batches=None, c06r3n08: do_eval=True, c06r3n08: do_predict=False, c06r3n08: do_train=True, c06r3n08: eval_accumulation_steps=None, c06r3n08: eval_delay=0, c06r3n08: eval_steps=100, c06r3n08: evaluation_strategy=steps, c06r3n08: fp16=True, c06r3n08: fp16_backend=auto, c06r3n08: fp16_full_eval=False, c06r3n08: fp16_opt_level=O1, c06r3n08: fsdp=[], c06r3n08: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n07: save_on_each_node=True, c06r3n07: save_only_model=False, c06r3n09: log_level=passive, c06r3n09: log_level_replica=warning, c06r3n09: log_on_each_node=True, c06r3n09: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n09, c06r3n09: logging_first_step=False, c06r3n09: logging_nan_inf_filter=True, c06r3n09: logging_steps=10, c06r3n09: logging_strategy=steps, c06r3n09: lr_scheduler_kwargs={}, c06r3n09: lr_scheduler_type=cosine, c06r3n09: max_grad_norm=0.5, c06r3n09: max_steps=-1, c06r3n09: metric_for_best_model=loss, c06r3n09: mp_parameters=, c06r3n09: neftune_noise_alpha=None, c06r3n09: no_cuda=False, c06r3n09: num_train_epochs=4.0, c06r3n09: optim=adamw_torch, c06r3n09: optim_args=None, c06r3n09: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n09: overwrite_output_dir=False, c06r3n09: past_index=-1, c06r3n09: per_device_eval_batch_size=1, c06r3n09: per_device_train_batch_size=1, c06r3n09: predict_with_generate=False, c06r3n09: prediction_loss_only=False, c06r3n09: push_to_hub=False, c06r3n09: push_to_hub_model_id=None, c06r3n09: push_to_hub_organization=None, c06r3n09: push_to_hub_token=, c06r3n09: ray_scope=last, c06r3n09: remove_unused_columns=True, c06r3n08: fsdp_min_num_params=0, c06r3n08: fsdp_transformer_layer_cls_to_wrap=None, c06r3n08: full_determinism=False, c06r3n08: generation_config=None, c06r3n08: generation_max_length=None, c06r3n08: generation_num_beams=None, c06r3n08: gradient_accumulation_steps=1, c06r3n08: gradient_checkpointing=False, c06r3n08: gradient_checkpointing_kwargs=None, c06r3n08: greater_is_better=False, c06r3n08: group_by_length=False, c06r3n08: half_precision_backend=auto, c06r3n08: hub_always_push=False, c06r3n08: hub_model_id=None, c06r3n08: hub_private_repo=False, c06r3n08: hub_strategy=every_save, c06r3n08: hub_token=, c06r3n08: ignore_data_skip=False, c06r3n08: include_inputs_for_metrics=False, c06r3n08: include_num_input_tokens_seen=False, c06r3n08: include_tokens_per_second=False, c06r3n08: jit_mode_eval=False, c06r3n08: label_names=None, c06r3n08: label_smoothing_factor=0.0, c06r3n08: learning_rate=5e-05, c06r3n08: length_column_name=length, c06r3n08: load_best_model_at_end=True, c06r3n08: local_rank=1, c06r3n08: log_level=passive, c06r3n08: log_level_replica=warning, c06r3n08: log_on_each_node=True, c06r3n08: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n08, c06r3n08: logging_first_step=False, c06r3n08: logging_nan_inf_filter=True, c06r3n08: logging_steps=10, c06r3n08: logging_strategy=steps, c06r3n09: report_to=['tensorboard'], c06r3n09: resume_from_checkpoint=None, c06r3n08: lr_scheduler_kwargs={}, c06r3n07: save_safetensors=True, c06r3n09: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n07: save_steps=100, c06r3n09: save_on_each_node=True, c06r3n07: save_strategy=steps, c06r3n09: save_only_model=False, c06r3n07: save_total_limit=None, c06r3n09: save_safetensors=True, c06r3n07: seed=42, c06r3n09: save_steps=100, c06r3n07: skip_memory_metrics=True, c06r3n09: save_strategy=steps, c06r3n07: sortish_sampler=False, c06r3n09: save_total_limit=None, c06r3n07: split_batches=False, c06r3n09: seed=42, c06r3n07: tf32=None, c06r3n09: skip_memory_metrics=True, c06r3n07: torch_compile=False, c06r3n09: sortish_sampler=False, c06r3n07: torch_compile_backend=None, c06r3n09: split_batches=False, c06r3n07: torch_compile_mode=None, c06r3n09: tf32=None, c06r3n07: torchdynamo=None, c06r3n09: torch_compile=False, c06r3n07: tpu_metrics_debug=False, c06r3n09: torch_compile_backend=None, c06r3n07: tpu_num_cores=None, c06r3n09: torch_compile_mode=None, c06r3n07: use_cpu=False, c06r3n09: torchdynamo=None, c06r3n07: use_ipex=False, c06r3n09: tpu_metrics_debug=False, c06r3n07: use_legacy_prediction_loop=False, c06r3n09: tpu_num_cores=None, c06r3n07: use_mps_device=False, c06r3n09: use_cpu=False, c06r3n07: warmup_ratio=0.03, c06r3n09: use_ipex=False, c06r3n07: warmup_steps=0, c06r3n09: use_legacy_prediction_loop=False, c06r3n07: weight_decay=0.0, c06r3n09: use_mps_device=False, c06r3n07: ) c06r3n09: warmup_ratio=0.03, c06r3n07: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n09: warmup_steps=0, c06r3n07: _n_gpu=1, c06r3n09: weight_decay=0.0, c06r3n07: adafactor=False, c06r3n09: ) c06r3n07: adam_beta1=0.9, c06r3n07: adam_beta2=0.999, c06r3n07: adam_epsilon=1e-08, c06r3n07: auto_find_batch_size=False, c06r3n07: bf16=False, c06r3n07: bf16_full_eval=False, c06r3n07: data_seed=None, c06r3n07: dataloader_drop_last=False, c06r3n07: dataloader_num_workers=0, c06r3n07: dataloader_persistent_workers=False, c06r3n07: dataloader_pin_memory=True, c06r3n07: ddp_backend=None, c06r3n07: ddp_broadcast_buffers=None, c06r3n07: ddp_bucket_cap_mb=None, c06r3n07: ddp_find_unused_parameters=None, c06r3n07: ddp_timeout=1800, c06r3n07: debug=[], c06r3n07: deepspeed=deepspeed.json, c06r3n07: disable_tqdm=False, c06r3n07: dispatch_batches=None, c06r3n07: do_eval=True, c06r3n07: do_predict=False, c06r3n07: do_train=True, c06r3n07: eval_accumulation_steps=None, c06r3n07: eval_delay=0, c06r3n07: eval_steps=100, c06r3n07: evaluation_strategy=steps, c06r3n07: fp16=True, c06r3n07: fp16_backend=auto, c06r3n07: fp16_full_eval=False, c06r3n07: fp16_opt_level=O1, c06r3n07: fsdp=[], c06r3n07: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n07: fsdp_min_num_params=0, c06r3n07: fsdp_transformer_layer_cls_to_wrap=None, c06r3n07: full_determinism=False, c06r3n07: generation_config=None, c06r3n07: generation_max_length=None, c06r3n07: generation_num_beams=None, c06r3n07: gradient_accumulation_steps=1, c06r3n07: gradient_checkpointing=False, c06r3n07: gradient_checkpointing_kwargs=None, c06r3n07: greater_is_better=False, c06r3n07: group_by_length=False, c06r3n07: half_precision_backend=auto, c06r3n07: hub_always_push=False, c06r3n07: hub_model_id=None, c06r3n07: hub_private_repo=False, c06r3n07: hub_strategy=every_save, c06r3n07: hub_token=, c06r3n07: ignore_data_skip=False, c06r3n07: include_inputs_for_metrics=False, c06r3n07: include_num_input_tokens_seen=False, c06r3n07: include_tokens_per_second=False, c06r3n07: jit_mode_eval=False, c06r3n07: label_names=None, c06r3n07: label_smoothing_factor=0.0, c06r3n07: learning_rate=5e-05, c06r3n07: length_column_name=length, c06r3n07: load_best_model_at_end=True, c06r3n07: local_rank=1, c06r3n07: log_level=passive, c06r3n07: log_level_replica=warning, c06r3n07: log_on_each_node=True, c06r3n07: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n07, c06r3n07: logging_first_step=False, c06r3n07: logging_nan_inf_filter=True, c06r3n07: logging_steps=10, c06r3n07: logging_strategy=steps, c06r3n07: lr_scheduler_kwargs={}, c06r3n07: lr_scheduler_type=cosine, c06r3n07: max_grad_norm=0.5, c06r3n07: max_steps=-1, c06r3n07: metric_for_best_model=loss, c06r3n07: mp_parameters=, c06r3n07: neftune_noise_alpha=None, c06r3n07: no_cuda=False, c06r3n07: num_train_epochs=4.0, c06r3n07: optim=adamw_torch, c06r3n07: optim_args=None, c06r3n07: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n07: overwrite_output_dir=False, c06r3n07: past_index=-1, c06r3n07: per_device_eval_batch_size=1, c06r3n07: per_device_train_batch_size=1, c06r3n07: predict_with_generate=False, c06r3n08: lr_scheduler_type=cosine, c06r3n07: prediction_loss_only=False, c06r3n07: push_to_hub=False, c06r3n07: push_to_hub_model_id=None, c06r3n07: push_to_hub_organization=None, c06r3n07: push_to_hub_token=, c06r3n07: ray_scope=last, c06r3n07: remove_unused_columns=True, c06r3n07: report_to=['tensorboard'], c06r3n07: resume_from_checkpoint=None, c06r3n07: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n07: save_on_each_node=True, c06r3n07: save_only_model=False, c06r3n07: save_safetensors=True, c06r3n07: save_steps=100, c06r3n07: save_strategy=steps, c06r3n07: save_total_limit=None, c06r3n07: seed=42, c06r3n07: skip_memory_metrics=True, c06r3n07: sortish_sampler=False, c06r3n07: split_batches=False, c06r3n07: tf32=None, c06r3n07: torch_compile=False, c06r3n07: torch_compile_backend=None, c06r3n07: torch_compile_mode=None, c06r3n07: torchdynamo=None, c06r3n07: tpu_metrics_debug=False, c06r3n07: tpu_num_cores=None, c06r3n07: use_cpu=False, c06r3n07: use_ipex=False, c06r3n07: use_legacy_prediction_loop=False, c06r3n07: use_mps_device=False, c06r3n07: warmup_ratio=0.03, c06r3n07: warmup_steps=0, c06r3n07: weight_decay=0.0, c06r3n07: ) c06r3n08: max_grad_norm=0.5, c06r3n08: max_steps=-1, c06r3n08: metric_for_best_model=loss, c06r3n08: mp_parameters=, c06r3n08: neftune_noise_alpha=None, c06r3n08: no_cuda=False, c06r3n08: num_train_epochs=4.0, c06r3n08: optim=adamw_torch, c06r3n08: optim_args=None, c06r3n08: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n08: overwrite_output_dir=False, c06r3n08: past_index=-1, c06r3n08: per_device_eval_batch_size=1, c06r3n08: per_device_train_batch_size=1, c06r3n08: predict_with_generate=False, c06r3n08: prediction_loss_only=False, c06r3n08: push_to_hub=False, c06r3n08: push_to_hub_model_id=None, c06r3n08: push_to_hub_organization=None, c06r3n08: push_to_hub_token=, c06r3n08: ray_scope=last, c06r3n08: remove_unused_columns=True, c06r3n08: report_to=['tensorboard'], c06r3n08: resume_from_checkpoint=None, c06r3n08: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n08: save_on_each_node=True, c06r3n08: save_only_model=False, c06r3n08: save_safetensors=True, c06r3n08: save_steps=100, c06r3n08: save_strategy=steps, c06r3n08: save_total_limit=None, c06r3n08: seed=42, c06r3n08: skip_memory_metrics=True, c06r3n08: sortish_sampler=False, c06r3n08: split_batches=False, c06r3n08: tf32=None, c06r3n08: torch_compile=False, c06r3n08: torch_compile_backend=None, c06r3n08: torch_compile_mode=None, c06r3n08: torchdynamo=None, c06r3n08: tpu_metrics_debug=False, c06r3n08: tpu_num_cores=None, c06r3n08: use_cpu=False, c06r3n08: use_ipex=False, c06r3n08: use_legacy_prediction_loop=False, c06r3n08: use_mps_device=False, c06r3n08: warmup_ratio=0.03, c06r3n08: warmup_steps=0, c06r3n08: weight_decay=0.0, c06r3n08: ) c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n08: _n_gpu=1, c06r3n08: adafactor=False, c06r3n08: adam_beta1=0.9, c06r3n08: adam_beta2=0.999, c06r3n08: adam_epsilon=1e-08, c06r3n08: auto_find_batch_size=False, c06r3n08: bf16=False, c06r3n08: bf16_full_eval=False, c06r3n08: data_seed=None, c06r3n08: dataloader_drop_last=False, c06r3n08: dataloader_num_workers=0, c06r3n08: dataloader_persistent_workers=False, c06r3n08: dataloader_pin_memory=True, c06r3n08: ddp_backend=None, c06r3n08: ddp_broadcast_buffers=None, c06r3n08: ddp_bucket_cap_mb=None, c06r3n08: ddp_find_unused_parameters=None, c06r3n08: ddp_timeout=1800, c06r3n08: debug=[], c06r3n08: deepspeed=deepspeed.json, c06r3n08: disable_tqdm=False, c06r3n08: dispatch_batches=None, c06r3n08: do_eval=True, c06r3n08: do_predict=False, c06r3n08: do_train=True, c06r3n08: eval_accumulation_steps=None, c06r3n08: eval_delay=0, c06r3n08: eval_steps=100, c06r3n08: evaluation_strategy=steps, c06r3n08: fp16=True, c06r3n08: fp16_backend=auto, c06r3n08: fp16_full_eval=False, c06r3n08: fp16_opt_level=O1, c06r3n08: fsdp=[], c06r3n08: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n08: fsdp_min_num_params=0, c06r3n08: fsdp_transformer_layer_cls_to_wrap=None, c06r3n08: full_determinism=False, c06r3n08: generation_config=None, c06r3n08: generation_max_length=None, c06r3n08: generation_num_beams=None, c06r3n08: gradient_accumulation_steps=1, c06r3n08: gradient_checkpointing=False, c06r3n08: gradient_checkpointing_kwargs=None, c06r3n08: greater_is_better=False, c06r3n08: group_by_length=False, c06r3n08: half_precision_backend=auto, c06r3n08: hub_always_push=False, c06r3n08: hub_model_id=None, c06r3n08: hub_private_repo=False, c06r3n08: hub_strategy=every_save, c06r3n08: hub_token=, c06r3n08: ignore_data_skip=False, c06r3n08: include_inputs_for_metrics=False, c06r3n08: include_num_input_tokens_seen=False, c06r3n08: include_tokens_per_second=False, c06r3n08: jit_mode_eval=False, c06r3n08: label_names=None, c06r3n08: label_smoothing_factor=0.0, c06r3n08: learning_rate=5e-05, c06r3n08: length_column_name=length, c06r3n08: load_best_model_at_end=True, c06r3n08: local_rank=0, c06r3n08: log_level=passive, c06r3n08: log_level_replica=warning, c06r3n08: log_on_each_node=True, c06r3n08: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n08, c06r3n08: logging_first_step=False, c06r3n08: logging_nan_inf_filter=True, c06r3n08: logging_steps=10, c06r3n08: logging_strategy=steps, c06r3n08: lr_scheduler_kwargs={}, c06r3n08: lr_scheduler_type=cosine, c06r3n08: max_grad_norm=0.5, c06r3n08: max_steps=-1, c06r3n08: metric_for_best_model=loss, c06r3n08: mp_parameters=, c06r3n08: neftune_noise_alpha=None, c06r3n08: no_cuda=False, c06r3n08: num_train_epochs=4.0, c06r3n08: optim=adamw_torch, c06r3n08: optim_args=None, c06r3n08: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n08: overwrite_output_dir=False, c06r3n08: past_index=-1, c06r3n08: per_device_eval_batch_size=1, c06r3n08: per_device_train_batch_size=1, c06r3n08: predict_with_generate=False, c06r3n08: prediction_loss_only=False, c06r3n08: push_to_hub=False, c06r3n08: push_to_hub_model_id=None, c06r3n08: push_to_hub_organization=None, c06r3n08: push_to_hub_token=, c06r3n08: ray_scope=last, c06r3n08: remove_unused_columns=True, c06r3n08: report_to=['tensorboard'], c06r3n08: resume_from_checkpoint=None, c06r3n08: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n08: save_on_each_node=True, c06r3n08: save_only_model=False, c06r3n08: save_safetensors=True, c06r3n08: save_steps=100, c06r3n08: save_strategy=steps, c06r3n08: save_total_limit=None, c06r3n08: seed=42, c06r3n08: skip_memory_metrics=True, c06r3n08: sortish_sampler=False, c06r3n08: split_batches=False, c06r3n08: tf32=None, c06r3n08: torch_compile=False, c06r3n08: torch_compile_backend=None, c06r3n08: torch_compile_mode=None, c06r3n08: torchdynamo=None, c06r3n08: tpu_metrics_debug=False, c06r3n08: tpu_num_cores=None, c06r3n08: use_cpu=False, c06r3n08: use_ipex=False, c06r3n08: use_legacy_prediction_loop=False, c06r3n08: use_mps_device=False, c06r3n08: warmup_ratio=0.03, c06r3n08: warmup_steps=0, c06r3n08: weight_decay=0.0, c06r3n08: ) c06r3n08: 03/15/2024 11:02:10 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments( c06r3n08: _n_gpu=1, c06r3n08: adafactor=False, c06r3n08: adam_beta1=0.9, c06r3n08: adam_beta2=0.999, c06r3n08: adam_epsilon=1e-08, c06r3n08: auto_find_batch_size=False, c06r3n08: bf16=False, c06r3n08: bf16_full_eval=False, c06r3n08: data_seed=None, c06r3n08: dataloader_drop_last=False, c06r3n08: dataloader_num_workers=0, c06r3n08: dataloader_persistent_workers=False, c06r3n08: dataloader_pin_memory=True, c06r3n08: ddp_backend=None, c06r3n08: ddp_broadcast_buffers=None, c06r3n08: ddp_bucket_cap_mb=None, c06r3n08: ddp_find_unused_parameters=None, c06r3n08: ddp_timeout=1800, c06r3n08: debug=[], c06r3n08: deepspeed=deepspeed.json, c06r3n08: disable_tqdm=False, c06r3n08: dispatch_batches=None, c06r3n08: do_eval=True, c06r3n08: do_predict=False, c06r3n08: do_train=True, c06r3n08: eval_accumulation_steps=None, c06r3n08: eval_delay=0, c06r3n08: eval_steps=100, c06r3n08: evaluation_strategy=steps, c06r3n08: fp16=True, c06r3n08: fp16_backend=auto, c06r3n08: fp16_full_eval=False, c06r3n08: fp16_opt_level=O1, c06r3n08: fsdp=[], c06r3n08: fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, c06r3n08: fsdp_min_num_params=0, c06r3n08: fsdp_transformer_layer_cls_to_wrap=None, c06r3n08: full_determinism=False, c06r3n08: generation_config=None, c06r3n08: generation_max_length=None, c06r3n08: generation_num_beams=None, c06r3n08: gradient_accumulation_steps=1, c06r3n08: gradient_checkpointing=False, c06r3n08: gradient_checkpointing_kwargs=None, c06r3n08: greater_is_better=False, c06r3n08: group_by_length=False, c06r3n08: half_precision_backend=auto, c06r3n08: hub_always_push=False, c06r3n08: hub_model_id=None, c06r3n08: hub_private_repo=False, c06r3n08: hub_strategy=every_save, c06r3n08: hub_token=, c06r3n08: ignore_data_skip=False, c06r3n08: include_inputs_for_metrics=False, c06r3n08: include_num_input_tokens_seen=False, c06r3n08: include_tokens_per_second=False, c06r3n08: jit_mode_eval=False, c06r3n08: label_names=None, c06r3n08: label_smoothing_factor=0.0, c06r3n08: learning_rate=5e-05, c06r3n08: length_column_name=length, c06r3n08: load_best_model_at_end=True, c06r3n08: local_rank=3, c06r3n08: log_level=passive, c06r3n08: log_level_replica=warning, c06r3n08: log_on_each_node=True, c06r3n08: logging_dir=/work/share/huchen1/liangjj/llama_factory/runs/Mar15_11-02-10_c06r3n08, c06r3n08: logging_first_step=False, c06r3n08: logging_nan_inf_filter=True, c06r3n08: logging_steps=10, c06r3n08: logging_strategy=steps, c06r3n08: lr_scheduler_kwargs={}, c06r3n08: lr_scheduler_type=cosine, c06r3n08: max_grad_norm=0.5, c06r3n08: max_steps=-1, c06r3n08: metric_for_best_model=loss, c06r3n08: mp_parameters=, c06r3n08: neftune_noise_alpha=None, c06r3n08: no_cuda=False, c06r3n08: num_train_epochs=4.0, c06r3n08: optim=adamw_torch, c06r3n08: optim_args=None, c06r3n08: output_dir=/work/share/huchen1/liangjj/llama_factory, c06r3n08: overwrite_output_dir=False, c06r3n08: past_index=-1, c06r3n08: per_device_eval_batch_size=1, c06r3n08: per_device_train_batch_size=1, c06r3n08: predict_with_generate=False, c06r3n08: prediction_loss_only=False, c06r3n08: push_to_hub=False, c06r3n08: push_to_hub_model_id=None, c06r3n08: push_to_hub_organization=None, c06r3n08: push_to_hub_token=, c06r3n08: ray_scope=last, c06r3n08: remove_unused_columns=True, c06r3n08: report_to=['tensorboard'], c06r3n08: resume_from_checkpoint=None, c06r3n08: run_name=/work/share/huchen1/liangjj/llama_factory, c06r3n08: save_on_each_node=True, c06r3n08: save_only_model=False, c06r3n08: save_safetensors=True, c06r3n08: save_steps=100, c06r3n08: save_strategy=steps, c06r3n08: save_total_limit=None, c06r3n08: seed=42, c06r3n08: skip_memory_metrics=True, c06r3n08: sortish_sampler=False, c06r3n08: split_batches=False, c06r3n08: tf32=None, c06r3n08: torch_compile=False, c06r3n08: torch_compile_backend=None, c06r3n08: torch_compile_mode=None, c06r3n08: torchdynamo=None, c06r3n08: tpu_metrics_debug=False, c06r3n08: tpu_num_cores=None, c06r3n08: use_cpu=False, c06r3n08: use_ipex=False, c06r3n08: use_legacy_prediction_loop=False, c06r3n08: use_mps_device=False, c06r3n08: warmup_ratio=0.03, c06r3n08: warmup_steps=0, c06r3n08: weight_decay=0.0, c06r3n08: ) c06r3n07: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.model c06r3n07: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file added_tokens.json c06r3n07: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file special_tokens_map.json c06r3n07: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer_config.json c06r3n07: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.json c06r3n06: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.model c06r3n06: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file added_tokens.json c06r3n06: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file special_tokens_map.json c06r3n06: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer_config.json c06r3n06: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.json c06r3n08: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.model c06r3n08: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file added_tokens.json c06r3n08: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file special_tokens_map.json c06r3n08: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer_config.json c06r3n08: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.json c06r3n09: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.model c06r3n09: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file added_tokens.json c06r3n09: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file special_tokens_map.json c06r3n09: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer_config.json c06r3n09: [INFO|tokenization_utils_base.py:2025] 2024-03-15 11:02:10,615 >> loading file tokenizer.json c06r3n09: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n09: [WARNING|logging.py:329] 2024-03-15 11:02:10,621 >> You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n06: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n07: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n08: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n06: [WARNING|logging.py:329] 2024-03-15 11:02:10,622 >> You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n07: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n09: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n08: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n06: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n06: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n09: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n08: [WARNING|logging.py:329] 2024-03-15 11:02:10,622 >> You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n07: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n08: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n07: [WARNING|logging.py:329] 2024-03-15 11:02:10,623 >> You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 c06r3n06: [INFO|configuration_utils.py:727] 2024-03-15 11:02:10,772 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/config.json c06r3n08: [INFO|configuration_utils.py:727] 2024-03-15 11:02:10,772 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/config.json c06r3n09: [INFO|configuration_utils.py:727] 2024-03-15 11:02:10,771 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/config.json c06r3n07: [INFO|configuration_utils.py:727] 2024-03-15 11:02:10,773 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/config.json c06r3n06: [INFO|configuration_utils.py:792] 2024-03-15 11:02:10,773 >> Model config LlamaConfig { c06r3n06: "_name_or_path": "/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b", c06r3n06: "architectures": [ c06r3n06: "LlamaForCausalLM" c06r3n06: ], c06r3n06: "attention_bias": false, c06r3n06: "attention_dropout": 0.0, c06r3n06: "bos_token_id": 0, c06r3n06: "eos_token_id": 1, c06r3n06: "hidden_act": "silu", c06r3n06: "hidden_size": 4096, c06r3n06: "initializer_range": 0.02, c06r3n08: [INFO|configuration_utils.py:792] 2024-03-15 11:02:10,773 >> Model config LlamaConfig { c06r3n06: "intermediate_size": 11008, c06r3n07: [INFO|configuration_utils.py:792] 2024-03-15 11:02:10,774 >> Model config LlamaConfig { c06r3n09: [INFO|configuration_utils.py:792] 2024-03-15 11:02:10,773 >> Model config LlamaConfig { c06r3n08: "_name_or_path": "/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b", c06r3n06: "max_position_embeddings": 2048, c06r3n07: "_name_or_path": "/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b", c06r3n09: "_name_or_path": "/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b", c06r3n08: "architectures": [ c06r3n06: "max_sequence_length": 2048, c06r3n07: "architectures": [ c06r3n09: "architectures": [ c06r3n08: "LlamaForCausalLM" c06r3n06: "model_type": "llama", c06r3n07: "LlamaForCausalLM" c06r3n09: "LlamaForCausalLM" c06r3n08: ], c06r3n06: "num_attention_heads": 32, c06r3n07: ], c06r3n09: ], c06r3n08: "attention_bias": false, c06r3n06: "num_hidden_layers": 32, c06r3n07: "attention_bias": false, c06r3n09: "attention_bias": false, c06r3n08: "attention_dropout": 0.0, c06r3n06: "num_key_value_heads": 32, c06r3n07: "attention_dropout": 0.0, c06r3n09: "attention_dropout": 0.0, c06r3n08: "bos_token_id": 0, c06r3n06: "pad_token_id": -1, c06r3n07: "bos_token_id": 0, c06r3n09: "bos_token_id": 0, c06r3n08: "eos_token_id": 1, c06r3n08: "hidden_act": "silu", c06r3n08: "hidden_size": 4096, c06r3n07: "eos_token_id": 1, c06r3n06: "pretraining_tp": 1, c06r3n09: "eos_token_id": 1, c06r3n08: "initializer_range": 0.02, c06r3n07: "hidden_act": "silu", c06r3n06: "rms_norm_eps": 1e-06, c06r3n09: "hidden_act": "silu", c06r3n08: "intermediate_size": 11008, c06r3n07: "hidden_size": 4096, c06r3n06: "rope_scaling": null, c06r3n09: "hidden_size": 4096, c06r3n08: "max_position_embeddings": 2048, c06r3n07: "initializer_range": 0.02, c06r3n06: "rope_theta": 10000.0, c06r3n09: "initializer_range": 0.02, c06r3n08: "max_sequence_length": 2048, c06r3n07: "intermediate_size": 11008, c06r3n06: "tie_word_embeddings": false, c06r3n09: "intermediate_size": 11008, c06r3n08: "model_type": "llama", c06r3n07: "max_position_embeddings": 2048, c06r3n06: "torch_dtype": "float16", c06r3n09: "max_position_embeddings": 2048, c06r3n08: "num_attention_heads": 32, c06r3n07: "max_sequence_length": 2048, c06r3n06: "transformers_version": "4.37.2", c06r3n09: "max_sequence_length": 2048, c06r3n08: "num_hidden_layers": 32, c06r3n07: "model_type": "llama", c06r3n06: "use_cache": true, c06r3n09: "model_type": "llama", c06r3n08: "num_key_value_heads": 32, c06r3n07: "num_attention_heads": 32, c06r3n06: "vocab_size": 32000 c06r3n09: "num_attention_heads": 32, c06r3n08: "pad_token_id": -1, c06r3n07: "num_hidden_layers": 32, c06r3n06: } c06r3n09: "num_hidden_layers": 32, c06r3n08: "pretraining_tp": 1, c06r3n07: "num_key_value_heads": 32, c06r3n06: c06r3n09: "num_key_value_heads": 32, c06r3n08: "rms_norm_eps": 1e-06, c06r3n07: "pad_token_id": -1, c06r3n09: "pad_token_id": -1, c06r3n08: "rope_scaling": null, c06r3n07: "pretraining_tp": 1, c06r3n09: "pretraining_tp": 1, c06r3n08: "rope_theta": 10000.0, c06r3n07: "rms_norm_eps": 1e-06, c06r3n09: "rms_norm_eps": 1e-06, c06r3n08: "tie_word_embeddings": false, c06r3n07: "rope_scaling": null, c06r3n09: "rope_scaling": null, c06r3n08: "torch_dtype": "float16", c06r3n07: "rope_theta": 10000.0, c06r3n09: "rope_theta": 10000.0, c06r3n08: "transformers_version": "4.37.2", c06r3n07: "tie_word_embeddings": false, c06r3n09: "tie_word_embeddings": false, c06r3n08: "use_cache": true, c06r3n07: "torch_dtype": "float16", c06r3n09: "torch_dtype": "float16", c06r3n08: "vocab_size": 32000 c06r3n07: "transformers_version": "4.37.2", c06r3n09: "transformers_version": "4.37.2", c06r3n08: } c06r3n07: "use_cache": true, c06r3n09: "use_cache": true, c06r3n08: c06r3n07: "vocab_size": 32000 c06r3n09: "vocab_size": 32000 c06r3n07: } c06r3n09: } c06r3n07: c06r3n09: c06r3n06: [INFO|modeling_utils.py:3473] 2024-03-15 11:02:12,194 >> loading weights file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/pytorch_model.bin.index.json c06r3n08: [INFO|modeling_utils.py:3473] 2024-03-15 11:02:12,193 >> loading weights file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/pytorch_model.bin.index.json c06r3n07: [INFO|modeling_utils.py:3473] 2024-03-15 11:02:12,195 >> loading weights file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/pytorch_model.bin.index.json c06r3n09: [INFO|modeling_utils.py:3473] 2024-03-15 11:02:12,194 >> loading weights file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/pytorch_model.bin.index.json c06r3n06: [INFO|modeling_utils.py:1426] 2024-03-15 11:02:12,208 >> Instantiating LlamaForCausalLM model under default dtype torch.float16. c06r3n09: [INFO|modeling_utils.py:1426] 2024-03-15 11:02:12,207 >> Instantiating LlamaForCausalLM model under default dtype torch.float16. c06r3n08: [INFO|modeling_utils.py:1426] 2024-03-15 11:02:12,207 >> Instantiating LlamaForCausalLM model under default dtype torch.float16. c06r3n07: [INFO|modeling_utils.py:1426] 2024-03-15 11:02:12,208 >> Instantiating LlamaForCausalLM model under default dtype torch.float16. c06r3n09: [INFO|modeling_utils.py:3582] 2024-03-15 11:02:12,207 >> Detected DeepSpeed ZeRO-3: activating zero.init() for this model c06r3n07: [INFO|modeling_utils.py:3582] 2024-03-15 11:02:12,208 >> Detected DeepSpeed ZeRO-3: activating zero.init() for this model c06r3n08: [INFO|modeling_utils.py:3582] 2024-03-15 11:02:12,207 >> Detected DeepSpeed ZeRO-3: activating zero.init() for this model c06r3n06: [INFO|modeling_utils.py:3582] 2024-03-15 11:02:12,208 >> Detected DeepSpeed ZeRO-3: activating zero.init() for this model c06r3n09: [INFO|configuration_utils.py:826] 2024-03-15 11:02:12,215 >> Generate config GenerationConfig { c06r3n09: "bos_token_id": 0, c06r3n09: "eos_token_id": 1, c06r3n09: "pad_token_id": -1 c06r3n09: } c06r3n09: c06r3n07: [INFO|configuration_utils.py:826] 2024-03-15 11:02:12,216 >> Generate config GenerationConfig { c06r3n07: "bos_token_id": 0, c06r3n07: "eos_token_id": 1, c06r3n07: "pad_token_id": -1 c06r3n07: } c06r3n07: c06r3n08: [INFO|configuration_utils.py:826] 2024-03-15 11:02:12,216 >> Generate config GenerationConfig { c06r3n08: "bos_token_id": 0, c06r3n08: "eos_token_id": 1, c06r3n08: "pad_token_id": -1 c06r3n08: } c06r3n08: c06r3n06: [INFO|configuration_utils.py:826] 2024-03-15 11:02:12,216 >> Generate config GenerationConfig { c06r3n06: "bos_token_id": 0, c06r3n06: "eos_token_id": 1, c06r3n06: "pad_token_id": -1 c06r3n06: } c06r3n06: c06r3n06: pthread_mutex_timedlock() returned 110 c06r3n06: pthread_mutex_timedlock() returned 110 c06r3n06: Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success c06r3n06: Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success c06r3n06: pthread_mutex_timedlock() returned 110 c06r3n06: Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success c06r3n06: pthread_mutex_timedlock() returned 110 c06r3n06: Failed to initialize RSMI device mutex after 5 seconds. Previous execution may not have shutdown cleanly. To fix problem, stop all rocm_smi programs, and then delete the rocm_smi* shared memory files in /dev/shm.: Success c06r3n06: I0315 11:02:18.874918 9280 ProcessGroupNCCL.cpp:1340] NCCL_DEBUG: N/A c06r3n06: [2024-03-15 11:02:23,334] [INFO] [partition_parameters.py:348:__exit__] finished initializing model - num_params = 291, num_elems = 6.74B c06r3n07: Loading checkpoint shards: 0%| | 0/33 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. c06r3n09: c06r3n09: [INFO|modeling_utils.py:4358] 2024-03-15 11:02:50,467 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b. c06r3n09: If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. c06r3n09: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.13it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n07: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n07: [INFO|modeling_utils.py:4350] 2024-03-15 11:02:50,470 >> All model checkpoint weights were used when initializing LlamaForCausalLM. c06r3n07: c06r3n07: [INFO|modeling_utils.py:4358] 2024-03-15 11:02:50,470 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b. c06r3n07: If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. c06r3n07: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n07: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n06: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.11it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n08: ing checkpoint shards: 33%|███▎ | 11/33 [00:10<00:17, 1.22it/s] Loading checkpoint shards: 33%|███▎ | 11/33 [00:10<00:17, 1.24it/s] Loading checkpoint shards: 33%|███▎ | 11/33 [00:10<00:17, 1.23it/s] Loading checkpoint shards: 33%|███▎ | 11/33 [00:10<00:18, 1.22it/s] Loading checkpoint shards: 36%|███▋ | 12/33 [00:10<00:17, 1.22it/s] Loading checkpoint shards: 36%|███▋ | 12/33 [00:10<00:17, 1.22it/s] Loading checkpoint shards: 36%|███▋ | 12/33 [00:10<00:17, 1.21it/s] Loading checkpoint shards: 36%|███▋ | 12/33 [00:10<00:17, 1.21it/s] Loading checkpoint shards: 39%|███▉ | 13/33 [00:11<00:16, 1.24it/s] Loading checkpoint shards: 39%|███▉ | 13/33 [00:11<00:16, 1.23it/s] Loading checkpoint shards: 39%|███▉ | 13/33 [00:11<00:16, 1.24it/s] Loading checkpoint shards: 39%|███▉ | 13/33 [00:11<00:16, 1.23it/s] Loading checkpoint shards: 42%|████▏ | 14/33 [00:12<00:14, 1.31it/s] Loading checkpoint shards: 42%|████▏ | 14/33 [00:12<00:14, 1.31it/s] Loading checkpoint shards: 42%|████▏ | 14/33 [00:12<00:14, 1.31it/s] Loading checkpoint shards: 42%|████▏ | 14/33 [00:12<00:14, 1.28it/s] Loading checkpoint shards: 45%|████▌ | 15/33 [00:13<00:13, 1.29it/s] Loading checkpoint shards: 45%|████▌ | 15/33 [00:13<00:13, 1.29it/s] Loading checkpoint shards: 45%|████▌ | 15/33 [00:13<00:13, 1.29it/s] Loading checkpoint shards: 45%|████▌ | 15/33 [00:13<00:14, 1.28it/s] Loading checkpoint shards: 48%|████▊ | 16/33 [00:13<00:12, 1.35it/s] Loading checkpoint shards: 48%|████▊ | 16/33 [00:13<00:12, 1.35it/s] Loading checkpoint shards: 48%|████▊ | 16/33 [00:13<00:12, 1.33it/s] Loading checkpoint shards: 48%|████▊ | 16/33 [00:13<00:12, 1.32it/s] Loading checkpoint shards: 52%|█████▏ | 17/33 [00:14<00:11, 1.37it/s] Loading checkpoint shards: 52%|█████▏ | 17/33 [00:14<00:11, 1.36it/s] Loading checkpoint shards: 52%|█████▏ | 17/33 [00:14<00:11, 1.34it/s] Loading checkpoint shards: 52%|█████▏ | 17/33 [00:14<00:11, 1.35it/s] Loading checkpoint shards: 55%|█████▍ | 18/33 [00:15<00:11, 1.32it/s] Loading checkpoint shards: 55%|█████▍ | 18/33 [00:15<00:11, 1.33it/s] Loading checkpoint shards: 55%|█████▍ | 18/33 [00:15<00:11, 1.34it/s] Loading checkpoint shards: 55%|█████▍ | 18/33 [00:15<00:11, 1.32it/s] Loading checkpoint shards: 58%|█████▊ | 19/33 [00:16<00:10, 1.30it/s] Loading checkpoint shards: 58%|█████▊ | 19/33 [00:16<00:10, 1.29it/s] Loading checkpoint shards: 58%|█████▊ | 19/33 [00:16<00:10, 1.29it/s] Loading checkpoint shards: 58%|█████▊ | 19/33 [00:16<00:10, 1.30it/s] Loading checkpoint shards: 61%|██████ | 20/33 [00:17<00:10, 1.24it/s] Loading checkpoint shards: 61%|██████ | 20/33 [00:17<00:10, 1.24it/s] Loading checkpoint shards: 61%|██████ | 20/33 [00:17<00:10, 1.23it/s] Loading checkpoint shards: 61%|██████ | 20/33 [00:17<00:10, 1.23it/s] Loading checkpoint shards: 64%|██████▎ | 21/33 [00:17<00:09, 1.24it/s] Loading checkpoint shards: 64%|██████▎ | 21/33 [00:17<00:09, 1.24it/s] Loading checkpoint shards: 64%|██████▎ | 21/33 [00:17<00:09, 1.23it/s] Loading checkpoint shards: 64%|██████▎ | 21/33 [00:17<00:09, 1.23it/s] Loading checkpoint shards: 67%|██████▋ | 22/33 [00:18<00:08, 1.27it/s] Loading checkpoint shards: 67%|██████▋ | 22/33 [00:18<00:08, 1.26it/s] Loading checkpoint shards: 67%|██████▋ | 22/33 [00:18<00:08, 1.27it/s] Loading checkpoint shards: 67%|██████▋ | 22/33 [00:18<00:08, 1.27it/s] Loading checkpoint shards: 70%|██████▉ | 23/33 [00:19<00:07, 1.31it/s] Loading checkpoint shards: 70%|██████▉ | 23/33 [00:19<00:07, 1.31it/s] Loading checkpoint shards: 70%|██████▉ | 23/33 [00:19<00:07, 1.31it/s] Loading checkpoint shards: 70%|██████▉ | 23/33 [00:19<00:07, 1.31it/s] Loading checkpoint shards: 73%|███████▎ | 24/33 [00:20<00:06, 1.32it/s] Loading checkpoint shards: 73%|███████▎ | 24/33 [00:20<00:06, 1.32it/s] Loading checkpoint shards: 73%|███████▎ | 24/33 [00:20<00:06, 1.32it/s] Loading checkpoint shards: 73%|███████▎ | 24/33 [00:20<00:06, 1.32it/s] Loading checkpoint shards: 76%|███████▌ | 25/33 [00:20<00:05, 1.36it/s] Loading checkpoint shards: 76%|███████▌ | 25/33 [00:20<00:05, 1.36it/s] Loading checkpoint shards: 76%|███████▌ | 25/33 [00:20<00:05, 1.36it/s] Loading checkpoint shards: 76%|███████▌ | 25/33 [00:20<00:05, 1.36it/s] Loading checkpoint shards: 79%|███████▉ | 26/33 [00:21<00:05, 1.39it/s] Loading checkpoint shards: 79%|███████▉ | 26/33 [00:21<00:05, 1.39it/s] Loading checkpoint shards: 79%|███████▉ | 26/33 [00:21<00:05, 1.39it/s] Loading checkpoint shards: 79%|███████▉ | 26/33 [00:21<00:05, 1.39it/s] Loading checkpoint shards: 82%|████████▏ | 27/33 [00:22<00:04, 1.39it/s] Loading checkpoint shards: 82%|████████▏ | 27/33 [00:22<00:04, 1.39it/s] Loading checkpoint shards: 82%|████████▏ | 27/33 [00:22<00:04, 1.39it/s] Loading checkpoint shards: 82%|████████▏ | 27/33 [00:22<00:04, 1.39it/s] Loading checkpoint shards: 85%|████████▍ | 28/33 [00:22<00:03, 1.40it/s] Loading checkpoint shards: 85%|████████▍ | 28/33 [00:22<00:03, 1.40it/s] Loading checkpoint shards: 85%|████████▍ | 28/33 [00:22<00:03, 1.40it/s] Loading checkpoint shards: 85%|████████▍ | 28/33 [00:22<00:03, 1.40it/s] Loading checkpoint shards: 88%|████████▊ | 29/33 [00:23<00:02, 1.42it/s] Loading checkpoint shards: 88%|████████▊ | 29/33 [00:23<00:02, 1.42it/s] Loading checkpoint shards: 88%|████████▊ | 29/33 [00:23<00:02, 1.42it/s] Loading checkpoint shards: 88%|████████▊ | 29/33 [00:23<00:02, 1.42it/s] Loading checkpoint shards: 91%|█████████ | 30/33 [00:24<00:02, 1.41it/s] Loading checkpoint shards: 91%|█████████ | 30/33 [00:24<00:02, 1.41it/s] Loading checkpoint shards: 91%|█████████ | 30/33 [00:24<00:02, 1.41it/s] Loading checkpoint shards: 91%|█████████ | 30/33 [00:24<00:02, 1.41it/s] Loading checkpoint shards: 94%|█████████▍| 31/33 [00:24<00:01, 1.40it/s] Loading checkpoint shards: 94%|█████████▍| 31/33 [00:24<00:01, 1.40it/s] Loading checkpoint shards: 94%|█████████▍| 31/33 [00:24<00:01, 1.40it/s] Loading checkpoint shards: 94%|█████████▍| 31/33 [00:24<00:01, 1.40it/s] Loading checkpoint shards: 97%|█████████▋| 32/33 [00:25<00:00, 1.24it/s] Loading checkpoint shards: 97%|█████████▋| 32/33 [00:25<00:00, 1.24it/s] Loading checkpoint shards: 97%|█████████▋| 32/33 [00:25<00:00, 1.24it/s] Loading checkpoint shards: 97%|█████████▋| 32/33 [00:25<00:00, 1.24it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n08: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n08: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n08: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.12it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] c06r3n08: [INFO|modeling_utils.py:4350] 2024-03-15 11:02:50,481 >> All model checkpoint weights were used when initializing LlamaForCausalLM. c06r3n08: c06r3n08: [INFO|modeling_utils.py:4358] 2024-03-15 11:02:50,481 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b. c06r3n08: If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. c06r3n07: [INFO|configuration_utils.py:779] 2024-03-15 11:02:50,484 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/generation_config.json c06r3n09: [INFO|configuration_utils.py:779] 2024-03-15 11:02:50,483 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/generation_config.json c06r3n07: [INFO|configuration_utils.py:826] 2024-03-15 11:02:50,485 >> Generate config GenerationConfig { c06r3n07: "bos_token_id": 0, c06r3n07: "eos_token_id": 1, c06r3n07: "pad_token_id": 0 c06r3n07: } c06r3n07: c06r3n09: [INFO|configuration_utils.py:826] 2024-03-15 11:02:50,484 >> Generate config GenerationConfig { c06r3n09: "bos_token_id": 0, c06r3n09: "eos_token_id": 1, c06r3n09: "pad_token_id": 0 c06r3n09: } c06r3n09: c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n08: [INFO|configuration_utils.py:779] 2024-03-15 11:02:50,489 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/generation_config.json c06r3n08: [INFO|configuration_utils.py:826] 2024-03-15 11:02:50,489 >> Generate config GenerationConfig { c06r3n08: "bos_token_id": 0, c06r3n08: "eos_token_id": 1, c06r3n08: "pad_token_id": 0 c06r3n08: } c06r3n08: c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n08: 03/15/2024 11:02:50 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n07: 03/15/2024 11:02:50 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n09: 03/15/2024 11:02:50 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n06: Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.22it/s] Loading checkpoint shards: 100%|██████████| 33/33 [00:27<00:00, 1.21it/s] c06r3n06: [INFO|modeling_utils.py:4350] 2024-03-15 11:02:50,627 >> All model checkpoint weights were used when initializing LlamaForCausalLM. c06r3n06: c06r3n06: [INFO|modeling_utils.py:4358] 2024-03-15 11:02:50,627 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b. c06r3n06: If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. c06r3n06: [INFO|configuration_utils.py:779] 2024-03-15 11:02:50,632 >> loading configuration file /work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b/generation_config.json c06r3n06: [INFO|configuration_utils.py:826] 2024-03-15 11:02:50,632 >> Generate config GenerationConfig { c06r3n06: "bos_token_id": 0, c06r3n06: "eos_token_id": 1, c06r3n06: "pad_token_id": 0 c06r3n06: } c06r3n06: c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.patcher - Gradient checkpointing enabled. c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.adapter - Fine-tuning method: Full c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.model.loader - trainable params: 6738415616 || all params: 6738415616 || trainable%: 100.0000 c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.data.template - Add pad token: c06r3n06: 03/15/2024 11:02:50 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n09: Using custom data configuration default-c71a5e5c5041e81e c06r3n09: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n07: Using custom data configuration default-c71a5e5c5041e81e c06r3n07: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n08: Using custom data configuration default-c71a5e5c5041e81e c06r3n08: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n06: Using custom data configuration default-c71a5e5c5041e81e c06r3n06: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n09: Overwrite dataset info from restored data version if exists. c06r3n09: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n07: Overwrite dataset info from restored data version if exists. c06r3n07: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n06: Overwrite dataset info from restored data version if exists. c06r3n06: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n08: Overwrite dataset info from restored data version if exists. c06r3n08: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n09: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n09: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n06: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n06: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n08: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n08: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n07: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n07: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n06: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00000_of_00002.arrow c06r3n06: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00001_of_00002.arrow c06r3n08: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00000_of_00002.arrow c06r3n08: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00001_of_00002.arrow c06r3n09: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00000_of_00002.arrow c06r3n09: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00001_of_00002.arrow c06r3n07: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00000_of_00002.arrow c06r3n07: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_00001_of_00002.arrow c06r3n09: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_*_of_00002.arrow c06r3n09: Concatenating 2 shards c06r3n08: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_*_of_00002.arrow c06r3n08: Concatenating 2 shards c06r3n07: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_*_of_00002.arrow c06r3n07: Concatenating 2 shards c06r3n06: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-f55b5a094672e9db_*_of_00002.arrow c06r3n06: Concatenating 2 shards c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n08: Using custom data configuration default-ea5892bdcb099afd c06r3n07: Using custom data configuration default-ea5892bdcb099afd c06r3n07: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n08: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n09: Using custom data configuration default-ea5892bdcb099afd c06r3n09: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n06: Using custom data configuration default-ea5892bdcb099afd c06r3n06: Loading Dataset Infos from /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/packaged_modules/json c06r3n08: Overwrite dataset info from restored data version if exists. c06r3n08: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n08: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n08: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n09: Overwrite dataset info from restored data version if exists. c06r3n09: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n09: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n09: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n09: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00000_of_00002.arrow c06r3n09: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00001_of_00002.arrow c06r3n08: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00000_of_00002.arrow c06r3n08: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00001_of_00002.arrow c06r3n06: Overwrite dataset info from restored data version if exists. c06r3n06: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n06: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n06: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n06: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00000_of_00002.arrow c06r3n06: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00001_of_00002.arrow c06r3n09: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_*_of_00002.arrow c06r3n09: Concatenating 2 shards c06r3n08: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_*_of_00002.arrow c06r3n08: Concatenating 2 shards c06r3n07: Overwrite dataset info from restored data version if exists. c06r3n07: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n06: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_*_of_00002.arrow c06r3n06: Concatenating 2 shards c06r3n07: Found cached dataset json (/work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) c06r3n07: Loading Dataset info from /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 c06r3n07: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00000_of_00002.arrow c06r3n07: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_00001_of_00002.arrow c06r3n07: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-ea5892bdcb099afd/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-4686cdfd54872145_*_of_00002.arrow c06r3n07: Concatenating 2 shards c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json... c06r3n06: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00000_of_00002.arrow c06r3n06: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00001_of_00002.arrow c06r3n09: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00000_of_00002.arrow c06r3n09: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00001_of_00002.arrow c06r3n08: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00000_of_00002.arrow c06r3n08: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00001_of_00002.arrow c06r3n07: Process #0 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00000_of_00002.arrow c06r3n07: Process #1 will write at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_00001_of_00002.arrow c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n08: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n09: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n07: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n06: 03/15/2024 11:02:52 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_zh.json... c06r3n08: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_*_of_00002.arrow c06r3n08: Concatenating 2 shards c06r3n07: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_*_of_00002.arrow c06r3n07: Concatenating 2 shards c06r3n09: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_*_of_00002.arrow c06r3n09: Concatenating 2 shards c06r3n08: input_ids: c06r3n08: [0, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29954, 573, 2211, 25562, 363, 7952, 292, 9045, 29891, 29889, 518, 29914, 25580, 29962, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n07: input_ids: c06r3n07: [0, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29954, 573, 2211, 25562, 363, 7952, 292, 9045, 29891, 29889, 518, 29914, 25580, 29962, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n06: Loading cached processed dataset at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-56e79f9679a95e8c_*_of_00002.arrow c06r3n06: Concatenating 2 shards c06r3n09: input_ids: c06r3n09: [0, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29954, 573, 2211, 25562, 363, 7952, 292, 9045, 29891, 29889, 518, 29914, 25580, 29962, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n08: inputs: c06r3n08: ⁇ [INST] <> c06r3n08: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. c06r3n08: c06r3n08: If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. c06r3n08: <> c06r3n08: c06r3n08: Give three tips for staying healthy. [/INST] 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n08: c06r3n08: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n08: c06r3n08: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n08: label_ids: c06r3n08: [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n07: inputs: c06r3n07: ⁇ [INST] <> c06r3n07: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. c06r3n07: c06r3n07: If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. c06r3n07: <> c06r3n07: c06r3n07: Give three tips for staying healthy. [/INST] 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n07: c06r3n07: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n07: c06r3n07: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n07: label_ids: c06r3n07: [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n06: input_ids: c06r3n06: [0, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29954, 573, 2211, 25562, 363, 7952, 292, 9045, 29891, 29889, 518, 29914, 25580, 29962, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n08: labels: c06r3n08: 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n08: c06r3n08: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n08: c06r3n08: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n09: inputs: c06r3n09: ⁇ [INST] <> c06r3n09: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. c06r3n09: c06r3n09: If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. c06r3n09: <> c06r3n09: c06r3n09: Give three tips for staying healthy. [/INST] 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n09: c06r3n09: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n09: c06r3n09: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n09: label_ids: c06r3n09: [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n07: labels: c06r3n07: 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n07: c06r3n07: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n07: c06r3n07: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n09: labels: c06r3n09: 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n09: c06r3n09: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n09: c06r3n09: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n06: inputs: c06r3n06: ⁇ [INST] <> c06r3n06: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. c06r3n06: c06r3n06: If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. c06r3n06: <> c06r3n06: c06r3n06: Give three tips for staying healthy. [/INST] 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n06: c06r3n06: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n06: c06r3n06: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n06: label_ids: c06r3n06: [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 29871, 29896, 29889, 382, 271, 263, 6411, 8362, 322, 18254, 768, 2738, 652, 300, 29901, 8561, 1854, 596, 592, 1338, 526, 20978, 573, 310, 263, 12875, 310, 285, 21211, 322, 18655, 1849, 29892, 20793, 26823, 29892, 3353, 2646, 1144, 29892, 322, 9045, 29891, 285, 1446, 29889, 910, 6911, 304, 3867, 596, 3573, 411, 278, 18853, 18254, 374, 1237, 304, 740, 472, 967, 1900, 322, 508, 1371, 5557, 17168, 293, 10267, 2129, 29889, 13, 13, 29906, 29889, 2201, 482, 297, 4943, 9128, 6354, 29901, 1222, 6269, 895, 338, 7618, 1455, 363, 7344, 292, 4549, 289, 2873, 29892, 2301, 7799, 29892, 322, 5881, 29875, 586, 6151, 1070, 9045, 29889, 319, 326, 363, 472, 3203, 29871, 29896, 29945, 29900, 6233, 310, 17768, 403, 14911, 711, 293, 15058, 470, 29871, 29955, 29945, 6233, 310, 14877, 20657, 15058, 1269, 4723, 29889, 13, 13, 29941, 29889, 3617, 3307, 8709, 29901, 24162, 3307, 11029, 8709, 338, 7618, 1455, 363, 9128, 322, 19119, 1532, 29899, 915, 292, 29889, 739, 6911, 304, 1072, 5987, 286, 2092, 29892, 11157, 25323, 3321, 740, 29892, 322, 11286, 9045, 29891, 14321, 322, 5198, 1540, 740, 29889, 319, 326, 363, 29871, 29955, 29899, 29929, 6199, 310, 8709, 1269, 4646, 29889, 0] c06r3n06: labels: c06r3n06: 1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases. c06r3n06: c06r3n06: 2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week. c06r3n06: c06r3n06: 3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night. ⁇ c06r3n08: [INFO|training_args.py:1828] 2024-03-15 11:02:54,121 >> PyTorch: setting up devices c06r3n07: [INFO|training_args.py:1828] 2024-03-15 11:02:54,122 >> PyTorch: setting up devices c06r3n09: [INFO|training_args.py:1828] 2024-03-15 11:02:54,121 >> PyTorch: setting up devices c06r3n06: [INFO|training_args.py:1828] 2024-03-15 11:02:54,122 >> PyTorch: setting up devices c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n07: warnings.warn( c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n09: warnings.warn( c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n09: warnings.warn( c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n07: warnings.warn( c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n07: warnings.warn( c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n06: warnings.warn( c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n08: warnings.warn( c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n08: warnings.warn( c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n07: warnings.warn( c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n08: warnings.warn( c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n06: warnings.warn( c06r3n09: warnings.warn( c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n08: warnings.warn( c06r3n08: Caching indices mapping at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n09: warnings.warn( c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n06: warnings.warn( c06r3n06: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/transformers/training_args.py:1741: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead. c06r3n06: warnings.warn( c06r3n06: Caching indices mapping at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow c06r3n07: Caching indices mapping at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow c06r3n09: Caching indices mapping at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow c06r3n08: Traceback (most recent call last): c06r3n08: File "src/train_bash.py", line 14, in c06r3n08: main() c06r3n08: File "src/train_bash.py", line 5, in main c06r3n08: run_exp() c06r3n08: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n08: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n08: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n08: **split_dataset(dataset, data_args, training_args), c06r3n08: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n08: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n08: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n08: out = func(dataset, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n08: train_split = self.select( c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n08: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n08: out = func(dataset, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n08: return self._select_with_indices_mapping( c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n08: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: Traceback (most recent call last): c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n08: out = func(dataset, *args, **kwargs) c06r3n09: File "src/train_bash.py", line 14, in c06r3n09: main() c06r3n09: File "src/train_bash.py", line 5, in main c06r3n09: run_exp() c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3991, in _select_with_indices_mapping c06r3n08: os.chmod(indices_cache_file_name, 0o666 & ~umask) c06r3n09: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n08: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n09: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n08: Traceback (most recent call last): c06r3n08: File "src/train_bash.py", line 14, in c06r3n08: main() c06r3n08: File "src/train_bash.py", line 5, in main c06r3n08: run_exp() c06r3n08: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n08: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n08: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n08: **split_dataset(dataset, data_args, training_args), c06r3n08: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n08: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n08: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n09: **split_dataset(dataset, data_args, training_args), c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n09: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n08: out = func(dataset, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n08: train_split = self.select( c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n08: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n08: out = func(dataset, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n08: return self._select_with_indices_mapping( c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n08: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n08: out = func(dataset, *args, **kwargs) c06r3n08: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3991, in _select_with_indices_mapping c06r3n08: os.chmod(indices_cache_file_name, 0o666 & ~umask) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n09: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n08: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: out = func(dataset, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n09: train_split = self.select( c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n09: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: out = func(dataset, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n09: return self._select_with_indices_mapping( c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n09: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: out = func(dataset, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3991, in _select_with_indices_mapping c06r3n09: os.chmod(indices_cache_file_name, 0o666 & ~umask) c06r3n09: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n09: Traceback (most recent call last): c06r3n09: File "src/train_bash.py", line 14, in c06r3n09: main() c06r3n09: File "src/train_bash.py", line 5, in main c06r3n09: run_exp() c06r3n09: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n09: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n09: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n09: **split_dataset(dataset, data_args, training_args), c06r3n09: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n09: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n09: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: out = func(dataset, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n09: train_split = self.select( c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n09: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: out = func(dataset, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n09: return self._select_with_indices_mapping( c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n09: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n09: out = func(dataset, *args, **kwargs) c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3991, in _select_with_indices_mapping c06r3n09: os.chmod(indices_cache_file_name, 0o666 & ~umask) c06r3n09: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: Traceback (most recent call last): c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 791, in move c06r3n06: os.rename(src, real_dst) c06r3n06: FileExistsError: [Errno 17] File exists: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/tmp1ek64s75' -> '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: c06r3n06: During handling of the above exception, another exception occurred: c06r3n06: c06r3n06: Traceback (most recent call last): c06r3n06: File "src/train_bash.py", line 14, in c06r3n06: main() c06r3n06: File "src/train_bash.py", line 5, in main c06r3n06: run_exp() c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n06: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n06: **split_dataset(dataset, data_args, training_args), c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n06: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n06: train_split = self.select( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n06: return self._select_with_indices_mapping( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3988, in _select_with_indices_mapping c06r3n06: shutil.move(tmp_file.name, indices_cache_file_name) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 811, in move c06r3n06: copy_function(src, real_dst) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 435, in copy2 c06r3n06: copyfile(src, dst, follow_symlinks=follow_symlinks) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 264, in copyfile c06r3n06: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: c06r3n06: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: Traceback (most recent call last): c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 791, in move c06r3n06: os.rename(src, real_dst) c06r3n06: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/tmpicz0l2yt' -> '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: c06r3n06: During handling of the above exception, another exception occurred: c06r3n06: c06r3n06: Traceback (most recent call last): c06r3n06: File "src/train_bash.py", line 14, in c06r3n06: main() c06r3n06: File "src/train_bash.py", line 5, in main c06r3n06: run_exp() c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n06: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n06: **split_dataset(dataset, data_args, training_args), c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n06: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n06: train_split = self.select( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n06: return self._select_with_indices_mapping( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3988, in _select_with_indices_mapping c06r3n06: shutil.move(tmp_file.name, indices_cache_file_name) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 811, in move c06r3n06: copy_function(src, real_dst) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 435, in copy2 c06r3n06: copyfile(src, dst, follow_symlinks=follow_symlinks) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 264, in copyfile c06r3n06: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: c06r3n06: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: Traceback (most recent call last): c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 791, in move c06r3n06: os.rename(src, real_dst) c06r3n06: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/tmpf42whgbp' -> '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: c06r3n06: During handling of the above exception, another exception occurred: c06r3n06: c06r3n06: Traceback (most recent call last): c06r3n06: File "src/train_bash.py", line 14, in c06r3n06: main() c06r3n06: File "src/train_bash.py", line 5, in main c06r3n06: run_exp() c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n06: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n06: **split_dataset(dataset, data_args, training_args), c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n06: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n06: train_split = self.select( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n06: return self._select_with_indices_mapping( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3988, in _select_with_indices_mapping c06r3n06: shutil.move(tmp_file.name, indices_cache_file_name) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 811, in move c06r3n06: copy_function(src, real_dst) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 435, in copy2 c06r3n06: copyfile(src, dst, follow_symlinks=follow_symlinks) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 264, in copyfile c06r3n06: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: c06r3n06: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: Traceback (most recent call last): c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 791, in move c06r3n06: os.rename(src, real_dst) c06r3n06: FileExistsError: [Errno 17] File exists: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/tmp07exwzzo' -> '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n06: c06r3n06: During handling of the above exception, another exception occurred: c06r3n06: c06r3n06: Traceback (most recent call last): c06r3n06: File "src/train_bash.py", line 14, in c06r3n06: main() c06r3n06: File "src/train_bash.py", line 5, in main c06r3n06: run_exp() c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n06: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n06: **split_dataset(dataset, data_args, training_args), c06r3n06: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n06: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n06: train_split = self.select( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n06: return self._select_with_indices_mapping( c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n06: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n06: out = func(dataset, *args, **kwargs) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3988, in _select_with_indices_mapping c06r3n06: shutil.move(tmp_file.name, indices_cache_file_name) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 811, in move c06r3n06: copy_function(src, real_dst) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 435, in copy2 c06r3n06: copyfile(src, dst, follow_symlinks=follow_symlinks) c06r3n06: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 264, in copyfile c06r3n06: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: c06r3n06: FileNotFoundError: [Errno 2] No such file or directory: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-9854d2224d063093.arrow' c06r3n07: Traceback (most recent call last): c06r3n07: File "src/train_bash.py", line 14, in c06r3n07: main() c06r3n07: File "src/train_bash.py", line 5, in main c06r3n07: run_exp() c06r3n07: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n07: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n07: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n07: **split_dataset(dataset, data_args, training_args), c06r3n07: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n07: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n07: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n07: out = func(dataset, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4544, in train_test_split c06r3n07: train_split = self.select( c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n07: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n07: out = func(dataset, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n07: return self._select_with_indices_mapping( c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n07: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n07: out = func(dataset, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3995, in _select_with_indices_mapping c06r3n07: return self._new_dataset_with_indices( c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3737, in _new_dataset_with_indices c06r3n07: indices_table = MemoryMappedTable.from_file(indices_cache_file_name) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/table.py", line 1018, in from_file c06r3n07: table = _memory_mapped_arrow_table_from_file(filename) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/table.py", line 64, in _memory_mapped_arrow_table_from_file c06r3n07: opened_stream = _memory_mapped_record_batch_reader_from_file(filename) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/table.py", line 50, in _memory_mapped_record_batch_reader_from_file c06r3n07: return pa.ipc.open_stream(memory_mapped_stream) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/pyarrow/ipc.py", line 190, in open_stream c06r3n07: return RecordBatchStreamReader(source, options=options, c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/pyarrow/ipc.py", line 52, in __init__ c06r3n07: self._open(source, options=options, memory_pool=memory_pool) c06r3n07: File "pyarrow/ipc.pxi", line 974, in pyarrow.lib._RecordBatchStreamReader._open c06r3n07: File "pyarrow/error.pxi", line 154, in pyarrow.lib.pyarrow_internal_check_status c06r3n07: File "pyarrow/error.pxi", line 91, in pyarrow.lib.check_status c06r3n07: pyarrow.lib.ArrowInvalid: Tried reading schema message, was null or length 0 c06r3n09: Caching indices mapping at /work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-e052a5760cca9436.arrow c06r3n09: Error in atexit._run_exitfuncs: c06r3n09: Traceback (most recent call last): c06r3n09: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py", line 74, in load c06r3n09: loaded_dict = pickle.load(handle) c06r3n09: OSError: [Errno 5] Input/output error c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: c06r3n09: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False) c06r3n09: warnings.warn( c06r3n09: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: c06r3n09: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False) c06r3n09: warnings.warn( c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: c06r3n08: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False) c06r3n08: warnings.warn( c06r3n08: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: c06r3n08: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False) c06r3n08: warnings.warn( c06r3n07: Traceback (most recent call last): c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 791, in move c06r3n07: os.rename(src, real_dst) c06r3n07: FileExistsError: [Errno 17] File exists: '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/tmpptbva0zx' -> '/work/home/liangjing/.cache/huggingface/datasets/json/default-c71a5e5c5041e81e/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-e052a5760cca9436.arrow' c06r3n07: c06r3n07: During handling of the above exception, another exception occurred: c06r3n07: c06r3n07: Traceback (most recent call last): c06r3n07: File "src/train_bash.py", line 14, in c06r3n07: main() c06r3n07: File "src/train_bash.py", line 5, in main c06r3n07: run_exp() c06r3n07: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/tuner.py", line 31, in run_exp c06r3n07: run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) c06r3n07: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/train/sft/workflow.py", line 64, in run_sft c06r3n07: **split_dataset(dataset, data_args, training_args), c06r3n07: File "/work/home/liangjing/LLM/LLaMA-Factory-main/src/llmtuner/data/utils.py", line 61, in split_dataset c06r3n07: dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n07: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n07: out = func(dataset, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4551, in train_test_split c06r3n07: test_split = self.select( c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n07: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n07: out = func(dataset, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3832, in select c06r3n07: return self._select_with_indices_mapping( c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 558, in wrapper c06r3n07: out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/fingerprint.py", line 482, in wrapper c06r3n07: out = func(dataset, *args, **kwargs) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 3988, in _select_with_indices_mapping c06r3n07: shutil.move(tmp_file.name, indices_cache_file_name) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 811, in move c06r3n07: copy_function(src, real_dst) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 435, in copy2 c06r3n07: copyfile(src, dst, follow_symlinks=follow_symlinks) c06r3n07: File "/work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/shutil.py", line 285, in copyfile c06r3n07: copyfileobj(fsrc, fdst) c06r3n07: FileNotFoundError: [Errno 2] No such file or directory c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: c06r3n07: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False) c06r3n07: warnings.warn( c06r3n07: /work/home/liangjing/anaconda3/envs/torch2.1/lib/python3.8/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: c06r3n07: dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False) c06r3n07: warnings.warn( c06r3n09: [INFO|trainer.py:571] 2024-03-15 11:02:58,522 >> Using auto half precision backend c06r3n08: [2024-03-15 11:03:00,939] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 5117 c06r3n08: [2024-03-15 11:03:00,966] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 5118 c06r3n08: [2024-03-15 11:03:00,967] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 5119 c06r3n07: [2024-03-15 11:03:01,034] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 24634 c06r3n07: [2024-03-15 11:03:01,035] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 24635 c06r3n08: [2024-03-15 11:03:01,083] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 5120 c06r3n07: [2024-03-15 11:03:01,141] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 24636 c06r3n08: [2024-03-15 11:03:01,160] [ERROR] [launch.py:321:sigkill_handler] ['/work/home/liangjing/anaconda3/envs/torch2.1/bin/python', '-u', 'src/train_bash.py', '--local_rank=3', '--stage', 'sft', '--do_train', '--template', 'llama2', '--dataset', 'alpaca_gpt4_en,alpaca_gpt4_zh', '--finetuning_type', 'full', '--model_name_or_path', '/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b', '--output_dir', '/work/share/huchen1/liangjj/llama_factory', '--per_device_train_batch_size', '1', '--per_device_eval_batch_size', '1', '--gradient_accumulation_steps', '1', '--preprocessing_num_workers', '2', '--lr_scheduler_type', 'cosine', '--logging_steps', '10', '--save_steps', '100', '--eval_steps', '100', '--learning_rate', '5e-5', '--max_grad_norm', '0.5', '--num_train_epochs', '4.0', '--val_size', '0.01', '--evaluation_strategy', 'steps', '--load_best_model_at_end', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--plot_loss', '--fp16', '--save_on_each_node', '--deepspeed', 'deepspeed.json'] exits with return code = 1 c06r3n07: [2024-03-15 11:03:01,205] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 24637 c06r3n07: [2024-03-15 11:03:01,218] [ERROR] [launch.py:321:sigkill_handler] ['/work/home/liangjing/anaconda3/envs/torch2.1/bin/python', '-u', 'src/train_bash.py', '--local_rank=3', '--stage', 'sft', '--do_train', '--template', 'llama2', '--dataset', 'alpaca_gpt4_en,alpaca_gpt4_zh', '--finetuning_type', 'full', '--model_name_or_path', '/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b', '--output_dir', '/work/share/huchen1/liangjj/llama_factory', '--per_device_train_batch_size', '1', '--per_device_eval_batch_size', '1', '--gradient_accumulation_steps', '1', '--preprocessing_num_workers', '2', '--lr_scheduler_type', 'cosine', '--logging_steps', '10', '--save_steps', '100', '--eval_steps', '100', '--learning_rate', '5e-5', '--max_grad_norm', '0.5', '--num_train_epochs', '4.0', '--val_size', '0.01', '--evaluation_strategy', 'steps', '--load_best_model_at_end', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--plot_loss', '--fp16', '--save_on_each_node', '--deepspeed', 'deepspeed.json'] exits with return code = 1 c06r3n06: [2024-03-15 11:03:01,849] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 9280 c06r3n09: [2024-03-15 11:03:01,852] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 6714 c06r3n06: [2024-03-15 11:03:01,863] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 9281 c06r3n06: [2024-03-15 11:03:01,876] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 9282 c06r3n06: [2024-03-15 11:03:01,888] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 9283 c06r3n06: [2024-03-15 11:03:01,888] [ERROR] [launch.py:321:sigkill_handler] ['/work/home/liangjing/anaconda3/envs/torch2.1/bin/python', '-u', 'src/train_bash.py', '--local_rank=3', '--stage', 'sft', '--do_train', '--template', 'llama2', '--dataset', 'alpaca_gpt4_en,alpaca_gpt4_zh', '--finetuning_type', 'full', '--model_name_or_path', '/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b', '--output_dir', '/work/share/huchen1/liangjj/llama_factory', '--per_device_train_batch_size', '1', '--per_device_eval_batch_size', '1', '--gradient_accumulation_steps', '1', '--preprocessing_num_workers', '2', '--lr_scheduler_type', 'cosine', '--logging_steps', '10', '--save_steps', '100', '--eval_steps', '100', '--learning_rate', '5e-5', '--max_grad_norm', '0.5', '--num_train_epochs', '4.0', '--val_size', '0.01', '--evaluation_strategy', 'steps', '--load_best_model_at_end', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--plot_loss', '--fp16', '--save_on_each_node', '--deepspeed', 'deepspeed.json'] exits with return code = 1 c06r3n09: [2024-03-15 11:03:01,930] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 6715 c06r3n09: [2024-03-15 11:03:02,008] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 6716 c06r3n09: [2024-03-15 11:03:02,032] [INFO] [launch.py:315:sigkill_handler] Killing subprocess 6717 c06r3n09: [2024-03-15 11:03:02,032] [ERROR] [launch.py:321:sigkill_handler] ['/work/home/liangjing/anaconda3/envs/torch2.1/bin/python', '-u', 'src/train_bash.py', '--local_rank=3', '--stage', 'sft', '--do_train', '--template', 'llama2', '--dataset', 'alpaca_gpt4_en,alpaca_gpt4_zh', '--finetuning_type', 'full', '--model_name_or_path', '/work/home/liangjing/.cache/modelscope/hub/skyline2006/llama-7b', '--output_dir', '/work/share/huchen1/liangjj/llama_factory', '--per_device_train_batch_size', '1', '--per_device_eval_batch_size', '1', '--gradient_accumulation_steps', '1', '--preprocessing_num_workers', '2', '--lr_scheduler_type', 'cosine', '--logging_steps', '10', '--save_steps', '100', '--eval_steps', '100', '--learning_rate', '5e-5', '--max_grad_norm', '0.5', '--num_train_epochs', '4.0', '--val_size', '0.01', '--evaluation_strategy', 'steps', '--load_best_model_at_end', '--weight_decay', '0.', '--warmup_ratio', '0.03', '--plot_loss', '--fp16', '--save_on_each_node', '--deepspeed', 'deepspeed.json'] exits with return code = 1 pdsh@c06r3n06: c06r3n08: ssh exited with exit code 1 pdsh@c06r3n06: c06r3n07: ssh exited with exit code 1 pdsh@c06r3n06: c06r3n06: ssh exited with exit code 1 pdsh@c06r3n06: c06r3n09: ssh exited with exit code 1