init commit

032b90a1 · luopl · 032b90a1 · 032b90a1 · 032b90a1 · 032b90a1
Commit 032b90a1 authored Sep 12, 2024 by luopl
20 changed files
--- a/LLaMA-Factory/evaluation/cmmlu/cmmlu.zip
+++ b/LLaMA-Factory/evaluation/cmmlu/cmmlu.zip
--- a/LLaMA-Factory/evaluation/cmmlu/mapping.json
+++ b/LLaMA-Factory/evaluation/cmmlu/mapping.json
--- a/LLaMA-Factory/evaluation/mmlu/mapping.json
+++ b/LLaMA-Factory/evaluation/mmlu/mapping.json
--- a/LLaMA-Factory/evaluation/mmlu/mmlu.py
+++ b/LLaMA-Factory/evaluation/mmlu/mmlu.py
--- a/LLaMA-Factory/evaluation/mmlu/mmlu.zip
+++ b/LLaMA-Factory/evaluation/mmlu/mmlu.zip
--- a/LLaMA-Factory/examples/README.md
+++ b/LLaMA-Factory/examples/README.md
--- a/LLaMA-Factory/examples/README_zh.md
+++ b/LLaMA-Factory/examples/README_zh.md
--- a/LLaMA-Factory/examples/accelerate/fsdp_config.yaml
+++ b/LLaMA-Factory/examples/accelerate/fsdp_config.yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch: BACKWARD_PRE
+  fsdp_forward_prefetch: false
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_offload_params: true # offload may affect training speed
+  fsdp_sharding_strategy: FULL_SHARD
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: fp16 # or bf16
+num_machines: 1 # the number of nodes
+num_processes: 2 # the number of GPUs in all nodes
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/LLaMA-Factory/examples/deepspeed/ds_z0_config.json
+++ b/LLaMA-Factory/examples/deepspeed/ds_z0_config.json
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 0,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
\ No newline at end of file
--- a/LLaMA-Factory/examples/deepspeed/ds_z2_config.json
+++ b/LLaMA-Factory/examples/deepspeed/ds_z2_config.json
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
\ No newline at end of file
--- a/LLaMA-Factory/examples/deepspeed/ds_z2_offload_config.json
+++ b/LLaMA-Factory/examples/deepspeed/ds_z2_offload_config.json
--- a/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
+++ b/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
--- a/LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json
+++ b/LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json
--- a/LLaMA-Factory/examples/extras/badam/llama3_full_sft.yaml
+++ b/LLaMA-Factory/examples/extras/badam/llama3_full_sft.yaml
--- a/LLaMA-Factory/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/LLaMA-Factory/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
--- a/LLaMA-Factory/examples/extras/fsdp_qlora/train.sh
+++ b/LLaMA-Factory/examples/extras/fsdp_qlora/train.sh
+#!/bin/bash
+# DO NOT use GPTQ/AWQ model in FSDP+QLoRA
+CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
+    --config_file examples/accelerate/fsdp_config.yaml \
+    src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
--- a/LLaMA-Factory/examples/extras/galore/llama3_full_sft.yaml
+++ b/LLaMA-Factory/examples/extras/galore/llama3_full_sft.yaml
--- a/LLaMA-Factory/examples/extras/llama_pro/expand.sh
+++ b/LLaMA-Factory/examples/extras/llama_pro/expand.sh
--- a/LLaMA-Factory/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/LLaMA-Factory/examples/extras/llama_pro/llama3_freeze_sft.yaml
--- a/LLaMA-Factory/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/LLaMA-Factory/examples/extras/loraplus/llama3_lora_sft.yaml