Delete git submodule

154afe21 · chenych · ead0d966 · 8bcc3dc7 · 154afe21 · 154afe21
Commit 154afe21 authored Nov 05, 2024 by chenych
4 changed files
--- a/Llama-Factory @ 8bcc3dc7
+++ b/Llama-Factory @ 8bcc3dc7
-Subproject commit 8bcc3dc7ab44595f0cf07607f0937f3d2ee428d7
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ DTK驱动: dtk24.04.2
 python: 3.10
 torch: 2.1.0
 llama-factory: 0.8.3
-transformers: 4.42.4
+transformers: >=4.41.2
 vllm: 0.5.4
 ```
 `Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`

--- a/llama-factory-v0.8.3/examples/train_full/gemma2_full_sft_ds3.yaml
+++ b/llama-factory-v0.8.3/examples/train_full/gemma2_full_sft_ds3.yaml
+### model
+model_name_or_path: google/gemma-2-2b
+### method
+stage: sft
+do_train: true
+finetuning_type: full
+deepspeed: examples/deepspeed/ds_z3_config.json
+### dataset
+dataset: identity,alpaca_en_demo
+template: gemma
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/gemma-2-2b/full/sft
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 2
+learning_rate: 1.0e-5
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500
--- a/llama-factory-v0.8.3/examples/train_lora/gemma2_lora_sft_ds3.yaml
+++ b/llama-factory-v0.8.3/examples/train_lora/gemma2_lora_sft_ds3.yaml
+### model
+model_name_or_path: google/gemma-2-2b
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+deepspeed: examples/deepspeed/ds_z3_config.json
+### dataset
+dataset: identity,alpaca_en_demo
+template: gemma
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/gemma-2-2b/lora/sft
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 2
+learning_rate: 1.0e-4
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500