v1.0

0371621a · chenzk · 0371621a · 0371621a · 0371621a · 0371621a
Commit 0371621a authored Nov 27, 2024 by chenzk
20 changed files
--- a/prepare_datasets.py
+++ b/prepare_datasets.py
+import numpy as np
+import tiktoken
+def encode_file(input_file_path, output_file_path, tokenizer_name):
+    tokenizer = tiktoken.get_encoding(tokenizer_name)
+    print(tokenizer)
+    with open(input_file_path, 'r') as f:
+        data = f.read()
+    enc_data = tokenizer.encode(data)
+    enc_data = np.array(enc_data, dtype=np.uint32)
+    enc_data.tofile(output_file_path)
+encode_file('input.txt', 'train.bin', 'cl100k_base')
--- a/scripts/Llama-3.2-3B/README.md
+++ b/scripts/Llama-3.2-3B/README.md
--- a/scripts/Meta-Llama-3.1-8B/README.md
+++ b/scripts/Meta-Llama-3.1-8B/README.md
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
--- a/scripts/adjust_tokenizer.py
+++ b/scripts/adjust_tokenizer.py
--- a/scripts/allamo-llama-3.1-8b/config_import_ckpt.json
+++ b/scripts/allamo-llama-3.1-8b/config_import_ckpt.json
+{
+    "model_args": {
+        "block_size": 131072,
+        "vocab_size": 128256,
+        "rope_freq_base": 10000,
+        "rope_freq_scale": 1.0,
+        "n_layer": 32,
+        "num_kv_heads": 8,
+        "head_size": 128,
+        "n_head": 32,
+        "n_embd": 4096,
+        "intermediate_size": 14336,
+        "dropout": 0.0,
+        "bias": false,
+        "multiple_of": 256,
+        "norm_eps": 1e-05,
+        "sliding_window": null,
+        "gradient_checkpointing": false
+    }
+}
\ No newline at end of file
--- a/scripts/allamo-llama-3.2-3B/config_import_ckpt.json
+++ b/scripts/allamo-llama-3.2-3B/config_import_ckpt.json
+{
+    "model_args": {
+        "block_size": 131072,
+        "vocab_size": 128256,
+        "rope_freq_base": 10000,
+        "rope_freq_scale": 1.0,
+        "n_layer": 28,
+        "num_kv_heads": 8,
+        "head_size": 128,
+        "n_head": 24,
+        "n_embd": 3072,
+        "intermediate_size": 8192,
+        "dropout": 0.0,
+        "bias": false,
+        "multiple_of": 256,
+        "norm_eps": 1e-05,
+        "sliding_window": null,
+        "gradient_checkpointing": false
+    }
+}
\ No newline at end of file
--- a/scripts/checkpoints/__init__.py
+++ b/scripts/checkpoints/__init__.py
--- a/scripts/checkpoints/convert_dcp.py
+++ b/scripts/checkpoints/convert_dcp.py
--- a/scripts/checkpoints/parallelize_dcp.py
+++ b/scripts/checkpoints/parallelize_dcp.py
--- a/scripts/convert_config_checkpoint_pt_to_json.py
+++ b/scripts/convert_config_checkpoint_pt_to_json.py
--- a/scripts/data/input.txt
+++ b/scripts/data/input.txt
--- a/scripts/depth_up_scaling.py
+++ b/scripts/depth_up_scaling.py
--- a/scripts/estimate_model_memory_usage.py
+++ b/scripts/estimate_model_memory_usage.py
--- a/scripts/export_to_hf.py
+++ b/scripts/export_to_hf.py
--- a/scripts/export_to_hf.sh
+++ b/scripts/export_to_hf.sh
+python export_to_hf.py --input_dir="/home/data/llama3/" --output_dir="/home/data/llama-hf/"
--- a/scripts/generate_dpo_reference_logprobs.py
+++ b/scripts/generate_dpo_reference_logprobs.py
--- a/scripts/git-lfs_install.sh
+++ b/scripts/git-lfs_install.sh
+wget https://mirror.ghproxy.com/https://github.com/git-lfs/git-lfs/releases/download/v3.5.1/git-lfs-linux-amd64-v3.5.1.tar.gz
+tar -xzvf git-lfs-linux-amd64-v3.5.1.tar.gz
+./git-lfs-3.5.1/install.sh
+rm -rf git-lfs-3.5.1 git-lfs-linux-amd64-v3.5.1.tar.gz
--- a/scripts/import_hf_llama_weights.py
+++ b/scripts/import_hf_llama_weights.py
--- a/scripts/import_hf_llama_weights.sh
+++ b/scripts/import_hf_llama_weights.sh
+python import_hf_llama_weights.py --huggingface_model Meta-Llama-3.1-8B --output_dir allamo-llama-3.1-8b
+# python import_hf_llama_weights.py --huggingface_model Llama-3.2-3B --output_dir allamo-llama-3.2-3B