Commit 0371621a authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #1989 canceled with stages
import numpy as np
import tiktoken
def encode_file(input_file_path, output_file_path, tokenizer_name):
tokenizer = tiktoken.get_encoding(tokenizer_name)
print(tokenizer)
with open(input_file_path, 'r') as f:
data = f.read()
enc_data = tokenizer.encode(data)
enc_data = np.array(enc_data, dtype=np.uint32)
enc_data.tofile(output_file_path)
encode_file('input.txt', 'train.bin', 'cl100k_base')
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
{
"model_args": {
"block_size": 131072,
"vocab_size": 128256,
"rope_freq_base": 10000,
"rope_freq_scale": 1.0,
"n_layer": 32,
"num_kv_heads": 8,
"head_size": 128,
"n_head": 32,
"n_embd": 4096,
"intermediate_size": 14336,
"dropout": 0.0,
"bias": false,
"multiple_of": 256,
"norm_eps": 1e-05,
"sliding_window": null,
"gradient_checkpointing": false
}
}
\ No newline at end of file
{
"model_args": {
"block_size": 131072,
"vocab_size": 128256,
"rope_freq_base": 10000,
"rope_freq_scale": 1.0,
"n_layer": 28,
"num_kv_heads": 8,
"head_size": 128,
"n_head": 24,
"n_embd": 3072,
"intermediate_size": 8192,
"dropout": 0.0,
"bias": false,
"multiple_of": 256,
"norm_eps": 1e-05,
"sliding_window": null,
"gradient_checkpointing": false
}
}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
python export_to_hf.py --input_dir="/home/data/llama3/" --output_dir="/home/data/llama-hf/"
This diff is collapsed.
wget https://mirror.ghproxy.com/https://github.com/git-lfs/git-lfs/releases/download/v3.5.1/git-lfs-linux-amd64-v3.5.1.tar.gz
tar -xzvf git-lfs-linux-amd64-v3.5.1.tar.gz
./git-lfs-3.5.1/install.sh
rm -rf git-lfs-3.5.1 git-lfs-linux-amd64-v3.5.1.tar.gz
This diff is collapsed.
python import_hf_llama_weights.py --huggingface_model Meta-Llama-3.1-8B --output_dir allamo-llama-3.1-8b
# python import_hf_llama_weights.py --huggingface_model Llama-3.2-3B --output_dir allamo-llama-3.2-3B
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment