Unverified Commit df2eebf1 authored by corey hu's avatar corey hu Committed by GitHub
Browse files

Handle unsharded Llama2 model types in conversion script (#27069)

Handle all unshared models types
parent a2f55a65
...@@ -124,7 +124,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa ...@@ -124,7 +124,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa
print(f"Fetching all parameters from the checkpoint at {input_base_path}.") print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
# Load weights # Load weights
if model_size == "7B": if num_shards == 1:
# Not sharded # Not sharded
# (The sharded implementation would also work, but this is simpler.) # (The sharded implementation would also work, but this is simpler.)
loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu") loaded = torch.load(os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu")
...@@ -138,7 +138,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa ...@@ -138,7 +138,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa
index_dict = {"weight_map": {}} index_dict = {"weight_map": {}}
for layer_i in range(n_layers): for layer_i in range(n_layers):
filename = f"pytorch_model-{layer_i + 1}-of-{n_layers + 1}.bin" filename = f"pytorch_model-{layer_i + 1}-of-{n_layers + 1}.bin"
if model_size == "7B": if num_shards == 1:
# Unsharded # Unsharded
state_dict = { state_dict = {
f"model.layers.{layer_i}.self_attn.q_proj.weight": permute( f"model.layers.{layer_i}.self_attn.q_proj.weight": permute(
...@@ -222,7 +222,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa ...@@ -222,7 +222,7 @@ def write_model(model_path, input_base_path, model_size, tokenizer_path=None, sa
torch.save(state_dict, os.path.join(tmp_model_path, filename)) torch.save(state_dict, os.path.join(tmp_model_path, filename))
filename = f"pytorch_model-{n_layers + 1}-of-{n_layers + 1}.bin" filename = f"pytorch_model-{n_layers + 1}-of-{n_layers + 1}.bin"
if model_size == "7B": if num_shards == 1:
# Unsharded # Unsharded
state_dict = { state_dict = {
"model.embed_tokens.weight": loaded["tok_embeddings.weight"], "model.embed_tokens.weight": loaded["tok_embeddings.weight"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment