Unverified Commit 2e341cd4 authored by zhyncs's avatar zhyncs Committed by GitHub
Browse files

misc: add pre-commit config (#637)

parent a8552cb1
......@@ -401,9 +401,11 @@ class Qwen2MoeForCausalLM(nn.Module):
# These are the weights for the experts
# (param_name, weight_name, expert_id, shard_id)
(
"experts.w13_weight"
if weight_name in ["gate_proj", "up_proj"]
else "experts.w2_weight",
(
"experts.w13_weight"
if weight_name in ["gate_proj", "up_proj"]
else "experts.w2_weight"
),
f"experts.{expert_id}.{weight_name}.weight",
expert_id,
shard_id,
......@@ -418,7 +420,7 @@ class Qwen2MoeForCausalLM(nn.Module):
for name, loaded_weight in weights:
if "rotary_emb.inv_freq" in name:
continue
for (param_name, weight_name, shard_id) in stacked_params_mapping:
for param_name, weight_name, shard_id in stacked_params_mapping:
# Skip non-stacked layers and experts (experts handled below).
if weight_name not in name:
continue
......
......@@ -32,8 +32,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.controller.manager_multi import (
start_controller_process as start_controller_process_multi,
)
from sglang.srt.managers.controller.manager_single import launch_tp_servers
from sglang.srt.managers.controller.manager_single import (
launch_tp_servers,
start_controller_process as start_controller_process_single,
)
from sglang.srt.managers.detokenizer_manager import start_detokenizer_process
......@@ -198,11 +198,22 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
if server_args.node_rank != 0:
tp_size_local = server_args.tp_size // server_args.nnodes
gpu_ids = [i for _ in range(server_args.nnodes) for i in range(tp_size_local)]
tp_rank_range = list(range(server_args.node_rank * tp_size_local,
(server_args.node_rank + 1) * tp_size_local))
procs = launch_tp_servers(gpu_ids, tp_rank_range, server_args,
port_args.model_port_args[0], model_overide_args)
gpu_ids = [
i for _ in range(server_args.nnodes) for i in range(tp_size_local)
]
tp_rank_range = list(
range(
server_args.node_rank * tp_size_local,
(server_args.node_rank + 1) * tp_size_local,
)
)
procs = launch_tp_servers(
gpu_ids,
tp_rank_range,
server_args,
port_args.model_port_args[0],
model_overide_args,
)
while True:
pass
......
......@@ -10,16 +10,15 @@ import os
from transformers import AutoConfig, AutoTokenizer
def add_image_token(model_path: str):
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.add_tokens(
["<image_placeholder>"],
special_tokens=True
)
tokenizer.add_tokens(["<image_placeholder>"], special_tokens=True)
print(tokenizer)
tokenizer.save_pretrained(model_path)
def edit_model_config(model_path):
config = AutoConfig.from_pretrained(model_path)
......@@ -29,10 +28,11 @@ def edit_model_config(model_path):
print(config)
config.save_pretrained(model_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model-path", type=str)
args = parser.parse_args()
add_image_token(args.model_path)
edit_model_config(args.model_path)
\ No newline at end of file
edit_model_config(args.model_path)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment