Unverified Commit 2e341cd4 authored by zhyncs's avatar zhyncs Committed by GitHub
Browse files

misc: add pre-commit config (#637)

parent a8552cb1
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
...@@ -312,8 +312,8 @@ def main(args: argparse.Namespace): ...@@ -312,8 +312,8 @@ def main(args: argparse.Namespace):
np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time
) )
#latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY] # latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
#print(latencies) # print(latencies)
print(f"Total time: {benchmark_time:.2f} s") print(f"Total time: {benchmark_time:.2f} s")
print(f"Request throughput: {args.num_prompts / benchmark_time:.2f} requests/s") print(f"Request throughput: {args.num_prompts / benchmark_time:.2f} requests/s")
......
...@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio): ...@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
) )
for i in redirect_indices: for i in redirect_indices:
target_idx = np.random.choice(min(i * 2 + 100, num_lines)) target_idx = np.random.choice(min(i * 2 + 100, num_lines))
lines[ lines[i] = (
i f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
] = f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}." )
redirects[i] = target_idx redirects[i] = target_idx
# Build links and find sources # Build links and find sources
......
...@@ -3,6 +3,7 @@ Usage: ...@@ -3,6 +3,7 @@ Usage:
export ANTHROPIC_API_KEY=sk-****** export ANTHROPIC_API_KEY=sk-******
python3 anthropic_example_chat.py python3 anthropic_example_chat.py
""" """
import sglang as sgl import sglang as sgl
...@@ -30,7 +31,7 @@ def stream(): ...@@ -30,7 +31,7 @@ def stream():
state = multi_turn_question.run( state = multi_turn_question.run(
question_1="What is the capital of the United States?", question_1="What is the capital of the United States?",
question_2="List two local attractions.", question_2="List two local attractions.",
stream=True stream=True,
) )
for out in state.text_iter(): for out in state.text_iter():
...@@ -39,13 +40,18 @@ def stream(): ...@@ -39,13 +40,18 @@ def stream():
def batch(): def batch():
states = multi_turn_question.run_batch([ states = multi_turn_question.run_batch(
{"question_1": "What is the capital of the United States?", [
"question_2": "List two local attractions."}, {
"question_1": "What is the capital of the United States?",
{"question_1": "What is the capital of France?", "question_2": "List two local attractions.",
"question_2": "What is the population of this city?"}, },
]) {
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)
for s in states: for s in states:
print(s.messages()) print(s.messages())
......
...@@ -9,15 +9,14 @@ import sglang as sgl ...@@ -9,15 +9,14 @@ import sglang as sgl
@sgl.function @sgl.function
def few_shot_qa(s, question): def few_shot_qa(s, question):
s += ( s += """
"""
\n\nHuman: What is the capital of France? \n\nHuman: What is the capital of France?
\n\nAssistant: Paris \n\nAssistant: Paris
\n\nHuman: What is the capital of Germany? \n\nHuman: What is the capital of Germany?
\n\nAssistant: Berlin \n\nAssistant: Berlin
\n\nHuman: What is the capital of Italy? \n\nHuman: What is the capital of Italy?
\n\nAssistant: Rome \n\nAssistant: Rome
""") """
s += "\n\nHuman: " + question + "\n" s += "\n\nHuman: " + question + "\n"
s += "\n\nAssistant:" + sgl.gen("answer", temperature=0) s += "\n\nAssistant:" + sgl.gen("answer", temperature=0)
...@@ -33,8 +32,8 @@ def single(): ...@@ -33,8 +32,8 @@ def single():
def stream(): def stream():
state = few_shot_qa.run( state = few_shot_qa.run(
question="What is the capital of the United States?", question="What is the capital of the United States?", stream=True
stream=True) )
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -42,10 +41,12 @@ def stream(): ...@@ -42,10 +41,12 @@ def stream():
def batch(): def batch():
states = few_shot_qa.run_batch([ states = few_shot_qa.run_batch(
{"question": "What is the capital of the United States?"}, [
{"question": "What is the capital of China?"}, {"question": "What is the capital of the United States?"},
]) {"question": "What is the capital of China?"},
]
)
for s in states: for s in states:
print(s["answer"]) print(s["answer"])
......
...@@ -3,9 +3,11 @@ Usage: ...@@ -3,9 +3,11 @@ Usage:
export AZURE_OPENAI_API_KEY=sk-****** export AZURE_OPENAI_API_KEY=sk-******
python3 openai_example_chat.py python3 openai_example_chat.py
""" """
import sglang as sgl
import os import os
import sglang as sgl
@sgl.function @sgl.function
def multi_turn_question(s, question_1, question_2): def multi_turn_question(s, question_1, question_2):
...@@ -32,7 +34,7 @@ def stream(): ...@@ -32,7 +34,7 @@ def stream():
state = multi_turn_question.run( state = multi_turn_question.run(
question_1="What is the capital of the United States?", question_1="What is the capital of the United States?",
question_2="List two local attractions.", question_2="List two local attractions.",
stream=True stream=True,
) )
for out in state.text_iter(): for out in state.text_iter():
...@@ -41,13 +43,18 @@ def stream(): ...@@ -41,13 +43,18 @@ def stream():
def batch(): def batch():
states = multi_turn_question.run_batch([ states = multi_turn_question.run_batch(
{"question_1": "What is the capital of the United States?", [
"question_2": "List two local attractions."}, {
"question_1": "What is the capital of the United States?",
{"question_1": "What is the capital of France?", "question_2": "List two local attractions.",
"question_2": "What is the population of this city?"}, },
]) {
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)
for s in states: for s in states:
print(s.messages()) print(s.messages())
......
...@@ -3,6 +3,7 @@ Usage: ...@@ -3,6 +3,7 @@ Usage:
export GCP_PROJECT_ID=****** export GCP_PROJECT_ID=******
python3 gemini_example_chat.py python3 gemini_example_chat.py
""" """
import sglang as sgl import sglang as sgl
...@@ -30,7 +31,7 @@ def stream(): ...@@ -30,7 +31,7 @@ def stream():
state = multi_turn_question.run( state = multi_turn_question.run(
question_1="What is the capital of the United States?", question_1="What is the capital of the United States?",
question_2="List two local attractions.", question_2="List two local attractions.",
stream=True stream=True,
) )
for out in state.text_iter(): for out in state.text_iter():
...@@ -39,13 +40,18 @@ def stream(): ...@@ -39,13 +40,18 @@ def stream():
def batch(): def batch():
states = multi_turn_question.run_batch([ states = multi_turn_question.run_batch(
{"question_1": "What is the capital of the United States?", [
"question_2": "List two local attractions."}, {
"question_1": "What is the capital of the United States?",
{"question_1": "What is the capital of France?", "question_2": "List two local attractions.",
"question_2": "What is the population of this city?"}, },
]) {
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)
for s in states: for s in states:
print(s.messages()) print(s.messages())
......
...@@ -9,15 +9,14 @@ import sglang as sgl ...@@ -9,15 +9,14 @@ import sglang as sgl
@sgl.function @sgl.function
def few_shot_qa(s, question): def few_shot_qa(s, question):
s += ( s += """The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France? Q: What is the capital of France?
A: Paris A: Paris
Q: What is the capital of Germany? Q: What is the capital of Germany?
A: Berlin A: Berlin
Q: What is the capital of Italy? Q: What is the capital of Italy?
A: Rome A: Rome
""") """
s += "Q: " + question + "\n" s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n", temperature=0) s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
...@@ -33,8 +32,8 @@ def single(): ...@@ -33,8 +32,8 @@ def single():
def stream(): def stream():
state = few_shot_qa.run( state = few_shot_qa.run(
question="What is the capital of the United States?", question="What is the capital of the United States?", stream=True
stream=True) )
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -42,10 +41,12 @@ def stream(): ...@@ -42,10 +41,12 @@ def stream():
def batch(): def batch():
states = few_shot_qa.run_batch([ states = few_shot_qa.run_batch(
{"question": "What is the capital of the United States?"}, [
{"question": "What is the capital of China?"}, {"question": "What is the capital of the United States?"},
]) {"question": "What is the capital of China?"},
]
)
for s in states: for s in states:
print(s["answer"]) print(s["answer"])
......
...@@ -3,6 +3,7 @@ Usage: ...@@ -3,6 +3,7 @@ Usage:
export GCP_PROJECT_ID=****** export GCP_PROJECT_ID=******
python3 gemini_example_multimodal_chat.py python3 gemini_example_multimodal_chat.py
""" """
import sglang as sgl import sglang as sgl
...@@ -19,7 +20,7 @@ if __name__ == "__main__": ...@@ -19,7 +20,7 @@ if __name__ == "__main__":
image_file1="./images/cat.jpeg", image_file1="./images/cat.jpeg",
image_file2="./images/dog.jpeg", image_file2="./images/dog.jpeg",
question="Describe difference of the two images in one sentence.", question="Describe difference of the two images in one sentence.",
stream=True stream=True,
) )
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
......
...@@ -3,6 +3,7 @@ Usage: ...@@ -3,6 +3,7 @@ Usage:
export OPENAI_API_KEY=sk-****** export OPENAI_API_KEY=sk-******
python3 openai_example_chat.py python3 openai_example_chat.py
""" """
import sglang as sgl import sglang as sgl
...@@ -31,7 +32,7 @@ def stream(): ...@@ -31,7 +32,7 @@ def stream():
state = multi_turn_question.run( state = multi_turn_question.run(
question_1="What is the capital of the United States?", question_1="What is the capital of the United States?",
question_2="List two local attractions.", question_2="List two local attractions.",
stream=True stream=True,
) )
for out in state.text_iter(): for out in state.text_iter():
...@@ -40,13 +41,18 @@ def stream(): ...@@ -40,13 +41,18 @@ def stream():
def batch(): def batch():
states = multi_turn_question.run_batch([ states = multi_turn_question.run_batch(
{"question_1": "What is the capital of the United States?", [
"question_2": "List two local attractions."}, {
"question_1": "What is the capital of the United States?",
{"question_1": "What is the capital of France?", "question_2": "List two local attractions.",
"question_2": "What is the population of this city?"}, },
]) {
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)
for s in states: for s in states:
print(s.messages()) print(s.messages())
......
...@@ -9,15 +9,14 @@ import sglang as sgl ...@@ -9,15 +9,14 @@ import sglang as sgl
@sgl.function @sgl.function
def few_shot_qa(s, question): def few_shot_qa(s, question):
s += ( s += """The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France? Q: What is the capital of France?
A: Paris A: Paris
Q: What is the capital of Germany? Q: What is the capital of Germany?
A: Berlin A: Berlin
Q: What is the capital of Italy? Q: What is the capital of Italy?
A: Rome A: Rome
""") """
s += "Q: " + question + "\n" s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n", temperature=0) s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
...@@ -33,8 +32,8 @@ def single(): ...@@ -33,8 +32,8 @@ def single():
def stream(): def stream():
state = few_shot_qa.run( state = few_shot_qa.run(
question="What is the capital of the United States?", question="What is the capital of the United States?", stream=True
stream=True) )
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -42,10 +41,12 @@ def stream(): ...@@ -42,10 +41,12 @@ def stream():
def batch(): def batch():
states = few_shot_qa.run_batch([ states = few_shot_qa.run_batch(
{"question": "What is the capital of the United States?"}, [
{"question": "What is the capital of China?"}, {"question": "What is the capital of the United States?"},
]) {"question": "What is the capital of China?"},
]
)
for s in states: for s in states:
print(s["answer"]) print(s["answer"])
......
...@@ -3,9 +3,11 @@ Usage: ...@@ -3,9 +3,11 @@ Usage:
export OPENROUTER_API_KEY=sk-****** export OPENROUTER_API_KEY=sk-******
python3 together_example_chat.py python3 together_example_chat.py
""" """
import sglang as sgl
import os import os
import sglang as sgl
@sgl.function @sgl.function
def multi_turn_question(s, question_1, question_2): def multi_turn_question(s, question_1, question_2):
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
Usage: Usage:
python3 srt_example_chat.py python3 srt_example_chat.py
""" """
import sglang as sgl import sglang as sgl
...@@ -29,7 +30,7 @@ def stream(): ...@@ -29,7 +30,7 @@ def stream():
state = multi_turn_question.run( state = multi_turn_question.run(
question_1="What is the capital of the United States?", question_1="What is the capital of the United States?",
question_2="List two local attractions.", question_2="List two local attractions.",
stream=True stream=True,
) )
for out in state.text_iter(): for out in state.text_iter():
...@@ -38,13 +39,18 @@ def stream(): ...@@ -38,13 +39,18 @@ def stream():
def batch(): def batch():
states = multi_turn_question.run_batch([ states = multi_turn_question.run_batch(
{"question_1": "What is the capital of the United States?", [
"question_2": "List two local attractions."}, {
"question_1": "What is the capital of the United States?",
{"question_1": "What is the capital of France?", "question_2": "List two local attractions.",
"question_2": "What is the population of this city?"}, },
]) {
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)
for s in states: for s in states:
print(s.messages()) print(s.messages())
......
...@@ -2,20 +2,20 @@ ...@@ -2,20 +2,20 @@
Usage: Usage:
python3 srt_example_complete.py python3 srt_example_complete.py
""" """
import sglang as sgl import sglang as sgl
@sgl.function @sgl.function
def few_shot_qa(s, question): def few_shot_qa(s, question):
s += ( s += """The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France? Q: What is the capital of France?
A: Paris A: Paris
Q: What is the capital of Germany? Q: What is the capital of Germany?
A: Berlin A: Berlin
Q: What is the capital of Italy? Q: What is the capital of Italy?
A: Rome A: Rome
""") """
s += "Q: " + question + "\n" s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n", temperature=0) s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
...@@ -31,8 +31,8 @@ def single(): ...@@ -31,8 +31,8 @@ def single():
def stream(): def stream():
state = few_shot_qa.run( state = few_shot_qa.run(
question="What is the capital of the United States?", question="What is the capital of the United States?", stream=True
stream=True) )
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -40,10 +40,12 @@ def stream(): ...@@ -40,10 +40,12 @@ def stream():
def batch(): def batch():
states = few_shot_qa.run_batch([ states = few_shot_qa.run_batch(
{"question": "What is the capital of the United States?"}, [
{"question": "What is the capital of China?"}, {"question": "What is the capital of the United States?"},
]) {"question": "What is the capital of China?"},
]
)
for s in states: for s in states:
print(s["answer"]) print(s["answer"])
......
""" """
Usage: python3 srt_example_llava.py Usage: python3 srt_example_llava.py
""" """
import sglang as sgl import sglang as sgl
...@@ -12,9 +13,8 @@ def image_qa(s, image_path, question): ...@@ -12,9 +13,8 @@ def image_qa(s, image_path, question):
def single(): def single():
state = image_qa.run( state = image_qa.run(
image_path="images/cat.jpeg", image_path="images/cat.jpeg", question="What is this?", max_new_tokens=128
question="What is this?", )
max_new_tokens=128)
print(state["answer"], "\n") print(state["answer"], "\n")
...@@ -23,7 +23,8 @@ def stream(): ...@@ -23,7 +23,8 @@ def stream():
image_path="images/cat.jpeg", image_path="images/cat.jpeg",
question="What is this?", question="What is this?",
max_new_tokens=64, max_new_tokens=64,
stream=True) stream=True,
)
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -33,8 +34,8 @@ def stream(): ...@@ -33,8 +34,8 @@ def stream():
def batch(): def batch():
states = image_qa.run_batch( states = image_qa.run_batch(
[ [
{"image_path": "images/cat.jpeg", "question":"What is this?"}, {"image_path": "images/cat.jpeg", "question": "What is this?"},
{"image_path": "images/dog.jpeg", "question":"What is this?"}, {"image_path": "images/dog.jpeg", "question": "What is this?"},
], ],
max_new_tokens=128, max_new_tokens=128,
) )
...@@ -43,8 +44,10 @@ def batch(): ...@@ -43,8 +44,10 @@ def batch():
if __name__ == "__main__": if __name__ == "__main__":
runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.6-vicuna-7b", runtime = sgl.Runtime(
tokenizer_path="llava-hf/llava-1.5-7b-hf") model_path="liuhaotian/llava-v1.6-vicuna-7b",
tokenizer_path="llava-hf/llava-1.5-7b-hf",
)
sgl.set_default_backend(runtime) sgl.set_default_backend(runtime)
print(f"chat template: {runtime.endpoint.chat_template.name}") print(f"chat template: {runtime.endpoint.chat_template.name}")
......
...@@ -3,6 +3,7 @@ Usage: python3 srt_example_yi_vl.py ...@@ -3,6 +3,7 @@ Usage: python3 srt_example_yi_vl.py
Requirements: transformers==4.38 Requirements: transformers==4.38
""" """
import sglang as sgl import sglang as sgl
...@@ -17,7 +18,8 @@ def single(): ...@@ -17,7 +18,8 @@ def single():
image_path="images/cat.jpeg", image_path="images/cat.jpeg",
question="What is this?", question="What is this?",
max_new_tokens=64, max_new_tokens=64,
stop="###") stop="###",
)
print(state["answer"], "\n") print(state["answer"], "\n")
...@@ -27,7 +29,8 @@ def stream(): ...@@ -27,7 +29,8 @@ def stream():
question="What is this?", question="What is this?",
max_new_tokens=64, max_new_tokens=64,
stream=True, stream=True,
stop="###") stop="###",
)
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -37,11 +40,11 @@ def stream(): ...@@ -37,11 +40,11 @@ def stream():
def batch(): def batch():
states = image_qa.run_batch( states = image_qa.run_batch(
[ [
{"image_path": "images/cat.jpeg", "question":"What is this?"}, {"image_path": "images/cat.jpeg", "question": "What is this?"},
{"image_path": "images/dog.jpeg", "question":"What is this?"}, {"image_path": "images/dog.jpeg", "question": "What is this?"},
], ],
max_new_tokens=64, max_new_tokens=64,
stop="###" stop="###",
) )
for s in states: for s in states:
print(s["answer"], "\n") print(s["answer"], "\n")
......
...@@ -3,9 +3,11 @@ Usage: ...@@ -3,9 +3,11 @@ Usage:
export TOGETHER_API_KEY=sk-****** export TOGETHER_API_KEY=sk-******
python3 together_example_chat.py python3 together_example_chat.py
""" """
import sglang as sgl
import os import os
import sglang as sgl
@sgl.function @sgl.function
def multi_turn_question(s, question_1, question_2): def multi_turn_question(s, question_1, question_2):
...@@ -32,7 +34,7 @@ def stream(): ...@@ -32,7 +34,7 @@ def stream():
state = multi_turn_question.run( state = multi_turn_question.run(
question_1="What is the capital of the United States?", question_1="What is the capital of the United States?",
question_2="List two local attractions.", question_2="List two local attractions.",
stream=True stream=True,
) )
for out in state.text_iter(): for out in state.text_iter():
...@@ -41,13 +43,18 @@ def stream(): ...@@ -41,13 +43,18 @@ def stream():
def batch(): def batch():
states = multi_turn_question.run_batch([ states = multi_turn_question.run_batch(
{"question_1": "What is the capital of the United States?", [
"question_2": "List two local attractions."}, {
"question_1": "What is the capital of the United States?",
{"question_1": "What is the capital of France?", "question_2": "List two local attractions.",
"question_2": "What is the population of this city?"}, },
]) {
"question_1": "What is the capital of France?",
"question_2": "What is the population of this city?",
},
]
)
for s in states: for s in states:
print(s.messages()) print(s.messages())
......
...@@ -4,21 +4,21 @@ export TOGETHER_API_KEY=sk-****** ...@@ -4,21 +4,21 @@ export TOGETHER_API_KEY=sk-******
python3 together_example_complete.py python3 together_example_complete.py
""" """
import sglang as sgl
import os import os
import sglang as sgl
@sgl.function @sgl.function
def few_shot_qa(s, question): def few_shot_qa(s, question):
s += ( s += """The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France? Q: What is the capital of France?
A: Paris A: Paris
Q: What is the capital of Germany? Q: What is the capital of Germany?
A: Berlin A: Berlin
Q: What is the capital of Italy? Q: What is the capital of Italy?
A: Rome A: Rome
""") """
s += "Q: " + question + "\n" s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n", temperature=0) s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
...@@ -34,8 +34,8 @@ def single(): ...@@ -34,8 +34,8 @@ def single():
def stream(): def stream():
state = few_shot_qa.run( state = few_shot_qa.run(
question="What is the capital of the United States?", question="What is the capital of the United States?", stream=True
stream=True) )
for out in state.text_iter("answer"): for out in state.text_iter("answer"):
print(out, end="", flush=True) print(out, end="", flush=True)
...@@ -43,10 +43,12 @@ def stream(): ...@@ -43,10 +43,12 @@ def stream():
def batch(): def batch():
states = few_shot_qa.run_batch([ states = few_shot_qa.run_batch(
{"question": "What is the capital of the United States?"}, [
{"question": "What is the capital of China?"}, {"question": "What is the capital of the United States?"},
]) {"question": "What is the capital of China?"},
]
)
for s in states: for s in states:
print(s["answer"]) print(s["answer"])
......
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
Usage: Usage:
python3 async_io.py python3 async_io.py
""" """
import asyncio import asyncio
from sglang import Runtime from sglang import Runtime
...@@ -14,7 +16,10 @@ async def generate( ...@@ -14,7 +16,10 @@ async def generate(
tokenizer = engine.get_tokenizer() tokenizer = engine.get_tokenizer()
messages = [ messages = [
{"role": "system", "content": "You will be given question answer tasks.",}, {
"role": "system",
"content": "You will be given question answer tasks.",
},
{"role": "user", "content": prompt}, {"role": "user", "content": prompt},
] ]
...@@ -36,5 +41,5 @@ if __name__ == "__main__": ...@@ -36,5 +41,5 @@ if __name__ == "__main__":
prompt = "Who is Alan Turing?" prompt = "Who is Alan Turing?"
sampling_params = {"max_new_tokens": 128} sampling_params = {"max_new_tokens": 128}
asyncio.run(generate(runtime, prompt, sampling_params)) asyncio.run(generate(runtime, prompt, sampling_params))
runtime.shutdown() runtime.shutdown()
...@@ -33,8 +33,7 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose): ...@@ -33,8 +33,7 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
) )
logprobs = step_0.get_meta_info("get_top_k")["decode_top_logprobs"][0] logprobs = step_0.get_meta_info("get_top_k")["decode_top_logprobs"][0]
print("Decoding step 0:", print("Decoding step 0:", ", ".join(pformat(token[2]) for token in logprobs))
", ".join(pformat(token[2]) for token in logprobs))
for idx, (f, token) in enumerate(zip(forks, logprobs)): for idx, (f, token) in enumerate(zip(forks, logprobs)):
logprob, token_id, text = token logprob, token_id, text = token
f += text f += text
...@@ -56,17 +55,9 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose): ...@@ -56,17 +55,9 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
) )
# calculate probability disparity between the top and secondary tokens # calculate probability disparity between the top and secondary tokens
x1s = [ x1s = [exp(xt[0][0]) for xt in f.get_meta_info("answer")["decode_top_logprobs"]]
exp(xt[0][0]) x2s = [exp(xt[1][0]) for xt in f.get_meta_info("answer")["decode_top_logprobs"]]
for xt in f.get_meta_info("answer")["decode_top_logprobs"] tokens = [xt[0][2] for xt in f.get_meta_info("answer")["decode_top_logprobs"]]
]
x2s = [
exp(xt[1][0])
for xt in f.get_meta_info("answer")["decode_top_logprobs"]
]
tokens = [
xt[0][2] for xt in f.get_meta_info("answer")["decode_top_logprobs"]
]
delta = (sum(x1s) - sum(x2s)) / len(x1s) delta = (sum(x1s) - sum(x2s)) / len(x1s)
# extract the answer span (without the '<|end_of_text|>' token) # extract the answer span (without the '<|end_of_text|>' token)
...@@ -79,42 +70,45 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose): ...@@ -79,42 +70,45 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
top_logprobs_num=2, top_logprobs_num=2,
return_text_in_logprobs=True, return_text_in_logprobs=True,
) )
answer = answer_forks[idx]['answer_span'].replace('\n', ' ').strip(':') answer = answer_forks[idx]["answer_span"].replace("\n", " ").strip(":")
print( print(
f"{YELLOW}Path #{idx} {pformat(text)}[{exp(logprob):.3f}] (score={delta}, answer={answer}){CLEAR}" f"{YELLOW}Path #{idx} {pformat(text)}[{exp(logprob):.3f}] (score={delta}, answer={answer}){CLEAR}"
) )
generated_text = str(answer_forks[idx])[len("ProgramState("):-1] generated_text = str(answer_forks[idx])[len("ProgramState(") : -1]
print(f"{BLUE}{pformat(generated_text)}{CLEAR}") print(f"{BLUE}{pformat(generated_text)}{CLEAR}")
if verbose: if verbose:
answer_tokens = [ answer_tokens = [
xt[0][2] for xt in answer_forks[idx].get_meta_info( xt[0][2]
"answer_span")["decode_top_logprobs"] for xt in answer_forks[idx].get_meta_info("answer_span")[
"decode_top_logprobs"
]
] ]
answer_x1s = [ answer_x1s = [
exp(xt[0][0]) for xt in answer_forks[idx].get_meta_info( exp(xt[0][0])
"answer_span")["decode_top_logprobs"] for xt in answer_forks[idx].get_meta_info("answer_span")[
"decode_top_logprobs"
]
] ]
answer_x2s = [ answer_x2s = [
exp(xt[1][0]) for xt in answer_forks[idx].get_meta_info( exp(xt[1][0])
"answer_span")["decode_top_logprobs"] for xt in answer_forks[idx].get_meta_info("answer_span")[
"decode_top_logprobs"
]
] ]
for token, x1, x2 in zip(tokens, x1s, x2s): for token, x1, x2 in zip(tokens, x1s, x2s):
print(f" {GREEN}{pformat(token)}{CLEAR}({x1:.3f}-{x2:.3f})", print(f" {GREEN}{pformat(token)}{CLEAR}({x1:.3f}-{x2:.3f})", end="")
end="")
print("\n===========") print("\n===========")
for token, x1, x2 in zip(answer_tokens, answer_x1s, answer_x2s): for token, x1, x2 in zip(answer_tokens, answer_x1s, answer_x2s):
print(f" {GREEN}{pformat(token)}{CLEAR}({x1:.3f}-{x2:.3f})", print(f" {GREEN}{pformat(token)}{CLEAR}({x1:.3f}-{x2:.3f})", end="")
end="")
print() print()
sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000")) sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
state = cot_decoding.run( state = cot_decoding.run(
question= question=r"Claire makes a 3 egg omelet every morning for breakfast. How many dozens of eggs will she eat in 4 weeks?",
r"Claire makes a 3 egg omelet every morning for breakfast. How many dozens of eggs will she eat in 4 weeks?",
get_top_k=10, get_top_k=10,
is_chat_model=True, is_chat_model=True,
verbose=False, verbose=False,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment