Commit b240f751 authored by Lianmin Zheng's avatar Lianmin Zheng
Browse files

Add a parallel sampling case (#34)

parent 501f9444
...@@ -19,7 +19,7 @@ python3 -m vllm.entrypoints.api_server --model meta-llama/Llama-2-7b-chat-hf --d ...@@ -19,7 +19,7 @@ python3 -m vllm.entrypoints.api_server --model meta-llama/Llama-2-7b-chat-hf --d
``` ```
``` ```
python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 --port 21000
``` ```
......
import sglang as sgl
@sgl.function
def parallel_sample(s, question, n):
s += (
"Question: Compute 1 + 2 + 3\n"
"Reasoning: I need to use a calculator.\n"
"Tool: calculator\n"
"Answer: 6\n"
"Question: Compute 3 + 2 + 2\n"
"Reasoning: I will try a calculator.\n"
"Tool: calculator\n"
"Answer: 7\n"
)
s += "Question: " + question + "\n"
forks = s.fork(n)
forks += "Reasoning:" + sgl.gen("reasoning", stop="\n") + "\n"
forks += "Tool:" + sgl.gen("tool", choices=["calculator", "browser"]) + "\n"
forks += "Answer:" + sgl.gen("answer", stop="\n") + "\n"
forks.join()
sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
#sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
state = parallel_sample.run(
question="Compute 5 + 2 + 4.",
n=5,
temperature=1.0
)
for i in range(5):
obj = {
"reasoning": state["reasoning"][i],
"tool": state["tool"][i],
"answer": state["answer"][i],
}
print(f"[{i}], {obj}")
...@@ -50,7 +50,7 @@ def gen( ...@@ -50,7 +50,7 @@ def gen(
regex: Optional[str] = None, regex: Optional[str] = None,
): ):
if choices: if choices:
return SglSelect(name, choices, temperature) return SglSelect(name, choices, 0.0 if temperature is None else temperature)
# check regex is valid # check regex is valid
if regex is not None: if regex is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment