Add a parallel sampling case (#34)

b240f751 · Lianmin Zheng · 501f9444 · b240f751 · b240f751 · b240f751
Commit b240f751 authored Jan 17, 2024 by Lianmin Zheng
Showing with 42 additions and 2 deletions

benchmark/latency_throughput/README.md benchmark/latency_throughput/README.md +1 -1

examples/usage/parallel_sample.py examples/usage/parallel_sample.py +40 -0

python/sglang/api.py python/sglang/api.py +1 -1

No files found.
--- a/benchmark/latency_throughput/README.md
+++ b/benchmark/latency_throughput/README.md
@@ -19,7 +19,7 @@ python3 -m vllm.entrypoints.api_server --model meta-llama/Llama-2-7b-chat-hf --d
 ```
 ```
-python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10
+python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 --port 21000
 ```

--- a/examples/usage/parallel_sample.py
+++ b/examples/usage/parallel_sample.py
+import sglang as sgl
+@sgl.function
+def parallel_sample(s, question, n):
+    s += (
+        "Question: Compute 1 + 2 + 3\n"
+        "Reasoning: I need to use a calculator.\n"
+        "Tool: calculator\n"
+        "Answer: 6\n"
+        "Question: Compute 3 + 2 + 2\n"
+        "Reasoning: I will try a calculator.\n"
+        "Tool: calculator\n"
+        "Answer: 7\n"
+    )
+    s += "Question: " + question + "\n"
+    forks = s.fork(n)
+    forks += "Reasoning:" + sgl.gen("reasoning", stop="\n") + "\n"
+    forks += "Tool:" + sgl.gen("tool", choices=["calculator", "browser"]) + "\n"
+    forks += "Answer:" + sgl.gen("answer", stop="\n") + "\n"
+    forks.join()
+sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
+#sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
+state = parallel_sample.run(
+    question="Compute 5 + 2 + 4.",
+    n=5,
+    temperature=1.0
+)
+for i in range(5):
+    obj = {
+        "reasoning": state["reasoning"][i],
+        "tool": state["tool"][i],
+        "answer": state["answer"][i],
+    }
+    print(f"[{i}], {obj}")
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -50,7 +50,7 @@ def gen(
    regex: Optional[str] = None,
 ):
    if choices:
-        return SglSelect(name, choices, temperature)
+        return SglSelect(name, choices, 0.0 if temperature is None else temperature)
    # check regex is valid
    if regex is not None: