Remove batches api in docs & example (#7400)

ab74f8f0 · Jinn · GitHub · 5e7fdc79 · ab74f8f0 · 5e7fdc79
Unverified Commit ab74f8f0 authored Jun 20, 2025 by Jinn Committed by GitHub Jun 20, 2025
3 changed files
--- a/docs/backend/openai_api_completions.ipynb
+++ b/docs/backend/openai_api_completions.ipynb
@@ -13,7 +13,6 @@
    "\n",
    "- `chat/completions`\n",
    "- `completions`\n",
-    "- `batches`\n",
    "\n",
    "Check out other tutorials to learn about [vision APIs](https://docs.sglang.ai/backend/openai_api_vision.html) for vision-language models and [embedding APIs](https://docs.sglang.ai/backend/openai_api_embeddings.html) for embedding models."
   ]
@@ -278,290 +277,6 @@
    "For OpenAI compatible structured outputs API, refer to [Structured Outputs](https://docs.sglang.ai/backend/structured_outputs.html#OpenAI-Compatible-API) for more details.\n"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Batches\n",
-    "\n",
-    "Batches API for chat completions and completions are also supported. You can upload your requests in `jsonl` files, create a batch job, and retrieve the results when the batch job is completed (which takes longer but costs less).\n",
-    "\n",
-    "The batches APIs are:\n",
-    "\n",
-    "- `batches`\n",
-    "- `batches/{batch_id}/cancel`\n",
-    "- `batches/{batch_id}`\n",
-    "\n",
-    "Here is an example of a batch job for chat completions, completions are similar.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import time\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "client = OpenAI(base_url=f\"http://127.0.0.1:{port}/v1\", api_key=\"None\")\n",
-    "\n",
-    "requests = [\n",
-    "    {\n",
-    "        \"custom_id\": \"request-1\",\n",
-    "        \"method\": \"POST\",\n",
-    "        \"url\": \"/chat/completions\",\n",
-    "        \"body\": {\n",
-    "            \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
-    "            \"messages\": [\n",
-    "                {\"role\": \"user\", \"content\": \"Tell me a joke about programming\"}\n",
-    "            ],\n",
-    "            \"max_tokens\": 50,\n",
-    "        },\n",
-    "    },\n",
-    "    {\n",
-    "        \"custom_id\": \"request-2\",\n",
-    "        \"method\": \"POST\",\n",
-    "        \"url\": \"/chat/completions\",\n",
-    "        \"body\": {\n",
-    "            \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
-    "            \"messages\": [{\"role\": \"user\", \"content\": \"What is Python?\"}],\n",
-    "            \"max_tokens\": 50,\n",
-    "        },\n",
-    "    },\n",
-    "]\n",
-    "\n",
-    "input_file_path = \"batch_requests.jsonl\"\n",
-    "\n",
-    "with open(input_file_path, \"w\") as f:\n",
-    "    for req in requests:\n",
-    "        f.write(json.dumps(req) + \"\\n\")\n",
-    "\n",
-    "with open(input_file_path, \"rb\") as f:\n",
-    "    file_response = client.files.create(file=f, purpose=\"batch\")\n",
-    "\n",
-    "batch_response = client.batches.create(\n",
-    "    input_file_id=file_response.id,\n",
-    "    endpoint=\"/v1/chat/completions\",\n",
-    "    completion_window=\"24h\",\n",
-    ")\n",
-    "\n",
-    "print_highlight(f\"Batch job created with ID: {batch_response.id}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "while batch_response.status not in [\"completed\", \"failed\", \"cancelled\"]:\n",
-    "    time.sleep(3)\n",
-    "    print(f\"Batch job status: {batch_response.status}...trying again in 3 seconds...\")\n",
-    "    batch_response = client.batches.retrieve(batch_response.id)\n",
-    "\n",
-    "if batch_response.status == \"completed\":\n",
-    "    print(\"Batch job completed successfully!\")\n",
-    "    print(f\"Request counts: {batch_response.request_counts}\")\n",
-    "\n",
-    "    result_file_id = batch_response.output_file_id\n",
-    "    file_response = client.files.content(result_file_id)\n",
-    "    result_content = file_response.read().decode(\"utf-8\")\n",
-    "\n",
-    "    results = [\n",
-    "        json.loads(line) for line in result_content.split(\"\\n\") if line.strip() != \"\"\n",
-    "    ]\n",
-    "\n",
-    "    for result in results:\n",
-    "        print_highlight(f\"Request {result['custom_id']}:\")\n",
-    "        print_highlight(f\"Response: {result['response']}\")\n",
-    "\n",
-    "    print_highlight(\"Cleaning up files...\")\n",
-    "    # Only delete the result file ID since file_response is just content\n",
-    "    client.files.delete(result_file_id)\n",
-    "else:\n",
-    "    print_highlight(f\"Batch job failed with status: {batch_response.status}\")\n",
-    "    if hasattr(batch_response, \"errors\"):\n",
-    "        print_highlight(f\"Errors: {batch_response.errors}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It takes a while to complete the batch job. You can use these two APIs to retrieve the batch job status or cancel the batch job.\n",
-    "\n",
-    "1. `batches/{batch_id}`: Retrieve the batch job status.\n",
-    "2. `batches/{batch_id}/cancel`: Cancel the batch job.\n",
-    "\n",
-    "Here is an example to check the batch job status."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import time\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "client = OpenAI(base_url=f\"http://127.0.0.1:{port}/v1\", api_key=\"None\")\n",
-    "\n",
-    "requests = []\n",
-    "for i in range(20):\n",
-    "    requests.append(\n",
-    "        {\n",
-    "            \"custom_id\": f\"request-{i}\",\n",
-    "            \"method\": \"POST\",\n",
-    "            \"url\": \"/chat/completions\",\n",
-    "            \"body\": {\n",
-    "                \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
-    "                \"messages\": [\n",
-    "                    {\n",
-    "                        \"role\": \"system\",\n",
-    "                        \"content\": f\"{i}: You are a helpful AI assistant\",\n",
-    "                    },\n",
-    "                    {\n",
-    "                        \"role\": \"user\",\n",
-    "                        \"content\": \"Write a detailed story about topic. Make it very long.\",\n",
-    "                    },\n",
-    "                ],\n",
-    "                \"max_tokens\": 64,\n",
-    "            },\n",
-    "        }\n",
-    "    )\n",
-    "\n",
-    "input_file_path = \"batch_requests.jsonl\"\n",
-    "with open(input_file_path, \"w\") as f:\n",
-    "    for req in requests:\n",
-    "        f.write(json.dumps(req) + \"\\n\")\n",
-    "\n",
-    "with open(input_file_path, \"rb\") as f:\n",
-    "    uploaded_file = client.files.create(file=f, purpose=\"batch\")\n",
-    "\n",
-    "batch_job = client.batches.create(\n",
-    "    input_file_id=uploaded_file.id,\n",
-    "    endpoint=\"/v1/chat/completions\",\n",
-    "    completion_window=\"24h\",\n",
-    ")\n",
-    "\n",
-    "print_highlight(f\"Created batch job with ID: {batch_job.id}\")\n",
-    "print_highlight(f\"Initial status: {batch_job.status}\")\n",
-    "\n",
-    "time.sleep(10)\n",
-    "\n",
-    "max_checks = 5\n",
-    "for i in range(max_checks):\n",
-    "    batch_details = client.batches.retrieve(batch_id=batch_job.id)\n",
-    "\n",
-    "    print_highlight(\n",
-    "        f\"Batch job details (check {i+1} / {max_checks}) // ID: {batch_details.id} // Status: {batch_details.status} // Created at: {batch_details.created_at} // Input file ID: {batch_details.input_file_id} // Output file ID: {batch_details.output_file_id}\"\n",
-    "    )\n",
-    "    print_highlight(\n",
-    "        f\"<strong>Request counts: Total: {batch_details.request_counts.total} // Completed: {batch_details.request_counts.completed} // Failed: {batch_details.request_counts.failed}</strong>\"\n",
-    "    )\n",
-    "\n",
-    "    time.sleep(3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here is an example to cancel a batch job."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import time\n",
-    "from openai import OpenAI\n",
-    "import os\n",
-    "\n",
-    "client = OpenAI(base_url=f\"http://127.0.0.1:{port}/v1\", api_key=\"None\")\n",
-    "\n",
-    "requests = []\n",
-    "for i in range(5000):\n",
-    "    requests.append(\n",
-    "        {\n",
-    "            \"custom_id\": f\"request-{i}\",\n",
-    "            \"method\": \"POST\",\n",
-    "            \"url\": \"/chat/completions\",\n",
-    "            \"body\": {\n",
-    "                \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
-    "                \"messages\": [\n",
-    "                    {\n",
-    "                        \"role\": \"system\",\n",
-    "                        \"content\": f\"{i}: You are a helpful AI assistant\",\n",
-    "                    },\n",
-    "                    {\n",
-    "                        \"role\": \"user\",\n",
-    "                        \"content\": \"Write a detailed story about topic. Make it very long.\",\n",
-    "                    },\n",
-    "                ],\n",
-    "                \"max_tokens\": 128,\n",
-    "            },\n",
-    "        }\n",
-    "    )\n",
-    "\n",
-    "input_file_path = \"batch_requests.jsonl\"\n",
-    "with open(input_file_path, \"w\") as f:\n",
-    "    for req in requests:\n",
-    "        f.write(json.dumps(req) + \"\\n\")\n",
-    "\n",
-    "with open(input_file_path, \"rb\") as f:\n",
-    "    uploaded_file = client.files.create(file=f, purpose=\"batch\")\n",
-    "\n",
-    "batch_job = client.batches.create(\n",
-    "    input_file_id=uploaded_file.id,\n",
-    "    endpoint=\"/v1/chat/completions\",\n",
-    "    completion_window=\"24h\",\n",
-    ")\n",
-    "\n",
-    "print_highlight(f\"Created batch job with ID: {batch_job.id}\")\n",
-    "print_highlight(f\"Initial status: {batch_job.status}\")\n",
-    "\n",
-    "time.sleep(10)\n",
-    "\n",
-    "try:\n",
-    "    cancelled_job = client.batches.cancel(batch_id=batch_job.id)\n",
-    "    print_highlight(f\"Cancellation initiated. Status: {cancelled_job.status}\")\n",
-    "    assert cancelled_job.status == \"cancelling\"\n",
-    "\n",
-    "    # Monitor the cancellation process\n",
-    "    while cancelled_job.status not in [\"failed\", \"cancelled\"]:\n",
-    "        time.sleep(3)\n",
-    "        cancelled_job = client.batches.retrieve(batch_job.id)\n",
-    "        print_highlight(f\"Current status: {cancelled_job.status}\")\n",
-    "\n",
-    "    # Verify final status\n",
-    "    assert cancelled_job.status == \"cancelled\"\n",
-    "    print_highlight(\"Batch job successfully cancelled\")\n",
-    "\n",
-    "except Exception as e:\n",
-    "    print_highlight(f\"Error during cancellation: {e}\")\n",
-    "    raise e\n",
-    "\n",
-    "finally:\n",
-    "    try:\n",
-    "        del_response = client.files.delete(uploaded_file.id)\n",
-    "        if del_response.deleted:\n",
-    "            print_highlight(\"Successfully cleaned up input file\")\n",
-    "        if os.path.exists(input_file_path):\n",
-    "            os.remove(input_file_path)\n",
-    "            print_highlight(\"Successfully deleted local batch_requests.jsonl file\")\n",
-    "    except Exception as e:\n",
-    "        print_highlight(f\"Error cleaning up: {e}\")\n",
-    "        raise e"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,

--- a/examples/runtime/openai_batch_chat.py
+++ b/examples/runtime/openai_batch_chat.py
-"""
-Usage:
-python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
-python openai_batch_chat.py
-Note: Before running this script,
-you should create the input.jsonl file with the following content:
-{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!  List 3 NBA players and tell a story"}],"max_tokens": 300}}
-{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-3.5-turbo-0125", "messages": [{"role": "system", "content": "You are an assistant. "},{"role": "user", "content": "Hello world! List three capital and tell a story"}],"max_tokens": 500}}
-"""
-import json
-import time
-import openai
-class OpenAIBatchProcessor:
-    def __init__(self):
-        client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
-        self.client = client
-    def process_batch(self, input_file_path, endpoint, completion_window):
-        # Upload the input file
-        with open(input_file_path, "rb") as file:
-            uploaded_file = self.client.files.create(file=file, purpose="batch")
-        # Create the batch job
-        batch_job = self.client.batches.create(
-            input_file_id=uploaded_file.id,
-            endpoint=endpoint,
-            completion_window=completion_window,
-        )
-        # Monitor the batch job status
-        while batch_job.status not in ["completed", "failed", "cancelled"]:
-            time.sleep(3)  # Wait for 3 seconds before checking the status again
-            print(
-                f"Batch job status: {batch_job.status}...trying again in 3 seconds..."
-            )
-            batch_job = self.client.batches.retrieve(batch_job.id)
-        # Check the batch job status and errors
-        if batch_job.status == "failed":
-            print(f"Batch job failed with status: {batch_job.status}")
-            print(f"Batch job errors: {batch_job.errors}")
-            return None
-        # If the batch job is completed, process the results
-        if batch_job.status == "completed":
-            # print result of batch job
-            print("batch", batch_job.request_counts)
-            result_file_id = batch_job.output_file_id
-            # Retrieve the file content from the server
-            file_response = self.client.files.content(result_file_id)
-            result_content = file_response.read()  # Read the content of the file
-            # Save the content to a local file
-            result_file_name = "batch_job_chat_results.jsonl"
-            with open(result_file_name, "wb") as file:
-                file.write(result_content)  # Write the binary content to the file
-            # Load data from the saved JSONL file
-            results = []
-            with open(result_file_name, "r", encoding="utf-8") as file:
-                for line in file:
-                    json_object = json.loads(
-                        line.strip()
-                    )  # Parse each line as a JSON object
-                    results.append(json_object)
-            return results
-        else:
-            print(f"Batch job failed with status: {batch_job.status}")
-            return None
-# Initialize the OpenAIBatchProcessor
-processor = OpenAIBatchProcessor()
-# Process the batch job
-input_file_path = "input.jsonl"
-endpoint = "/v1/chat/completions"
-completion_window = "24h"
-# Process the batch job
-results = processor.process_batch(input_file_path, endpoint, completion_window)
-# Print the results
-print(results)
--- a/examples/runtime/openai_batch_complete.py
+++ b/examples/runtime/openai_batch_complete.py
-"""
-Usage:
-python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
-python openai_batch_complete.py
-Note: Before running this script,
-you should create the input.jsonl file with the following content:
-{"custom_id": "request-1", "method": "POST", "url": "/v1/completions", "body": {"model": "gpt-3.5-turbo-instruct", "prompt": "List 3 names of famous soccer player: ", "max_tokens": 200}}
-{"custom_id": "request-2", "method": "POST", "url": "/v1/completions", "body": {"model": "gpt-3.5-turbo-instruct", "prompt": "List 6 names of famous basketball player:  ", "max_tokens": 400}}
-{"custom_id": "request-3", "method": "POST", "url": "/v1/completions", "body": {"model": "gpt-3.5-turbo-instruct", "prompt": "List 6 names of famous basketball player:  ", "max_tokens": 400}}
-"""
-import json
-import time
-import openai
-class OpenAIBatchProcessor:
-    def __init__(self):
-        client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
-        self.client = client
-    def process_batch(self, input_file_path, endpoint, completion_window):
-        # Upload the input file
-        with open(input_file_path, "rb") as file:
-            uploaded_file = self.client.files.create(file=file, purpose="batch")
-        # Create the batch job
-        batch_job = self.client.batches.create(
-            input_file_id=uploaded_file.id,
-            endpoint=endpoint,
-            completion_window=completion_window,
-        )
-        # Monitor the batch job status
-        while batch_job.status not in ["completed", "failed", "cancelled"]:
-            time.sleep(3)  # Wait for 3 seconds before checking the status again
-            print(
-                f"Batch job status: {batch_job.status}...trying again in 3 seconds..."
-            )
-            batch_job = self.client.batches.retrieve(batch_job.id)
-        # Check the batch job status and errors
-        if batch_job.status == "failed":
-            print(f"Batch job failed with status: {batch_job.status}")
-            print(f"Batch job errors: {batch_job.errors}")
-            return None
-        # If the batch job is completed, process the results
-        if batch_job.status == "completed":
-            # print result of batch job
-            print("batch", batch_job.request_counts)
-            result_file_id = batch_job.output_file_id
-            # Retrieve the file content from the server
-            file_response = self.client.files.content(result_file_id)
-            result_content = file_response.read()  # Read the content of the file
-            # Save the content to a local file
-            result_file_name = "batch_job_complete_results.jsonl"
-            with open(result_file_name, "wb") as file:
-                file.write(result_content)  # Write the binary content to the file
-            # Load data from the saved JSONL file
-            results = []
-            with open(result_file_name, "r", encoding="utf-8") as file:
-                for line in file:
-                    json_object = json.loads(
-                        line.strip()
-                    )  # Parse each line as a JSON object
-                    results.append(json_object)
-            return results
-        else:
-            print(f"Batch job failed with status: {batch_job.status}")
-            return None
-# Initialize the OpenAIBatchProcessor
-processor = OpenAIBatchProcessor()
-# Process the batch job
-input_file_path = "input.jsonl"
-endpoint = "/v1/completions"
-completion_window = "24h"
-# Process the batch job
-results = processor.process_batch(input_file_path, endpoint, completion_window)
-# Print the results
-print(results)