"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "bdd16116f319fdc4b299f248519c17a5aeb9dd51"
Unverified Commit ced362f7 authored by Chayenne's avatar Chayenne Committed by GitHub
Browse files

Simplify our docs with complicated functions into utils (#1807)


Co-authored-by: default avatarChayenne <zhaochenyang@ucla.edu>
parent 9084a864
name: Build Documentation name: Release Documentation
on: on:
push:
branches:
- main
paths:
- 'docs/**'
- 'python/sglang/version.py'
workflow_dispatch: workflow_dispatch:
jobs: jobs:
execute-notebooks: execute-notebooks:
runs-on: 1-gpu-runner runs-on: 1-gpu-runner
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang'
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
...@@ -38,8 +44,9 @@ jobs: ...@@ -38,8 +44,9 @@ jobs:
done done
build-and-deploy: build-and-deploy:
needs: execute-notebooks
if: github.repository == 'sgl-project/sglang' if: github.repository == 'sgl-project/sglang'
runs-on: 1-gpu-runner runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
...@@ -75,4 +82,4 @@ jobs: ...@@ -75,4 +82,4 @@ jobs:
git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')" git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')"
git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main
cd .. cd ..
rm -rf sgl-project.github.io rm -rf sgl-project.github.io
\ No newline at end of file
name: Execute Notebooks
on:
pull_request:
push:
branches:
- main
workflow_dispatch:
jobs:
run-all-notebooks:
runs-on: 1-gpu-runner
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install dependencies
run: |
bash scripts/ci_install_dependency.sh
pip install -r docs/requirements.txt
- name: Setup Jupyter Kernel
run: |
python -m ipykernel install --user --name python3 --display-name "Python 3"
- name: Execute notebooks
run: |
cd docs
for nb in *.ipynb; do
if [ -f "$nb" ]; then
echo "Executing $nb"
jupyter nbconvert --to notebook --execute --inplace "$nb" \
--ExecutePreprocessor.timeout=600 \
--ExecutePreprocessor.kernel_name=python3
fi
done
\ No newline at end of file
...@@ -28,42 +28,16 @@ ...@@ -28,42 +28,16 @@
} }
], ],
"source": [ "source": [
"import subprocess\n",
"import time\n",
"import requests\n",
"\n",
"# Equivalent to running this in the shell:\n", "# Equivalent to running this in the shell:\n",
"# python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --port 30010 --host 0.0.0.0 --is-embedding --log-level error\n", "# python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --port 30010 --host 0.0.0.0 --is-embedding --log-level error\n",
"embedding_process = subprocess.Popen(\n", "from sglang.utils import execute_shell_command, wait_for_server, terminate_process\n",
" [\n", "\n",
" \"python\",\n", "embedding_process = execute_shell_command(\"\"\"\n",
" \"-m\",\n", "python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct \\\n",
" \"sglang.launch_server\",\n", " --port 30010 --host 0.0.0.0 --is-embedding --log-level error\n",
" \"--model-path\",\n", "\"\"\")\n",
" \"Alibaba-NLP/gte-Qwen2-7B-instruct\",\n",
" \"--port\",\n",
" \"30010\",\n",
" \"--host\",\n",
" \"0.0.0.0\",\n",
" \"--is-embedding\",\n",
" \"--log-level\",\n",
" \"error\",\n",
" ],\n",
" text=True,\n",
" stdout=subprocess.DEVNULL,\n",
" stderr=subprocess.DEVNULL,\n",
")\n",
"\n", "\n",
"while True:\n", "wait_for_server(\"http://localhost:30010\")\n",
" try:\n",
" response = requests.get(\n",
" \"http://localhost:30010/v1/models\",\n",
" headers={\"Authorization\": \"Bearer None\"},\n",
" )\n",
" if response.status_code == 200:\n",
" break\n",
" except requests.exceptions.RequestException:\n",
" time.sleep(1)\n",
"\n", "\n",
"print(\"Embedding server is ready. Proceeding with the next steps.\")" "print(\"Embedding server is ready. Proceeding with the next steps.\")"
] ]
...@@ -134,6 +108,15 @@ ...@@ -134,6 +108,15 @@
"embedding = response.data[0].embedding[:10]\n", "embedding = response.data[0].embedding[:10]\n",
"print(embedding)" "print(embedding)"
] ]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"terminate_process(embedding_process)"
]
} }
], ],
"metadata": { "metadata": {
......
...@@ -36,41 +36,15 @@ ...@@ -36,41 +36,15 @@
} }
], ],
"source": [ "source": [
"import subprocess\n", "from sglang.utils import execute_shell_command, wait_for_server, terminate_process\n",
"import time\n",
"import requests\n",
"import os\n",
"\n", "\n",
"server_process = subprocess.Popen(\n",
" [\n",
" \"python\",\n",
" \"-m\",\n",
" \"sglang.launch_server\",\n",
" \"--model-path\",\n",
" \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" \"--port\",\n",
" \"30000\",\n",
" \"--host\",\n",
" \"0.0.0.0\",\n",
" \"--log-level\",\n",
" \"error\",\n",
" ],\n",
" text=True,\n",
" stdout=subprocess.DEVNULL,\n",
" stderr=subprocess.DEVNULL,\n",
")\n",
"\n", "\n",
"while True:\n", "server_process = execute_shell_command(\"\"\"\n",
" try:\n", "python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
" response = requests.get(\n", "--port 30000 --host 0.0.0.0 --log-level warning\n",
" \"http://localhost:30000/v1/models\",\n", "\"\"\")\n",
" headers={\"Authorization\": \"Bearer None\"},\n",
" )\n",
" if response.status_code == 200:\n",
" break\n",
" except requests.exceptions.RequestException:\n",
" time.sleep(1)\n",
"\n", "\n",
"wait_for_server(\"http://localhost:30000\")\n",
"print(\"Server is ready. Proceeding with the next steps.\")" "print(\"Server is ready. Proceeding with the next steps.\")"
] ]
}, },
...@@ -92,7 +66,7 @@ ...@@ -92,7 +66,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"{\"id\":\"1449c9c20d4448299431a57facc68d7a\",\"object\":\"chat.completion\",\"created\":1729816891,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"LLM stands for Large Language Model. It's a type of artificial intelligence (AI) designed to process and generate human-like language. LLMs are trained on vast amounts of text data, which enables them to learn patterns, relationships, and nuances of language.\\n\\nLarge Language Models are typically trained using a technique called deep learning, where multiple layers of artificial neural networks are used to analyze and understand the input data. This training process involves feeding the model massive amounts of text data, which it uses to learn and improve its language understanding and generation capabilities.\\n\\nSome key characteristics of LLMs include:\\n\\n1. **Language understanding**: LLMs can comprehend natural language, including its syntax, semantics, and context.\\n2. **Language generation**: LLMs can generate text, including responses to user input, articles, stories, and more.\\n3. **Contextual understanding**: LLMs can understand the context in which language is being used, including the topic, tone, and intent.\\n4. **Self-supervised learning**: LLMs can learn from large datasets without explicit supervision or labeling.\\n\\nLLMs have a wide range of applications, including:\\n\\n1. **Virtual assistants**: LLMs power virtual assistants like Siri, Alexa, and Google Assistant.\\n2. **Language translation**: LLMs can translate text from one language to another.\\n3. **Text summarization**: LLMs can summarize long pieces of text into shorter, more digestible versions.\\n4. **Content generation**: LLMs can generate content, such as news articles, product descriptions, and social media posts.\\n5. **Chatbots**: LLMs can power chatbots that can have human-like conversations with users.\\n\\nThe Large Language Model I am, is a type of LLM that has been trained on a massive dataset of text and can answer a wide range of questions and engage in conversation.\"},\"logprobs\":null,\"finish_reason\":\"stop\",\"matched_stop\":128009}],\"usage\":{\"prompt_tokens\":47,\"total_tokens\":426,\"completion_tokens\":379,\"prompt_tokens_details\":null}}" "{\"id\":\"449710eb827c49c99b82ce187e912c2a\",\"object\":\"chat.completion\",\"created\":1729962606,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"LLM stands for Large Language Model. It's a type of artificial intelligence (AI) designed to process and generate human-like language. These models are trained on vast amounts of text data, allowing them to learn patterns, relationships, and context within language.\\n\\nLarge language models use various techniques, such as deep learning and natural language processing, to analyze and understand the input text. They can then use this understanding to generate coherent and context-specific text, such as:\\n\\n1. Responses to questions or prompts\\n2. Summaries of long pieces of text\\n3. Creative writing, like stories or poetry\\n4. Translation of text from one language to another\\n\\nSome popular examples of LLMs include:\\n\\n1. Chatbots: Virtual assistants that can understand and respond to user input\\n2. Virtual assistants: Like Siri, Alexa, or Google Assistant\\n3. Language translation tools: Such as Google Translate\\n4. Writing assistants: Like Grammarly or Language Tool\\n\\nThe key characteristics of LLMs include:\\n\\n1. **Scalability**: They can process large amounts of text data\\n2. **Flexibility**: They can be fine-tuned for specific tasks or domains\\n3. **Contextual understanding**: They can recognize context and nuances in language\\n4. **Creativity**: They can generate original text or responses\\n\\nHowever, LLMs also have limitations and potential drawbacks:\\n\\n1. **Bias**: They can perpetuate existing biases in the training data\\n2. **Misinformation**: They can spread misinformation or false information\\n3. **Dependence on data quality**: The quality of the training data directly affects the model's performance\\n\\nOverall, LLMs are powerful tools that can be used in various applications, from language translation and writing assistance to chatbots and virtual assistants.\"},\"logprobs\":null,\"finish_reason\":\"stop\",\"matched_stop\":128009}],\"usage\":{\"prompt_tokens\":47,\"total_tokens\":408,\"completion_tokens\":361,\"prompt_tokens_details\":null}}"
] ]
} }
], ],
...@@ -121,7 +95,7 @@ ...@@ -121,7 +95,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"ChatCompletion(id='16757c3dd6e14a6e9bafd1122f84e4c5', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here are 3 countries and their capitals:\\n\\n1. **Country:** Japan\\n**Capital:** Tokyo\\n\\n2. **Country:** Australia\\n**Capital:** Canberra\\n\\n3. **Country:** Brazil\\n**Capital:** Brasília', refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop=128009)], created=1729816893, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=46, prompt_tokens=49, total_tokens=95, prompt_tokens_details=None))\n" "ChatCompletion(id='6bbf20fed17940739eb5cd5d685fa29a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here are 3 countries and their capitals:\\n\\n1. **Country:** Japan\\n**Capital:** Tokyo\\n\\n2. **Country:** Australia\\n**Capital:** Canberra\\n\\n3. **Country:** Brazil\\n**Capital:** Brasília', refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop=128009)], created=1729962608, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=46, prompt_tokens=49, total_tokens=95, prompt_tokens_details=None))\n"
] ]
} }
], ],
...@@ -155,38 +129,7 @@ ...@@ -155,38 +129,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import signal\n", "terminate_process(server_process)"
"import gc\n",
"import torch\n",
"\n",
"def terminate_process(process):\n",
" try:\n",
" process.terminate()\n",
" try:\n",
" process.wait(timeout=5)\n",
" except subprocess.TimeoutExpired:\n",
" if os.name != 'nt':\n",
" try:\n",
" pgid = os.getpgid(process.pid)\n",
" os.killpg(pgid, signal.SIGTERM)\n",
" time.sleep(1)\n",
" if process.poll() is None:\n",
" os.killpg(pgid, signal.SIGKILL)\n",
" except ProcessLookupError:\n",
" pass\n",
" else:\n",
" process.kill()\n",
" process.wait()\n",
" except Exception as e:\n",
" print(f\"Warning: {e}\")\n",
" finally:\n",
" gc.collect()\n",
" if torch.cuda.is_available():\n",
" torch.cuda.empty_cache()\n",
" torch.cuda.ipc_collect()\n",
"\n",
"terminate_process(server_process)\n",
"time.sleep(2)"
] ]
} }
], ],
......
"""Common utilities.""" """Common utilities."""
import base64 import base64
import gc
import importlib import importlib
import json import json
import logging import logging
import os import os
import signal import signal
import subprocess
import sys import sys
import time
import traceback import traceback
import urllib.request import urllib.request
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
...@@ -16,6 +19,7 @@ from typing import Optional, Union ...@@ -16,6 +19,7 @@ from typing import Optional, Union
import numpy as np import numpy as np
import requests import requests
import torch
from tqdm import tqdm from tqdm import tqdm
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -294,3 +298,80 @@ def download_and_cache_file(url: str, filename: Optional[str] = None): ...@@ -294,3 +298,80 @@ def download_and_cache_file(url: str, filename: Optional[str] = None):
bar.update(len(chunk)) bar.update(len(chunk))
return filename return filename
def execute_shell_command(command: str) -> subprocess.Popen:
"""
Execute a shell command and return the process handle
Args:
command: Shell command as a string (can include \ line continuations)
Returns:
subprocess.Popen: Process handle
"""
# Replace \ newline with space and split
command = command.replace("\\\n", " ").replace("\\", " ")
parts = command.split()
return subprocess.Popen(
parts,
text=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def wait_for_server(base_url: str, timeout: int = None) -> None:
"""Wait for the server to be ready by polling the /v1/models endpoint.
Args:
base_url: The base URL of the server
timeout: Maximum time to wait in seconds. None means wait forever.
"""
start_time = time.time()
while True:
try:
response = requests.get(
f"{base_url}/v1/models",
headers={"Authorization": "Bearer None"},
)
if response.status_code == 200:
break
if timeout and time.time() - start_time > timeout:
raise TimeoutError("Server did not become ready within timeout period")
except requests.exceptions.RequestException:
time.sleep(1)
def terminate_process(process):
"""Safely terminate a process and clean up GPU memory.
Args:
process: subprocess.Popen object to terminate
"""
try:
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
if os.name != "nt":
try:
pgid = os.getpgid(process.pid)
os.killpg(pgid, signal.SIGTERM)
time.sleep(1)
if process.poll() is None:
os.killpg(pgid, signal.SIGKILL)
except ProcessLookupError:
pass
else:
process.kill()
process.wait()
except Exception as e:
print(f"Warning: {e}")
finally:
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
time.sleep(2)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment