Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c77c1e05
Unverified
Commit
c77c1e05
authored
Nov 07, 2024
by
Chayenne
Committed by
GitHub
Nov 08, 2024
Browse files
fix black in pre-commit (#1940)
parent
dca87ec3
Changes
29
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
617 additions
and
494 deletions
+617
-494
.pre-commit-config.yaml
.pre-commit-config.yaml
+3
-3
docs/backend/native_api.ipynb
docs/backend/native_api.ipynb
+61
-64
docs/backend/offline_engine_api.ipynb
docs/backend/offline_engine_api.ipynb
+24
-24
docs/backend/openai_api_completions.ipynb
docs/backend/openai_api_completions.ipynb
+56
-53
docs/backend/openai_api_embeddings.ipynb
docs/backend/openai_api_embeddings.ipynb
+24
-24
docs/backend/openai_api_vision.ipynb
docs/backend/openai_api_vision.ipynb
+24
-24
docs/conf.py
docs/conf.py
+5
-3
docs/start/send_request.ipynb
docs/start/send_request.ipynb
+35
-37
examples/frontend_language/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb
.../rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb
+26
-19
examples/runtime/engine/input_ids.py
examples/runtime/engine/input_ids.py
+1
-0
python/sglang/srt/configs/model_config.py
python/sglang/srt/configs/model_config.py
+4
-2
python/sglang/srt/layers/quantization/base_config.py
python/sglang/srt/layers/quantization/base_config.py
+4
-6
python/sglang/srt/layers/vocab_parallel_embedding.py
python/sglang/srt/layers/vocab_parallel_embedding.py
+214
-148
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+4
-2
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+2
-2
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+25
-22
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+11
-7
python/sglang/srt/metrics/metrics_collector.py
python/sglang/srt/metrics/metrics_collector.py
+59
-16
python/sglang/srt/models/gpt2.py
python/sglang/srt/models/gpt2.py
+30
-36
python/sglang/srt/server.py
python/sglang/srt/server.py
+5
-2
No files found.
.pre-commit-config.yaml
View file @
c77c1e05
...
@@ -30,6 +30,6 @@ repos:
...
@@ -30,6 +30,6 @@ repos:
rev
:
24.10.0
rev
:
24.10.0
hooks
:
hooks
:
-
id
:
black
-
id
:
black
additional_dependencies
:
[
'
.[jupyter]'
]
types
:
[
python
]
types
:
[
python
,
jupyter
]
-
id
:
black-
jupyter
types
_or
:
[
python
,
jupyter
]
types
:
[
jupyter
]
docs/backend/native_api.ipynb
View file @
c77c1e05
...
@@ -34,10 +34,10 @@
...
@@ -34,10 +34,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:08.536886
Z",
"iopub.execute_input": "2024-11-0
7T18:44:42.063503
Z",
"iopub.status.busy": "2024-11-0
5T05:08:08.536763
Z",
"iopub.status.busy": "2024-11-0
7T18:44:42.063379
Z",
"iopub.status.idle": "2024-11-0
5T05:08:34.725831
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.255300
Z",
"shell.execute_reply": "2024-11-0
5T05:08:34.725316
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.254547
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -73,10 +73,10 @@
...
@@ -73,10 +73,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:34.727530
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.258292
Z",
"iopub.status.busy": "2024-11-0
5T05:08:34.727333
Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.257710
Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.359784
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.611559
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.359090
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.610842
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -101,10 +101,10 @@
...
@@ -101,10 +101,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.362286
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.613911
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.362140
Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.613746
Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.368711
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.620286
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.368220
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.619779
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -132,10 +132,10 @@
...
@@ -132,10 +132,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.371313
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.622407
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.37087
7Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.62226
7Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.376712
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.628290
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.376230
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.627793
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -164,10 +164,10 @@
...
@@ -164,10 +164,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.378982
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.630585
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.378597
Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.630235
Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.391820
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.643498
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.391336
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.643007
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -183,10 +183,10 @@
...
@@ -183,10 +183,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.393748
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.645336
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.39360
6Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.64519
6Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.398645
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.650363
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.398145
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.649837
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -211,10 +211,10 @@
...
@@ -211,10 +211,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.400683
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.652212
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.400419
Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.652076
Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.406146
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.658633
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.405661
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.658119
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -241,10 +241,10 @@
...
@@ -241,10 +241,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.408176
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.660468
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.407884
Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.660325
Z",
"iopub.status.idle": "2024-11-0
5T05:08:35.413587
Z",
"iopub.status.idle": "2024-11-0
7T18:45:07.666476
Z",
"shell.execute_reply": "2024-11-0
5T05:08:35.413108
Z"
"shell.execute_reply": "2024-11-0
7T18:45:07.665984
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -271,10 +271,10 @@
...
@@ -271,10 +271,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:35.416090
Z",
"iopub.execute_input": "2024-11-0
7T18:45:07.668242
Z",
"iopub.status.busy": "2024-11-0
5T05:08:35.415793
Z",
"iopub.status.busy": "2024-11-0
7T18:45:07.668108
Z",
"iopub.status.idle": "2024-11-0
5T05:08:36.55254
9Z",
"iopub.status.idle": "2024-11-0
7T18:45:08.72570
9Z",
"shell.execute_reply": "2024-11-0
5T05:08:36.551870
Z"
"shell.execute_reply": "2024-11-0
7T18:45:08.725021
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -296,10 +296,10 @@
...
@@ -296,10 +296,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:36.554823
Z",
"iopub.execute_input": "2024-11-0
7T18:45:08.727865
Z",
"iopub.status.busy": "2024-11-0
5T05:08:36.554680
Z",
"iopub.status.busy": "2024-11-0
7T18:45:08.727721
Z",
"iopub.status.idle": "2024-11-0
5T05:08:38.053945
Z",
"iopub.status.idle": "2024-11-0
7T18:45:11.165841
Z",
"shell.execute_reply": "2024-11-0
5T05:08:38.053034
Z"
"shell.execute_reply": "2024-11-0
7T18:45:11.165282
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -335,10 +335,10 @@
...
@@ -335,10 +335,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:08:38.05
6783Z",
"iopub.execute_input": "2024-11-0
7T18:45:11.1
678
5
3Z",
"iopub.status.busy": "2024-11-0
5T05:08:38.056497
Z",
"iopub.status.busy": "2024-11-0
7T18:45:11.167711
Z",
"iopub.status.idle": "2024-11-0
5T05:09:04.436030
Z",
"iopub.status.idle": "2024-11-0
7T18:45:39.542988
Z",
"shell.execute_reply": "2024-11-0
5T05:09:04.435311
Z"
"shell.execute_reply": "2024-11-0
7T18:45:39.542135
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -360,10 +360,10 @@
...
@@ -360,10 +360,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:09:04.438987
Z",
"iopub.execute_input": "2024-11-0
7T18:45:39.545416
Z",
"iopub.status.busy": "2024-11-0
5T05:09:04.438568
Z",
"iopub.status.busy": "2024-11-0
7T18:45:39.545005
Z",
"iopub.status.idle": "2024-11-0
5T05:09:04.485291
Z",
"iopub.status.idle": "2024-11-0
7T18:45:39.588793
Z",
"shell.execute_reply": "2024-11-0
5T05:09:04.484829
Z"
"shell.execute_reply": "2024-11-0
7T18:45:39.588054
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -392,10 +392,10 @@
...
@@ -392,10 +392,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:09:04.487191
Z",
"iopub.execute_input": "2024-11-0
7T18:45:39.590729
Z",
"iopub.status.busy": "2024-11-0
5T05:09:04.486929
Z",
"iopub.status.busy": "2024-11-0
7T18:45:39.590446
Z",
"iopub.status.idle": "2024-11-0
5T05:09:25.553481
Z",
"iopub.status.idle": "2024-11-0
7T18:45:59.660376
Z",
"shell.execute_reply": "2024-11-0
5T05:09:25.552747
Z"
"shell.execute_reply": "2024-11-0
7T18:45:59.659992
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -419,10 +419,10 @@
...
@@ -419,10 +419,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:09:25.555813
Z",
"iopub.execute_input": "2024-11-0
7T18:45:59.661779
Z",
"iopub.status.busy": "2024-11-0
5T05:09:25.555666
Z",
"iopub.status.busy": "2024-11-0
7T18:45:59.661641
Z",
"iopub.status.idle": "2024-11-0
5T05:09:26.3543
72Z",
"iopub.status.idle": "2024-11-0
7T18:46:00.475
72
6
Z",
"shell.execute_reply": "2024-11-0
5T05:09:26.353
69
3
Z"
"shell.execute_reply": "2024-11-0
7T18:46:00.4752
69Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -445,10 +445,7 @@
...
@@ -445,10 +445,7 @@
"prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n",
"prompts = tokenizer.apply_chat_template(CONVS, tokenize=False)\n",
"\n",
"\n",
"url = \"http://localhost:30030/classify\"\n",
"url = \"http://localhost:30030/classify\"\n",
"data = {\n",
"data = {\"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \"text\": prompts}\n",
" \"model\": \"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\", \n",
" \"text\": prompts\n",
"}\n",
"\n",
"\n",
"responses = requests.post(url, json=data).json()\n",
"responses = requests.post(url, json=data).json()\n",
"for response in responses:\n",
"for response in responses:\n",
...
@@ -460,10 +457,10 @@
...
@@ -460,10 +457,10 @@
"execution_count": 15,
"execution_count": 15,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:09:26.356532
Z",
"iopub.execute_input": "2024-11-0
7T18:46:00.477283
Z",
"iopub.status.busy": "2024-11-0
5T05:09:26.356327
Z",
"iopub.status.busy": "2024-11-0
7T18:46:00.477025
Z",
"iopub.status.idle": "2024-11-0
5T05:09:26.396590
Z",
"iopub.status.idle": "2024-11-0
7T18:46:00.525758
Z",
"shell.execute_reply": "2024-11-0
5T05:09:26.395914
Z"
"shell.execute_reply": "2024-11-0
7T18:46:00.525236
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
...
docs/backend/offline_engine_api.ipynb
View file @
c77c1e05
...
@@ -35,10 +35,10 @@
...
@@ -35,10 +35,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:21:27.50302
6Z",
"iopub.execute_input": "2024-11-0
7T18:46:04.78953
6Z",
"iopub.status.busy": "2024-11-0
5T05:21:27.5027
41Z",
"iopub.status.busy": "2024-11-0
7T18:46:04.789
41
8
Z",
"iopub.status.idle": "2024-11-0
5T05:21:49.554631
Z",
"iopub.status.idle": "2024-11-0
7T18:46:27.038169
Z",
"shell.execute_reply": "2024-11-0
5T05:21:49.55369
0Z"
"shell.execute_reply": "2024-11-0
7T18:46:27.03754
0Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -64,10 +64,10 @@
...
@@ -64,10 +64,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:21:49.55827
5Z",
"iopub.execute_input": "2024-11-0
7T18:46:27.04000
5Z",
"iopub.status.busy": "2024-11-0
5T05:21:49.558110
Z",
"iopub.status.busy": "2024-11-0
7T18:46:27.039872
Z",
"iopub.status.idle": "2024-11-0
5T05:21:52.717287
Z",
"iopub.status.idle": "2024-11-0
7T18:46:30.203840
Z",
"shell.execute_reply": "2024-11-0
5T05:21:52.716842
Z"
"shell.execute_reply": "2024-11-0
7T18:46:30.203368
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -99,10 +99,10 @@
...
@@ -99,10 +99,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:21:52.721738
Z",
"iopub.execute_input": "2024-11-0
7T18:46:30.205880
Z",
"iopub.status.busy": "2024-11-0
5T05:21:52.720908
Z",
"iopub.status.busy": "2024-11-0
7T18:46:30.205719
Z",
"iopub.status.idle": "2024-11-0
5T05:22:01.77034
1Z",
"iopub.status.idle": "2024-11-0
7T18:46:39.25656
1Z",
"shell.execute_reply": "2024-11-0
5T05:22:01.76951
0Z"
"shell.execute_reply": "2024-11-0
7T18:46:39.25588
0Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -137,10 +137,10 @@
...
@@ -137,10 +137,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:01.772662
Z",
"iopub.execute_input": "2024-11-0
7T18:46:39.259464
Z",
"iopub.status.busy": "2024-11-0
5T05:22:01.772377
Z",
"iopub.status.busy": "2024-11-0
7T18:46:39.259309
Z",
"iopub.status.idle": "2024-11-0
5T05:22:04.897499
Z",
"iopub.status.idle": "2024-11-0
7T18:46:42.384955
Z",
"shell.execute_reply": "2024-11-0
5T05:22:04.896867
Z"
"shell.execute_reply": "2024-11-0
7T18:46:42.384378
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -179,10 +179,10 @@
...
@@ -179,10 +179,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:04.899754
Z",
"iopub.execute_input": "2024-11-0
7T18:46:42.387431
Z",
"iopub.status.busy": "2024-11-0
5T05:22:04.899478
Z",
"iopub.status.busy": "2024-11-0
7T18:46:42.387279
Z",
"iopub.status.idle": "2024-11-0
5T05:22:13.970245
Z",
"iopub.status.idle": "2024-11-0
7T18:46:51.448572
Z",
"shell.execute_reply": "2024-11-0
5T05:22:13.969779
Z"
"shell.execute_reply": "2024-11-0
7T18:46:51.447781
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -216,10 +216,10 @@
...
@@ -216,10 +216,10 @@
"execution_count": 6,
"execution_count": 6,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:13.972039
Z",
"iopub.execute_input": "2024-11-0
7T18:46:51.451177
Z",
"iopub.status.busy": "2024-11-0
5T05:22:13.971846
Z",
"iopub.status.busy": "2024-11-0
7T18:46:51.450952
Z",
"iopub.status.idle": "2024-11-0
5T05:22:14.027421
Z",
"iopub.status.idle": "2024-11-0
7T18:46:51.497530
Z",
"shell.execute_reply": "2024-11-0
5T05:22:14.027003
Z"
"shell.execute_reply": "2024-11-0
7T18:46:51.496850
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
...
docs/backend/openai_api_completions.ipynb
View file @
c77c1e05
...
@@ -39,10 +39,10 @@
...
@@ -39,10 +39,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:09:30.637832
Z",
"iopub.execute_input": "2024-11-0
7T18:46:54.813876
Z",
"iopub.status.busy": "2024-11-0
5T05:09:30.637709
Z",
"iopub.status.busy": "2024-11-0
7T18:46:54.813741
Z",
"iopub.status.idle": "2024-11-0
5T05:09:58.830158
Z",
"iopub.status.idle": "2024-11-0
7T18:47:24.015527
Z",
"shell.execute_reply": "2024-11-0
5T05:09:58.829395
Z"
"shell.execute_reply": "2024-11-0
7T18:47:24.014987
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -79,10 +79,10 @@
...
@@ -79,10 +79,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:09:58.833008
Z",
"iopub.execute_input": "2024-11-0
7T18:47:24.018153
Z",
"iopub.status.busy": "2024-11-0
5T05:09:58.83280
5Z",
"iopub.status.busy": "2024-11-0
7T18:47:24.01775
5Z",
"iopub.status.idle": "2024-11-0
5T05:10:00.187146
Z",
"iopub.status.idle": "2024-11-0
7T18:47:25.374821
Z",
"shell.execute_reply": "2024-11-0
5T05:10:00.18665
7Z"
"shell.execute_reply": "2024-11-0
7T18:47:25.37439
7Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -119,10 +119,10 @@
...
@@ -119,10 +119,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:00.189444
Z",
"iopub.execute_input": "2024-11-0
7T18:47:25.376617
Z",
"iopub.status.busy": "2024-11-0
5T05:10:00.189289
Z",
"iopub.status.busy": "2024-11-0
7T18:47:25.376495
Z",
"iopub.status.idle": "2024-11-0
5T05:10:03.291891
Z",
"iopub.status.idle": "2024-11-0
7T18:47:28.482537
Z",
"shell.execute_reply": "2024-11-0
5T05:10:03.291173
Z"
"shell.execute_reply": "2024-11-0
7T18:47:28.482125
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -165,10 +165,10 @@
...
@@ -165,10 +165,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:03.29438
9Z",
"iopub.execute_input": "2024-11-0
7T18:47:28.48481
9Z",
"iopub.status.busy": "2024-11-0
5T05:10:03.294237
Z",
"iopub.status.busy": "2024-11-0
7T18:47:28.484673
Z",
"iopub.status.idle": "2024-11-0
5T05:10:03.469357
Z",
"iopub.status.idle": "2024-11-0
7T18:47:28.659814
Z",
"shell.execute_reply": "2024-11-0
5T05:10:03.468661
Z"
"shell.execute_reply": "2024-11-0
7T18:47:28.659435
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -198,10 +198,10 @@
...
@@ -198,10 +198,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:03.471573
Z",
"iopub.execute_input": "2024-11-0
7T18:47:28.661844
Z",
"iopub.status.busy": "2024-11-0
5T05:10:03.47143
0Z",
"iopub.status.busy": "2024-11-0
7T18:47:28.66171
0Z",
"iopub.status.idle": "2024-11-0
5T05:10:04.977081
Z",
"iopub.status.idle": "2024-11-0
7T18:47:30.168922
Z",
"shell.execute_reply": "2024-11-0
5T05:10:04.976391
Z"
"shell.execute_reply": "2024-11-0
7T18:47:30.168600
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -234,10 +234,10 @@
...
@@ -234,10 +234,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:04.979428
Z",
"iopub.execute_input": "2024-11-0
7T18:47:30.171319
Z",
"iopub.status.busy": "2024-11-0
5T05:10:04.979272
Z",
"iopub.status.busy": "2024-11-0
7T18:47:30.171176
Z",
"iopub.status.idle": "2024-11-0
5T05:10:08.568761
Z",
"iopub.status.idle": "2024-11-0
7T18:47:33.760113
Z",
"shell.execute_reply": "2024-11-0
5T05:10:08.568355
Z"
"shell.execute_reply": "2024-11-0
7T18:47:33.759713
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -273,10 +273,10 @@
...
@@ -273,10 +273,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:08.571102
Z",
"iopub.execute_input": "2024-11-0
7T18:47:33.762729
Z",
"iopub.status.busy": "2024-11-0
5T05:10:08.570964
Z",
"iopub.status.busy": "2024-11-0
7T18:47:33.762590
Z",
"iopub.status.idle": "2024-11-0
5T05:10:23.214087
Z",
"iopub.status.idle": "2024-11-0
7T18:47:34.255316
Z",
"shell.execute_reply": "2024-11-0
5T05:10:23.213664
Z"
"shell.execute_reply": "2024-11-0
7T18:47:34.254907
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -297,7 +297,10 @@
...
@@ -297,7 +297,10 @@
"response = client.chat.completions.create(\n",
"response = client.chat.completions.create(\n",
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" messages=[\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"Give me the information of the capital of France in the JSON format.\"},\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Give me the information of the capital of France in the JSON format.\",\n",
" },\n",
" ],\n",
" ],\n",
" temperature=0,\n",
" temperature=0,\n",
" max_tokens=128,\n",
" max_tokens=128,\n",
...
@@ -322,10 +325,10 @@
...
@@ -322,10 +325,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:23.216229
Z",
"iopub.execute_input": "2024-11-0
7T18:47:34.257393
Z",
"iopub.status.busy": "2024-11-0
5T05:10:23.21607
6Z",
"iopub.status.busy": "2024-11-0
7T18:47:34.25724
6Z",
"iopub.status.idle": "2024-11-0
5T05:10:23.88423
6Z",
"iopub.status.idle": "2024-11-0
7T18:47:34.41350
6Z",
"shell.execute_reply": "2024-11-0
5T05:10:23.883897
Z"
"shell.execute_reply": "2024-11-0
7T18:47:34.413172
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -365,10 +368,10 @@
...
@@ -365,10 +368,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:23.88627
6Z",
"iopub.execute_input": "2024-11-0
7T18:47:34.41481
6Z",
"iopub.status.busy": "2024-11-0
5T05:10:23.886136
Z",
"iopub.status.busy": "2024-11-0
7T18:47:34.414541
Z",
"iopub.status.idle": "2024-11-0
5T05:10:23.905880
Z",
"iopub.status.idle": "2024-11-0
7T18:47:34.431341
Z",
"shell.execute_reply": "2024-11-0
5T05:10:23.905529
Z"
"shell.execute_reply": "2024-11-0
7T18:47:34.431081
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -427,10 +430,10 @@
...
@@ -427,10 +430,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:23.907468
Z",
"iopub.execute_input": "2024-11-0
7T18:47:34.432325
Z",
"iopub.status.busy": "2024-11-0
5T05:10:23.907247
Z",
"iopub.status.busy": "2024-11-0
7T18:47:34.432208
Z",
"iopub.status.idle": "2024-11-0
5T05:10:26.920212
Z",
"iopub.status.idle": "2024-11-0
7T18:47:37.444337
Z",
"shell.execute_reply": "2024-11-0
5T05:10:26.919865
Z"
"shell.execute_reply": "2024-11-0
7T18:47:37.444000
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -482,10 +485,10 @@
...
@@ -482,10 +485,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:26.922675
Z",
"iopub.execute_input": "2024-11-0
7T18:47:37.445894
Z",
"iopub.status.busy": "2024-11-0
5T05:10:26.922413
Z",
"iopub.status.busy": "2024-11-0
7T18:47:37.445744
Z",
"iopub.status.idle": "2024-11-0
5T05:10:51.961703
Z",
"iopub.status.idle": "2024-11-0
7T18:48:02.482532
Z",
"shell.execute_reply": "2024-11-0
5T05:10:51.960846
Z"
"shell.execute_reply": "2024-11-0
7T18:48:02.482042
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -565,10 +568,10 @@
...
@@ -565,10 +568,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:10:51.964749
Z",
"iopub.execute_input": "2024-11-0
7T18:48:02.485206
Z",
"iopub.status.busy": "2024-11-0
5T05:10:51.964215
Z",
"iopub.status.busy": "2024-11-0
7T18:48:02.485064
Z",
"iopub.status.idle": "2024-11-0
5T05:11:05.023450
Z",
"iopub.status.idle": "2024-11-0
7T18:48:15.521489
Z",
"shell.execute_reply": "2024-11-0
5T05:11:05.023101
Z"
"shell.execute_reply": "2024-11-0
7T18:48:15.521156
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -660,10 +663,10 @@
...
@@ -660,10 +663,10 @@
"execution_count": 13,
"execution_count": 13,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:05.024877
Z",
"iopub.execute_input": "2024-11-0
7T18:48:15.522794
Z",
"iopub.status.busy": "2024-11-0
5T05:11:05.024561
Z",
"iopub.status.busy": "2024-11-0
7T18:48:15.522657
Z",
"iopub.status.idle": "2024-11-0
5T05:11:06.358695
Z",
"iopub.status.idle": "2024-11-0
7T18:48:16.875740
Z",
"shell.execute_reply": "2024-11-0
5T05:11:06.357635
Z"
"shell.execute_reply": "2024-11-0
7T18:48:16.874847
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
...
docs/backend/openai_api_embeddings.ipynb
View file @
c77c1e05
...
@@ -35,10 +35,10 @@
...
@@ -35,10 +35,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:17.227174
Z",
"iopub.execute_input": "2024-11-0
7T18:48:21.128020
Z",
"iopub.status.busy": "2024-11-0
5T05:22:17.226952
Z",
"iopub.status.busy": "2024-11-0
7T18:48:21.127898
Z",
"iopub.status.idle": "2024-11-0
5T05:22:42.44579
1Z",
"iopub.status.idle": "2024-11-0
7T18:48:45.31037
1Z",
"shell.execute_reply": "2024-11-0
5T05:22:42.444980
Z"
"shell.execute_reply": "2024-11-0
7T18:48:45.309469
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -72,10 +72,10 @@
...
@@ -72,10 +72,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:42.448147
Z",
"iopub.execute_input": "2024-11-0
7T18:48:45.313506
Z",
"iopub.status.busy": "2024-11-0
5T05:22:42.447775
Z",
"iopub.status.busy": "2024-11-0
7T18:48:45.313123
Z",
"iopub.status.idle": "2024-11-0
5T05:22:42.495311
Z",
"iopub.status.idle": "2024-11-0
7T18:48:45.364918
Z",
"shell.execute_reply": "2024-11-0
5T05:22:42.495027
Z"
"shell.execute_reply": "2024-11-0
7T18:48:45.364155
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -106,10 +106,10 @@
...
@@ -106,10 +106,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:42.49666
6Z",
"iopub.execute_input": "2024-11-0
7T18:48:45.36777
6Z",
"iopub.status.busy": "2024-11-0
5T05:22:42.496524
Z",
"iopub.status.busy": "2024-11-0
7T18:48:45.367490
Z",
"iopub.status.idle": "2024-11-0
5T05:22:42.540687
Z",
"iopub.status.idle": "2024-11-0
7T18:48:45.411386
Z",
"shell.execute_reply": "2024-11-0
5T05:22:42.540060
Z"
"shell.execute_reply": "2024-11-0
7T18:48:45.411134
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -140,10 +140,10 @@
...
@@ -140,10 +140,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:42.542551
Z",
"iopub.execute_input": "2024-11-0
7T18:48:45.412462
Z",
"iopub.status.busy": "2024-11-0
5T05:22:42.542282
Z",
"iopub.status.busy": "2024-11-0
7T18:48:45.412351
Z",
"iopub.status.idle": "2024-11-0
5T05:22:42.928542
Z",
"iopub.status.idle": "2024-11-0
7T18:48:45.768796
Z",
"shell.execute_reply": "2024-11-0
5T05:22:42.928181
Z"
"shell.execute_reply": "2024-11-0
7T18:48:45.768406
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -176,10 +176,10 @@
...
@@ -176,10 +176,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:42.930093
Z",
"iopub.execute_input": "2024-11-0
7T18:48:45.770227
Z",
"iopub.status.busy": "2024-11-0
5T05:22:42.929954
Z",
"iopub.status.busy": "2024-11-0
7T18:48:45.770106
Z",
"iopub.status.idle": "2024-11-0
5T05:22:44.79994
5Z",
"iopub.status.idle": "2024-11-0
7T18:48:47.44706
5Z",
"shell.execute_reply": "2024-11-0
5T05:22:44.799562
Z"
"shell.execute_reply": "2024-11-0
7T18:48:47.446733
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -208,10 +208,10 @@
...
@@ -208,10 +208,10 @@
"execution_count": 6,
"execution_count": 6,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22:44.801418
Z",
"iopub.execute_input": "2024-11-0
7T18:48:47.448510
Z",
"iopub.status.busy": "2024-11-0
5T05:22:44.801192
Z",
"iopub.status.busy": "2024-11-0
7T18:48:47.448337
Z",
"iopub.status.idle": "2024-11-0
5T05:22:45.094634
Z",
"iopub.status.idle": "2024-11-0
7T18:48:47.743336
Z",
"shell.execute_reply": "2024-11-0
5T05:22:45.093950
Z"
"shell.execute_reply": "2024-11-0
7T18:48:47.742276
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
...
docs/backend/openai_api_vision.ipynb
View file @
c77c1e05
...
@@ -39,10 +39,10 @@
...
@@ -39,10 +39,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:22
:4
9
.3
20999
Z",
"iopub.execute_input": "2024-11-0
7T18:43
:4
7
.3
11708
Z",
"iopub.status.busy": "2024-11-0
5T05:22
:4
9
.3
20880
Z",
"iopub.status.busy": "2024-11-0
7T18:43
:4
7
.3
11517
Z",
"iopub.status.idle": "2024-11-0
5T05:23:21.537478
Z",
"iopub.status.idle": "2024-11-0
7T18:44:18.512576
Z",
"shell.execute_reply": "2024-11-0
5T05:23:21.536956
Z"
"shell.execute_reply": "2024-11-0
7T18:44:18.511909
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -78,10 +78,10 @@
...
@@ -78,10 +78,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:23:21.539953
Z",
"iopub.execute_input": "2024-11-0
7T18:44:18.515678
Z",
"iopub.status.busy": "2024-11-0
5T05:23:21.539100
Z",
"iopub.status.busy": "2024-11-0
7T18:44:18.515314
Z",
"iopub.status.idle": "2024-11-0
5T05:23
:2
5
.880
1
79Z",
"iopub.status.idle": "2024-11-0
7T18:44
:2
2
.88079
3
Z",
"shell.execute_reply": "2024-11-0
5T05:23
:2
5
.8
79744
Z"
"shell.execute_reply": "2024-11-0
7T18:44
:2
2
.8
80303
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -129,10 +129,10 @@
...
@@ -129,10 +129,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:23
:2
5
.88
1742
Z",
"iopub.execute_input": "2024-11-0
7T18:44
:2
2
.88
3309
Z",
"iopub.status.busy": "2024-11-0
5T05:23
:2
5
.88
1595
Z",
"iopub.status.busy": "2024-11-0
7T18:44
:2
2
.88
3160
Z",
"iopub.status.idle": "2024-11-0
5T05:23:26.758503
Z",
"iopub.status.idle": "2024-11-0
7T18:44:27.048810
Z",
"shell.execute_reply": "2024-11-0
5T05:23:26.75
80
8
4Z"
"shell.execute_reply": "2024-11-0
7T18:44:27.04
80
7
4Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -176,10 +176,10 @@
...
@@ -176,10 +176,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:23:26.760098
Z",
"iopub.execute_input": "2024-11-0
7T18:44:27.051312
Z",
"iopub.status.busy": "2024-11-0
5T05:23:26.759955
Z",
"iopub.status.busy": "2024-11-0
7T18:44:27.051190
Z",
"iopub.status.idle": "2024-11-0
5T05:23:27.849510
Z",
"iopub.status.idle": "2024-11-0
7T18:44:32.358097
Z",
"shell.execute_reply": "2024-11-0
5T05:23:27.849117
Z"
"shell.execute_reply": "2024-11-0
7T18:44:32.357628
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -227,10 +227,10 @@
...
@@ -227,10 +227,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:23:27.850994
Z",
"iopub.execute_input": "2024-11-0
7T18:44:32.359532
Z",
"iopub.status.busy": "2024-11-0
5T05:23:27.850864
Z",
"iopub.status.busy": "2024-11-0
7T18:44:32.359413
Z",
"iopub.status.idle": "2024-11-0
5T05:23:31.609137
Z",
"iopub.status.idle": "2024-11-0
7T18:44:36.164664
Z",
"shell.execute_reply": "2024-11-0
5T05:23:31.608748
Z"
"shell.execute_reply": "2024-11-0
7T18:44:36.164005
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -276,10 +276,10 @@
...
@@ -276,10 +276,10 @@
"execution_count": 6,
"execution_count": 6,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:23:31.61068
3Z",
"iopub.execute_input": "2024-11-0
7T18:44:36.16712
3Z",
"iopub.status.busy": "2024-11-0
5T05:23:31.610560
Z",
"iopub.status.busy": "2024-11-0
7T18:44:36.166535
Z",
"iopub.status.idle": "2024-11-0
5T05:23:32.965146
Z",
"iopub.status.idle": "2024-11-0
7T18:44:37.743761
Z",
"shell.execute_reply": "2024-11-0
5T05:23:32.963922
Z"
"shell.execute_reply": "2024-11-0
7T18:44:37.742510
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
...
docs/conf.py
View file @
c77c1e05
...
@@ -31,7 +31,7 @@ extensions = [
...
@@ -31,7 +31,7 @@ extensions = [
]
]
nbsphinx_allow_errors
=
True
nbsphinx_allow_errors
=
True
nbsphinx_execute
=
'
never
'
nbsphinx_execute
=
"
never
"
autosectionlabel_prefix_document
=
True
autosectionlabel_prefix_document
=
True
nbsphinx_allow_directives
=
True
nbsphinx_allow_directives
=
True
...
@@ -49,7 +49,7 @@ myst_enable_extensions = [
...
@@ -49,7 +49,7 @@ myst_enable_extensions = [
myst_heading_anchors
=
3
myst_heading_anchors
=
3
nbsphinx_kernel_name
=
'
python3
'
nbsphinx_kernel_name
=
"
python3
"
nbsphinx_execute_arguments
=
[
nbsphinx_execute_arguments
=
[
"--InlineBackend.figure_formats={'svg', 'pdf'}"
,
"--InlineBackend.figure_formats={'svg', 'pdf'}"
,
"--InlineBackend.rc={'figure.dpi': 96}"
,
"--InlineBackend.rc={'figure.dpi': 96}"
,
...
@@ -130,8 +130,10 @@ html_context = {
...
@@ -130,8 +130,10 @@ html_context = {
html_static_path
=
[
"_static"
]
html_static_path
=
[
"_static"
]
html_css_files
=
[
"css/custom_log.css"
]
html_css_files
=
[
"css/custom_log.css"
]
def
setup
(
app
):
def
setup
(
app
):
app
.
add_css_file
(
'css/custom_log.css'
)
app
.
add_css_file
(
"css/custom_log.css"
)
myst_enable_extensions
=
[
myst_enable_extensions
=
[
"dollarmath"
,
"dollarmath"
,
...
...
docs/start/send_request.ipynb
View file @
c77c1e05
...
@@ -33,10 +33,10 @@
...
@@ -33,10 +33,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:10.680191
Z",
"iopub.execute_input": "2024-11-0
7T18:48:52.032229
Z",
"iopub.status.busy": "2024-11-0
5T05:11:10.6797
10Z",
"iopub.status.busy": "2024-11-0
7T18:48:52.032
10
5
Z",
"iopub.status.idle": "2024-11-0
5T05:11:39.882385
Z",
"iopub.status.idle": "2024-11-0
7T18:49:20.226042
Z",
"shell.execute_reply": "2024-11-0
5T05:11:39.881827
Z"
"shell.execute_reply": "2024-11-0
7T18:49:20.225562
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -49,7 +49,7 @@
...
@@ -49,7 +49,7 @@
")\n",
")\n",
"\n",
"\n",
"server_process = execute_shell_command(\n",
"server_process = execute_shell_command(\n",
"\"\"\"\n",
"
\"\"\"\n",
"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
"--port 30000 --host 0.0.0.0\n",
"--port 30000 --host 0.0.0.0\n",
"\"\"\"\n",
"\"\"\"\n",
...
@@ -70,10 +70,10 @@
...
@@ -70,10 +70,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:39.883923
Z",
"iopub.execute_input": "2024-11-0
7T18:49:20.228006
Z",
"iopub.status.busy": "2024-11-0
5T05:11:39.883
72
1
Z",
"iopub.status.busy": "2024-11-0
7T18:49:20.2275
72Z",
"iopub.status.idle": "2024-11-0
5T05:11:40.124980
Z",
"iopub.status.idle": "2024-11-0
7T18:49:20.469885
Z",
"shell.execute_reply": "2024-11-0
5T05:11:40.124557
Z"
"shell.execute_reply": "2024-11-0
7T18:49:20.469518
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -101,10 +101,10 @@
...
@@ -101,10 +101,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:40.126
56
4
Z",
"iopub.execute_input": "2024-11-0
7T18:49:20.4719
56Z",
"iopub.status.busy": "2024-11-0
5T05:11:40.126369
Z",
"iopub.status.busy": "2024-11-0
7T18:49:20.471811
Z",
"iopub.status.idle": "2024-11-0
5T05:11:40.324316
Z",
"iopub.status.idle": "2024-11-0
7T18:49:20.667997
Z",
"shell.execute_reply": "2024-11-0
5T05:11:40.323693
Z"
"shell.execute_reply": "2024-11-0
7T18:49:20.667630
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -115,9 +115,7 @@
...
@@ -115,9 +115,7 @@
"\n",
"\n",
"data = {\n",
"data = {\n",
" \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
" \"messages\": [\n",
" \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}],\n",
" {\"role\": \"user\", \"content\": \"What is the capital of France?\"}\n",
" ]\n",
"}\n",
"}\n",
"\n",
"\n",
"response = requests.post(url, json=data)\n",
"response = requests.post(url, json=data)\n",
...
@@ -136,10 +134,10 @@
...
@@ -136,10 +134,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:40.327043
Z",
"iopub.execute_input": "2024-11-0
7T18:49:20.669977
Z",
"iopub.status.busy": "2024-11-0
5T05:11:40.326759
Z",
"iopub.status.busy": "2024-11-0
7T18:49:20.669826
Z",
"iopub.status.idle": "2024-11-0
5T05:11:41.687336
Z",
"iopub.status.idle": "2024-11-0
7T18:49:22.004855
Z",
"shell.execute_reply": "2024-11-0
5T05:11:41.686855
Z"
"shell.execute_reply": "2024-11-0
7T18:49:22.004472
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -171,10 +169,10 @@
...
@@ -171,10 +169,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:41.688676
Z",
"iopub.execute_input": "2024-11-0
7T18:49:22.006983
Z",
"iopub.status.busy": "2024-11-0
5T05:11:41.688527
Z",
"iopub.status.busy": "2024-11-0
7T18:49:22.006858
Z",
"iopub.status.idle": "2024-11-0
5T05:11:42.717140
Z",
"iopub.status.idle": "2024-11-0
7T18:49:23.029098
Z",
"shell.execute_reply": "2024-11-0
5T05:11:42.716452
Z"
"shell.execute_reply": "2024-11-0
7T18:49:23.028697
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -197,7 +195,7 @@
...
@@ -197,7 +195,7 @@
"# Handle the streaming output\n",
"# Handle the streaming output\n",
"for chunk in response:\n",
"for chunk in response:\n",
" if chunk.choices[0].delta.content:\n",
" if chunk.choices[0].delta.content:\n",
" print(chunk.choices[0].delta.content, end=
''
, flush=True)"
" print(chunk.choices[0].delta.content, end=
\"\"
, flush=True)"
]
]
},
},
{
{
...
@@ -214,10 +212,10 @@
...
@@ -214,10 +212,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:42.720467
Z",
"iopub.execute_input": "2024-11-0
7T18:49:23.031712
Z",
"iopub.status.busy": "2024-11-0
5T05:11:42.720182
Z",
"iopub.status.busy": "2024-11-0
7T18:49:23.031571
Z",
"iopub.status.idle": "2024-11-0
5T05:11:43.480765
Z",
"iopub.status.idle": "2024-11-0
7T18:49:23.787752
Z",
"shell.execute_reply": "2024-11-0
5T05:11:43.480143
Z"
"shell.execute_reply": "2024-11-0
7T18:49:23.787368
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -250,10 +248,10 @@
...
@@ -250,10 +248,10 @@
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:43.483575
Z",
"iopub.execute_input": "2024-11-0
7T18:49:23.789840
Z",
"iopub.status.busy": "2024-11-0
5T05:11:43.483295
Z",
"iopub.status.busy": "2024-11-0
7T18:49:23.789702
Z",
"iopub.status.idle": "2024-11-0
5T05:11:44.242950
Z",
"iopub.status.idle": "2024-11-0
7T18:49:24.545631
Z",
"shell.execute_reply": "2024-11-0
5T05:11:44.242
24
8
Z"
"shell.execute_reply": "2024-11-0
7T18:49:24.545
24
1
Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
@@ -290,10 +288,10 @@
...
@@ -290,10 +288,10 @@
"execution_count": 8,
"execution_count": 8,
"metadata": {
"metadata": {
"execution": {
"execution": {
"iopub.execute_input": "2024-11-0
5T05:11:44.245660
Z",
"iopub.execute_input": "2024-11-0
7T18:49:24.547641
Z",
"iopub.status.busy": "2024-11-0
5T05:11:44.245373
Z",
"iopub.status.busy": "2024-11-0
7T18:49:24.547497
Z",
"iopub.status.idle": "2024-11-0
5T05:11:45.591682
Z",
"iopub.status.idle": "2024-11-0
7T18:49:25.888864
Z",
"shell.execute_reply": "2024-11-0
5T05:11:45.59
11
8
4Z"
"shell.execute_reply": "2024-11-0
7T18:49:25.888
114Z"
}
}
},
},
"outputs": [],
"outputs": [],
...
...
examples/frontend_language/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb
View file @
c77c1e05
...
@@ -80,7 +80,7 @@
...
@@ -80,7 +80,7 @@
"if not os.path.exists(path_qca):\n",
"if not os.path.exists(path_qca):\n",
" !wget https://virattt.github.io/datasets/abnb-2023-10k.json -O airbnb-2023-10k-qca.json\n",
" !wget https://virattt.github.io/datasets/abnb-2023-10k.json -O airbnb-2023-10k-qca.json\n",
"\n",
"\n",
"with open(path_qca,
'r'
) as f:\n",
"with open(path_qca,
\"r\"
) as f:\n",
" question_context_answers = json.load(f)\n",
" question_context_answers = json.load(f)\n",
"\n",
"\n",
"chroma_client = chromadb.PersistentClient()\n",
"chroma_client = chromadb.PersistentClient()\n",
...
@@ -88,7 +88,7 @@
...
@@ -88,7 +88,7 @@
"if collection.count() == 0:\n",
"if collection.count() == 0:\n",
" collection.add(\n",
" collection.add(\n",
" documents=[qca[\"context\"] for qca in question_context_answers],\n",
" documents=[qca[\"context\"] for qca in question_context_answers],\n",
" ids=[str(i) for i in range(len(question_context_answers))]\n",
" ids=[str(i) for i in range(len(question_context_answers))]
,
\n",
" )"
" )"
],
],
"metadata": {
"metadata": {
...
@@ -123,7 +123,7 @@
...
@@ -123,7 +123,7 @@
"\n",
"\n",
"load_dotenv()\n",
"load_dotenv()\n",
"\n",
"\n",
"os.environ[
'
TOKENIZERS_PARALLELISM
'
] = \"false\"\n",
"os.environ[
\"
TOKENIZERS_PARALLELISM
\"
] = \"false\"\n",
"\n",
"\n",
"p = Parea(api_key=os.getenv(\"PAREA_API_KEY\"), project_name=\"rag_sglang\")\n",
"p = Parea(api_key=os.getenv(\"PAREA_API_KEY\"), project_name=\"rag_sglang\")\n",
"p.integrate_with_sglang()\n",
"p.integrate_with_sglang()\n",
...
@@ -150,10 +150,7 @@
...
@@ -150,10 +150,7 @@
"source": [
"source": [
"@trace\n",
"@trace\n",
"def retrieval(question: str) -> List[str]:\n",
"def retrieval(question: str) -> List[str]:\n",
" return collection.query(\n",
" return collection.query(query_texts=[question], n_results=1)[\"documents\"][0]"
" query_texts=[question],\n",
" n_results=1\n",
" )['documents'][0]"
],
],
"metadata": {
"metadata": {
"collapsed": false
"collapsed": false
...
@@ -176,7 +173,9 @@
...
@@ -176,7 +173,9 @@
"@function\n",
"@function\n",
"def generation_sglang(s, question: str, *context: str):\n",
"def generation_sglang(s, question: str, *context: str):\n",
" context = \"\\n\".join(context)\n",
" context = \"\\n\".join(context)\n",
" s += user(f'Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.')\n",
" s += user(\n",
" f\"Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.\"\n",
" )\n",
" s += assistant(gen(\"answer\"))\n",
" s += assistant(gen(\"answer\"))\n",
"\n",
"\n",
"\n",
"\n",
...
@@ -223,7 +222,9 @@
...
@@ -223,7 +222,9 @@
" return generation(question, *contexts)\n",
" return generation(question, *contexts)\n",
"\n",
"\n",
"\n",
"\n",
"rag_pipeline(\"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\")"
"rag_pipeline(\n",
" \"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\"\n",
")"
]
]
},
},
{
{
...
@@ -271,7 +272,10 @@
...
@@ -271,7 +272,10 @@
"execution_count": null,
"execution_count": null,
"outputs": [],
"outputs": [],
"source": [
"source": [
"from parea.evals.rag import context_query_relevancy_factory, percent_target_supported_by_context_factory\n",
"from parea.evals.rag import (\n",
" context_query_relevancy_factory,\n",
" percent_target_supported_by_context_factory,\n",
")\n",
"\n",
"\n",
"\n",
"\n",
"context_relevancy_eval = context_query_relevancy_factory()\n",
"context_relevancy_eval = context_query_relevancy_factory()\n",
...
@@ -280,10 +284,7 @@
...
@@ -280,10 +284,7 @@
"\n",
"\n",
"@trace(eval_funcs=[context_relevancy_eval, percent_target_supported_by_context])\n",
"@trace(eval_funcs=[context_relevancy_eval, percent_target_supported_by_context])\n",
"def retrieval(question: str) -> List[str]:\n",
"def retrieval(question: str) -> List[str]:\n",
" return collection.query(\n",
" return collection.query(query_texts=[question], n_results=1)[\"documents\"][0]"
" query_texts=[question],\n",
" n_results=1\n",
" )['documents'][0]"
],
],
"metadata": {
"metadata": {
"collapsed": false
"collapsed": false
...
@@ -310,10 +311,13 @@
...
@@ -310,10 +311,13 @@
"answer_context_faithfulness = answer_context_faithfulness_statement_level_factory()\n",
"answer_context_faithfulness = answer_context_faithfulness_statement_level_factory()\n",
"answer_matches_target_llm_grader = answer_matches_target_llm_grader_factory()\n",
"answer_matches_target_llm_grader = answer_matches_target_llm_grader_factory()\n",
"\n",
"\n",
"\n",
"@function\n",
"@function\n",
"def generation_sglang(s, question: str, *context: str):\n",
"def generation_sglang(s, question: str, *context: str):\n",
" context = \"\\n\".join(context)\n",
" context = \"\\n\".join(context)\n",
" s += user(f'Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.')\n",
" s += user(\n",
" f\"Given this question:\\n{question}\\n\\nAnd this context:\\n{context}\\n\\nAnswer the question.\"\n",
" )\n",
" s += assistant(gen(\"answer\", max_tokens=1_000))\n",
" s += assistant(gen(\"answer\", max_tokens=1_000))\n",
"\n",
"\n",
"\n",
"\n",
...
@@ -357,7 +361,9 @@
...
@@ -357,7 +361,9 @@
" return generation(question, *contexts)\n",
" return generation(question, *contexts)\n",
"\n",
"\n",
"\n",
"\n",
"rag_pipeline(\"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\")"
"rag_pipeline(\n",
" \"When did the World Health Organization formally declare an end to the COVID-19 global health emergency?\"\n",
")"
],
],
"metadata": {
"metadata": {
"collapsed": false
"collapsed": false
...
@@ -402,6 +408,7 @@
...
@@ -402,6 +408,7 @@
"source": [
"source": [
"!pip install nest-asyncio\n",
"!pip install nest-asyncio\n",
"import nest_asyncio\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()"
"nest_asyncio.apply()"
],
],
"metadata": {
"metadata": {
...
@@ -461,7 +468,7 @@
...
@@ -461,7 +468,7 @@
],
],
"source": [
"source": [
"e = p.experiment(\n",
"e = p.experiment(\n",
"
'
RAG
'
,\n",
"
\"
RAG
\"
,\n",
" data=[\n",
" data=[\n",
" {\n",
" {\n",
" \"question\": qca[\"question\"],\n",
" \"question\": qca[\"question\"],\n",
...
@@ -469,7 +476,7 @@
...
@@ -469,7 +476,7 @@
" }\n",
" }\n",
" for qca in question_context_answers\n",
" for qca in question_context_answers\n",
" ],\n",
" ],\n",
" func=rag_pipeline\n",
" func=rag_pipeline
,
\n",
").run()"
").run()"
],
],
"metadata": {
"metadata": {
...
...
examples/runtime/engine/input_ids.py
View file @
c77c1e05
...
@@ -7,6 +7,7 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
...
@@ -7,6 +7,7 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
MODEL_PATH
=
"meta-llama/Llama-3.1-8B-Instruct"
MODEL_PATH
=
"meta-llama/Llama-3.1-8B-Instruct"
def
main
():
def
main
():
# Sample prompts.
# Sample prompts.
prompts
=
[
prompts
=
[
...
...
python/sglang/srt/configs/model_config.py
View file @
c77c1e05
...
@@ -39,7 +39,7 @@ class ModelConfig:
...
@@ -39,7 +39,7 @@ class ModelConfig:
revision
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
context_length
:
Optional
[
int
]
=
None
,
context_length
:
Optional
[
int
]
=
None
,
model_override_args
:
Optional
[
dict
]
=
None
,
model_override_args
:
Optional
[
dict
]
=
None
,
is_embedding
:
Optional
[
bool
]
=
None
is_embedding
:
Optional
[
bool
]
=
None
,
)
->
None
:
)
->
None
:
# Parse args
# Parse args
self
.
model_override_args
=
json
.
loads
(
model_override_args
)
self
.
model_override_args
=
json
.
loads
(
model_override_args
)
...
@@ -52,7 +52,9 @@ class ModelConfig:
...
@@ -52,7 +52,9 @@ class ModelConfig:
self
.
hf_text_config
=
get_hf_text_config
(
self
.
hf_config
)
self
.
hf_text_config
=
get_hf_text_config
(
self
.
hf_config
)
# Check model type
# Check model type
self
.
is_generation
=
is_generation_model
(
self
.
hf_config
.
architectures
,
is_embedding
)
self
.
is_generation
=
is_generation_model
(
self
.
hf_config
.
architectures
,
is_embedding
)
self
.
is_multimodal
=
is_multimodal_model
(
self
.
hf_config
.
architectures
)
self
.
is_multimodal
=
is_multimodal_model
(
self
.
hf_config
.
architectures
)
self
.
is_encoder_decoder
=
is_encoder_decoder_model
(
self
.
hf_config
.
architectures
)
self
.
is_encoder_decoder
=
is_encoder_decoder_model
(
self
.
hf_config
.
architectures
)
...
...
python/sglang/srt/layers/quantization/base_config.py
View file @
c77c1e05
...
@@ -122,16 +122,14 @@ class QuantizationConfig(ABC):
...
@@ -122,16 +122,14 @@ class QuantizationConfig(ABC):
"""
"""
raise
NotImplementedError
raise
NotImplementedError
def
method_has_implemented_embedding
(
method_class
:
Type
[
QuantizeMethodBase
])
->
bool
:
def
method_has_implemented_embedding
(
method_class
:
Type
[
QuantizeMethodBase
])
->
bool
:
"""
"""
Not all quant methods have embedding implemented, so we need to check that
Not all quant methods have embedding implemented, so we need to check that
it exists for our given method. We check this by making sure the function
it exists for our given method. We check this by making sure the function
has been changed from the base implementation.
has been changed from the base implementation.
"""
"""
base_embedding
=
inspect
.
getattr_static
(
QuantizeMethodBase
,
"embedding"
,
base_embedding
=
inspect
.
getattr_static
(
QuantizeMethodBase
,
"embedding"
,
None
)
None
)
class_embedding
=
inspect
.
getattr_static
(
method_class
,
"embedding"
,
None
)
class_embedding
=
inspect
.
getattr_static
(
method_class
,
"embedding"
,
None
)
return
(
class_embedding
is
not
None
return
class_embedding
is
not
None
and
class_embedding
is
not
base_embedding
and
class_embedding
is
not
base_embedding
)
python/sglang/srt/layers/vocab_parallel_embedding.py
View file @
c77c1e05
This diff is collapsed.
Click to expand it.
python/sglang/srt/managers/io_struct.py
View file @
c77c1e05
...
@@ -86,8 +86,10 @@ class GenerateReqInput:
...
@@ -86,8 +86,10 @@ class GenerateReqInput:
self
.
parallel_sample_num
=
self
.
sampling_params
.
get
(
"n"
,
1
)
self
.
parallel_sample_num
=
self
.
sampling_params
.
get
(
"n"
,
1
)
else
:
# isinstance(self.sampling_params, list):
else
:
# isinstance(self.sampling_params, list):
self
.
parallel_sample_num
=
self
.
sampling_params
[
0
].
get
(
"n"
,
1
)
self
.
parallel_sample_num
=
self
.
sampling_params
[
0
].
get
(
"n"
,
1
)
assert
all
(
self
.
parallel_sample_num
==
sampling_params
.
get
(
"n"
,
1
)
for
sampling_params
in
self
.
sampling_params
),
(
assert
all
(
"The parallel_sample_num should be the same for all samples in sample params."
)
self
.
parallel_sample_num
==
sampling_params
.
get
(
"n"
,
1
)
for
sampling_params
in
self
.
sampling_params
),
"The parallel_sample_num should be the same for all samples in sample params."
if
self
.
parallel_sample_num
>
1
and
self
.
is_single
:
if
self
.
parallel_sample_num
>
1
and
self
.
is_single
:
self
.
is_single
=
False
self
.
is_single
=
False
...
...
python/sglang/srt/managers/schedule_batch.py
View file @
c77c1e05
...
@@ -911,8 +911,7 @@ class ScheduleBatch:
...
@@ -911,8 +911,7 @@ class ScheduleBatch:
keep_indices
=
[
keep_indices
=
[
i
i
for
i
in
range
(
len
(
self
.
reqs
))
for
i
in
range
(
len
(
self
.
reqs
))
if
not
self
.
reqs
[
i
].
finished
()
if
not
self
.
reqs
[
i
].
finished
()
and
self
.
reqs
[
i
]
is
not
being_chunked_req
and
self
.
reqs
[
i
]
is
not
being_chunked_req
]
]
if
keep_indices
is
None
or
len
(
keep_indices
)
==
0
:
if
keep_indices
is
None
or
len
(
keep_indices
)
==
0
:
...
@@ -1043,6 +1042,7 @@ class ScheduleBatch:
...
@@ -1043,6 +1042,7 @@ class ScheduleBatch:
for
req
in
self
.
reqs
:
for
req
in
self
.
reqs
:
req
.
started_time
=
time
.
time
()
req
.
started_time
=
time
.
time
()
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
class
ModelWorkerBatch
:
class
ModelWorkerBatch
:
# The batch id
# The batch id
...
...
python/sglang/srt/managers/scheduler.py
View file @
c77c1e05
...
@@ -566,9 +566,7 @@ class Scheduler:
...
@@ -566,9 +566,7 @@ class Scheduler:
and
not
self
.
last_batch
.
is_empty
()
and
not
self
.
last_batch
.
is_empty
()
):
):
if
self
.
being_chunked_req
:
if
self
.
being_chunked_req
:
self
.
last_batch
.
filter_batch
(
self
.
last_batch
.
filter_batch
(
being_chunked_req
=
self
.
being_chunked_req
)
being_chunked_req
=
self
.
being_chunked_req
)
self
.
tree_cache
.
cache_unfinished_req
(
self
.
being_chunked_req
)
self
.
tree_cache
.
cache_unfinished_req
(
self
.
being_chunked_req
)
# Inflight request keeps its rid but will get a new req_pool_idx.
# Inflight request keeps its rid but will get a new req_pool_idx.
self
.
req_to_token_pool
.
free
(
self
.
being_chunked_req
.
req_pool_idx
)
self
.
req_to_token_pool
.
free
(
self
.
being_chunked_req
.
req_pool_idx
)
...
@@ -628,9 +626,7 @@ class Scheduler:
...
@@ -628,9 +626,7 @@ class Scheduler:
has_inflight
=
self
.
being_chunked_req
is
not
None
has_inflight
=
self
.
being_chunked_req
is
not
None
if
has_inflight
:
if
has_inflight
:
self
.
being_chunked_req
.
init_next_round_input
()
self
.
being_chunked_req
.
init_next_round_input
()
self
.
being_chunked_req
=
adder
.
add_inflight_req
(
self
.
being_chunked_req
=
adder
.
add_inflight_req
(
self
.
being_chunked_req
)
self
.
being_chunked_req
)
if
self
.
lora_paths
:
if
self
.
lora_paths
:
lora_set
=
(
lora_set
=
(
...
@@ -813,7 +809,8 @@ class Scheduler:
...
@@ -813,7 +809,8 @@ class Scheduler:
embeddings
=
self
.
tp_worker
.
forward_batch_embedding
(
model_worker_batch
)
embeddings
=
self
.
tp_worker
.
forward_batch_embedding
(
model_worker_batch
)
ret
=
embeddings
,
model_worker_batch
.
bid
ret
=
embeddings
,
model_worker_batch
.
bid
return
ret
return
ret
def
get_stats
(
self
,
batch
:
ScheduleBatch
):
def
get_stats
(
self
,
batch
:
ScheduleBatch
):
# TODO: get stats for chunked prefill
# TODO: get stats for chunked prefill
now
=
time
.
time
()
now
=
time
.
time
()
...
@@ -829,8 +826,8 @@ class Scheduler:
...
@@ -829,8 +826,8 @@ class Scheduler:
# set stats from prefill
# set stats from prefill
if
self
.
stats
is
not
None
:
if
self
.
stats
is
not
None
:
# new_seq=self.stats.new_seq
# new_seq=self.stats.new_seq
cache_hit_rate
=
self
.
stats
.
cache_hit_rate
cache_hit_rate
=
self
.
stats
.
cache_hit_rate
token_usage
=
self
.
stats
.
token_usage
token_usage
=
self
.
stats
.
token_usage
# Iteration stats
# Iteration stats
num_prompt_tokens_iter
=
0
num_prompt_tokens_iter
=
0
num_generation_tokens_iter
=
0
num_generation_tokens_iter
=
0
...
@@ -851,15 +848,19 @@ class Scheduler:
...
@@ -851,15 +848,19 @@ class Scheduler:
# _, next_token_ids, _ = result
# _, next_token_ids, _ = result
if
batch
is
not
None
:
if
batch
is
not
None
:
num_generation_tokens_iter
=
len
(
batch
.
output_ids
)
num_generation_tokens_iter
=
len
(
batch
.
output_ids
)
gen_throughput
=
round
(
num_generation_tokens_iter
/
(
now
-
self
.
last_stats_tic
),
2
)
gen_throughput
=
round
(
num_generation_tokens_iter
/
(
now
-
self
.
last_stats_tic
),
2
)
for
i
,
req
in
enumerate
(
batch
.
reqs
):
for
i
,
req
in
enumerate
(
batch
.
reqs
):
# NOTE: Batch forward mode is extend befor start decode,
# NOTE: Batch forward mode is extend befor start decode,
if
batch
.
forward_mode
.
is_extend
():
if
batch
.
forward_mode
.
is_extend
():
num_prompt_tokens_iter
=
len
(
batch
.
input_ids
)
+
sum
(
batch
.
prefix_lens
)
num_prompt_tokens_iter
=
len
(
batch
.
input_ids
)
+
sum
(
batch
.
prefix_lens
)
time_to_first_tokens_iter
.
append
(
now
-
req
.
started_time
)
time_to_first_tokens_iter
.
append
(
now
-
req
.
started_time
)
else
:
else
:
time_per_output_tokens_iter
.
append
(
now
-
self
.
last_stats_tic
)
time_per_output_tokens_iter
.
append
(
now
-
self
.
last_stats_tic
)
if
req
.
finished
():
if
req
.
finished
():
time_e2e_requests
.
append
(
now
-
req
.
created_time
)
time_e2e_requests
.
append
(
now
-
req
.
created_time
)
...
@@ -869,7 +870,8 @@ class Scheduler:
...
@@ -869,7 +870,8 @@ class Scheduler:
finished_reason_requests
.
append
(
finished_reason_requests
.
append
(
req
.
finished_reason
.
to_json
()
req
.
finished_reason
.
to_json
()
if
req
.
finished_reason
is
not
None
if
req
.
finished_reason
is
not
None
else
None
)
else
None
)
return
Stats
(
return
Stats
(
new_seq
=
new_seq
,
new_seq
=
new_seq
,
...
@@ -893,7 +895,7 @@ class Scheduler:
...
@@ -893,7 +895,7 @@ class Scheduler:
max_running_requests
=
self
.
max_running_requests
,
max_running_requests
=
self
.
max_running_requests
,
)
)
def
log_stats
(
self
,
stats
:
Stats
):
def
log_stats
(
self
,
stats
:
Stats
):
self
.
metrics_collector
.
log_stats
(
stats
)
self
.
metrics_collector
.
log_stats
(
stats
)
def
process_batch_result
(
self
,
batch
:
ScheduleBatch
,
result
):
def
process_batch_result
(
self
,
batch
:
ScheduleBatch
,
result
):
...
@@ -1003,9 +1005,7 @@ class Scheduler:
...
@@ -1003,9 +1005,7 @@ class Scheduler:
if
req
.
is_retracted
:
if
req
.
is_retracted
:
continue
continue
if
self
.
server_args
.
enable_overlap_schedule
and
(
if
self
.
server_args
.
enable_overlap_schedule
and
(
req
.
finished
()):
req
.
finished
()
):
self
.
token_to_kv_pool
.
free
(
batch
.
out_cache_loc
[
i
:
i
+
1
])
self
.
token_to_kv_pool
.
free
(
batch
.
out_cache_loc
[
i
:
i
+
1
])
continue
continue
...
@@ -1031,7 +1031,10 @@ class Scheduler:
...
@@ -1031,7 +1031,10 @@ class Scheduler:
self
.
token_to_kv_pool
.
free_group_end
()
self
.
token_to_kv_pool
.
free_group_end
()
self
.
forward_ct_decode
=
(
self
.
forward_ct_decode
+
1
)
%
(
1
<<
30
)
self
.
forward_ct_decode
=
(
self
.
forward_ct_decode
+
1
)
%
(
1
<<
30
)
if
self
.
tp_rank
==
0
and
self
.
forward_ct_decode
%
self
.
server_args
.
decode_log_interval
==
0
:
if
(
self
.
tp_rank
==
0
and
self
.
forward_ct_decode
%
self
.
server_args
.
decode_log_interval
==
0
):
self
.
print_decode_stats
()
self
.
print_decode_stats
()
def
add_logprob_return_values
(
def
add_logprob_return_values
(
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
c77c1e05
...
@@ -215,7 +215,7 @@ class TokenizerManager:
...
@@ -215,7 +215,7 @@ class TokenizerManager:
logprob_start_len
,
logprob_start_len
,
top_logprobs_num
,
top_logprobs_num
,
obj
.
stream
,
obj
.
stream
,
obj
.
lora_path
obj
.
lora_path
,
)
)
elif
isinstance
(
obj
,
EmbeddingReqInput
):
elif
isinstance
(
obj
,
EmbeddingReqInput
):
tokenized_obj
=
TokenizedEmbeddingReqInput
(
tokenized_obj
=
TokenizedEmbeddingReqInput
(
...
@@ -290,7 +290,9 @@ class TokenizerManager:
...
@@ -290,7 +290,9 @@ class TokenizerManager:
# Tokenize all requests
# Tokenize all requests
objs
=
[
obj
[
i
]
for
i
in
range
(
batch_size
)]
objs
=
[
obj
[
i
]
for
i
in
range
(
batch_size
)]
tokenized_objs
=
await
asyncio
.
gather
(
*
(
self
.
_tokenize_one_request
(
obj
)
for
obj
in
objs
))
tokenized_objs
=
await
asyncio
.
gather
(
*
(
self
.
_tokenize_one_request
(
obj
)
for
obj
in
objs
)
)
# Cache the common prefix for parallel sampling
# Cache the common prefix for parallel sampling
for
i
in
range
(
batch_size
):
for
i
in
range
(
batch_size
):
...
@@ -322,7 +324,9 @@ class TokenizerManager:
...
@@ -322,7 +324,9 @@ class TokenizerManager:
rid_to_index
=
{
rid
:
i
for
i
,
rid
in
enumerate
(
rids
)}
rid_to_index
=
{
rid
:
i
for
i
,
rid
in
enumerate
(
rids
)}
task_map
=
{
asyncio
.
create_task
(
gen
.
__anext__
()):
gen
for
gen
in
generators
}
task_map
=
{
asyncio
.
create_task
(
gen
.
__anext__
()):
gen
for
gen
in
generators
}
while
task_map
:
while
task_map
:
done
,
_
=
await
asyncio
.
wait
(
task_map
.
keys
(),
return_when
=
asyncio
.
FIRST_COMPLETED
)
done
,
_
=
await
asyncio
.
wait
(
task_map
.
keys
(),
return_when
=
asyncio
.
FIRST_COMPLETED
)
for
task
in
done
:
for
task
in
done
:
gen
=
task_map
.
pop
(
task
)
gen
=
task_map
.
pop
(
task
)
...
...
python/sglang/srt/metrics/metrics_collector.py
View file @
c77c1e05
...
@@ -130,27 +130,65 @@ class Metrics:
...
@@ -130,27 +130,65 @@ class Metrics:
self
.
counter_prompt_tokens
=
Counter
(
self
.
counter_prompt_tokens
=
Counter
(
name
=
"sglang:prompt_tokens_total"
,
name
=
"sglang:prompt_tokens_total"
,
documentation
=
"Number of prefill tokens processed."
,
documentation
=
"Number of prefill tokens processed."
,
labelnames
=
labelnames
)
labelnames
=
labelnames
,
)
self
.
counter_generation_tokens
=
Counter
(
self
.
counter_generation_tokens
=
Counter
(
name
=
"sglang:generation_tokens_total"
,
name
=
"sglang:generation_tokens_total"
,
documentation
=
"Number of generation tokens processed."
,
documentation
=
"Number of generation tokens processed."
,
labelnames
=
labelnames
)
labelnames
=
labelnames
,
)
self
.
histogram_time_to_first_token
=
Histogram
(
self
.
histogram_time_to_first_token
=
Histogram
(
name
=
"sglang:time_to_first_token_seconds"
,
name
=
"sglang:time_to_first_token_seconds"
,
documentation
=
"Histogram of time to first token in seconds."
,
documentation
=
"Histogram of time to first token in seconds."
,
labelnames
=
labelnames
,
labelnames
=
labelnames
,
buckets
=
[
buckets
=
[
0.001
,
0.005
,
0.01
,
0.02
,
0.04
,
0.06
,
0.08
,
0.1
,
0.25
,
0.5
,
0.001
,
0.75
,
1.0
,
2.5
,
5.0
,
7.5
,
10.0
,
15.0
,
20.0
,
25.0
,
30.0
0.005
,
])
0.01
,
0.02
,
0.04
,
0.06
,
0.08
,
0.1
,
0.25
,
0.5
,
0.75
,
1.0
,
2.5
,
5.0
,
7.5
,
10.0
,
15.0
,
20.0
,
25.0
,
30.0
,
],
)
self
.
histogram_time_per_output_token
=
Histogram
(
self
.
histogram_time_per_output_token
=
Histogram
(
name
=
"sglang:time_per_output_token_seconds"
,
name
=
"sglang:time_per_output_token_seconds"
,
documentation
=
"Histogram of time per output token in seconds."
,
documentation
=
"Histogram of time per output token in seconds."
,
labelnames
=
labelnames
,
labelnames
=
labelnames
,
buckets
=
[
buckets
=
[
0.005
,
0.01
,
0.015
,
0.02
,
0.025
,
0.03
,
0.04
,
0.05
,
0.075
,
0.1
,
0.15
,
0.2
,
0.3
,
0.4
,
0.5
,
0.75
,
0.005
,
1.0
,
2.5
0.01
,
])
0.015
,
0.02
,
0.025
,
0.03
,
0.04
,
0.05
,
0.075
,
0.1
,
0.15
,
0.2
,
0.3
,
0.4
,
0.5
,
0.75
,
1.0
,
2.5
,
],
)
# Request Stats
# Request Stats
# Metadata
# Metadata
...
@@ -245,14 +283,19 @@ class PrometheusMetricsCollector(MetricsCollector):
...
@@ -245,14 +283,19 @@ class PrometheusMetricsCollector(MetricsCollector):
stats
.
num_generation_tokens_requests
,
stats
.
num_generation_tokens_requests
,
)
)
self
.
_log_counter
(
self
.
metrics
.
counter_prompt_tokens
,
self
.
_log_counter
(
stats
.
num_prompt_tokens_iter
)
self
.
metrics
.
counter_prompt_tokens
,
stats
.
num_prompt_tokens_iter
self
.
_log_counter
(
self
.
metrics
.
counter_generation_tokens
,
)
stats
.
num_generation_tokens_iter
)
self
.
_log_counter
(
self
.
_log_histogram
(
self
.
metrics
.
histogram_time_to_first_token
,
self
.
metrics
.
counter_generation_tokens
,
stats
.
num_generation_tokens_iter
stats
.
time_to_first_tokens_iter
)
)
self
.
_log_histogram
(
self
.
metrics
.
histogram_time_per_output_token
,
self
.
_log_histogram
(
stats
.
time_per_output_tokens_iter
)
self
.
metrics
.
histogram_time_to_first_token
,
stats
.
time_to_first_tokens_iter
)
self
.
_log_histogram
(
self
.
metrics
.
histogram_time_per_output_token
,
stats
.
time_per_output_tokens_iter
,
)
# self._log_gauge(self.metrics.gpu_cache_usage_sys, stats.gpu_cache_usage_sys)
# self._log_gauge(self.metrics.gpu_cache_usage_sys, stats.gpu_cache_usage_sys)
self
.
_log_gauge
(
self
.
metrics
.
num_running_sys
,
stats
.
num_running_req
)
self
.
_log_gauge
(
self
.
metrics
.
num_running_sys
,
stats
.
num_running_req
)
...
...
python/sglang/srt/models/gpt2.py
View file @
c77c1e05
...
@@ -28,7 +28,7 @@ from vllm.model_executor.layers.activation import get_act_fn
...
@@ -28,7 +28,7 @@ from vllm.model_executor.layers.activation import get_act_fn
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
#from sglang.srt.layers.activation import get_act_fn
#
from sglang.srt.layers.activation import get_act_fn
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
@@ -47,15 +47,14 @@ class GPT2Attention(nn.Module):
...
@@ -47,15 +47,14 @@ class GPT2Attention(nn.Module):
self
,
self
,
layer_id
:
int
,
layer_id
:
int
,
config
:
GPT2Config
,
config
:
GPT2Config
,
cache_config
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
):
):
super
().
__init__
()
super
().
__init__
()
self
.
hidden_size
=
config
.
hidden_size
self
.
hidden_size
=
config
.
hidden_size
total_num_heads
=
config
.
num_attention_heads
total_num_heads
=
config
.
num_attention_heads
tensor_model_parallel_world_size
=
(
tensor_model_parallel_world_size
=
get_tensor_model_parallel_world_size
()
get_tensor_model_parallel_world_size
())
assert
total_num_heads
%
tensor_model_parallel_world_size
==
0
assert
total_num_heads
%
tensor_model_parallel_world_size
==
0
self
.
num_heads
=
total_num_heads
//
tensor_model_parallel_world_size
self
.
num_heads
=
total_num_heads
//
tensor_model_parallel_world_size
self
.
head_dim
=
self
.
hidden_size
//
total_num_heads
self
.
head_dim
=
self
.
hidden_size
//
total_num_heads
...
@@ -76,11 +75,13 @@ class GPT2Attention(nn.Module):
...
@@ -76,11 +75,13 @@ class GPT2Attention(nn.Module):
quant_config
=
quant_config
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.c_proj"
,
prefix
=
f
"
{
prefix
}
.c_proj"
,
)
)
self
.
attn
=
RadixAttention
(
self
.
num_heads
,
self
.
attn
=
RadixAttention
(
self
.
num_heads
,
self
.
head_dim
,
self
.
head_dim
,
scaling
=
self
.
scale
,
scaling
=
self
.
scale
,
num_kv_heads
=
total_num_heads
,
num_kv_heads
=
total_num_heads
,
layer_id
=
layer_id
)
layer_id
=
layer_id
,
)
def
forward
(
def
forward
(
self
,
self
,
...
@@ -119,10 +120,14 @@ class GPT2MLP(nn.Module):
...
@@ -119,10 +120,14 @@ class GPT2MLP(nn.Module):
quant_config
=
quant_config
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.c_proj"
,
prefix
=
f
"
{
prefix
}
.c_proj"
,
)
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
self
.
act
=
get_act_fn
(
intermediate_size
)
config
.
activation_function
,
quant_config
,
intermediate_size
)
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
,)
->
torch
.
Tensor
:
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
hidden_states
,
_
=
self
.
c_fc
(
hidden_states
)
hidden_states
,
_
=
self
.
c_fc
(
hidden_states
)
hidden_states
=
self
.
act
(
hidden_states
)
hidden_states
=
self
.
act
(
hidden_states
)
hidden_states
,
_
=
self
.
c_proj
(
hidden_states
)
hidden_states
,
_
=
self
.
c_proj
(
hidden_states
)
...
@@ -135,27 +140,20 @@ class GPT2Block(nn.Module):
...
@@ -135,27 +140,20 @@ class GPT2Block(nn.Module):
self
,
self
,
layer_id
:
int
,
layer_id
:
int
,
config
:
GPT2Config
,
config
:
GPT2Config
,
cache_config
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
):
):
super
().
__init__
()
super
().
__init__
()
hidden_size
=
config
.
hidden_size
hidden_size
=
config
.
hidden_size
inner_dim
=
(
config
.
n_inner
if
config
.
n_inner
is
not
None
else
4
*
inner_dim
=
config
.
n_inner
if
config
.
n_inner
is
not
None
else
4
*
hidden_size
hidden_size
)
self
.
ln_1
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
config
.
layer_norm_epsilon
)
self
.
ln_1
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
config
.
layer_norm_epsilon
)
self
.
attn
=
GPT2Attention
(
layer_id
,
self
.
attn
=
GPT2Attention
(
config
,
layer_id
,
config
,
cache_config
,
quant_config
,
prefix
=
f
"
{
prefix
}
.attn"
cache_config
,
)
quant_config
,
prefix
=
f
"
{
prefix
}
.attn"
)
self
.
ln_2
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
config
.
layer_norm_epsilon
)
self
.
ln_2
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
config
.
layer_norm_epsilon
)
self
.
mlp
=
GPT2MLP
(
inner_dim
,
self
.
mlp
=
GPT2MLP
(
inner_dim
,
config
,
quant_config
,
prefix
=
f
"
{
prefix
}
.mlp"
)
config
,
quant_config
,
prefix
=
f
"
{
prefix
}
.mlp"
)
def
forward
(
def
forward
(
self
,
self
,
...
@@ -179,13 +177,12 @@ class GPT2Block(nn.Module):
...
@@ -179,13 +177,12 @@ class GPT2Block(nn.Module):
return
hidden_states
return
hidden_states
class
GPT2Model
(
nn
.
Module
):
class
GPT2Model
(
nn
.
Module
):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
GPT2Config
,
config
:
GPT2Config
,
cache_config
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
):
):
...
@@ -229,16 +226,15 @@ class GPT2LMHeadModel(nn.Module):
...
@@ -229,16 +226,15 @@ class GPT2LMHeadModel(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
GPT2Config
,
config
:
GPT2Config
,
cache_config
=
None
,
cache_config
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
):
super
().
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
quant_config
=
quant_config
self
.
quant_config
=
quant_config
self
.
transformer
=
GPT2Model
(
config
,
self
.
transformer
=
GPT2Model
(
cache_config
,
config
,
cache_config
,
quant_config
,
prefix
=
"transformer"
quant_config
,
)
prefix
=
"transformer"
)
self
.
lm_head
=
self
.
transformer
.
wte
self
.
lm_head
=
self
.
transformer
.
wte
self
.
logits_processor
=
LogitsProcessor
(
config
)
self
.
logits_processor
=
LogitsProcessor
(
config
)
...
@@ -254,8 +250,6 @@ class GPT2LMHeadModel(nn.Module):
...
@@ -254,8 +250,6 @@ class GPT2LMHeadModel(nn.Module):
input_ids
,
hidden_states
,
self
.
lm_head
.
weight
,
forward_batch
input_ids
,
hidden_states
,
self
.
lm_head
.
weight
,
forward_batch
)
)
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
def
load_weights
(
self
,
weights
:
Iterable
[
Tuple
[
str
,
torch
.
Tensor
]]):
params_dict
=
dict
(
self
.
named_parameters
(
remove_duplicate
=
False
))
params_dict
=
dict
(
self
.
named_parameters
(
remove_duplicate
=
False
))
for
name
,
loaded_weight
in
weights
:
for
name
,
loaded_weight
in
weights
:
...
@@ -280,8 +274,8 @@ class GPT2LMHeadModel(nn.Module):
...
@@ -280,8 +274,8 @@ class GPT2LMHeadModel(nn.Module):
if
not
name
.
endswith
(
".weight"
):
if
not
name
.
endswith
(
".weight"
):
continue
continue
loaded_weight
=
loaded_weight
.
t
()
loaded_weight
=
loaded_weight
.
t
()
weight_loader
=
getattr
(
param
,
"weight_loader"
,
weight_loader
=
getattr
(
param
,
"weight_loader"
,
default_weight_loader
)
default_weight_loader
)
weight_loader
(
param
,
loaded_weight
)
weight_loader
(
param
,
loaded_weight
)
EntryClass
=
GPT2LMHeadModel
EntryClass
=
GPT2LMHeadModel
python/sglang/srt/server.py
View file @
c77c1e05
...
@@ -419,6 +419,7 @@ def launch_engine(
...
@@ -419,6 +419,7 @@ def launch_engine(
for
i
in
range
(
len
(
scheduler_pipe_readers
)):
for
i
in
range
(
len
(
scheduler_pipe_readers
)):
scheduler_pipe_readers
[
i
].
recv
()
scheduler_pipe_readers
[
i
].
recv
()
def
add_prometheus_middleware
(
app
:
FastAPI
):
def
add_prometheus_middleware
(
app
:
FastAPI
):
# Adapted from https://github.com/vllm-project/vllm/blob/v0.6.1/vllm/entrypoints/openai/api_server.py#L216
# Adapted from https://github.com/vllm-project/vllm/blob/v0.6.1/vllm/entrypoints/openai/api_server.py#L216
from
prometheus_client
import
CollectorRegistry
,
make_asgi_app
,
multiprocess
from
prometheus_client
import
CollectorRegistry
,
make_asgi_app
,
multiprocess
...
@@ -490,6 +491,7 @@ def launch_server(
...
@@ -490,6 +491,7 @@ def launch_server(
finally
:
finally
:
t
.
join
()
t
.
join
()
def
_set_prometheus_env
():
def
_set_prometheus_env
():
# Set prometheus multiprocess directory
# Set prometheus multiprocess directory
# sglang uses prometheus multiprocess mode
# sglang uses prometheus multiprocess mode
...
@@ -506,6 +508,7 @@ def _set_prometheus_env():
...
@@ -506,6 +508,7 @@ def _set_prometheus_env():
os
.
environ
[
"PROMETHEUS_MULTIPROC_DIR"
]
=
prometheus_multiproc_dir
.
name
os
.
environ
[
"PROMETHEUS_MULTIPROC_DIR"
]
=
prometheus_multiproc_dir
.
name
logger
.
debug
(
f
"PROMETHEUS_MULTIPROC_DIR:
{
os
.
environ
[
'PROMETHEUS_MULTIPROC_DIR'
]
}
"
)
logger
.
debug
(
f
"PROMETHEUS_MULTIPROC_DIR:
{
os
.
environ
[
'PROMETHEUS_MULTIPROC_DIR'
]
}
"
)
def
_set_envs_and_config
(
server_args
:
ServerArgs
):
def
_set_envs_and_config
(
server_args
:
ServerArgs
):
# Set global environments
# Set global environments
os
.
environ
[
"TF_CPP_MIN_LOG_LEVEL"
]
=
"3"
os
.
environ
[
"TF_CPP_MIN_LOG_LEVEL"
]
=
"3"
...
@@ -763,8 +766,8 @@ class Engine:
...
@@ -763,8 +766,8 @@ class Engine:
# runtime server default log level is log
# runtime server default log level is log
# offline engine works in scripts, so we set it to error
# offline engine works in scripts, so we set it to error
if
'
log_level
'
not
in
kwargs
:
if
"
log_level
"
not
in
kwargs
:
kwargs
[
'
log_level
'
]
=
'
error
'
kwargs
[
"
log_level
"
]
=
"
error
"
server_args
=
ServerArgs
(
*
args
,
**
kwargs
)
server_args
=
ServerArgs
(
*
args
,
**
kwargs
)
launch_engine
(
server_args
=
server_args
)
launch_engine
(
server_args
=
server_args
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment