Unverified Commit 322421fa authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

Add warmup to SRT server (#146)

parent 8ff870bf
...@@ -389,7 +389,7 @@ def launch_server(server_args, pipe_finish_writer): ...@@ -389,7 +389,7 @@ def launch_server(server_args, pipe_finish_writer):
assert proc_router.is_alive() and proc_detoken.is_alive() assert proc_router.is_alive() and proc_detoken.is_alive()
def launch_server(): def _launch_server():
# Launch api server # Launch api server
uvicorn.run( uvicorn.run(
app, app,
...@@ -400,26 +400,48 @@ def launch_server(server_args, pipe_finish_writer): ...@@ -400,26 +400,48 @@ def launch_server(server_args, pipe_finish_writer):
loop="uvloop", loop="uvloop",
) )
t = threading.Thread(target=launch_server) t = threading.Thread(target=_launch_server)
t.start() t.start()
if pipe_finish_writer: url = server_args.url()
url = server_args.url() for _ in range(60):
time.sleep(1)
success = False try:
for i in range(60): requests.get(url + "/get_model_info", timeout=5)
time.sleep(1) break
try: except requests.exceptions.RequestException as e:
res = requests.get(url + "/get_model_info", timeout=5) pass
success = True else:
break if pipe_finish_writer is not None:
except requests.exceptions.RequestException as e: pipe_finish_writer.send(str(e))
pass
if success:
pipe_finish_writer.send("init ok")
else: else:
print(e, flush=True)
return
# Warmup
try:
print("Warmup...", flush=True)
res = requests.post(
url + "/generate",
json={
"text": "Say this is a warmup request.",
"sampling_params": {
"temperature": 0,
"max_new_tokens": 16,
},
},
timeout=60,
)
print(f"Warmup done. model response: {res.json()['text']}")
except requests.exceptions.RequestException as e:
if pipe_finish_writer is not None:
pipe_finish_writer.send(str(e)) pipe_finish_writer.send(str(e))
else:
print(e, flush=True)
return
if pipe_finish_writer is not None:
pipe_finish_writer.send("init ok")
class Runtime: class Runtime:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment