Unverified Commit c03cece4 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Improve error reporting during server launch (#1390)

parent 15c75e41
...@@ -447,13 +447,12 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid): ...@@ -447,13 +447,12 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
time.sleep(1) time.sleep(1)
try: try:
res = requests.get(url + "/get_model_info", timeout=5, headers=headers) res = requests.get(url + "/get_model_info", timeout=5, headers=headers)
assert res.status_code == 200, f"{res}" assert res.status_code == 200, f"{res=}, {res.text=}"
success = True success = True
break break
except (AssertionError, requests.exceptions.RequestException) as e: except (AssertionError, requests.exceptions.RequestException):
last_traceback = get_exception_traceback() last_traceback = get_exception_traceback()
pass pass
model_info = res.json()
if not success: if not success:
if pipe_finish_writer is not None: if pipe_finish_writer is not None:
...@@ -462,6 +461,8 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid): ...@@ -462,6 +461,8 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
kill_child_process(pid, including_parent=False) kill_child_process(pid, including_parent=False)
return return
model_info = res.json()
# Send a warmup request # Send a warmup request
request_name = "/generate" if model_info["is_generation"] else "/encode" request_name = "/generate" if model_info["is_generation"] else "/encode"
max_new_tokens = 8 if model_info["is_generation"] else 1 max_new_tokens = 8 if model_info["is_generation"] else 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment