Unverified Commit 150d7020 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Revert removing the unused imports (#385)

parent 9acc6e35
"""Inference-only Yi-VL model.""" """Inference-only Yi-VL model."""
from typing import Optional import os
from typing import List, Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import ( ...@@ -12,6 +13,7 @@ from vllm.model_executor.weight_utils import (
from sglang.srt.models.llava import ( from sglang.srt.models.llava import (
LlavaLlamaForCausalLM, LlavaLlamaForCausalLM,
clip_vision_embed_forward,
monkey_path_clip_vision_embed_forward, monkey_path_clip_vision_embed_forward,
) )
......
...@@ -10,6 +10,9 @@ import threading ...@@ -10,6 +10,9 @@ import threading
import time import time
from typing import List, Optional, Union from typing import List, Optional, Union
# Fix a Python bug
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
import aiohttp import aiohttp
import psutil import psutil
import pydantic import pydantic
...@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager ...@@ -55,9 +58,6 @@ from sglang.srt.managers.tokenizer_manager import TokenizerManager
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import enable_show_time_cost, handle_port_init from sglang.srt.utils import enable_show_time_cost, handle_port_init
# Fix a Python bug
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
API_KEY_HEADER_NAME = "X-API-Key" API_KEY_HEADER_NAME = "X-API-Key"
...@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer): ...@@ -619,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
try: try:
requests.get(url + "/get_model_info", timeout=5, headers=headers) requests.get(url + "/get_model_info", timeout=5, headers=headers)
break break
except requests.exceptions.RequestException: except requests.exceptions.RequestException as e:
pass pass
else: else:
if pipe_finish_writer is not None: if pipe_finish_writer is not None:
......
...@@ -157,6 +157,7 @@ def get_exception_traceback(): ...@@ -157,6 +157,7 @@ def get_exception_traceback():
def get_int_token_logit_bias(tokenizer, vocab_size): def get_int_token_logit_bias(tokenizer, vocab_size):
from transformers import LlamaTokenizer, LlamaTokenizerFast
# a bug when model's vocab size > tokenizer.vocab_size # a bug when model's vocab size > tokenizer.vocab_size
vocab_size = tokenizer.vocab_size vocab_size = tokenizer.vocab_size
......
import argparse import argparse
import glob import glob
import multiprocessing import multiprocessing
import os
import time import time
import unittest import unittest
......
import json
import unittest import unittest
from sglang import Anthropic, set_default_backend from sglang import Anthropic, set_default_backend
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
""" """
import json
import unittest import unittest
import sglang as sgl import sglang as sgl
...@@ -12,6 +13,8 @@ from sglang.test.test_programs import ( ...@@ -12,6 +13,8 @@ from sglang.test.test_programs import (
test_few_shot_qa, test_few_shot_qa,
test_mt_bench, test_mt_bench,
test_parallel_decoding, test_parallel_decoding,
test_parallel_encoding,
test_react,
test_regex, test_regex,
test_select, test_select,
test_stream, test_stream,
......
...@@ -110,7 +110,7 @@ class TestTracing(unittest.TestCase): ...@@ -110,7 +110,7 @@ class TestTracing(unittest.TestCase):
forks = s.fork(3) forks = s.fork(3)
for i in range(3): for i in range(3):
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n" forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
forks[i] += sgl.gen("detailed_tip") forks[i] += sgl.gen(f"detailed_tip")
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n" s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n" s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
......
import argparse import argparse
import os
import torch import torch
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
......
import multiprocessing import multiprocessing
import os import os
import time
import numpy as np
import torch
import torch.distributed as dist
import transformers import transformers
from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
......
import multiprocessing import multiprocessing
import time
import numpy as np import numpy as np
import torch import torch
import torch.distributed as dist
from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.managers.router.model_runner import ModelRunner from sglang.srt.managers.router.infer_batch import ForwardMode
from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
from sglang.srt.model_config import ModelConfig from sglang.srt.model_config import ModelConfig
from sglang.srt.utils import load_image from sglang.srt.utils import load_image
......
...@@ -9,8 +9,11 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i ...@@ -9,8 +9,11 @@ The capital of the United Kindom is London.\nThe capital of the United Kingdom i
import argparse import argparse
import asyncio import asyncio
import json
import time
import aiohttp import aiohttp
import requests
async def send_request(url, data, delay=0): async def send_request(url, data, delay=0):
......
...@@ -10,6 +10,7 @@ The image features a man standing on the back of a yellow taxi cab, holding ...@@ -10,6 +10,7 @@ The image features a man standing on the back of a yellow taxi cab, holding
import argparse import argparse
import asyncio import asyncio
import json import json
import time
import aiohttp import aiohttp
import requests import requests
......
...@@ -6,6 +6,7 @@ The capital of France is Paris.\nThe capital of the United States is Washington, ...@@ -6,6 +6,7 @@ The capital of France is Paris.\nThe capital of the United States is Washington,
""" """
import argparse import argparse
import time
import requests import requests
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment