Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01bfb22b
Unverified
Commit
01bfb22b
authored
Mar 25, 2024
by
SangBin Cho
Committed by
GitHub
Mar 25, 2024
Browse files
[CI] Try introducing isort. (#3495)
parent
e67c295b
Changes
144
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
79 additions
and
76 deletions
+79
-76
vllm/attention/__init__.py
vllm/attention/__init__.py
+2
-1
vllm/attention/backends/flash_attn.py
vllm/attention/backends/flash_attn.py
+3
-2
vllm/attention/backends/xformers.py
vllm/attention/backends/xformers.py
+2
-1
vllm/attention/ops/paged_attn.py
vllm/attention/ops/paged_attn.py
+1
-2
vllm/attention/selector.py
vllm/attention/selector.py
+4
-2
vllm/config.py
vllm/config.py
+7
-6
vllm/core/block_manager.py
vllm/core/block_manager.py
+3
-3
vllm/core/evictor.py
vllm/core/evictor.py
+1
-1
vllm/core/scheduler.py
vllm/core/scheduler.py
+3
-3
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+2
-3
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+3
-3
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+5
-5
vllm/engine/metrics.py
vllm/engine/metrics.py
+7
-6
vllm/engine/ray_utils.py
vllm/engine/ray_utils.py
+2
-3
vllm/entrypoints/api_server.py
vllm/entrypoints/api_server.py
+1
-1
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+1
-1
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+8
-9
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+2
-3
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+8
-6
vllm/entrypoints/openai/serving_completion.py
vllm/entrypoints/openai/serving_completion.py
+14
-15
No files found.
vllm/attention/__init__.py
View file @
01bfb22b
from
vllm.attention.backends.abstract
import
AttentionBackend
,
AttentionMetadata
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionMetadata
)
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.attention.selector
import
get_attn_backend
from
vllm.attention.selector
import
get_attn_backend
...
...
vllm/attention/backends/flash_attn.py
View file @
01bfb22b
...
@@ -7,12 +7,13 @@ flashinfer for all the attention operations.
...
@@ -7,12 +7,13 @@ flashinfer for all the attention operations.
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Type
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Type
from
flash_attn
import
flash_attn_varlen_func
import
torch
import
torch
from
flash_attn
import
flash_attn_varlen_func
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionImpl
,
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionImpl
,
AttentionMetadata
)
AttentionMetadata
)
from
vllm.attention.ops.paged_attn
import
PagedAttention
,
PagedAttentionMetadata
from
vllm.attention.ops.paged_attn
import
(
PagedAttention
,
PagedAttentionMetadata
)
class
FlashAttentionBackend
(
AttentionBackend
):
class
FlashAttentionBackend
(
AttentionBackend
):
...
...
vllm/attention/backends/xformers.py
View file @
01bfb22b
...
@@ -11,7 +11,8 @@ from xformers.ops.fmha.attn_bias import (AttentionBias,
...
@@ -11,7 +11,8 @@ from xformers.ops.fmha.attn_bias import (AttentionBias,
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionImpl
,
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionImpl
,
AttentionMetadata
)
AttentionMetadata
)
from
vllm.attention.ops.paged_attn
import
PagedAttention
,
PagedAttentionMetadata
from
vllm.attention.ops.paged_attn
import
(
PagedAttention
,
PagedAttentionMetadata
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils
import
is_hip
from
vllm.utils
import
is_hip
...
...
vllm/attention/ops/paged_attn.py
View file @
01bfb22b
...
@@ -3,8 +3,7 @@ from typing import Dict, List, Optional, Tuple
...
@@ -3,8 +3,7 @@ from typing import Dict, List, Optional, Tuple
import
torch
import
torch
from
vllm._C
import
cache_ops
from
vllm._C
import
cache_ops
,
ops
from
vllm._C
import
ops
from
vllm.attention.ops.prefix_prefill
import
context_attention_fwd
from
vllm.attention.ops.prefix_prefill
import
context_attention_fwd
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
...
...
vllm/attention/selector.py
View file @
01bfb22b
...
@@ -13,11 +13,13 @@ logger = init_logger(__name__)
...
@@ -13,11 +13,13 @@ logger = init_logger(__name__)
def
get_attn_backend
(
dtype
:
torch
.
dtype
)
->
AttentionBackend
:
def
get_attn_backend
(
dtype
:
torch
.
dtype
)
->
AttentionBackend
:
if
_can_use_flash_attn
(
dtype
):
if
_can_use_flash_attn
(
dtype
):
logger
.
info
(
"Using FlashAttention backend."
)
logger
.
info
(
"Using FlashAttention backend."
)
from
vllm.attention.backends.flash_attn
import
FlashAttentionBackend
# noqa: F401
from
vllm.attention.backends.flash_attn
import
(
# noqa: F401
FlashAttentionBackend
)
return
FlashAttentionBackend
return
FlashAttentionBackend
else
:
else
:
logger
.
info
(
"Using XFormers backend."
)
logger
.
info
(
"Using XFormers backend."
)
from
vllm.attention.backends.xformers
import
XFormersBackend
# noqa: F401
from
vllm.attention.backends.xformers
import
(
# noqa: F401
XFormersBackend
)
return
XFormersBackend
return
XFormersBackend
...
...
vllm/config.py
View file @
01bfb22b
from
typing
import
TYPE_CHECKING
,
Optional
,
Union
,
ClassVar
import
json
from
dataclasses
import
dataclass
import
os
import
os
from
packaging.version
import
Version
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
ClassVar
,
Optional
,
Union
import
json
import
torch
import
torch
from
packaging.version
import
Version
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.transformers_utils.config
import
get_config
from
vllm.transformers_utils.config
import
get_config
from
vllm.utils
import
get_cpu_memory
,
is_hip
,
is_neuron
,
get_nvcc_cuda_version
from
vllm.utils
import
get_cpu_memory
,
get_nvcc_cuda_version
,
is_hip
,
is_neuron
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
ray.util.placement_group
import
PlacementGroup
from
ray.util.placement_group
import
PlacementGroup
...
@@ -103,7 +103,8 @@ class ModelConfig:
...
@@ -103,7 +103,8 @@ class ModelConfig:
if
os
.
environ
.
get
(
"VLLM_USE_MODELSCOPE"
,
"False"
).
lower
()
==
"true"
:
if
os
.
environ
.
get
(
"VLLM_USE_MODELSCOPE"
,
"False"
).
lower
()
==
"true"
:
# download model from ModelScope hub,
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
# lazy import so that modelscope is not required for normal use.
from
modelscope.hub.snapshot_download
import
snapshot_download
# pylint: disable=C
# pylint: disable=C.
from
modelscope.hub.snapshot_download
import
snapshot_download
if
not
os
.
path
.
exists
(
model
):
if
not
os
.
path
.
exists
(
model
):
model_path
=
snapshot_download
(
model_id
=
model
,
model_path
=
snapshot_download
(
model_id
=
model
,
...
...
vllm/core/block_manager.py
View file @
01bfb22b
"""A block manager that manages token blocks."""
"""A block manager that manages token blocks."""
import
enum
import
enum
from
abc
import
ABC
,
abstractmethod
from
itertools
import
count
,
takewhile
from
itertools
import
count
,
takewhile
from
os.path
import
commonprefix
from
os.path
import
commonprefix
from
typing
import
Dict
,
List
,
Optional
,
Set
,
Tuple
from
typing
import
Dict
,
List
,
Optional
,
Set
,
Tuple
from
abc
import
ABC
,
abstractmethod
from
vllm.block
import
BlockTable
,
PhysicalTokenBlock
from
vllm.block
import
BlockTable
,
PhysicalTokenBlock
from
vllm.core.evictor
import
EvictionPolicy
,
Evictor
,
make_evictor
from
vllm.logger
import
init_logger
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
from
vllm.utils
import
Device
from
vllm.utils
import
Device
from
vllm.core.evictor
import
Evictor
,
EvictionPolicy
,
make_evictor
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/core/evictor.py
View file @
01bfb22b
import
enum
import
enum
from
typing
import
OrderedDict
from
abc
import
ABC
,
abstractmethod
,
abstractproperty
from
abc
import
ABC
,
abstractmethod
,
abstractproperty
from
typing
import
OrderedDict
from
vllm.block
import
PhysicalTokenBlock
from
vllm.block
import
PhysicalTokenBlock
...
...
vllm/core/scheduler.py
View file @
01bfb22b
from
collections
import
deque
import
enum
import
enum
import
time
import
time
from
typing
import
Deque
,
Dict
,
Iterable
,
List
,
Optional
,
Tuple
,
Union
,
Set
from
collections
import
deque
from
typing
import
Deque
,
Dict
,
Iterable
,
List
,
Optional
,
Set
,
Tuple
,
Union
from
vllm.config
import
CacheConfig
,
LoRAConfig
,
SchedulerConfig
from
vllm.config
import
CacheConfig
,
LoRAConfig
,
SchedulerConfig
from
vllm.core.block_manager
import
AllocStatus
,
BlockSpaceManager
from
vllm.core.block_manager
import
AllocStatus
,
BlockSpaceManager
from
vllm.core.policy
import
PolicyFactory
from
vllm.core.policy
import
PolicyFactory
from
vllm.lora.request
import
LoRARequest
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
(
Sequence
,
SequenceData
,
SequenceGroup
,
from
vllm.sequence
import
(
Sequence
,
SequenceData
,
SequenceGroup
,
SequenceGroupMetadata
,
SequenceStatus
)
SequenceGroupMetadata
,
SequenceStatus
)
...
...
vllm/engine/arg_utils.py
View file @
01bfb22b
...
@@ -3,9 +3,8 @@ import dataclasses
...
@@ -3,9 +3,8 @@ import dataclasses
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
,
ParallelConfig
,
SchedulerConfig
,
TokenizerPoolConfig
)
TokenizerPoolConfig
)
@
dataclass
@
dataclass
...
...
vllm/engine/async_llm_engine.py
View file @
01bfb22b
...
@@ -2,17 +2,17 @@ import asyncio
...
@@ -2,17 +2,17 @@ import asyncio
import
os
import
os
import
time
import
time
from
functools
import
partial
from
functools
import
partial
from
typing
import
(
Callable
,
Dict
,
Iterable
,
List
,
Optional
,
Set
,
Tuple
,
Type
,
from
typing
import
(
AsyncIterator
,
Callable
,
Dict
,
Iterable
,
List
,
Optional
,
Union
,
AsyncIterator
)
Set
,
Tuple
,
Type
,
Union
)
from
transformers
import
PreTrainedTokenizer
from
transformers
import
PreTrainedTokenizer
from
vllm.lora.request
import
LoRARequest
from
vllm.config
import
ModelConfig
from
vllm.config
import
ModelConfig
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.engine.ray_utils
import
initialize_ray_cluster
,
ray
from
vllm.engine.ray_utils
import
initialize_ray_cluster
,
ray
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
...
...
vllm/engine/llm_engine.py
View file @
01bfb22b
...
@@ -4,22 +4,22 @@ from typing import Iterable, List, Optional, Tuple, Type, Union
...
@@ -4,22 +4,22 @@ from typing import Iterable, List, Optional, Tuple, Type, Union
from
transformers
import
PreTrainedTokenizer
from
transformers
import
PreTrainedTokenizer
import
vllm
import
vllm
from
vllm.lora.request
import
LoRARequest
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoRAConfig
,
ModelConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
)
ParallelConfig
,
SchedulerConfig
,
LoRAConfig
)
from
vllm.core.scheduler
import
Scheduler
,
SchedulerOutputs
from
vllm.core.scheduler
import
Scheduler
,
SchedulerOutputs
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.executor.executor_base
import
ExecutorBase
from
vllm.engine.metrics
import
StatLogger
,
Stats
from
vllm.engine.metrics
import
StatLogger
,
Stats
from
vllm.engine.ray_utils
import
initialize_ray_cluster
from
vllm.engine.ray_utils
import
initialize_ray_cluster
from
vllm.executor.executor_base
import
ExecutorBase
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
(
SamplerOutput
,
Sequence
,
SequenceGroup
,
from
vllm.sequence
import
(
SamplerOutput
,
Sequence
,
SequenceGroup
,
SequenceGroupOutput
,
SequenceOutput
,
SequenceStatus
)
SequenceGroupOutput
,
SequenceOutput
,
SequenceStatus
)
from
vllm.transformers_utils.detokenizer
import
Detokenizer
from
vllm.transformers_utils.tokenizer_group
import
(
BaseTokenizerGroup
,
from
vllm.transformers_utils.tokenizer_group
import
(
BaseTokenizerGroup
,
get_tokenizer_group
)
get_tokenizer_group
)
from
vllm.transformers_utils.detokenizer
import
Detokenizer
from
vllm.utils
import
Counter
from
vllm.utils
import
Counter
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/engine/metrics.py
View file @
01bfb22b
from
vllm.logger
import
init_logger
from
prometheus_client
import
(
Counter
,
Gauge
,
Histogram
,
Info
,
REGISTRY
,
disable_created_metrics
)
import
time
import
time
import
numpy
as
np
from
typing
import
Dict
,
List
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
import
numpy
as
np
from
prometheus_client
import
(
REGISTRY
,
Counter
,
Gauge
,
Histogram
,
Info
,
disable_created_metrics
)
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/engine/ray_utils.py
View file @
01bfb22b
import
pickle
import
pickle
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
Optional
,
List
,
Tuple
from
vllm.config
import
ParallelConfig
from
vllm.config
import
ParallelConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils
import
is_hip
,
set_cuda_visible_devices
,
get_ip
from
vllm.utils
import
get_ip
,
is_hip
,
set_cuda_visible_devices
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/entrypoints/api_server.py
View file @
01bfb22b
...
@@ -11,9 +11,9 @@ import json
...
@@ -11,9 +11,9 @@ import json
import
ssl
import
ssl
from
typing
import
AsyncGenerator
from
typing
import
AsyncGenerator
import
uvicorn
from
fastapi
import
FastAPI
,
Request
from
fastapi
import
FastAPI
,
Request
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
import
uvicorn
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
...
...
vllm/entrypoints/llm.py
View file @
01bfb22b
...
@@ -3,9 +3,9 @@ from typing import List, Optional, Union
...
@@ -3,9 +3,9 @@ from typing import List, Optional, Union
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
transformers
import
PreTrainedTokenizer
,
PreTrainedTokenizerFast
from
transformers
import
PreTrainedTokenizer
,
PreTrainedTokenizerFast
from
vllm.lora.request
import
LoRARequest
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.lora.request
import
LoRARequest
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.utils
import
Counter
from
vllm.utils
import
Counter
...
...
vllm/entrypoints/openai/api_server.py
View file @
01bfb22b
import
asyncio
import
asyncio
from
contextlib
import
asynccontextmanager
import
os
import
importlib
import
importlib
import
inspect
import
inspect
import
os
from
contextlib
import
asynccontextmanager
from
http
import
HTTPStatus
from
prometheus_client
import
make_asgi_app
import
fastapi
import
fastapi
import
uvicorn
import
uvicorn
from
http
import
HTTPStatus
from
fastapi
import
Request
from
fastapi
import
Request
from
fastapi.exceptions
import
RequestValidationError
from
fastapi.exceptions
import
RequestValidationError
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.responses
import
JSONResponse
,
StreamingResponse
,
Response
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
from
prometheus_client
import
make_asgi_app
import
vllm
import
vllm
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.openai.protocol
import
(
CompletionRequest
,
ChatCompletionRequest
,
ErrorResponse
)
from
vllm.logger
import
init_logger
from
vllm.entrypoints.openai.cli_args
import
make_arg_parser
from
vllm.entrypoints.openai.cli_args
import
make_arg_parser
from
vllm.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
CompletionRequest
,
ErrorResponse
)
from
vllm.entrypoints.openai.serving_chat
import
OpenAIServingChat
from
vllm.entrypoints.openai.serving_chat
import
OpenAIServingChat
from
vllm.entrypoints.openai.serving_completion
import
OpenAIServingCompletion
from
vllm.entrypoints.openai.serving_completion
import
OpenAIServingCompletion
from
vllm.logger
import
init_logger
TIMEOUT_KEEP_ALIVE
=
5
# seconds
TIMEOUT_KEEP_ALIVE
=
5
# seconds
...
...
vllm/entrypoints/openai/protocol.py
View file @
01bfb22b
...
@@ -3,12 +3,11 @@
...
@@ -3,12 +3,11 @@
import
time
import
time
from
typing
import
Dict
,
List
,
Literal
,
Optional
,
Union
from
typing
import
Dict
,
List
,
Literal
,
Optional
,
Union
import
torch
from
pydantic
import
BaseModel
,
Field
,
model_validator
from
pydantic
import
BaseModel
,
Field
,
model_validator
from
vllm.utils
import
random_uuid
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.utils
import
random_uuid
import
torch
class
ErrorResponse
(
BaseModel
):
class
ErrorResponse
(
BaseModel
):
...
...
vllm/entrypoints/openai/serving_chat.py
View file @
01bfb22b
import
time
import
codecs
import
codecs
import
time
from
typing
import
AsyncGenerator
,
AsyncIterator
,
List
,
Optional
,
Union
from
fastapi
import
Request
from
fastapi
import
Request
from
typing
import
AsyncGenerator
,
AsyncIterator
,
Optional
,
List
,
Union
from
vllm.logger
import
init_logger
from
vllm.utils
import
random_uuid
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
ChatCompletionResponse
,
ChatCompletionRequest
,
ChatCompletionResponse
,
ChatCompletionResponseChoice
,
ChatCompletionResponseStreamChoice
,
ChatCompletionResponseChoice
,
ChatCompletionResponseStreamChoice
,
ChatCompletionStreamResponse
,
ChatMessage
,
DeltaMessage
,
ErrorResponse
,
ChatCompletionStreamResponse
,
ChatMessage
,
DeltaMessage
,
ErrorResponse
,
UsageInfo
)
UsageInfo
)
from
vllm.
outputs
import
RequestOutput
from
vllm.
entrypoints.openai.serving_engine
import
LoRA
,
OpenAIServing
from
vllm.
entrypoints.openai.serving_engine
import
OpenAIServing
,
LoRA
from
vllm.
logger
import
init_logger
from
vllm.model_executor.guided_decoding
import
(
from
vllm.model_executor.guided_decoding
import
(
get_guided_decoding_logits_processor
)
get_guided_decoding_logits_processor
)
from
vllm.outputs
import
RequestOutput
from
vllm.utils
import
random_uuid
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/entrypoints/openai/serving_completion.py
View file @
01bfb22b
import
asyncio
import
asyncio
import
time
import
time
from
typing
import
(
AsyncGenerator
,
AsyncIterator
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
)
from
fastapi
import
Request
from
fastapi
import
Request
from
typing
import
(
AsyncGenerator
,
AsyncIterator
,
Callable
,
List
,
Optional
,
Dict
,
Tuple
)
from
vllm.logger
import
init_logger
from
vllm.utils
import
random_uuid
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.entrypoints.openai.protocol
import
(
from
vllm.entrypoints.openai.protocol
import
(
CompletionRequest
,
CompletionRequest
,
CompletionResponse
,
CompletionResponse
,
CompletionResponseChoice
,
CompletionResponseChoice
,
CompletionResponseStreamChoice
,
CompletionResponseStreamChoice
,
CompletionStreamResponse
,
CompletionStreamResponse
,
LogProbs
,
LogProbs
,
UsageInfo
)
UsageInfo
,
from
vllm.entrypoints.openai.serving_engine
import
LoRA
,
OpenAIServing
)
from
vllm.logger
import
init_logger
from
vllm.outputs
import
RequestOutput
from
vllm.entrypoints.openai.serving_engine
import
OpenAIServing
,
LoRA
from
vllm.model_executor.guided_decoding
import
(
from
vllm.model_executor.guided_decoding
import
(
get_guided_decoding_logits_processor
)
get_guided_decoding_logits_processor
)
from
vllm.outputs
import
RequestOutput
from
vllm.utils
import
random_uuid
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment