Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
"""Block manager utils."""
from vllm.sequence import SequenceGroup
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
......
# SPDX-License-Identifier: Apache-2.0
"""A block manager that manages token blocks."""
from typing import Dict, List, Optional
from typing import Sequence as GenericSequence
......
# SPDX-License-Identifier: Apache-2.0
import enum
import heapq
from abc import ABC, abstractmethod
......
# SPDX-License-Identifier: Apache-2.0
import enum
from abc import ABC, abstractmethod
from typing import List
......
# SPDX-License-Identifier: Apache-2.0
from typing import List, Tuple
from vllm.core.interfaces import AllocStatus, BlockSpaceManager
......
# SPDX-License-Identifier: Apache-2.0
import enum
import os
import random
......
# SPDX-License-Identifier: Apache-2.0
# cumem-based pytorch pluggable allocator to implement sleep mode.
# other approaches tried but failed:
# - cuda-python package binding
......
# SPDX-License-Identifier: Apache-2.0
from .communication_op import *
from .parallel_state import *
from .utils import *
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Dict, Optional, Union
import torch
......
# SPDX-License-Identifier: Apache-2.0
"""This file is a pure Python wrapper for the cudart library.
It avoids the need to compile a separate shared library, and is
convenient for use when we just need to call a few functions.
......
# SPDX-License-Identifier: Apache-2.0
import ctypes
from contextlib import contextmanager
from typing import List, Optional, Union
......
# SPDX-License-Identifier: Apache-2.0
import ctypes
import json
import os
......
# SPDX-License-Identifier: Apache-2.0
import torch
import torch.distributed as dist
from torch.distributed import ProcessGroup
......
# SPDX-License-Identifier: Apache-2.0
from typing import Optional, Union
# ===================== import region =====================
......
# SPDX-License-Identifier: Apache-2.0
# This file is a pure Python wrapper for the NCCL library.
# The main purpose is to use NCCL combined with CUDA graph.
# Before writing this script, we tried the following approach:
......
# SPDX-License-Identifier: Apache-2.0
import os
import pickle
import sys
......
# SPDX-License-Identifier: Apache-2.0
import os
import torch
......
# SPDX-License-Identifier: Apache-2.0
import torch
import torch.distributed as dist
from torch.distributed import ProcessGroup
......
# SPDX-License-Identifier: Apache-2.0
"""
KVConnectorBase Class for Distributed KV Cache & Hidden State communication
......
# SPDX-License-Identifier: Apache-2.0
import importlib
from typing import TYPE_CHECKING, Callable, Dict, Type
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment