Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
"""Block manager utils.""" """Block manager utils."""
from vllm.sequence import SequenceGroup from vllm.sequence import SequenceGroup
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE, from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
......
# SPDX-License-Identifier: Apache-2.0
"""A block manager that manages token blocks.""" """A block manager that manages token blocks."""
from typing import Dict, List, Optional from typing import Dict, List, Optional
from typing import Sequence as GenericSequence from typing import Sequence as GenericSequence
......
# SPDX-License-Identifier: Apache-2.0
import enum import enum
import heapq import heapq
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
......
# SPDX-License-Identifier: Apache-2.0
import enum import enum
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import List from typing import List
......
# SPDX-License-Identifier: Apache-2.0
from typing import List, Tuple from typing import List, Tuple
from vllm.core.interfaces import AllocStatus, BlockSpaceManager from vllm.core.interfaces import AllocStatus, BlockSpaceManager
......
# SPDX-License-Identifier: Apache-2.0
import enum import enum
import os import os
import random import random
......
# SPDX-License-Identifier: Apache-2.0
# cumem-based pytorch pluggable allocator to implement sleep mode. # cumem-based pytorch pluggable allocator to implement sleep mode.
# other approaches tried but failed: # other approaches tried but failed:
# - cuda-python package binding # - cuda-python package binding
......
# SPDX-License-Identifier: Apache-2.0
from .communication_op import * from .communication_op import *
from .parallel_state import * from .parallel_state import *
from .utils import * from .utils import *
# SPDX-License-Identifier: Apache-2.0
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union
import torch import torch
......
# SPDX-License-Identifier: Apache-2.0
"""This file is a pure Python wrapper for the cudart library. """This file is a pure Python wrapper for the cudart library.
It avoids the need to compile a separate shared library, and is It avoids the need to compile a separate shared library, and is
convenient for use when we just need to call a few functions. convenient for use when we just need to call a few functions.
......
# SPDX-License-Identifier: Apache-2.0
import ctypes import ctypes
from contextlib import contextmanager from contextlib import contextmanager
from typing import List, Optional, Union from typing import List, Optional, Union
......
# SPDX-License-Identifier: Apache-2.0
import ctypes import ctypes
import json import json
import os import os
......
# SPDX-License-Identifier: Apache-2.0
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
......
# SPDX-License-Identifier: Apache-2.0
from typing import Optional, Union from typing import Optional, Union
# ===================== import region ===================== # ===================== import region =====================
......
# SPDX-License-Identifier: Apache-2.0
# This file is a pure Python wrapper for the NCCL library. # This file is a pure Python wrapper for the NCCL library.
# The main purpose is to use NCCL combined with CUDA graph. # The main purpose is to use NCCL combined with CUDA graph.
# Before writing this script, we tried the following approach: # Before writing this script, we tried the following approach:
......
# SPDX-License-Identifier: Apache-2.0
import os import os
import pickle import pickle
import sys import sys
......
# SPDX-License-Identifier: Apache-2.0
import os import os
import torch import torch
......
# SPDX-License-Identifier: Apache-2.0
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
......
# SPDX-License-Identifier: Apache-2.0
""" """
KVConnectorBase Class for Distributed KV Cache & Hidden State communication KVConnectorBase Class for Distributed KV Cache & Hidden State communication
......
# SPDX-License-Identifier: Apache-2.0
import importlib import importlib
from typing import TYPE_CHECKING, Callable, Dict, Type from typing import TYPE_CHECKING, Callable, Dict, Type
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment