Unverified Commit e0c6f556 authored by Yanming W's avatar Yanming W Committed by GitHub
Browse files

[Build] Avoid building too many extensions (#1624)

parent de23687d
......@@ -27,7 +27,7 @@ from typing import Any, Dict, Optional, Tuple, Union
import torch
import torch.nn as nn
from vllm import pos_encoding_ops
from vllm._C import ops
class RotaryEmbedding(nn.Module):
......@@ -87,11 +87,10 @@ class RotaryEmbedding(nn.Module):
query: torch.Tensor,
key: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
# pos_encoding_ops.rotary_embedding() is an in-place operation that
# ops.rotary_embedding() is an in-place operation that
# updates the query and key tensors.
pos_encoding_ops.rotary_embedding(positions, query, key,
self.head_size, self.cos_sin_cache,
self.is_neox_style)
ops.rotary_embedding(positions, query, key, self.head_size,
self.cos_sin_cache, self.is_neox_style)
return query, key
......
......@@ -5,7 +5,7 @@ from platform import uname
import psutil
import torch
from vllm import cuda_utils
from vllm._C import cuda_utils
class Device(enum.Enum):
......
......@@ -3,7 +3,7 @@ from typing import Dict, List, Tuple
import torch
from vllm import cache_ops
from vllm._C import cache_ops
from vllm.config import CacheConfig, ModelConfig, ParallelConfig
from vllm.logger import init_logger
from vllm.utils import in_wsl
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment