Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
0b98ba15
Unverified
Commit
0b98ba15
authored
Jun 17, 2023
by
Woosuk Kwon
Committed by
GitHub
Jun 17, 2023
Browse files
Change the name to vLLM (#150)
parent
e5464ee4
Changes
88
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
97 additions
and
84 deletions
+97
-84
vllm/model_executor/layers/__init__.py
vllm/model_executor/layers/__init__.py
+0
-0
vllm/model_executor/layers/activation.py
vllm/model_executor/layers/activation.py
+1
-1
vllm/model_executor/layers/attention.py
vllm/model_executor/layers/attention.py
+8
-8
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/layernorm.py
+1
-1
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/sampler.py
+4
-4
vllm/model_executor/model_loader.py
vllm/model_executor/model_loader.py
+4
-4
vllm/model_executor/models/__init__.py
vllm/model_executor/models/__init__.py
+12
-0
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+16
-16
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+13
-12
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+14
-14
vllm/model_executor/models/opt.py
vllm/model_executor/models/opt.py
+12
-12
vllm/model_executor/parallel_utils/README.md
vllm/model_executor/parallel_utils/README.md
+0
-0
vllm/model_executor/parallel_utils/__init__.py
vllm/model_executor/parallel_utils/__init__.py
+2
-2
vllm/model_executor/parallel_utils/parallel_state.py
vllm/model_executor/parallel_utils/parallel_state.py
+1
-1
vllm/model_executor/parallel_utils/tensor_parallel/__init__.py
...model_executor/parallel_utils/tensor_parallel/__init__.py
+0
-0
vllm/model_executor/parallel_utils/tensor_parallel/layers.py
vllm/model_executor/parallel_utils/tensor_parallel/layers.py
+2
-2
vllm/model_executor/parallel_utils/tensor_parallel/mappings.py
...model_executor/parallel_utils/tensor_parallel/mappings.py
+2
-2
vllm/model_executor/parallel_utils/tensor_parallel/random.py
vllm/model_executor/parallel_utils/tensor_parallel/random.py
+2
-2
vllm/model_executor/parallel_utils/tensor_parallel/utils.py
vllm/model_executor/parallel_utils/tensor_parallel/utils.py
+1
-1
vllm/model_executor/utils.py
vllm/model_executor/utils.py
+2
-2
No files found.
cacheflow
/model_executor/layers/__init__.py
→
vllm
/model_executor/layers/__init__.py
View file @
0b98ba15
File moved
cacheflow
/model_executor/layers/activation.py
→
vllm
/model_executor/layers/activation.py
View file @
0b98ba15
...
...
@@ -2,7 +2,7 @@
import
torch
import
torch.nn
as
nn
from
cacheflow
import
activation_ops
from
vllm
import
activation_ops
_ACTIVATION_REGISTRY
=
{
"gelu"
:
nn
.
GELU
(),
...
...
cacheflow
/model_executor/layers/attention.py
→
vllm
/model_executor/layers/attention.py
View file @
0b98ba15
...
...
@@ -5,16 +5,16 @@ import torch
import
torch.nn
as
nn
from
xformers
import
ops
as
xops
from
cacheflow
import
attention_ops
from
cacheflow
import
cache_ops
from
cacheflow
import
pos_encoding_ops
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
vllm
import
attention_ops
from
vllm
import
cache_ops
from
vllm
import
pos_encoding_ops
from
vllm
.model_executor.input_metadata
import
InputMetadata
_SUPPORTED_HEAD_SIZES
=
[
64
,
80
,
96
,
128
]
class
GPTCacheFlow
Attention
(
nn
.
Module
):
"""GPT-style multi-head
a
ttention.
class
Paged
Attention
(
nn
.
Module
):
"""GPT-style multi-head
PagedA
ttention.
This class takes flattened 1D query, key, and value tensors as input. The
input 1D tensors can be split into three parts: the prompt tokens, the
...
...
@@ -164,8 +164,8 @@ class GPTCacheFlowAttention(nn.Module):
return
output
.
view
(
-
1
,
self
.
num_heads
*
self
.
head_size
)
class
GPTNeoXCacheFlowAttention
(
GPTCacheFlow
Attention
):
"""Attention with GPT-NeoX style rotary embedding."""
class
PagedAttentionWithRoPE
(
Paged
Attention
):
"""
Paged
Attention with GPT-NeoX style rotary embedding."""
def
__init__
(
self
,
...
...
cacheflow
/model_executor/layers/layernorm.py
→
vllm
/model_executor/layers/layernorm.py
View file @
0b98ba15
...
...
@@ -2,7 +2,7 @@
import
torch
import
torch.nn
as
nn
from
cacheflow
import
layernorm_ops
from
vllm
import
layernorm_ops
class
RMSNorm
(
nn
.
Module
):
...
...
cacheflow
/model_executor/layers/sampler.py
→
vllm
/model_executor/layers/sampler.py
View file @
0b98ba15
...
...
@@ -5,11 +5,11 @@ import numpy as np
import
torch
import
torch.nn
as
nn
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
gather_from_tensor_model_parallel_region
)
from
cacheflow
.sampling_params
import
SamplingParams
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sampling_params
import
SamplingParams
from
vllm
.sequence
import
SequenceOutputs
class
Sampler
(
nn
.
Module
):
...
...
cacheflow
/model_executor/model_loader.py
→
vllm
/model_executor/model_loader.py
View file @
0b98ba15
...
...
@@ -5,10 +5,10 @@ import torch
import
torch.nn
as
nn
from
transformers
import
PretrainedConfig
from
cacheflow
.config
import
ModelConfig
from
cacheflow
.model_executor.models
import
(
GPT2LMHeadModel
,
GPTNeoXForCausalLM
,
LlamaForCausalLM
,
OPTForCausalLM
)
from
cacheflow
.model_executor.weight_utils
import
initialize_dummy_weights
from
vllm
.config
import
ModelConfig
from
vllm
.model_executor.models
import
(
GPT2LMHeadModel
,
GPTNeoXForCausalLM
,
LlamaForCausalLM
,
OPTForCausalLM
)
from
vllm
.model_executor.weight_utils
import
initialize_dummy_weights
# TODO(woosuk): Lazy-load the model classes.
_MODEL_REGISTRY
=
{
...
...
cacheflow
/model_executor/models/__init__.py
→
vllm
/model_executor/models/__init__.py
View file @
0b98ba15
from
cacheflow
.model_executor.models.gpt_neox
import
GPTNeoXForCausalLM
from
cacheflow
.model_executor.models.gpt2
import
GPT2LMHeadModel
from
cacheflow
.model_executor.models.llama
import
LlamaForCausalLM
from
cacheflow
.model_executor.models.opt
import
OPTForCausalLM
from
vllm
.model_executor.models.gpt_neox
import
GPTNeoXForCausalLM
from
vllm
.model_executor.models.gpt2
import
GPT2LMHeadModel
from
vllm
.model_executor.models.llama
import
LlamaForCausalLM
from
vllm
.model_executor.models.opt
import
OPTForCausalLM
__all__
=
[
...
...
cacheflow
/model_executor/models/gpt2.py
→
vllm
/model_executor/models/gpt2.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/gpt2/modeling_gpt2.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
...
...
@@ -26,17 +26,17 @@ import torch
from
torch
import
nn
from
transformers
import
GPT2Config
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
get_act_fn
from
cacheflow
.model_executor.layers.attention
import
GPTCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
get_act_fn
from
vllm
.model_executor.layers.attention
import
Paged
Attention
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -53,14 +53,14 @@ class GPT2Attention(nn.Module):
self
.
head_dim
=
self
.
hidden_size
//
total_num_heads
self
.
scale
=
self
.
head_dim
**
-
0.5
self
.
c_attn
=
ColumnParallelLinear
(
self
.
hidden_size
,
3
*
self
.
hidden_size
,
bias
=
True
,
gather_output
=
False
,
self
.
c_attn
=
ColumnParallelLinear
(
self
.
hidden_size
,
3
*
self
.
hidden_size
,
bias
=
True
,
gather_output
=
False
,
perform_initialization
=
False
)
self
.
c_proj
=
RowParallelLinear
(
self
.
hidden_size
,
self
.
hidden_size
,
bias
=
True
,
input_is_parallel
=
True
,
self
.
c_proj
=
RowParallelLinear
(
self
.
hidden_size
,
self
.
hidden_size
,
bias
=
True
,
input_is_parallel
=
True
,
perform_initialization
=
False
)
self
.
attn
=
GPTCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scale
)
self
.
attn
=
Paged
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scale
)
def
forward
(
self
,
...
...
cacheflow
/model_executor/models/gpt_neox.py
→
vllm
/model_executor/models/gpt_neox.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/gpt_neox/modeling_gpt_neox.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2022 EleutherAI The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -25,17 +25,17 @@ import torch
from
torch
import
nn
from
transformers
import
GPTNeoXConfig
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
get_act_fn
from
cacheflow
.model_executor.layers.attention
import
GPTNeoXCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
get_act_fn
from
vllm
.model_executor.layers.attention
import
Paged
Attention
WithRoPE
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -63,8 +63,8 @@ class GPTNeoXAttention(nn.Module):
scaling
=
self
.
head_size
**
-
0.5
rotary_dim
=
int
(
self
.
head_size
*
config
.
rotary_pct
)
assert
rotary_dim
%
2
==
0
self
.
attn
=
GPTNeoXCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_size
,
scaling
,
rotary_dim
)
self
.
attn
=
Paged
Attention
WithRoPE
(
self
.
num_heads
,
self
.
head_size
,
scaling
,
rotary_dim
)
def
forward
(
self
,
...
...
@@ -149,6 +149,7 @@ class GPTNeoXLayer(nn.Module):
class
GPTNeoXModel
(
nn
.
Module
):
def
__init__
(
self
,
config
:
GPTNeoXConfig
):
super
().
__init__
()
self
.
config
=
config
...
...
cacheflow
/model_executor/models/llama.py
→
vllm
/model_executor/models/llama.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
...
...
@@ -30,19 +30,19 @@ import torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
cacheflow
.sequence
import
SequenceOutputs
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
SiluAndMul
from
cacheflow
.model_executor.layers.layernorm
import
RMSNorm
from
cacheflow
.model_executor.layers.attention
import
GPTNeoXCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.sequence
import
SequenceOutputs
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
SiluAndMul
from
vllm
.model_executor.layers.layernorm
import
RMSNorm
from
vllm
.model_executor.layers.attention
import
Paged
Attention
WithRoPE
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -104,8 +104,8 @@ class LlamaAttention(nn.Module):
input_is_parallel
=
True
,
perform_initialization
=
False
,
)
self
.
attn
=
GPTNeoXCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_dim
,
self
.
scaling
,
rotary_dim
=
self
.
head_dim
)
self
.
attn
=
Paged
Attention
WithRoPE
(
self
.
num_heads
,
self
.
head_dim
,
self
.
scaling
,
rotary_dim
=
self
.
head_dim
)
def
forward
(
self
,
...
...
cacheflow
/model_executor/models/opt.py
→
vllm
/model_executor/models/opt.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/opt/modeling_opt.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2022 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -25,17 +25,17 @@ import torch
from
torch
import
nn
from
transformers
import
OPTConfig
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
get_act_fn
from
cacheflow
.model_executor.layers.attention
import
GPTCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
get_act_fn
from
vllm
.model_executor.layers.attention
import
Paged
Attention
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -75,8 +75,8 @@ class OPTAttention(nn.Module):
self
.
out_proj
=
RowParallelLinear
(
embed_dim
,
embed_dim
,
bias
=
bias
,
input_is_parallel
=
True
,
perform_initialization
=
False
)
self
.
attn
=
GPTCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scaling
)
self
.
attn
=
Paged
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scaling
)
def
forward
(
self
,
...
...
cacheflow
/model_executor/parallel_utils/README.md
→
vllm
/model_executor/parallel_utils/README.md
View file @
0b98ba15
File moved
cacheflow
/model_executor/parallel_utils/__init__.py
→
vllm
/model_executor/parallel_utils/__init__.py
View file @
0b98ba15
import
cacheflow
.model_executor.parallel_utils.parallel_state
import
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
vllm
.model_executor.parallel_utils.parallel_state
import
vllm
.model_executor.parallel_utils.tensor_parallel
# Alias parallel_state as mpu, its legacy name
mpu
=
parallel_state
...
...
cacheflow
/model_executor/parallel_utils/parallel_state.py
→
vllm
/model_executor/parallel_utils/parallel_state.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/parallel_state.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/__init__.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/__init__.py
View file @
0b98ba15
File moved
cacheflow
/model_executor/parallel_utils/tensor_parallel/layers.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/layers.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/layers.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
@@ -11,7 +11,7 @@ import torch.nn.functional as F
import
torch.nn.init
as
init
from
torch.nn.parameter
import
Parameter
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_all_reduce_launcher
,
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/mappings.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/mappings.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/mappings.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
import
torch
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_group
,
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/random.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/random.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/random.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
@@ -11,7 +11,7 @@ import torch
from
torch
import
_C
from
torch.cuda
import
_lazy_call
,
device
as
device_ctx_manager
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
)
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/utils.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/utils.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/utils.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
cacheflow
/model_executor/utils.py
→
vllm
/model_executor/utils.py
View file @
0b98ba15
...
...
@@ -4,8 +4,8 @@ import random
import
numpy
as
np
import
torch
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
model_parallel_is_initialized
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
model_parallel_cuda_manual_seed
from
vllm
.model_executor.parallel_utils.parallel_state
import
model_parallel_is_initialized
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
model_parallel_cuda_manual_seed
def
set_random_seed
(
seed
:
int
)
->
None
:
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment