Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xdb4_94051
vllm
Commits
0b98ba15
Unverified
Commit
0b98ba15
authored
Jun 17, 2023
by
Woosuk Kwon
Committed by
GitHub
Jun 17, 2023
Browse files
Change the name to vLLM (#150)
parent
e5464ee4
Changes
88
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
97 additions
and
84 deletions
+97
-84
vllm/model_executor/layers/__init__.py
vllm/model_executor/layers/__init__.py
+0
-0
vllm/model_executor/layers/activation.py
vllm/model_executor/layers/activation.py
+1
-1
vllm/model_executor/layers/attention.py
vllm/model_executor/layers/attention.py
+8
-8
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/layernorm.py
+1
-1
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/sampler.py
+4
-4
vllm/model_executor/model_loader.py
vllm/model_executor/model_loader.py
+4
-4
vllm/model_executor/models/__init__.py
vllm/model_executor/models/__init__.py
+12
-0
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+16
-16
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+13
-12
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+14
-14
vllm/model_executor/models/opt.py
vllm/model_executor/models/opt.py
+12
-12
vllm/model_executor/parallel_utils/README.md
vllm/model_executor/parallel_utils/README.md
+0
-0
vllm/model_executor/parallel_utils/__init__.py
vllm/model_executor/parallel_utils/__init__.py
+2
-2
vllm/model_executor/parallel_utils/parallel_state.py
vllm/model_executor/parallel_utils/parallel_state.py
+1
-1
vllm/model_executor/parallel_utils/tensor_parallel/__init__.py
...model_executor/parallel_utils/tensor_parallel/__init__.py
+0
-0
vllm/model_executor/parallel_utils/tensor_parallel/layers.py
vllm/model_executor/parallel_utils/tensor_parallel/layers.py
+2
-2
vllm/model_executor/parallel_utils/tensor_parallel/mappings.py
...model_executor/parallel_utils/tensor_parallel/mappings.py
+2
-2
vllm/model_executor/parallel_utils/tensor_parallel/random.py
vllm/model_executor/parallel_utils/tensor_parallel/random.py
+2
-2
vllm/model_executor/parallel_utils/tensor_parallel/utils.py
vllm/model_executor/parallel_utils/tensor_parallel/utils.py
+1
-1
vllm/model_executor/utils.py
vllm/model_executor/utils.py
+2
-2
No files found.
cacheflow
/model_executor/layers/__init__.py
→
vllm
/model_executor/layers/__init__.py
View file @
0b98ba15
File moved
cacheflow
/model_executor/layers/activation.py
→
vllm
/model_executor/layers/activation.py
View file @
0b98ba15
...
...
@@ -2,7 +2,7 @@
import
torch
import
torch.nn
as
nn
from
cacheflow
import
activation_ops
from
vllm
import
activation_ops
_ACTIVATION_REGISTRY
=
{
"gelu"
:
nn
.
GELU
(),
...
...
cacheflow
/model_executor/layers/attention.py
→
vllm
/model_executor/layers/attention.py
View file @
0b98ba15
...
...
@@ -5,16 +5,16 @@ import torch
import
torch.nn
as
nn
from
xformers
import
ops
as
xops
from
cacheflow
import
attention_ops
from
cacheflow
import
cache_ops
from
cacheflow
import
pos_encoding_ops
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
vllm
import
attention_ops
from
vllm
import
cache_ops
from
vllm
import
pos_encoding_ops
from
vllm
.model_executor.input_metadata
import
InputMetadata
_SUPPORTED_HEAD_SIZES
=
[
64
,
80
,
96
,
128
]
class
GPTCacheFlow
Attention
(
nn
.
Module
):
"""GPT-style multi-head
a
ttention.
class
Paged
Attention
(
nn
.
Module
):
"""GPT-style multi-head
PagedA
ttention.
This class takes flattened 1D query, key, and value tensors as input. The
input 1D tensors can be split into three parts: the prompt tokens, the
...
...
@@ -164,8 +164,8 @@ class GPTCacheFlowAttention(nn.Module):
return
output
.
view
(
-
1
,
self
.
num_heads
*
self
.
head_size
)
class
GPTNeoXCacheFlowAttention
(
GPTCacheFlow
Attention
):
"""Attention with GPT-NeoX style rotary embedding."""
class
PagedAttentionWithRoPE
(
Paged
Attention
):
"""
Paged
Attention with GPT-NeoX style rotary embedding."""
def
__init__
(
self
,
...
...
cacheflow
/model_executor/layers/layernorm.py
→
vllm
/model_executor/layers/layernorm.py
View file @
0b98ba15
...
...
@@ -2,7 +2,7 @@
import
torch
import
torch.nn
as
nn
from
cacheflow
import
layernorm_ops
from
vllm
import
layernorm_ops
class
RMSNorm
(
nn
.
Module
):
...
...
cacheflow
/model_executor/layers/sampler.py
→
vllm
/model_executor/layers/sampler.py
View file @
0b98ba15
...
...
@@ -5,11 +5,11 @@ import numpy as np
import
torch
import
torch.nn
as
nn
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
gather_from_tensor_model_parallel_region
)
from
cacheflow
.sampling_params
import
SamplingParams
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sampling_params
import
SamplingParams
from
vllm
.sequence
import
SequenceOutputs
class
Sampler
(
nn
.
Module
):
...
...
cacheflow
/model_executor/model_loader.py
→
vllm
/model_executor/model_loader.py
View file @
0b98ba15
...
...
@@ -5,10 +5,10 @@ import torch
import
torch.nn
as
nn
from
transformers
import
PretrainedConfig
from
cacheflow
.config
import
ModelConfig
from
cacheflow
.model_executor.models
import
(
GPT2LMHeadModel
,
GPTNeoXForCausalLM
,
LlamaForCausalLM
,
OPTForCausalLM
)
from
cacheflow
.model_executor.weight_utils
import
initialize_dummy_weights
from
vllm
.config
import
ModelConfig
from
vllm
.model_executor.models
import
(
GPT2LMHeadModel
,
GPTNeoXForCausalLM
,
LlamaForCausalLM
,
OPTForCausalLM
)
from
vllm
.model_executor.weight_utils
import
initialize_dummy_weights
# TODO(woosuk): Lazy-load the model classes.
_MODEL_REGISTRY
=
{
...
...
cacheflow
/model_executor/models/__init__.py
→
vllm
/model_executor/models/__init__.py
View file @
0b98ba15
from
cacheflow
.model_executor.models.gpt_neox
import
GPTNeoXForCausalLM
from
cacheflow
.model_executor.models.gpt2
import
GPT2LMHeadModel
from
cacheflow
.model_executor.models.llama
import
LlamaForCausalLM
from
cacheflow
.model_executor.models.opt
import
OPTForCausalLM
from
vllm
.model_executor.models.gpt_neox
import
GPTNeoXForCausalLM
from
vllm
.model_executor.models.gpt2
import
GPT2LMHeadModel
from
vllm
.model_executor.models.llama
import
LlamaForCausalLM
from
vllm
.model_executor.models.opt
import
OPTForCausalLM
__all__
=
[
...
...
cacheflow
/model_executor/models/gpt2.py
→
vllm
/model_executor/models/gpt2.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/gpt2/modeling_gpt2.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
...
...
@@ -26,17 +26,17 @@ import torch
from
torch
import
nn
from
transformers
import
GPT2Config
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
get_act_fn
from
cacheflow
.model_executor.layers.attention
import
GPTCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
get_act_fn
from
vllm
.model_executor.layers.attention
import
Paged
Attention
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -53,14 +53,14 @@ class GPT2Attention(nn.Module):
self
.
head_dim
=
self
.
hidden_size
//
total_num_heads
self
.
scale
=
self
.
head_dim
**
-
0.5
self
.
c_attn
=
ColumnParallelLinear
(
self
.
hidden_size
,
3
*
self
.
hidden_size
,
bias
=
True
,
gather_output
=
False
,
self
.
c_attn
=
ColumnParallelLinear
(
self
.
hidden_size
,
3
*
self
.
hidden_size
,
bias
=
True
,
gather_output
=
False
,
perform_initialization
=
False
)
self
.
c_proj
=
RowParallelLinear
(
self
.
hidden_size
,
self
.
hidden_size
,
bias
=
True
,
input_is_parallel
=
True
,
self
.
c_proj
=
RowParallelLinear
(
self
.
hidden_size
,
self
.
hidden_size
,
bias
=
True
,
input_is_parallel
=
True
,
perform_initialization
=
False
)
self
.
attn
=
GPTCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scale
)
self
.
attn
=
Paged
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scale
)
def
forward
(
self
,
...
...
cacheflow
/model_executor/models/gpt_neox.py
→
vllm
/model_executor/models/gpt_neox.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/gpt_neox/modeling_gpt_neox.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2022 EleutherAI The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -25,17 +25,17 @@ import torch
from
torch
import
nn
from
transformers
import
GPTNeoXConfig
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
get_act_fn
from
cacheflow
.model_executor.layers.attention
import
GPTNeoXCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
get_act_fn
from
vllm
.model_executor.layers.attention
import
Paged
Attention
WithRoPE
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -63,8 +63,8 @@ class GPTNeoXAttention(nn.Module):
scaling
=
self
.
head_size
**
-
0.5
rotary_dim
=
int
(
self
.
head_size
*
config
.
rotary_pct
)
assert
rotary_dim
%
2
==
0
self
.
attn
=
GPTNeoXCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_size
,
scaling
,
rotary_dim
)
self
.
attn
=
Paged
Attention
WithRoPE
(
self
.
num_heads
,
self
.
head_size
,
scaling
,
rotary_dim
)
def
forward
(
self
,
...
...
@@ -149,6 +149,7 @@ class GPTNeoXLayer(nn.Module):
class
GPTNeoXModel
(
nn
.
Module
):
def
__init__
(
self
,
config
:
GPTNeoXConfig
):
super
().
__init__
()
self
.
config
=
config
...
...
cacheflow
/model_executor/models/llama.py
→
vllm
/model_executor/models/llama.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
...
...
@@ -30,19 +30,19 @@ import torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
cacheflow
.sequence
import
SequenceOutputs
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
SiluAndMul
from
cacheflow
.model_executor.layers.layernorm
import
RMSNorm
from
cacheflow
.model_executor.layers.attention
import
GPTNeoXCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.sequence
import
SequenceOutputs
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
SiluAndMul
from
vllm
.model_executor.layers.layernorm
import
RMSNorm
from
vllm
.model_executor.layers.attention
import
Paged
Attention
WithRoPE
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -104,8 +104,8 @@ class LlamaAttention(nn.Module):
input_is_parallel
=
True
,
perform_initialization
=
False
,
)
self
.
attn
=
GPTNeoXCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_dim
,
self
.
scaling
,
rotary_dim
=
self
.
head_dim
)
self
.
attn
=
Paged
Attention
WithRoPE
(
self
.
num_heads
,
self
.
head_dim
,
self
.
scaling
,
rotary_dim
=
self
.
head_dim
)
def
forward
(
self
,
...
...
cacheflow
/model_executor/models/opt.py
→
vllm
/model_executor/models/opt.py
View file @
0b98ba15
# coding=utf-8
# Adapted from https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/opt/modeling_opt.py
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Copyright 2022 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -25,17 +25,17 @@ import torch
from
torch
import
nn
from
transformers
import
OPTConfig
from
cacheflow
.model_executor.input_metadata
import
InputMetadata
from
cacheflow
.model_executor.layers.activation
import
get_act_fn
from
cacheflow
.model_executor.layers.attention
import
GPTCacheFlow
Attention
from
cacheflow
.model_executor.layers.sampler
import
Sampler
from
cacheflow
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.input_metadata
import
InputMetadata
from
vllm
.model_executor.layers.activation
import
get_act_fn
from
vllm
.model_executor.layers.attention
import
Paged
Attention
from
vllm
.model_executor.layers.sampler
import
Sampler
from
vllm
.model_executor.weight_utils
import
(
hf_model_weights_iterator
,
load_tensor_parallel_weights
)
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
(
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
(
VocabParallelEmbedding
,
ColumnParallelLinear
,
RowParallelLinear
)
from
cacheflow
.sequence
import
SequenceOutputs
from
vllm
.sequence
import
SequenceOutputs
KVCache
=
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]
...
...
@@ -75,8 +75,8 @@ class OPTAttention(nn.Module):
self
.
out_proj
=
RowParallelLinear
(
embed_dim
,
embed_dim
,
bias
=
bias
,
input_is_parallel
=
True
,
perform_initialization
=
False
)
self
.
attn
=
GPTCacheFlow
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scaling
)
self
.
attn
=
Paged
Attention
(
self
.
num_heads
,
self
.
head_dim
,
scale
=
self
.
scaling
)
def
forward
(
self
,
...
...
cacheflow
/model_executor/parallel_utils/README.md
→
vllm
/model_executor/parallel_utils/README.md
View file @
0b98ba15
File moved
cacheflow
/model_executor/parallel_utils/__init__.py
→
vllm
/model_executor/parallel_utils/__init__.py
View file @
0b98ba15
import
cacheflow
.model_executor.parallel_utils.parallel_state
import
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
vllm
.model_executor.parallel_utils.parallel_state
import
vllm
.model_executor.parallel_utils.tensor_parallel
# Alias parallel_state as mpu, its legacy name
mpu
=
parallel_state
...
...
cacheflow
/model_executor/parallel_utils/parallel_state.py
→
vllm
/model_executor/parallel_utils/parallel_state.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/parallel_state.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/__init__.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/__init__.py
View file @
0b98ba15
File moved
cacheflow
/model_executor/parallel_utils/tensor_parallel/layers.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/layers.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/layers.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
@@ -11,7 +11,7 @@ import torch.nn.functional as F
import
torch.nn.init
as
init
from
torch.nn.parameter
import
Parameter
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_all_reduce_launcher
,
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/mappings.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/mappings.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/mappings.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
import
torch
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_group
,
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/random.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/random.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/random.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
@@ -11,7 +11,7 @@ import torch
from
torch
import
_C
from
torch.cuda
import
_lazy_call
,
device
as
device_ctx_manager
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
(
from
vllm
.model_executor.parallel_utils.parallel_state
import
(
get_tensor_model_parallel_rank
,
)
...
...
cacheflow
/model_executor/parallel_utils/tensor_parallel/utils.py
→
vllm
/model_executor/parallel_utils/tensor_parallel/utils.py
View file @
0b98ba15
# Copyright 2023 The
CacheFlow
team.
# Copyright 2023 The
vLLM
team.
# Adapted from https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/utils.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
...
...
cacheflow
/model_executor/utils.py
→
vllm
/model_executor/utils.py
View file @
0b98ba15
...
...
@@ -4,8 +4,8 @@ import random
import
numpy
as
np
import
torch
from
cacheflow
.model_executor.parallel_utils.parallel_state
import
model_parallel_is_initialized
from
cacheflow
.model_executor.parallel_utils.tensor_parallel
import
model_parallel_cuda_manual_seed
from
vllm
.model_executor.parallel_utils.parallel_state
import
model_parallel_is_initialized
from
vllm
.model_executor.parallel_utils.tensor_parallel
import
model_parallel_cuda_manual_seed
def
set_random_seed
(
seed
:
int
)
->
None
:
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment