SOURCES.txt 3.77 KB
Newer Older
chenzk's avatar
v1.0  
chenzk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
LICENSE
MANIFEST.in
README.md
pyproject.toml
requirements.txt
setup.py
csrc/activation.cpp
csrc/activation_kernels.cu
csrc/attention.cpp
csrc/cache.cpp
csrc/cache_kernels.cu
csrc/cuda_utils.cpp
csrc/cuda_utils_kernels.cu
csrc/dispatch_utils.h
csrc/layernorm.cpp
csrc/layernorm_kernels.cu
csrc/pos_encoding.cpp
csrc/pos_encoding_kernels.cu
csrc/quantization.cpp
csrc/reduction_utils.cuh
csrc/attention/attention_dtypes.h
csrc/attention/attention_generic.cuh
csrc/attention/attention_kernels.cu
csrc/attention/attention_utils.cuh
csrc/attention/dtype_bfloat16.cuh
csrc/attention/dtype_float16.cuh
csrc/attention/dtype_float32.cuh
csrc/quantization/awq/dequantize.cuh
csrc/quantization/awq/gemm_kernels.cu
csrc/quantization/squeezellm/quant_cuda_kernel.cu
tests/test_regression.py
vllm/__init__.py
vllm/block.py
vllm/config.py
vllm/logger.py
vllm/outputs.py
vllm/py.typed
vllm/sampling_params.py
vllm/sequence.py
vllm/utils.py
vllm.egg-info/PKG-INFO
vllm.egg-info/SOURCES.txt
vllm.egg-info/dependency_links.txt
vllm.egg-info/requires.txt
vllm.egg-info/top_level.txt
vllm/core/__init__.py
vllm/core/block_manager.py
vllm/core/policy.py
vllm/core/scheduler.py
vllm/engine/__init__.py
vllm/engine/arg_utils.py
vllm/engine/async_llm_engine.py
vllm/engine/llm_engine.py
vllm/engine/ray_utils.py
vllm/entrypoints/__init__.py
vllm/entrypoints/api_server.py
vllm/entrypoints/llm.py
vllm/entrypoints/openai/__init__.py
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/protocol.py
vllm/model_executor/__init__.py
vllm/model_executor/input_metadata.py
vllm/model_executor/model_loader.py
vllm/model_executor/utils.py
vllm/model_executor/weight_utils.py
vllm/model_executor/layers/__init__.py
vllm/model_executor/layers/activation.py
vllm/model_executor/layers/attention.py
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/rotary_embedding.py
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/quantization/__init__.py
vllm/model_executor/layers/quantization/awq.py
vllm/model_executor/layers/quantization/base_config.py
vllm/model_executor/layers/quantization/squeezellm.py
vllm/model_executor/models/__init__.py
vllm/model_executor/models/aquila.py
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/bloom.py
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/cpm.py
vllm/model_executor/models/cpm_mistral.py
vllm/model_executor/models/cpm_old.py
vllm/model_executor/models/cpmmistral.py
vllm/model_executor/models/falcon.py
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt_bigcode.py
vllm/model_executor/models/gpt_j.py
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/internlm.py
vllm/model_executor/models/llama.py
vllm/model_executor/models/mistral.py
vllm/model_executor/models/mpt.py
vllm/model_executor/models/opt.py
vllm/model_executor/models/phi_1_5.py
vllm/model_executor/models/qwen.py
vllm/model_executor/models/yi.py
vllm/model_executor/parallel_utils/__init__.py
vllm/model_executor/parallel_utils/communication_op.py
vllm/model_executor/parallel_utils/parallel_state.py
vllm/model_executor/parallel_utils/utils.py
vllm/transformers_utils/__init__.py
vllm/transformers_utils/config.py
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/aquila.py
vllm/transformers_utils/configs/baichuan.py
vllm/transformers_utils/configs/chatglm.py
vllm/transformers_utils/configs/cpm.py
vllm/transformers_utils/configs/cpm_mistral.py
vllm/transformers_utils/configs/cpmmistral.py
vllm/transformers_utils/configs/falcon.py
vllm/transformers_utils/configs/mpt.py
vllm/transformers_utils/configs/qwen.py
vllm/transformers_utils/configs/yi.py
vllm/worker/__init__.py
vllm/worker/cache_engine.py
vllm/worker/worker.py