Unverified Commit 93dc5a28 authored by Massimiliano Pronesti's avatar Massimiliano Pronesti Committed by GitHub
Browse files

chore(vllm): codespell for spell checking (#2820)

parent 95529e32
...@@ -25,7 +25,10 @@ jobs: ...@@ -25,7 +25,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install ruff==0.1.5 pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
- name: Analysing the code with ruff - name: Analysing the code with ruff
run: | run: |
ruff vllm tests ruff vllm tests
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
\ No newline at end of file
...@@ -375,7 +375,7 @@ if __name__ == "__main__": ...@@ -375,7 +375,7 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--disable-tqdm", "--disable-tqdm",
action="store_true", action="store_true",
help="Specify to disbale tqdm progress bar.", help="Specify to disable tqdm progress bar.",
) )
parser.add_argument( parser.add_argument(
"--save-result", "--save-result",
......
...@@ -24,6 +24,7 @@ builtin cd "$ROOT" || exit 1 ...@@ -24,6 +24,7 @@ builtin cd "$ROOT" || exit 1
YAPF_VERSION=$(yapf --version | awk '{print $2}') YAPF_VERSION=$(yapf --version | awk '{print $2}')
RUFF_VERSION=$(ruff --version | awk '{print $2}') RUFF_VERSION=$(ruff --version | awk '{print $2}')
MYPY_VERSION=$(mypy --version | awk '{print $2}') MYPY_VERSION=$(mypy --version | awk '{print $2}')
CODESPELL_VERSION=$(codespell --version)
# # params: tool name, tool version, required version # # params: tool name, tool version, required version
tool_version_check() { tool_version_check() {
...@@ -36,6 +37,7 @@ tool_version_check() { ...@@ -36,6 +37,7 @@ tool_version_check() {
tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)" tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)"
tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)" tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)"
tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)" tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)"
tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-dev.txt | cut -d'=' -f3)"
YAPF_FLAGS=( YAPF_FLAGS=(
'--recursive' '--recursive'
...@@ -93,6 +95,47 @@ echo 'vLLM yapf: Done' ...@@ -93,6 +95,47 @@ echo 'vLLM yapf: Done'
# echo 'vLLM mypy:' # echo 'vLLM mypy:'
# mypy # mypy
# check spelling of specified files
spell_check() {
codespell "$@"
}
spell_check_all(){
codespell --toml pyproject.toml
}
# Spelling check of files that differ from main branch.
spell_check_changed() {
# The `if` guard ensures that the list of filenames is not empty, which
# could cause ruff to receive 0 positional arguments, making it hang
# waiting for STDIN.
#
# `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
# exist on both branches.
MERGEBASE="$(git merge-base origin/main HEAD)"
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
codespell
fi
}
# Run Codespell
## This flag runs spell check of individual files. --files *must* be the first command line
## arg to use this option.
if [[ "$1" == '--files' ]]; then
spell_check "${@:2}"
# If `--all` is passed, then any further arguments are ignored and the
# entire python directory is linted.
elif [[ "$1" == '--all' ]]; then
spell_check_all
else
# Check spelling only of the files that changed in last commit.
spell_check_changed
fi
echo 'vLLM codespell: Done'
# Lint specified files # Lint specified files
lint() { lint() {
ruff "$@" ruff "$@"
...@@ -117,9 +160,9 @@ lint_changed() { ...@@ -117,9 +160,9 @@ lint_changed() {
} }
# Run Ruff # Run Ruff
echo 'vLLM Ruff:' echo 'vLLM ruff:'
## This flag lints individual files. --files *must* be the first command line ### This flag lints individual files. --files *must* be the first command line
## arg to use this option. ### arg to use this option.
if [[ "$1" == '--files' ]]; then if [[ "$1" == '--files' ]]; then
lint "${@:2}" lint "${@:2}"
# If `--all` is passed, then any further arguments are ignored and the # If `--all` is passed, then any further arguments are ignored and the
...@@ -139,3 +182,5 @@ if ! git diff --quiet &>/dev/null; then ...@@ -139,3 +182,5 @@ if ! git diff --quiet &>/dev/null; then
exit 1 exit 1
fi fi
[mypy]
python_version = 3.8
ignore_missing_imports = True
files = vllm
# TODO(woosuk): Include the code from Megatron and HuggingFace.
exclude = vllm/model_executor/parallel_utils/|vllm/model_executor/models/
...@@ -31,4 +31,22 @@ ignore = [ ...@@ -31,4 +31,22 @@ ignore = [
"E731", "E731",
# line too long, handled by black formatting # line too long, handled by black formatting
"E501", "E501",
# .strip() with multi-character strings
"B005",
# Loop control variable not used within loop body
"B007",
] ]
[tool.mypy]
python_version = "3.8"
ignore_missing_imports = true
files = "vllm"
# TODO(woosuk): Include the code from Megatron and HuggingFace.
exclude = "vllm/model_executor/parallel_utils/|vllm/model_executor/models/"
[tool.codespell]
ignore-words-list = "dout, te, indicies"
skip = "./tests/prompts"
# formatting # formatting
yapf==0.32.0 yapf==0.32.0
toml==0.10.2 toml==0.10.2
tomli==2.0.1
ruff==0.1.5 ruff==0.1.5
codespell==2.2.6
# type checking # type checking
mypy==0.991 mypy==0.991
......
...@@ -279,7 +279,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device) -> None: ...@@ -279,7 +279,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device) -> None:
256, 256,
org_num_embeddings=512) org_num_embeddings=512)
expanded_embedding.weight.data[:512, :] = embedding_data expanded_embedding.weight.data[:512, :] = embedding_data
# We need to deepcopy the embedding as it will be modifed # We need to deepcopy the embedding as it will be modified
# in place # in place
lora_embedding = VocabParallelEmbeddingWithLoRA( lora_embedding = VocabParallelEmbeddingWithLoRA(
deepcopy(expanded_embedding)) deepcopy(expanded_embedding))
......
...@@ -15,7 +15,7 @@ def do_sample(llm, lora_path: str, lora_id: int): ...@@ -15,7 +15,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]", "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]",
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]", "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]",
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]", "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]",
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]" "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]"
] ]
sampling_params = vllm.SamplingParams(temperature=0, sampling_params = vllm.SamplingParams(temperature=0,
max_tokens=256, max_tokens=256,
...@@ -53,7 +53,7 @@ def test_llama_lora(sql_lora_files, tp_size): ...@@ -53,7 +53,7 @@ def test_llama_lora(sql_lora_files, tp_size):
"\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m", "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m",
" Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ", " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ",
" Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ", " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ",
"\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE", "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
] ]
expected_lora_output = [ expected_lora_output = [
" SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ", " SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",
......
...@@ -178,7 +178,7 @@ class BlockSpaceManager: ...@@ -178,7 +178,7 @@ class BlockSpaceManager:
if len(block_table) < len(logical_blocks): if len(block_table) < len(logical_blocks):
if (self.block_sliding_window if (self.block_sliding_window
and len(block_table) >= self.block_sliding_window): and len(block_table) >= self.block_sliding_window):
# re-use a block # reuse a block
block_table.append(block_table[len(block_table) % block_table.append(block_table[len(block_table) %
self.block_sliding_window]) self.block_sliding_window])
else: else:
......
...@@ -158,7 +158,7 @@ class Scheduler: ...@@ -158,7 +158,7 @@ class Scheduler:
return len(self.waiting) + len(self.running) + len(self.swapped) return len(self.waiting) + len(self.running) + len(self.swapped)
def _schedule(self) -> SchedulerOutputs: def _schedule(self) -> SchedulerOutputs:
# Blocks that need to be swaped or copied before model execution. # Blocks that need to be swapped or copied before model execution.
blocks_to_swap_in: Dict[int, int] = {} blocks_to_swap_in: Dict[int, int] = {}
blocks_to_swap_out: Dict[int, int] = {} blocks_to_swap_out: Dict[int, int] = {}
blocks_to_copy: Dict[int, List[int]] = {} blocks_to_copy: Dict[int, List[int]] = {}
......
...@@ -87,7 +87,7 @@ def add_lora(y: torch.Tensor, ...@@ -87,7 +87,7 @@ def add_lora(y: torch.Tensor,
r = wb_t_all.size(-1) r = wb_t_all.size(-1)
if buffer is None: if buffer is None:
# We set the buffer to be float32 by default to avoid # We set the buffer to be float32 by default to avoid
# numerical innacuracies that would otherwise happen # numerical inaccuracies that would otherwise happen
# due to downcasting. # due to downcasting.
buffer = torch.zeros((x.size(0), r), buffer = torch.zeros((x.size(0), r),
dtype=torch.float32, dtype=torch.float32,
......
...@@ -537,7 +537,7 @@ if triton.__version__ >= "2.1.0": ...@@ -537,7 +537,7 @@ if triton.__version__ >= "2.1.0":
alibi_start_q = tl.arange( alibi_start_q = tl.arange(
0, BLOCK_M) + block_start_loc + cur_batch_ctx_len 0, BLOCK_M) + block_start_loc + cur_batch_ctx_len
alibi_start_k = cur_batch_ctx_len alibi_start_k = cur_batch_ctx_len
# # init debuger # # init debugger
# offset_db_q = tl.arange(0, BLOCK_M) + block_start_loc # offset_db_q = tl.arange(0, BLOCK_M) + block_start_loc
# offset_db_k = tl.arange(0, BLOCK_N) # offset_db_k = tl.arange(0, BLOCK_N)
# calc q[BLOCK_M, BLOCK_MODEL] mul k[prefix_len: , BLOCK_DMODEL] # calc q[BLOCK_M, BLOCK_MODEL] mul k[prefix_len: , BLOCK_DMODEL]
......
...@@ -41,7 +41,7 @@ class DeciLMForCausalLM(LlamaForCausalLM): ...@@ -41,7 +41,7 @@ class DeciLMForCausalLM(LlamaForCausalLM):
Based on the llama executor. Based on the llama executor.
The main difference is that DeciLM uses Variable Grouped Query Attention. The main difference is that DeciLM uses Variable Grouped Query Attention.
The constant number of GQA heads in the decoder is overriden with a value The constant number of GQA heads in the decoder is overridden with a value
per layer. per layer.
Usually, in the HuggingFace implementation, instead of Usually, in the HuggingFace implementation, instead of
......
...@@ -36,14 +36,14 @@ def init_custom_ar() -> None: ...@@ -36,14 +36,14 @@ def init_custom_ar() -> None:
if world_size not in _SUPPORTED_WORLD_SIZES: if world_size not in _SUPPORTED_WORLD_SIZES:
logger.warn( logger.warn(
"Custom allreduce is disabled due to an unsupported world size: " "Custom allreduce is disabled due to an unsupported world size: "
"%d. Supported world sizes: %s. To slience this warning, specify" "%d. Supported world sizes: %s. To silence this warning, specify"
"disable_custom_all_reduce=True explicitly.", world_size, "disable_custom_all_reduce=True explicitly.", world_size,
str(_SUPPORTED_WORLD_SIZES)) str(_SUPPORTED_WORLD_SIZES))
return return
if not _can_p2p(rank, world_size): if not _can_p2p(rank, world_size):
logger.warn( logger.warn(
"Custom allreduce is disabled because your platform lacks GPU P2P" "Custom allreduce is disabled because your platform lacks GPU P2P"
" capability. To slience this warning, specify" " capability. To silence this warning, specify"
"disable_custom_all_reduce=True explicitly.") "disable_custom_all_reduce=True explicitly.")
return return
_CA_HANDLE = CustomAllreduce(rank, world_size) _CA_HANDLE = CustomAllreduce(rank, world_size)
......
...@@ -189,7 +189,7 @@ def get_pipeline_model_parallel_next_rank(): ...@@ -189,7 +189,7 @@ def get_pipeline_model_parallel_next_rank():
def get_pipeline_model_parallel_prev_rank(): def get_pipeline_model_parallel_prev_rank():
"""Return the global rank that preceeds the caller in the pipeline""" """Return the global rank that precedes the caller in the pipeline"""
assert _PIPELINE_GLOBAL_RANKS is not None, ( assert _PIPELINE_GLOBAL_RANKS is not None, (
"Pipeline parallel group is not initialized") "Pipeline parallel group is not initialized")
rank_in_pipeline = get_pipeline_model_parallel_rank() rank_in_pipeline = get_pipeline_model_parallel_rank()
......
...@@ -204,7 +204,7 @@ def _generate_random_fp8_e5m2( ...@@ -204,7 +204,7 @@ def _generate_random_fp8_e5m2(
# NOTE(zhaoyang): Due to NaN and Inf representation for fp8 data type, # NOTE(zhaoyang): Due to NaN and Inf representation for fp8 data type,
# it may occur Inf or NaN if we directly use torch.randint # it may occur Inf or NaN if we directly use torch.randint
# to generate random data for fp8 data. # to generate random data for fp8 data.
# For example, s.11111.00 in fp8e5m2 format repesents Inf. # For example, s.11111.00 in fp8e5m2 format represents Inf.
# | E4M3 | E5M2 # | E4M3 | E5M2
#-----|-------------|------------------- #-----|-------------|-------------------
# Inf | N/A | s.11111.00 # Inf | N/A | s.11111.00
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment