Unverified Commit cc102814 authored by மனோஜ்குமார் பழனிச்சாமி's avatar மனோஜ்குமார் பழனிச்சாமி Committed by GitHub
Browse files

[Misc] Set default value of seed to None (#14274)


Signed-off-by: default avatarமனோஜ்குமார் பழனிச்சாமி <smartmanoj42857@gmail.com>
parent 05fb6718
...@@ -25,7 +25,8 @@ llm = LLM(model="facebook/opt-125m", ...@@ -25,7 +25,8 @@ llm = LLM(model="facebook/opt-125m",
tensor_parallel_size=2, tensor_parallel_size=2,
distributed_executor_backend="external_launcher", distributed_executor_backend="external_launcher",
gpu_memory_utilization=random.uniform(0.7, 0.9), gpu_memory_utilization=random.uniform(0.7, 0.9),
swap_space=random.randint(1, 4)) swap_space=random.randint(1, 4),
seed=0)
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
......
...@@ -34,7 +34,8 @@ def llm(): ...@@ -34,7 +34,8 @@ def llm():
max_num_batched_tokens=32768, max_num_batched_tokens=32768,
tensor_parallel_size=1, tensor_parallel_size=1,
gpu_memory_utilization=0.75, gpu_memory_utilization=0.75,
enforce_eager=True) enforce_eager=True,
seed=0)
with llm.deprecate_legacy_api(): with llm.deprecate_legacy_api():
yield weakref.proxy(llm) yield weakref.proxy(llm)
......
...@@ -21,7 +21,7 @@ GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"] ...@@ -21,7 +21,7 @@ GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
def llm(): def llm():
# pytest caches the fixture so we use weakref.proxy to # pytest caches the fixture so we use weakref.proxy to
# enable garbage collection # enable garbage collection
llm = LLM(model=MODEL_NAME, max_model_len=1024) llm = LLM(model=MODEL_NAME, max_model_len=1024, seed=0)
with llm.deprecate_legacy_api(): with llm.deprecate_legacy_api():
yield weakref.proxy(llm) yield weakref.proxy(llm)
......
...@@ -24,6 +24,8 @@ def server(): ...@@ -24,6 +24,8 @@ def server():
"4080", "4080",
"--chat-template", "--chat-template",
DUMMY_CHAT_TEMPLATE, DUMMY_CHAT_TEMPLATE,
"--seed",
"0",
] ]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
......
...@@ -47,6 +47,8 @@ def default_server_args(): ...@@ -47,6 +47,8 @@ def default_server_args():
"--enforce-eager", "--enforce-eager",
"--max-num-seqs", "--max-num-seqs",
"128", "128",
"--seed",
"0",
] ]
......
...@@ -30,6 +30,8 @@ def server(): ...@@ -30,6 +30,8 @@ def server():
"/" + ROOT_PATH, "/" + ROOT_PATH,
"--chat-template", "--chat-template",
DUMMY_CHAT_TEMPLATE, DUMMY_CHAT_TEMPLATE,
"--seed",
"0",
] ]
envs = os.environ.copy() envs = os.environ.copy()
......
...@@ -104,7 +104,7 @@ class EngineArgs: ...@@ -104,7 +104,7 @@ class EngineArgs:
config_format: ConfigFormat = ConfigFormat.AUTO config_format: ConfigFormat = ConfigFormat.AUTO
dtype: str = 'auto' dtype: str = 'auto'
kv_cache_dtype: str = 'auto' kv_cache_dtype: str = 'auto'
seed: int = 0 seed: Optional[int] = None
max_model_len: Optional[int] = None max_model_len: Optional[int] = None
# Note: Specifying a custom executor backend by passing a class # Note: Specifying a custom executor backend by passing a class
# is intended for expert use only. The API may change without # is intended for expert use only. The API may change without
......
...@@ -169,7 +169,7 @@ class LLM: ...@@ -169,7 +169,7 @@ class LLM:
quantization: Optional[str] = None, quantization: Optional[str] = None,
revision: Optional[str] = None, revision: Optional[str] = None,
tokenizer_revision: Optional[str] = None, tokenizer_revision: Optional[str] = None,
seed: int = 0, seed: Optional[int] = None,
gpu_memory_utilization: float = 0.9, gpu_memory_utilization: float = 0.9,
swap_space: float = 4, swap_space: float = 4,
cpu_offload_gb: float = 0, cpu_offload_gb: float = 0,
......
...@@ -644,7 +644,7 @@ def create_kv_caches_with_random_flash( ...@@ -644,7 +644,7 @@ def create_kv_caches_with_random_flash(
head_size: int, head_size: int,
cache_dtype: Optional[Union[str, torch.dtype]], cache_dtype: Optional[Union[str, torch.dtype]],
model_dtype: Optional[Union[str, torch.dtype]] = None, model_dtype: Optional[Union[str, torch.dtype]] = None,
seed: int = 0, seed: Optional[int] = None,
device: Optional[str] = "cuda", device: Optional[str] = "cuda",
) -> tuple[list[torch.Tensor], list[torch.Tensor]]: ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
from vllm.platforms import current_platform from vllm.platforms import current_platform
...@@ -681,7 +681,7 @@ def create_kv_caches_with_random( ...@@ -681,7 +681,7 @@ def create_kv_caches_with_random(
head_size: int, head_size: int,
cache_dtype: Optional[Union[str, torch.dtype]], cache_dtype: Optional[Union[str, torch.dtype]],
model_dtype: Optional[Union[str, torch.dtype]] = None, model_dtype: Optional[Union[str, torch.dtype]] = None,
seed: int = 0, seed: Optional[int] = None,
device: Optional[str] = "cuda", device: Optional[str] = "cuda",
) -> tuple[list[torch.Tensor], list[torch.Tensor]]: ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment