Unverified Commit 2c4f5cca authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix minor style (#4460)

parent 15843047
...@@ -340,6 +340,7 @@ class MHATokenToKVPool(KVCache): ...@@ -340,6 +340,7 @@ class MHATokenToKVPool(KVCache):
cache_v = cache_v.view(self.store_dtype) cache_v = cache_v.view(self.store_dtype)
if self.capture_mode and cache_k.shape[0] < 4: if self.capture_mode and cache_k.shape[0] < 4:
# Overlap the copy of K and V cache for small batch size
current_stream = self.device_module.current_stream() current_stream = self.device_module.current_stream()
self.alt_stream.wait_stream(current_stream) self.alt_stream.wait_stream(current_stream)
with self.device_module.stream(self.alt_stream): with self.device_module.stream(self.alt_stream):
......
...@@ -481,6 +481,7 @@ def suppress_other_loggers(): ...@@ -481,6 +481,7 @@ def suppress_other_loggers():
logging.getLogger("vllm.distributed.device_communicators.shm_broadcast").setLevel( logging.getLogger("vllm.distributed.device_communicators.shm_broadcast").setLevel(
logging.WARN logging.WARN
) )
logging.getLogger("vllm.config").setLevel(logging.ERROR)
warnings.filterwarnings( warnings.filterwarnings(
"ignore", category=UserWarning, message="The given NumPy array is not writable" "ignore", category=UserWarning, message="The given NumPy array is not writable"
...@@ -527,10 +528,11 @@ def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = N ...@@ -527,10 +528,11 @@ def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid: int = N
pass pass
if include_parent: if include_parent:
if parent_pid == os.getpid():
sys.exit(0)
try: try:
if parent_pid == os.getpid():
itself.kill()
sys.exit(0)
itself.kill() itself.kill()
# Sometime processes cannot be killed with SIGKILL (e.g, PID=1 launched by kubernetes), # Sometime processes cannot be killed with SIGKILL (e.g, PID=1 launched by kubernetes),
......
...@@ -36,7 +36,7 @@ class TestEvalFP8Accuracy(unittest.TestCase): ...@@ -36,7 +36,7 @@ class TestEvalFP8Accuracy(unittest.TestCase):
) )
metrics = run_eval(args) metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.62) self.assertGreaterEqual(metrics["score"], 0.61)
class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase): class TestEvalFP8DynamicQuantAccuracy(unittest.TestCase):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment