Unverified Commit 865233e2 authored by Ankur Neog's avatar Ankur Neog Committed by GitHub
Browse files

Add initial support for intel Gaudi accelerators (#2121)

parent 66d4859a
...@@ -31,6 +31,9 @@ srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"] ...@@ -31,6 +31,9 @@ srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"]
# xpu is not enabled in public vllm and torch whl, # xpu is not enabled in public vllm and torch whl,
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm # need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
srt_xpu = ["sglang[runtime_common]"] srt_xpu = ["sglang[runtime_common]"]
#For Intel Gaudi(device : hpu) follow the installation guide
#https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"]
openai = ["openai>=1.0", "tiktoken"] openai = ["openai>=1.0", "tiktoken"]
anthropic = ["anthropic>=0.20.0"] anthropic = ["anthropic>=0.20.0"]
...@@ -46,9 +49,11 @@ test = [ ...@@ -46,9 +49,11 @@ test = [
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
dev = ["sglang[all]", "sglang[test]"] dev = ["sglang[all]", "sglang[test]"]
dev_hip = ["sglang[all_hip]", "sglang[test]"] dev_hip = ["sglang[all_hip]", "sglang[test]"]
dev_xpu = ["sglang[all_xpu]", "sglang[test]"] dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
[project.urls] [project.urls]
"Homepage" = "https://github.com/sgl-project/sglang" "Homepage" = "https://github.com/sgl-project/sglang"
......
...@@ -278,10 +278,7 @@ def correctness_test( ...@@ -278,10 +278,7 @@ def correctness_test(
def synchronize(device): def synchronize(device):
if device == "cuda": torch.get_device_module(device).synchronize()
torch.cuda.synchronize()
elif device == "xpu":
torch.xpu.synchronize()
def latency_test_run_once( def latency_test_run_once(
......
...@@ -176,14 +176,15 @@ class ModelRunner: ...@@ -176,14 +176,15 @@ class ModelRunner:
def init_torch_distributed(self): def init_torch_distributed(self):
logger.info("Init torch distributed begin.") logger.info("Init torch distributed begin.")
# Init torch distributed # Init torch distributed
torch.get_device_module(self.device).set_device(self.gpu_id)
if self.device == "cuda": if self.device == "cuda":
torch.cuda.set_device(self.gpu_id)
backend = "nccl" backend = "nccl"
# ToDO(liangan1):Just use gloo to bypass the initilization fail # ToDO(liangan1):Just use gloo to bypass the initilization fail
# Need to use xccl for xpu backend in the future # Need to use xccl for xpu backend in the future
elif self.device == "xpu": elif self.device == "xpu":
torch.xpu.set_device(self.gpu_id)
backend = "gloo" backend = "gloo"
elif self.device == "hpu":
backend = "hccl"
if not self.server_args.enable_p2p_check: if not self.server_args.enable_p2p_check:
monkey_patch_vllm_p2p_access_check(self.gpu_id) monkey_patch_vllm_p2p_access_check(self.gpu_id)
......
...@@ -306,7 +306,7 @@ class ServerArgs: ...@@ -306,7 +306,7 @@ class ServerArgs:
"--device", "--device",
type=str, type=str,
default="cuda", default="cuda",
choices=["cuda", "xpu"], choices=["cuda", "xpu", "hpu"],
help="The device type.", help="The device type.",
) )
parser.add_argument( parser.add_argument(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment