Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
ed83e246
Commit
ed83e246
authored
Mar 17, 2025
by
Anant Sharma
Committed by
GitHub
Mar 17, 2025
Browse files
chore: update nixl github commit (#214)
Co-authored-by:
Piotr Tarasiewicz
<
ptarasiewicz@nvidia.com
>
parent
70266ec8
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
13 deletions
+12
-13
container/Dockerfile.vllm
container/Dockerfile.vllm
+0
-1
container/build.sh
container/build.sh
+1
-1
container/deps/vllm/vllm_v0.7.2-dynamo-kv-disagg-patch.patch
container/deps/vllm/vllm_v0.7.2-dynamo-kv-disagg-patch.patch
+11
-11
No files found.
container/Dockerfile.vllm
View file @
ed83e246
...
@@ -157,7 +157,6 @@ ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins
...
@@ -157,7 +157,6 @@ ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/x86_64-linux-gnu/plugins
RUN ls -l /usr/local/nixl/
RUN ls -l /usr/local/nixl/
RUN ls -l /usr/local/nixl/include/
RUN ls -l /usr/local/nixl/include/
RUN ls -l /usr/local/nixl/include/internal/
RUN ls /opt/nixl
RUN ls /opt/nixl
...
...
container/build.sh
View file @
ed83e246
...
@@ -60,7 +60,7 @@ TENSORRTLLM_PIP_WHEEL_PATH=""
...
@@ -60,7 +60,7 @@ TENSORRTLLM_PIP_WHEEL_PATH=""
VLLM_BASE_IMAGE
=
"nvcr.io/nvidia/cuda-dl-base"
VLLM_BASE_IMAGE
=
"nvcr.io/nvidia/cuda-dl-base"
VLLM_BASE_IMAGE_TAG
=
"25.01-cuda12.8-devel-ubuntu24.04"
VLLM_BASE_IMAGE_TAG
=
"25.01-cuda12.8-devel-ubuntu24.04"
NIXL_COMMIT
=
d7a2c571a60d76a3d6c8458140eaaa5025fa48c4
NIXL_COMMIT
=
f35faf8ba4e725f1724177d0772200481d1d3446
NIXL_REPO
=
ai-dynamo/nixl.git
NIXL_REPO
=
ai-dynamo/nixl.git
get_options
()
{
get_options
()
{
...
...
container/deps/vllm/vllm_v0.7.2-dynamo-kv-disagg-patch.patch
View file @
ed83e246
...
@@ -810,7 +810,7 @@ index 00000000..9b938039
...
@@ -810,7 +810,7 @@ index 00000000..9b938039
\
No newline at end of file
\
No newline at end of file
diff --git a/vllm/distributed/device_communicators/nixl.py b/vllm/distributed/device_communicators/nixl.py
diff --git a/vllm/distributed/device_communicators/nixl.py b/vllm/distributed/device_communicators/nixl.py
new file mode 100644
new file mode 100644
index 00000000..
9b757396
index 00000000..
d972252a
--- /dev/null
--- /dev/null
+++ b/vllm/distributed/device_communicators/nixl.py
+++ b/vllm/distributed/device_communicators/nixl.py
@@ -0,0 +1,400 @@
@@ -0,0 +1,400 @@
...
@@ -828,7 +828,7 @@ index 00000000..9b757396
...
@@ -828,7 +828,7 @@ index 00000000..9b757396
+
+
+# Lazy import nixl_wrapper to avoid loading nixl_bindings if nixl is not used
+# Lazy import nixl_wrapper to avoid loading nixl_bindings if nixl is not used
+try:
+try:
+ from nixl
_wrapper
import nixl_
wrapper
as NixlWrapper
# type: ignore
+ from nixl
._api
import nixl_
agent
as NixlWrapper
+ logger.info("NIXL is available")
+ logger.info("NIXL is available")
+except ImportError:
+except ImportError:
+ logger.warning("NIXL is not available")
+ logger.warning("NIXL is not available")
...
@@ -916,10 +916,10 @@ index 00000000..9b757396
...
@@ -916,10 +916,10 @@ index 00000000..9b757396
+ for agent_name in agent_names:
+ for agent_name in agent_names:
+ self.nixl_wrapper.remove_remote_agent(agent_name)
+ self.nixl_wrapper.remove_remote_agent(agent_name)
+ for src_xfer_side_handle in self.src_xfer_side_handles.values():
+ for src_xfer_side_handle in self.src_xfer_side_handles.values():
+ self.nixl_wrapper.
d
ele
te_xfer_sid
e(src_xfer_side_handle)
+ self.nixl_wrapper.
r
ele
ase_dlist_handl
e(src_xfer_side_handle)
+ for dst_xfer_side_handles in self.dst_xfer_side_handles.values():
+ for dst_xfer_side_handles in self.dst_xfer_side_handles.values():
+ for dst_xfer_side_handle in dst_xfer_side_handles.values():
+ for dst_xfer_side_handle in dst_xfer_side_handles.values():
+ self.nixl_wrapper.
d
ele
te_xfer_sid
e(dst_xfer_side_handle)
+ self.nixl_wrapper.
r
ele
ase_dlist_handl
e(dst_xfer_side_handle)
+
+
+ def get_descs_ids(self, layer_ids, block_ids):
+ def get_descs_ids(self, layer_ids, block_ids):
+ if layer_ids == "all":
+ if layer_ids == "all":
...
@@ -1092,9 +1092,9 @@ index 00000000..9b757396
...
@@ -1092,9 +1092,9 @@ index 00000000..9b757396
+
+
+
+
+ logger.debug("Time to get block descs ids: %s ms", (time.perf_counter() - start_time) * 1000)
+ logger.debug("Time to get block descs ids: %s ms", (time.perf_counter() - start_time) * 1000)
+ handle = self.nixl_wrapper.make_prepped_xfer(src_xfer_side_handle, staging_block_descs_ids,
+ handle = self.nixl_wrapper.make_prepped_xfer(
"WRITE",
src_xfer_side_handle, staging_block_descs_ids,
+ dst_xfer_side_handle, dst_block_descs_ids,
+ dst_xfer_side_handle, dst_block_descs_ids,
+ notify_msg
, "WRITE"
)
+ notify_msg)
+ self._transfers[notify_msg].append(handle)
+ self._transfers[notify_msg].append(handle)
+ logger.debug("Time to initialize xfer: %s ms", (time.perf_counter() - start_time) * 1000)
+ logger.debug("Time to initialize xfer: %s ms", (time.perf_counter() - start_time) * 1000)
+ logger.debug("Transfer handle: %s", handle)
+ logger.debug("Transfer handle: %s", handle)
...
@@ -1137,9 +1137,9 @@ index 00000000..9b757396
...
@@ -1137,9 +1137,9 @@ index 00000000..9b757396
+ logger.debug("Time to get descs: %s ms", (time.perf_counter() - start_time) * 1000)
+ logger.debug("Time to get descs: %s ms", (time.perf_counter() - start_time) * 1000)
+
+
+ logger.debug("Transfering to agent %s", self._remote_agents[dst_engine_id][self.rank * tp_multiplier + i])
+ logger.debug("Transfering to agent %s", self._remote_agents[dst_engine_id][self.rank * tp_multiplier + i])
+ handle = self.nixl_wrapper.initialize_xfer(src_descs, dst_descs,
+ handle = self.nixl_wrapper.initialize_xfer(
"WRITE",
src_descs, dst_descs,
+ self._remote_agents[dst_engine_id][self.rank * tp_multiplier + i],
+ self._remote_agents[dst_engine_id][self.rank * tp_multiplier + i],
+ notify_msg
, "WRITE"
)
+ notify_msg)
+ self._transfers[notify_msg].append(handle)
+ self._transfers[notify_msg].append(handle)
+ logger.debug("Time to initialize xfer: %s ms", (time.perf_counter() - start_time) * 1000)
+ logger.debug("Time to initialize xfer: %s ms", (time.perf_counter() - start_time) * 1000)
+ logger.debug("Transfer handle: %s", handle)
+ logger.debug("Transfer handle: %s", handle)
...
@@ -1179,7 +1179,7 @@ index 00000000..9b757396
...
@@ -1179,7 +1179,7 @@ index 00000000..9b757396
+ blocks_data.append((base_addr + block_offset + tp_multiplier_offset, dst_block_len, self.rank))
+ blocks_data.append((base_addr + block_offset + tp_multiplier_offset, dst_block_len, self.rank))
+ logger.debug("Created %s blocks for src engine %s and rank %s", len(blocks_data), self.engine_id, self.rank * tp_multiplier + i)
+ logger.debug("Created %s blocks for src engine %s and rank %s", len(blocks_data), self.engine_id, self.rank * tp_multiplier + i)
+ descs = self.nixl_wrapper.get_xfer_descs(blocks_data, "VRAM")
+ descs = self.nixl_wrapper.get_xfer_descs(blocks_data, "VRAM")
+ self.src_xfer_side_handles[tp_multiplier] = self.nixl_wrapper.prep_xfer_
side
("", descs)
+ self.src_xfer_side_handles[tp_multiplier] = self.nixl_wrapper.prep_xfer_
dlist
("", descs)
+
+
+ # create dst xfer side handles
+ # create dst xfer side handles
+ self.dst_num_blocks[engine_id] = num_blocks
+ self.dst_num_blocks[engine_id] = num_blocks
...
@@ -1192,7 +1192,7 @@ index 00000000..9b757396
...
@@ -1192,7 +1192,7 @@ index 00000000..9b757396
+ blocks_data.append((base_addr + block_offset, dst_block_len, self.rank * tp_multiplier + i))
+ blocks_data.append((base_addr + block_offset, dst_block_len, self.rank * tp_multiplier + i))
+ logger.debug("Created %s blocks for dst engine %s and rank %s", len(blocks_data), engine_id, self.rank * tp_multiplier + i)
+ logger.debug("Created %s blocks for dst engine %s and rank %s", len(blocks_data), engine_id, self.rank * tp_multiplier + i)
+ descs = self.nixl_wrapper.get_xfer_descs(blocks_data, "VRAM")
+ descs = self.nixl_wrapper.get_xfer_descs(blocks_data, "VRAM")
+ self.dst_xfer_side_handles[engine_id][i] = self.nixl_wrapper.prep_xfer_
side
(self._remote_agents[engine_id][self.rank * tp_multiplier + i], descs)
+ self.dst_xfer_side_handles[engine_id][i] = self.nixl_wrapper.prep_xfer_
dlist
(self._remote_agents[engine_id][self.rank * tp_multiplier + i], descs)
+
+
+ return agent_names
+ return agent_names
+
+
...
@@ -1203,7 +1203,7 @@ index 00000000..9b757396
...
@@ -1203,7 +1203,7 @@ index 00000000..9b757396
+ for handle in handles:
+ for handle in handles:
+ xfer_state = self.nixl_wrapper.check_xfer_state(handle)
+ xfer_state = self.nixl_wrapper.check_xfer_state(handle)
+ if xfer_state == "DONE":
+ if xfer_state == "DONE":
+ # self.nixl_wrapper.
abort_xfer
(handle) # TODO ptarasiewicz: why abort is throwing errors?
+ # self.nixl_wrapper.
release_xfer_handle
(handle) # TODO ptarasiewicz: why abort is throwing errors?
+ continue
+ continue
+ if xfer_state == "PROC":
+ if xfer_state == "PROC":
+ running_reqs.append(handle)
+ running_reqs.append(handle)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment