docs: add SVG diagrams to disaggregated inference communication guide (#7638)

Signed-off-by: Dan Gil <dagil@nvidia.com>

docs: add SVG diagrams to disaggregated inference communication guide (#7638)
Signed-off-by: Dan Gil <dagil@nvidia.com>
c1560ac9 · dagil-nvidia · GitHub · bf61a5df · c1560ac9 · c1560ac9
Unverified Commit c1560ac9 authored Mar 25, 2026 by dagil-nvidia Committed by GitHub Mar 25, 2026
5 changed files
--- a/docs/assets/img/disagg-comm-stack.svg
+++ b/docs/assets/img/disagg-comm-stack.svg
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 600" width="800" height="600" role="img" aria-labelledby="title desc">
+  <title id="title">disagg-comm-stack</title>
+  <desc id="desc">Architecture diagram showing disagg-comm-stack</desc>
+
+  <defs>
+    <linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#8BD420"/>
+      <stop offset="100%" stop-color="#6AAF00"/>
+    </linearGradient>
+    <linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
+      <stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
+      <stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
+    </linearGradient>
+    <filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
+      <feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
+    </filter>
+    <filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
+      <feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
+    </filter>
+    <filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
+      <feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
+    </filter>
+    <pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
+      <circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
+    </pattern>
+    <marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
+    </marker>
+    <marker id="arrow-var--connector" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
+    </marker>
+    <marker id="arrow-pill-kv" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
+    </marker>
+    <marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <symbol id="sym-monitor" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
+      <line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+    </symbol>
+    <symbol id="sym-fork" viewBox="0 0 14 14">
+      <path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="12" cy="3" r="1.2" fill="currentColor"/>
+      <circle cx="12" cy="11" r="1.2" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-gauge" viewBox="0 0 14 14">
+      <path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="7" cy="9" r="1" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-layers" viewBox="0 0 14 14">
+      <path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
+    </symbol>
+    <symbol id="sym-stream" viewBox="0 0 14 14">
+      <line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
+    </symbol>
+    <symbol id="sym-blocks" viewBox="0 0 14 14">
+      <rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
+    </symbol>
+    <symbol id="sym-gate" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+      <rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+    </symbol>
+    <symbol id="sym-transfer" viewBox="0 0 14 12">
+      <line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
+      <path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+    </symbol>
+  </defs>
+
+  <style>
+    :root {
+      --surface: #FFFFFF;
+      --surface-zone: rgba(219,234,254,0.25);
+      --surface-accent: url(#green-fill);
+      --text-primary: #0F172A;
+      --text-secondary: #1E293B;
+      --text-muted: #475569;
+      --text-subtle: #64748B;
+      --text-on-accent: #FFFFFF;
+      --border: rgba(226,232,240,0.6);
+      --border-zone: rgba(147,197,253,0.5);
+      --accent: #76B900;
+      --accent-start: #8BD420;
+      --accent-end: #6AAF00;
+      --error: #EF4444;
+      --info: #3B82F6;
+      --connector: #CBD5E1;
+      --connector-dark: #64748B;
+      --shadow: rgba(15,23,42,0.10);
+      --glow-green: rgba(118,185,0,0.35);
+      --glow-blue: rgba(59,130,246,0.15);
+      --grid-dot: rgba(148,163,184,0.2);
+      --pill-request: #3B82F6;
+      --pill-kv: #D97706;
+      --pill-kv-dark: #B45309;
+      --pill-perf: #64748B;
+      --zone-label: rgba(59,130,246,0.65);
+    }
+    @media (prefers-color-scheme: dark) {
+      :root {
+        --surface: #1E293B;
+        --surface-zone: rgba(59,130,246,0.08);
+        --surface-accent: url(#green-fill);
+        --text-primary: #F1F5F9;
+        --text-secondary: #CBD5E1;
+        --text-muted: #94A3B8;
+        --text-subtle: #64748B;
+        --text-on-accent: #FFFFFF;
+        --border: rgba(148,163,184,0.25);
+        --border-zone: rgba(147,197,253,0.3);
+        --accent: #76B900;
+        --accent-start: #8BD420;
+        --accent-end: #6AAF00;
+        --error: #F87171;
+        --info: #60A5FA;
+        --connector: #475569;
+        --connector-dark: #94A3B8;
+        --shadow: rgba(0,0,0,0.40);
+        --glow-green: rgba(118,185,0,0.25);
+        --glow-blue: rgba(59,130,246,0.10);
+        --grid-dot: rgba(226,232,240,0.15);
+        --pill-request: #60A5FA;
+        --pill-kv: #FBBF24;
+        --pill-kv-dark: #D97706;
+        --pill-perf: #94A3B8;
+        --zone-label: rgba(96,165,250,0.7);
+      }
+    }
+    text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
+    .title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
+    .subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
+    .box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
+    .box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
+    .box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
+    .box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
+    .box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
+    .box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
+    .conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
+    .zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
+    .zone-label-desc { font-size: 9px; font-weight: normal; fill: #94A3B8; text-anchor: start; }
+    .pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
+    .label-request { fill: var(--info); }
+    .pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
+    .label-kv { fill: var(--pill-kv-dark); }
+    .pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
+    .label-perf { fill: var(--text-muted); }
+    .connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
+    .connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
+    .connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
+  </style>
+  <rect x="20" y="20" width="760" height="560" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="400.0" y="46.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">Dynamo Disaggregated Serving</text>
+
+  <rect x="40" y="320" width="720" height="270" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="52" y="340.5" class="zone-label" style="fill:var(--zone-label);text-anchor:start;">UCX</text>
+  <rect x="60" y="410" width="680" height="170" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="72" y="430.5" class="zone-label" style="fill:var(--zone-label);text-anchor:start;">Transport Layer</text>
+
+  <path d="M175,298 L175.0,456.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+  <path d="M400,298 L400.0,456.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+  <path d="M625,298 L625.0,456.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+
+  <rect x="58" y="70" width="300" height="140" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="60" y="70" width="300" height="140" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="70" y="89.9" class="box-label">Prefill Worker</text>
+  <text x="70" y="102.9" class="box-sub">(Pod A)</text>
+  <rect x="438" y="70" width="300" height="140" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="440" y="70" width="300" height="140" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="450" y="89.9" class="box-label">Decode Worker</text>
+  <text x="450" y="102.9" class="box-sub">(Pod B)</text>
+  <rect x="120" y="130" width="160" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="200.0" y="156.4" class="box-label-white">KV Cache</text>
+  <text x="200.0" y="169.4" class="box-sub-white">(GPU VRAM)</text>
+  <rect x="520" y="130" width="160" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="600.0" y="156.4" class="box-label-white">KV Cache</text>
+  <text x="600.0" y="169.4" class="box-sub-white">(GPU VRAM)</text>
+  <rect x="60" y="250" width="680" height="48" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="400.0" y="272.4" class="box-label-white">NIXL Library</text>
+  <text x="400.0" y="285.4" class="box-sub-white">(KV Cache Transfer API)</text>
+  <rect x="80" y="460" width="190" height="100" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="92" y="508.4" class="box-label">rc_x/dc_x</text>
+  <text x="92" y="521.4" class="box-sub">(RDMA)</text>
+  <rect x="305" y="460" width="190" height="100" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="317" y="508.4" class="box-label">cuda_copy</text>
+  <text x="317" y="521.4" class="box-sub">(staging)</text>
+  <rect x="530" y="460" width="190" height="100" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="542" y="508.4" class="box-label">tcp</text>
+  <text x="542" y="521.4" class="box-sub">(fallback)</text>
+
+  <path d="M280,158 L516.0,158.0" fill="none" stroke="var(--pill-kv)" stroke-width="2.0" marker-end="url(#arrow-pill-kv)"/>
+  <rect x="373.2" y="140.0" width="53.6" height="16" rx="8" class="pill-kv"/>
+  <text x="400" y="151" class="conn-label label-kv">Transfer</text>
+  <path d="M200,210 L200.0,246.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+  <path d="M600,210 L600.0,246.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+
+  <text class="zone-label" style="font-size:9px;text-anchor:start;fill:var(--text-subtle)">
+    <tspan x="52" y="354">High-Performance</tspan>
+    <tspan x="52" dy="12">Networking</tspan>
+  </text>
+  <text x="92" y="535" class="box-label-center" style="font-size:10px;text-anchor:start;fill:var(--text-muted)">InfiniBand or RoCE</text>
+  <text x="317" y="535" class="box-label-center" style="font-size:10px;text-anchor:start;fill:var(--text-muted)">GPU↔Host memory copy</text>
+  <text x="542" y="535" class="box-label-center" style="font-size:10px;text-anchor:start;fill:var(--text-muted)">Network sockets</text>
+
+</svg>
\ No newline at end of file
--- a/docs/assets/img/disagg-cross-node.svg
+++ b/docs/assets/img/disagg-cross-node.svg
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 370" width="800" height="370" role="img" aria-labelledby="title desc">
+  <title id="title">disagg-cross-node</title>
+  <desc id="desc">Architecture diagram showing disagg-cross-node</desc>
+
+  <defs>
+    <linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#8BD420"/>
+      <stop offset="100%" stop-color="#6AAF00"/>
+    </linearGradient>
+    <linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
+      <stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
+      <stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
+    </linearGradient>
+    <filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
+      <feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
+    </filter>
+    <filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
+      <feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
+    </filter>
+    <filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
+      <feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
+    </filter>
+    <pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
+      <circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
+    </pattern>
+    <marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
+    </marker>
+    <marker id="arrow-var--connector" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
+    </marker>
+    <marker id="arrow-info" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--info)"/>
+    </marker>
+    <marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
+    </marker>
+    <marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <symbol id="sym-monitor" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
+      <line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+    </symbol>
+    <symbol id="sym-fork" viewBox="0 0 14 14">
+      <path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="12" cy="3" r="1.2" fill="currentColor"/>
+      <circle cx="12" cy="11" r="1.2" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-gauge" viewBox="0 0 14 14">
+      <path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="7" cy="9" r="1" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-layers" viewBox="0 0 14 14">
+      <path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
+    </symbol>
+    <symbol id="sym-stream" viewBox="0 0 14 14">
+      <line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
+    </symbol>
+    <symbol id="sym-blocks" viewBox="0 0 14 14">
+      <rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
+    </symbol>
+    <symbol id="sym-gate" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+      <rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+    </symbol>
+    <symbol id="sym-transfer" viewBox="0 0 14 12">
+      <line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
+      <path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+    </symbol>
+  </defs>
+
+  <style>
+    :root {
+      --surface: #FFFFFF;
+      --surface-zone: rgba(219,234,254,0.25);
+      --surface-accent: url(#green-fill);
+      --text-primary: #0F172A;
+      --text-secondary: #1E293B;
+      --text-muted: #475569;
+      --text-subtle: #64748B;
+      --text-on-accent: #FFFFFF;
+      --border: rgba(226,232,240,0.6);
+      --border-zone: rgba(147,197,253,0.5);
+      --accent: #76B900;
+      --accent-start: #8BD420;
+      --accent-end: #6AAF00;
+      --error: #EF4444;
+      --info: #3B82F6;
+      --connector: #CBD5E1;
+      --connector-dark: #64748B;
+      --shadow: rgba(15,23,42,0.10);
+      --glow-green: rgba(118,185,0,0.35);
+      --glow-blue: rgba(59,130,246,0.15);
+      --grid-dot: rgba(148,163,184,0.2);
+      --pill-request: #3B82F6;
+      --pill-kv: #D97706;
+      --pill-kv-dark: #B45309;
+      --pill-perf: #64748B;
+      --zone-label: rgba(59,130,246,0.65);
+    }
+    @media (prefers-color-scheme: dark) {
+      :root {
+        --surface: #1E293B;
+        --surface-zone: rgba(59,130,246,0.08);
+        --surface-accent: url(#green-fill);
+        --text-primary: #F1F5F9;
+        --text-secondary: #CBD5E1;
+        --text-muted: #94A3B8;
+        --text-subtle: #64748B;
+        --text-on-accent: #FFFFFF;
+        --border: rgba(148,163,184,0.25);
+        --border-zone: rgba(147,197,253,0.3);
+        --accent: #76B900;
+        --accent-start: #8BD420;
+        --accent-end: #6AAF00;
+        --error: #F87171;
+        --info: #60A5FA;
+        --connector: #475569;
+        --connector-dark: #94A3B8;
+        --shadow: rgba(0,0,0,0.40);
+        --glow-green: rgba(118,185,0,0.25);
+        --glow-blue: rgba(59,130,246,0.10);
+        --grid-dot: rgba(226,232,240,0.15);
+        --pill-request: #60A5FA;
+        --pill-kv: #FBBF24;
+        --pill-kv-dark: #D97706;
+        --pill-perf: #94A3B8;
+        --zone-label: rgba(96,165,250,0.7);
+      }
+    }
+    text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
+    .title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
+    .subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
+    .box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
+    .box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
+    .box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
+    .box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
+    .box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
+    .box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
+    .conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
+    .zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
+    .pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
+    .label-request { fill: var(--info); }
+    .pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
+    .label-kv { fill: var(--pill-kv-dark); }
+    .pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
+    .label-perf { fill: var(--text-muted); }
+    .connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
+    .connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
+    .connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
+  </style>
+  <rect x="20" y="15" width="350" height="320" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="195.0" y="35.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">NODE 1</text>
+  <rect x="430" y="15" width="350" height="320" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="605.0" y="35.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">NODE 2</text>
+
+  <rect x="73" y="55" width="240" height="120" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="75" y="55" width="240" height="120" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="85" y="74.9" class="box-label">Prefill Pod</text>
+  <rect x="125" y="100" width="140" height="50" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="195.0" y="129.9" class="box-label-white">GPU (VRAM)</text>
+  <rect x="95" y="220" width="200" height="70" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="105" y="253.4" class="box-label">RDMA NIC</text>
+  <text x="105" y="266.4" class="box-sub">InfiniBand / RoCE</text>
+  <rect x="483" y="55" width="240" height="120" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="485" y="55" width="240" height="120" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="495" y="74.9" class="box-label">Decode Pod</text>
+  <rect x="535" y="100" width="140" height="50" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="605.0" y="129.9" class="box-label-white">GPU (VRAM)</text>
+  <rect x="505" y="220" width="200" height="70" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="515" y="253.4" class="box-label">RDMA NIC</text>
+  <text x="515" y="266.4" class="box-sub">InfiniBand / RoCE</text>
+
+  <path d="M195,150 L195.0,216.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+  <path d="M605,220 L605.0,154.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
+  <path d="M295,255 L501.0,255.0" fill="none" stroke="var(--info)" stroke-width="2.0" marker-end="url(#arrow-info)"/>
+  <rect x="375.8" y="237.0" width="48.4" height="16" rx="8" class="pill-request"/>
+  <text x="400" y="248" class="conn-label label-request">Network</text>
+
+</svg>
\ No newline at end of file
--- a/docs/assets/img/disagg-nvlink-limitation.svg
+++ b/docs/assets/img/disagg-nvlink-limitation.svg
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 440" width="800" height="440" role="img" aria-labelledby="title desc">
+  <title id="title">disagg-nvlink-limitation</title>
+  <desc id="desc">Architecture diagram showing disagg-nvlink-limitation</desc>
+
+  <defs>
+    <linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#8BD420"/>
+      <stop offset="100%" stop-color="#6AAF00"/>
+    </linearGradient>
+    <linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
+      <stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
+      <stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
+    </linearGradient>
+    <filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
+      <feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
+    </filter>
+    <filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
+      <feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
+    </filter>
+    <filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
+      <feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
+    </filter>
+    <pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
+      <circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
+    </pattern>
+    <marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
+    </marker>
+    <marker id="arrow-accent" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--accent)"/>
+    </marker>
+    <marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
+    </marker>
+    <marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <symbol id="sym-monitor" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
+      <line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+    </symbol>
+    <symbol id="sym-fork" viewBox="0 0 14 14">
+      <path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="12" cy="3" r="1.2" fill="currentColor"/>
+      <circle cx="12" cy="11" r="1.2" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-gauge" viewBox="0 0 14 14">
+      <path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="7" cy="9" r="1" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-layers" viewBox="0 0 14 14">
+      <path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
+    </symbol>
+    <symbol id="sym-stream" viewBox="0 0 14 14">
+      <line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
+    </symbol>
+    <symbol id="sym-blocks" viewBox="0 0 14 14">
+      <rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
+    </symbol>
+    <symbol id="sym-gate" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+      <rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+    </symbol>
+    <symbol id="sym-transfer" viewBox="0 0 14 12">
+      <line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
+      <path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+    </symbol>
+  </defs>
+
+  <style>
+    :root {
+      --surface: #FFFFFF;
+      --surface-zone: rgba(219,234,254,0.25);
+      --surface-accent: url(#green-fill);
+      --text-primary: #0F172A;
+      --text-secondary: #1E293B;
+      --text-muted: #475569;
+      --text-subtle: #64748B;
+      --text-on-accent: #FFFFFF;
+      --border: rgba(226,232,240,0.6);
+      --border-zone: rgba(147,197,253,0.5);
+      --accent: #76B900;
+      --accent-start: #8BD420;
+      --accent-end: #6AAF00;
+      --error: #EF4444;
+      --info: #3B82F6;
+      --connector: #CBD5E1;
+      --connector-dark: #64748B;
+      --shadow: rgba(15,23,42,0.10);
+      --glow-green: rgba(118,185,0,0.35);
+      --glow-blue: rgba(59,130,246,0.15);
+      --grid-dot: rgba(148,163,184,0.2);
+      --pill-request: #3B82F6;
+      --pill-kv: #D97706;
+      --pill-kv-dark: #B45309;
+      --pill-perf: #64748B;
+      --zone-label: rgba(59,130,246,0.65);
+    }
+    @media (prefers-color-scheme: dark) {
+      :root {
+        --surface: #1E293B;
+        --surface-zone: rgba(59,130,246,0.08);
+        --surface-accent: url(#green-fill);
+        --text-primary: #F1F5F9;
+        --text-secondary: #CBD5E1;
+        --text-muted: #94A3B8;
+        --text-subtle: #64748B;
+        --text-on-accent: #FFFFFF;
+        --border: rgba(148,163,184,0.25);
+        --border-zone: rgba(147,197,253,0.3);
+        --accent: #76B900;
+        --accent-start: #8BD420;
+        --accent-end: #6AAF00;
+        --error: #F87171;
+        --info: #60A5FA;
+        --connector: #475569;
+        --connector-dark: #94A3B8;
+        --shadow: rgba(0,0,0,0.40);
+        --glow-green: rgba(118,185,0,0.25);
+        --glow-blue: rgba(59,130,246,0.10);
+        --grid-dot: rgba(226,232,240,0.15);
+        --pill-request: #60A5FA;
+        --pill-kv: #FBBF24;
+        --pill-kv-dark: #D97706;
+        --pill-perf: #94A3B8;
+        --zone-label: rgba(96,165,250,0.7);
+      }
+    }
+    text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
+    .title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
+    .subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
+    .box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
+    .box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
+    .box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
+    .box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
+    .box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
+    .box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
+    .conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
+    .zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
+    .pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
+    .label-request { fill: var(--info); }
+    .pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
+    .label-kv { fill: var(--pill-kv-dark); }
+    .pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
+    .label-perf { fill: var(--text-muted); }
+    .connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
+    .connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
+    .connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
+  </style>
+  <rect x="20" y="20" width="760" height="400" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="400.0" y="40.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">PHYSICAL NODE (8X H100 GPUS)</text>
+
+  <rect x="38" y="60" width="330" height="230" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="40" y="60" width="330" height="230" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="50" y="79.9" class="box-label">Pod A (Prefill)</text>
+  <text x="50" y="92.9" class="box-sub">Process Namespace: PID 1</text>
+  <rect x="428" y="60" width="330" height="230" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="430" y="60" width="330" height="230" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="440" y="79.9" class="box-label">Pod B (Decode)</text>
+  <text x="440" y="92.9" class="box-sub">Process Namespace: PID 1</text>
+  <rect x="75" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="135.0" y="202.9" class="box-label-white">GPU 0</text>
+  <rect x="215" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="275.0" y="202.9" class="box-label-white">GPU 1</text>
+  <rect x="465" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="525.0" y="202.9" class="box-label-white">GPU 2</text>
+  <rect x="615" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="675.0" y="202.9" class="box-label-white">GPU 3</text>
+
+  <path d="M195,198 L211.0,198.0" fill="none" stroke="var(--accent)" stroke-width="2.0" marker-end="url(#arrow-accent)"/>
+  <path d="M585,198 L611.0,198.0" fill="none" stroke="var(--accent)" stroke-width="2.0" marker-end="url(#arrow-accent)"/>
+  <path d="M370,198 L430,198" fill="none" stroke="var(--error)" stroke-width="2.0" stroke-dasharray="4 4"/>
+
+  <text x="205" y="150" class="box-label-center" style="font-size:9px;text-anchor:middle;fill:var(--text-subtle)">CUDA_VISIBLE_DEVICES: 0,1</text>
+  <text x="595" y="150" class="box-label-center" style="font-size:9px;text-anchor:middle;fill:var(--text-subtle)">CUDA_VISIBLE_DEVICES: 2-7</text>
+  <text x="205" y="248" class="box-label-center" style="font-size:10px;text-anchor:middle;fill:var(--accent)">NVLink (works!)</text>
+  <text x="595" y="248" class="box-label-center" style="font-size:10px;text-anchor:middle;fill:var(--accent)">NVLink (works!)</text>
+  <text x="400" y="330" class="box-label-center" style="font-size:14px;text-anchor:middle;fill:var(--error)">NO NVLink between pods</text>
+  <text class="box-label-center" style="font-size:11px;text-anchor:middle;fill:var(--text-subtle)">
+    <tspan x="400" y="365">Separate process namespaces, separate CUDA contexts,</tspan>
+    <tspan x="400" dy="18">separate GPU device assignments</tspan>
+  </text>
+
+</svg>
\ No newline at end of file
--- a/docs/assets/img/disagg-same-node.svg
+++ b/docs/assets/img/disagg-same-node.svg
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 250" width="800" height="250" role="img" aria-labelledby="title desc">
+  <title id="title">disagg-same-node</title>
+  <desc id="desc">Architecture diagram showing disagg-same-node</desc>
+
+  <defs>
+    <linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#8BD420"/>
+      <stop offset="100%" stop-color="#6AAF00"/>
+    </linearGradient>
+    <linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
+      <stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
+      <stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
+      <stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
+    </linearGradient>
+    <filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
+      <feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
+    </filter>
+    <filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
+      <feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
+    </filter>
+    <filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
+      <feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
+    </filter>
+    <pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
+      <circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
+    </pattern>
+    <marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
+    </marker>
+    <marker id="arrow-info" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--info)"/>
+    </marker>
+    <marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="5" markerHeight="5" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
+    </marker>
+    <marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
+            markerWidth="6" markerHeight="6" orient="auto">
+      <path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
+    </marker>
+    <symbol id="sym-monitor" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
+      <line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+    </symbol>
+    <symbol id="sym-fork" viewBox="0 0 14 14">
+      <path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="12" cy="3" r="1.2" fill="currentColor"/>
+      <circle cx="12" cy="11" r="1.2" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-gauge" viewBox="0 0 14 14">
+      <path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
+      <line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
+      <circle cx="7" cy="9" r="1" fill="currentColor"/>
+    </symbol>
+    <symbol id="sym-layers" viewBox="0 0 14 14">
+      <path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
+    </symbol>
+    <symbol id="sym-stream" viewBox="0 0 14 14">
+      <line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
+      <line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
+    </symbol>
+    <symbol id="sym-blocks" viewBox="0 0 14 14">
+      <rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
+      <rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
+    </symbol>
+    <symbol id="sym-gate" viewBox="0 0 14 14">
+      <rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+      <rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
+    </symbol>
+    <symbol id="sym-transfer" viewBox="0 0 14 12">
+      <line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
+      <path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+      <path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+    </symbol>
+  </defs>
+
+  <style>
+    :root {
+      --surface: #FFFFFF;
+      --surface-zone: rgba(219,234,254,0.25);
+      --surface-accent: url(#green-fill);
+      --text-primary: #0F172A;
+      --text-secondary: #1E293B;
+      --text-muted: #475569;
+      --text-subtle: #64748B;
+      --text-on-accent: #FFFFFF;
+      --border: rgba(226,232,240,0.6);
+      --border-zone: rgba(147,197,253,0.5);
+      --accent: #76B900;
+      --accent-start: #8BD420;
+      --accent-end: #6AAF00;
+      --error: #EF4444;
+      --info: #3B82F6;
+      --connector: #CBD5E1;
+      --connector-dark: #64748B;
+      --shadow: rgba(15,23,42,0.10);
+      --glow-green: rgba(118,185,0,0.35);
+      --glow-blue: rgba(59,130,246,0.15);
+      --grid-dot: rgba(148,163,184,0.2);
+      --pill-request: #3B82F6;
+      --pill-kv: #D97706;
+      --pill-kv-dark: #B45309;
+      --pill-perf: #64748B;
+      --zone-label: rgba(59,130,246,0.65);
+    }
+    @media (prefers-color-scheme: dark) {
+      :root {
+        --surface: #1E293B;
+        --surface-zone: rgba(59,130,246,0.08);
+        --surface-accent: url(#green-fill);
+        --text-primary: #F1F5F9;
+        --text-secondary: #CBD5E1;
+        --text-muted: #94A3B8;
+        --text-subtle: #64748B;
+        --text-on-accent: #FFFFFF;
+        --border: rgba(148,163,184,0.25);
+        --border-zone: rgba(147,197,253,0.3);
+        --accent: #76B900;
+        --accent-start: #8BD420;
+        --accent-end: #6AAF00;
+        --error: #F87171;
+        --info: #60A5FA;
+        --connector: #475569;
+        --connector-dark: #94A3B8;
+        --shadow: rgba(0,0,0,0.40);
+        --glow-green: rgba(118,185,0,0.25);
+        --glow-blue: rgba(59,130,246,0.10);
+        --grid-dot: rgba(226,232,240,0.15);
+        --pill-request: #60A5FA;
+        --pill-kv: #FBBF24;
+        --pill-kv-dark: #D97706;
+        --pill-perf: #94A3B8;
+        --zone-label: rgba(96,165,250,0.7);
+      }
+    }
+    text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
+    .title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
+    .subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
+    .box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
+    .box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
+    .box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
+    .box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
+    .box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
+    .box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
+    .conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
+    .zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
+    .zone-label-desc { font-size: 9px; font-weight: normal; fill: #94A3B8; text-anchor: start; }
+    .pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
+    .label-request { fill: var(--info); }
+    .pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
+    .label-kv { fill: var(--pill-kv-dark); }
+    .pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
+    .label-perf { fill: var(--text-muted); }
+    .connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
+    .connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
+    .connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
+  </style>
+  <rect x="20" y="15" width="760" height="220" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
+  <text x="400.0" y="35.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">SAME NODE</text>
+
+  <rect x="58" y="55" width="260" height="130" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="60" y="55" width="260" height="130" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="70" y="74.9" class="box-label">Prefill Pod</text>
+  <rect x="478" y="55" width="260" height="130" rx="10" fill="var(--accent)" opacity="0.7"/>
+  <rect x="480" y="55" width="260" height="130" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
+  <text x="490" y="74.9" class="box-label">Decode Pod</text>
+  <rect x="110" y="110" width="150" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="185.0" y="142.9" class="box-label-white">GPU 0 (VRAM)</text>
+  <rect x="540" y="110" width="150" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
+  <text x="615.0" y="142.9" class="box-label-white">GPU 2 (VRAM)</text>
+
+  <path d="M185,166 L185,220 L615,220 L615.0,170.0" fill="none" stroke="var(--info)" stroke-width="2.0" marker-end="url(#arrow-info)"/>
+  <rect x="336.8" y="202.0" width="126.39999999999995" height="16" rx="8" class="pill-request"/>
+  <text x="400" y="213" class="conn-label label-request">RDMA (InfiniBand/RoCE)</text>
+
+</svg>
\ No newline at end of file
--- a/docs/kubernetes/disagg-communication-guide.md
+++ b/docs/kubernetes/disagg-communication-guide.md
@@ -22,43 +22,9 @@ This guide explains how prefill and decode workers communicate in Dynamo's disag

 ### Communication Stack

-```text
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         Dynamo Disaggregated Serving                     │
-├─────────────────────────────────────────────────────────────────────────┤
-│                                                                          │
-│  ┌──────────────────┐              ┌──────────────────┐                 │
-│  │  Prefill Worker  │              │  Decode Worker   │                 │
-│  │  (Pod A)         │              │  (Pod B)         │                 │
-│  │                  │              │                  │                 │
-│  │  ┌────────────┐  │              │  ┌────────────┐  │                 │
-│  │  │ KV Cache   │  │   Transfer   │  │ KV Cache   │  │                 │
-│  │  │ (GPU VRAM) │──┼──────────────┼─▶│ (GPU VRAM) │  │                 │
-│  │  └────────────┘  │              │  └────────────┘  │                 │
-│  └────────┬─────────┘              └────────┬─────────┘                 │
-│           │                                  │                          │
-├───────────┼──────────────────────────────────┼──────────────────────────┤
-│           │          NIXL Library            │                          │
-│           │    (KV Cache Transfer API)       │                          │
-├───────────┼──────────────────────────────────┼──────────────────────────┤
-│           │                                  │                          │
-│           │              UCX                 │                          │
-│           │   (Unified Communication X)      │                          │
-│           │                                  │                          │
-│  ┌────────┴──────────────────────────────────┴────────┐                 │
-│  │                  Transport Layer                    │                 │
-│  │                                                     │                 │
-│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐ │                 │
-│  │  │   rc_x/dc_x │  │  cuda_copy  │  │    tcp      │ │                 │
-│  │  │   (RDMA)    │  │  (staging)  │  │  (fallback) │ │                 │
-│  │  │             │  │             │  │             │ │                 │
-│  │  │  InfiniBand │  │ GPU↔Host    │  │  Network    │ │                 │
-│  │  │  or RoCE    │  │ memory copy │  │  sockets    │ │                 │
-│  │  └─────────────┘  └─────────────┘  └─────────────┘ │                 │
-│  └─────────────────────────────────────────────────────┘                │
-│                                                                          │
-└─────────────────────────────────────────────────────────────────────────┘
-```
+<Frame>
+  <img src="../assets/img/disagg-comm-stack.svg" alt="Disaggregated inference communication stack showing NIXL, UCX, and transport layers" />
+</Frame>

 ### Component Responsibilities

@@ -82,29 +48,9 @@ NVLink is a **direct GPU-to-GPU interconnect** that operates at the hardware lev

 **Kubernetes pods violate all three requirements:**

-```text
-┌─────────────────────────────────────────────────────────────────────────┐
-│                        Physical Node (8× H100 GPUs)                      │
-│                                                                          │
-│  ┌─────────────────────────────┐    ┌─────────────────────────────┐    │
-│  │       Pod A (Prefill)       │    │       Pod B (Decode)        │    │
-│  │                             │    │                             │    │
-│  │  Process Namespace: PID 1   │    │  Process Namespace: PID 1   │    │
-│  │  CUDA_VISIBLE_DEVICES: 0,1  │    │  CUDA_VISIBLE_DEVICES: 2-7  │    │
-│  │                             │    │                             │    │
-│  │  ┌─────┐  ┌─────┐          │    │  ┌─────┐  ┌─────┐  ...     │    │
-│  │  │GPU 0│  │GPU 1│          │    │  │GPU 2│  │GPU 3│          │    │
-│  │  └─────┘  └─────┘          │    │  └─────┘  └─────┘          │    │
-│  │       ↑ NVLink ↑            │    │       ↑ NVLink ↑            │    │
-│  │       (works!)              │    │       (works!)              │    │
-│  └─────────────────────────────┘    └─────────────────────────────┘    │
-│                                                                          │
-│            ╳ NO NVLink possible between pods ╳                          │
-│                                                                          │
-│  Reason: Separate process namespaces, separate CUDA contexts,           │
-│          separate GPU device assignments                                 │
-└─────────────────────────────────────────────────────────────────────────┘
-```
+<Frame>
+  <img src="../assets/img/disagg-nvlink-limitation.svg" alt="Why NVLink cannot work between Kubernetes pods due to process isolation" />
+</Frame>

 ### Technical Explanation

@@ -148,28 +94,15 @@ VLLMDecodeWorker:

 When prefill and decode workers are on the **same physical node**:

-```text
-┌─────────────────────────────────────────────────────────────────────────┐
-│                             Same Node                                    │
-│                                                                          │
-│  ┌────────────────────┐                    ┌────────────────────┐       │
-│  │   Prefill Pod      │                    │   Decode Pod       │       │
-│  │                    │                    │                    │       │
-│  │  ┌──────────────┐  │                    │  ┌──────────────┐  │       │
-│  │  │ GPU 0 (VRAM) │  │                    │  │ GPU 2 (VRAM) │  │       │
-│  │  └──────┬───────┘  │                    │  └──────▲───────┘  │       │
-│  └─────────┼──────────┘                    └─────────┼──────────┘       │
-│            │                                         │                   │
-│            │         RDMA (InfiniBand/RoCE)          │                   │
-│            └─────────────────────────────────────────┘                   │
-│                                                                          │
-│  Options (best to worst):                                                │
-│  1. InfiniBand RDMA with GPUDirect    → GPU-to-GPU, bypasses CPU        │
-│  2. RoCE RDMA with GPUDirect          → GPU-to-GPU, bypasses CPU        │
-│  3. Host-staged RDMA                  → GPU→CPU→RDMA→CPU→GPU            │
-│  4. TCP (fallback)                    → GPU→CPU→TCP→CPU→GPU             │
-└─────────────────────────────────────────────────────────────────────────┘
-```
+<Frame>
+  <img src="../assets/img/disagg-same-node.svg" alt="Same-node RDMA communication between prefill and decode pods" />
+</Frame>
+
+**Options (best to worst):**
+1. InfiniBand RDMA with GPUDirect → GPU-to-GPU, bypasses CPU
+2. RoCE RDMA with GPUDirect → GPU-to-GPU, bypasses CPU
+3. Host-staged RDMA → GPU→CPU→RDMA→CPU→GPU
+4. TCP (fallback) → GPU→CPU→TCP→CPU→GPU

 **Best Practice**: Use RDMA even for same-node communication. The overhead is minimal and it provides consistent behavior whether pods land on the same or different nodes.

@@ -177,24 +110,9 @@ When prefill and decode workers are on the **same physical node**:

 When prefill and decode workers are on **different nodes**:

-```text
-┌──────────────────────────────┐         ┌──────────────────────────────┐
-│           Node 1             │         │           Node 2             │
-│                              │         │                              │
-│  ┌────────────────────┐      │         │      ┌────────────────────┐  │
-│  │   Prefill Pod      │      │         │      │   Decode Pod       │  │
-│  │  ┌──────────────┐  │      │         │      │  ┌──────────────┐  │  │
-│  │  │ GPU (VRAM)   │  │      │         │      │  │ GPU (VRAM)   │  │  │
-│  │  └──────┬───────┘  │      │         │      │  └──────▲───────┘  │  │
-│  └─────────┼──────────┘      │         │      └─────────┼──────────┘  │
-│            │                 │         │                │             │
-│  ┌─────────▼─────────┐       │         │       ┌────────┴────────┐   │
-│  │    RDMA NIC       │       │         │       │    RDMA NIC     │   │
-│  │  (InfiniBand/     │◄──────┼─────────┼──────▶│  (InfiniBand/   │   │
-│  │   RoCE)           │       │ Network │       │   RoCE)         │   │
-│  └───────────────────┘       │         │       └─────────────────┘   │
-└──────────────────────────────┘         └──────────────────────────────┘
-```
+<Frame>
+  <img src="../assets/img/disagg-cross-node.svg" alt="Cross-node RDMA communication between prefill and decode pods on separate nodes" />
+</Frame>

 **Requirements for optimal cross-node performance:**
 - InfiniBand or RoCE network fabric