Unverified Commit c1560ac9 authored by dagil-nvidia's avatar dagil-nvidia Committed by GitHub
Browse files

docs: add SVG diagrams to disaggregated inference communication guide (#7638)


Signed-off-by: default avatarDan Gil <dagil@nvidia.com>
parent bf61a5df
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 600" width="800" height="600" role="img" aria-labelledby="title desc">
<title id="title">disagg-comm-stack</title>
<desc id="desc">Architecture diagram showing disagg-comm-stack</desc>
<defs>
<linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" stop-color="#8BD420"/>
<stop offset="100%" stop-color="#6AAF00"/>
</linearGradient>
<linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
<stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
<stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
</linearGradient>
<filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
<feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
</filter>
<filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
<feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
</filter>
<filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
<feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
</filter>
<pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
<circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
</pattern>
<marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
</marker>
<marker id="arrow-var--connector" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
</marker>
<marker id="arrow-pill-kv" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
</marker>
<marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<symbol id="sym-monitor" viewBox="0 0 14 14">
<rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
<line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
</symbol>
<symbol id="sym-fork" viewBox="0 0 14 14">
<path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="12" cy="3" r="1.2" fill="currentColor"/>
<circle cx="12" cy="11" r="1.2" fill="currentColor"/>
</symbol>
<symbol id="sym-gauge" viewBox="0 0 14 14">
<path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="7" cy="9" r="1" fill="currentColor"/>
</symbol>
<symbol id="sym-layers" viewBox="0 0 14 14">
<path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
</symbol>
<symbol id="sym-stream" viewBox="0 0 14 14">
<line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
</symbol>
<symbol id="sym-blocks" viewBox="0 0 14 14">
<rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
</symbol>
<symbol id="sym-gate" viewBox="0 0 14 14">
<rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
<rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
</symbol>
<symbol id="sym-transfer" viewBox="0 0 14 12">
<line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
<path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
</symbol>
</defs>
<style>
:root {
--surface: #FFFFFF;
--surface-zone: rgba(219,234,254,0.25);
--surface-accent: url(#green-fill);
--text-primary: #0F172A;
--text-secondary: #1E293B;
--text-muted: #475569;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(226,232,240,0.6);
--border-zone: rgba(147,197,253,0.5);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #EF4444;
--info: #3B82F6;
--connector: #CBD5E1;
--connector-dark: #64748B;
--shadow: rgba(15,23,42,0.10);
--glow-green: rgba(118,185,0,0.35);
--glow-blue: rgba(59,130,246,0.15);
--grid-dot: rgba(148,163,184,0.2);
--pill-request: #3B82F6;
--pill-kv: #D97706;
--pill-kv-dark: #B45309;
--pill-perf: #64748B;
--zone-label: rgba(59,130,246,0.65);
}
@media (prefers-color-scheme: dark) {
:root {
--surface: #1E293B;
--surface-zone: rgba(59,130,246,0.08);
--surface-accent: url(#green-fill);
--text-primary: #F1F5F9;
--text-secondary: #CBD5E1;
--text-muted: #94A3B8;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(148,163,184,0.25);
--border-zone: rgba(147,197,253,0.3);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #F87171;
--info: #60A5FA;
--connector: #475569;
--connector-dark: #94A3B8;
--shadow: rgba(0,0,0,0.40);
--glow-green: rgba(118,185,0,0.25);
--glow-blue: rgba(59,130,246,0.10);
--grid-dot: rgba(226,232,240,0.15);
--pill-request: #60A5FA;
--pill-kv: #FBBF24;
--pill-kv-dark: #D97706;
--pill-perf: #94A3B8;
--zone-label: rgba(96,165,250,0.7);
}
}
text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
.title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
.subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
.box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
.box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
.box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
.box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
.box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
.box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
.conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
.zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
.zone-label-desc { font-size: 9px; font-weight: normal; fill: #94A3B8; text-anchor: start; }
.pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
.label-request { fill: var(--info); }
.pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
.label-kv { fill: var(--pill-kv-dark); }
.pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
.label-perf { fill: var(--text-muted); }
.connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
.connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
.connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
</style>
<rect x="20" y="20" width="760" height="560" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="400.0" y="46.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">Dynamo Disaggregated Serving</text>
<rect x="40" y="320" width="720" height="270" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="52" y="340.5" class="zone-label" style="fill:var(--zone-label);text-anchor:start;">UCX</text>
<rect x="60" y="410" width="680" height="170" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="72" y="430.5" class="zone-label" style="fill:var(--zone-label);text-anchor:start;">Transport Layer</text>
<path d="M175,298 L175.0,456.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<path d="M400,298 L400.0,456.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<path d="M625,298 L625.0,456.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<rect x="58" y="70" width="300" height="140" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="60" y="70" width="300" height="140" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="70" y="89.9" class="box-label">Prefill Worker</text>
<text x="70" y="102.9" class="box-sub">(Pod A)</text>
<rect x="438" y="70" width="300" height="140" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="440" y="70" width="300" height="140" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="450" y="89.9" class="box-label">Decode Worker</text>
<text x="450" y="102.9" class="box-sub">(Pod B)</text>
<rect x="120" y="130" width="160" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="200.0" y="156.4" class="box-label-white">KV Cache</text>
<text x="200.0" y="169.4" class="box-sub-white">(GPU VRAM)</text>
<rect x="520" y="130" width="160" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="600.0" y="156.4" class="box-label-white">KV Cache</text>
<text x="600.0" y="169.4" class="box-sub-white">(GPU VRAM)</text>
<rect x="60" y="250" width="680" height="48" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="400.0" y="272.4" class="box-label-white">NIXL Library</text>
<text x="400.0" y="285.4" class="box-sub-white">(KV Cache Transfer API)</text>
<rect x="80" y="460" width="190" height="100" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="92" y="508.4" class="box-label">rc_x/dc_x</text>
<text x="92" y="521.4" class="box-sub">(RDMA)</text>
<rect x="305" y="460" width="190" height="100" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="317" y="508.4" class="box-label">cuda_copy</text>
<text x="317" y="521.4" class="box-sub">(staging)</text>
<rect x="530" y="460" width="190" height="100" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="542" y="508.4" class="box-label">tcp</text>
<text x="542" y="521.4" class="box-sub">(fallback)</text>
<path d="M280,158 L516.0,158.0" fill="none" stroke="var(--pill-kv)" stroke-width="2.0" marker-end="url(#arrow-pill-kv)"/>
<rect x="373.2" y="140.0" width="53.6" height="16" rx="8" class="pill-kv"/>
<text x="400" y="151" class="conn-label label-kv">Transfer</text>
<path d="M200,210 L200.0,246.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<path d="M600,210 L600.0,246.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<text class="zone-label" style="font-size:9px;text-anchor:start;fill:var(--text-subtle)">
<tspan x="52" y="354">High-Performance</tspan>
<tspan x="52" dy="12">Networking</tspan>
</text>
<text x="92" y="535" class="box-label-center" style="font-size:10px;text-anchor:start;fill:var(--text-muted)">InfiniBand or RoCE</text>
<text x="317" y="535" class="box-label-center" style="font-size:10px;text-anchor:start;fill:var(--text-muted)">GPU↔Host memory copy</text>
<text x="542" y="535" class="box-label-center" style="font-size:10px;text-anchor:start;fill:var(--text-muted)">Network sockets</text>
</svg>
\ No newline at end of file
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 370" width="800" height="370" role="img" aria-labelledby="title desc">
<title id="title">disagg-cross-node</title>
<desc id="desc">Architecture diagram showing disagg-cross-node</desc>
<defs>
<linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" stop-color="#8BD420"/>
<stop offset="100%" stop-color="#6AAF00"/>
</linearGradient>
<linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
<stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
<stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
</linearGradient>
<filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
<feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
</filter>
<filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
<feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
</filter>
<filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
<feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
</filter>
<pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
<circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
</pattern>
<marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
</marker>
<marker id="arrow-var--connector" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
</marker>
<marker id="arrow-info" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--info)"/>
</marker>
<marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
</marker>
<marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<symbol id="sym-monitor" viewBox="0 0 14 14">
<rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
<line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
</symbol>
<symbol id="sym-fork" viewBox="0 0 14 14">
<path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="12" cy="3" r="1.2" fill="currentColor"/>
<circle cx="12" cy="11" r="1.2" fill="currentColor"/>
</symbol>
<symbol id="sym-gauge" viewBox="0 0 14 14">
<path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="7" cy="9" r="1" fill="currentColor"/>
</symbol>
<symbol id="sym-layers" viewBox="0 0 14 14">
<path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
</symbol>
<symbol id="sym-stream" viewBox="0 0 14 14">
<line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
</symbol>
<symbol id="sym-blocks" viewBox="0 0 14 14">
<rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
</symbol>
<symbol id="sym-gate" viewBox="0 0 14 14">
<rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
<rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
</symbol>
<symbol id="sym-transfer" viewBox="0 0 14 12">
<line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
<path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
</symbol>
</defs>
<style>
:root {
--surface: #FFFFFF;
--surface-zone: rgba(219,234,254,0.25);
--surface-accent: url(#green-fill);
--text-primary: #0F172A;
--text-secondary: #1E293B;
--text-muted: #475569;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(226,232,240,0.6);
--border-zone: rgba(147,197,253,0.5);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #EF4444;
--info: #3B82F6;
--connector: #CBD5E1;
--connector-dark: #64748B;
--shadow: rgba(15,23,42,0.10);
--glow-green: rgba(118,185,0,0.35);
--glow-blue: rgba(59,130,246,0.15);
--grid-dot: rgba(148,163,184,0.2);
--pill-request: #3B82F6;
--pill-kv: #D97706;
--pill-kv-dark: #B45309;
--pill-perf: #64748B;
--zone-label: rgba(59,130,246,0.65);
}
@media (prefers-color-scheme: dark) {
:root {
--surface: #1E293B;
--surface-zone: rgba(59,130,246,0.08);
--surface-accent: url(#green-fill);
--text-primary: #F1F5F9;
--text-secondary: #CBD5E1;
--text-muted: #94A3B8;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(148,163,184,0.25);
--border-zone: rgba(147,197,253,0.3);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #F87171;
--info: #60A5FA;
--connector: #475569;
--connector-dark: #94A3B8;
--shadow: rgba(0,0,0,0.40);
--glow-green: rgba(118,185,0,0.25);
--glow-blue: rgba(59,130,246,0.10);
--grid-dot: rgba(226,232,240,0.15);
--pill-request: #60A5FA;
--pill-kv: #FBBF24;
--pill-kv-dark: #D97706;
--pill-perf: #94A3B8;
--zone-label: rgba(96,165,250,0.7);
}
}
text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
.title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
.subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
.box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
.box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
.box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
.box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
.box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
.box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
.conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
.zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
.pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
.label-request { fill: var(--info); }
.pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
.label-kv { fill: var(--pill-kv-dark); }
.pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
.label-perf { fill: var(--text-muted); }
.connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
.connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
.connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
</style>
<rect x="20" y="15" width="350" height="320" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="195.0" y="35.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">NODE 1</text>
<rect x="430" y="15" width="350" height="320" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="605.0" y="35.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">NODE 2</text>
<rect x="73" y="55" width="240" height="120" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="75" y="55" width="240" height="120" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="85" y="74.9" class="box-label">Prefill Pod</text>
<rect x="125" y="100" width="140" height="50" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="195.0" y="129.9" class="box-label-white">GPU (VRAM)</text>
<rect x="95" y="220" width="200" height="70" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="105" y="253.4" class="box-label">RDMA NIC</text>
<text x="105" y="266.4" class="box-sub">InfiniBand / RoCE</text>
<rect x="483" y="55" width="240" height="120" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="485" y="55" width="240" height="120" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="495" y="74.9" class="box-label">Decode Pod</text>
<rect x="535" y="100" width="140" height="50" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="605.0" y="129.9" class="box-label-white">GPU (VRAM)</text>
<rect x="505" y="220" width="200" height="70" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="515" y="253.4" class="box-label">RDMA NIC</text>
<text x="515" y="266.4" class="box-sub">InfiniBand / RoCE</text>
<path d="M195,150 L195.0,216.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<path d="M605,220 L605.0,154.0" fill="none" stroke="var(--connector)" stroke-width="2.0" marker-end="url(#arrow-var--connector)"/>
<path d="M295,255 L501.0,255.0" fill="none" stroke="var(--info)" stroke-width="2.0" marker-end="url(#arrow-info)"/>
<rect x="375.8" y="237.0" width="48.4" height="16" rx="8" class="pill-request"/>
<text x="400" y="248" class="conn-label label-request">Network</text>
</svg>
\ No newline at end of file
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 440" width="800" height="440" role="img" aria-labelledby="title desc">
<title id="title">disagg-nvlink-limitation</title>
<desc id="desc">Architecture diagram showing disagg-nvlink-limitation</desc>
<defs>
<linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" stop-color="#8BD420"/>
<stop offset="100%" stop-color="#6AAF00"/>
</linearGradient>
<linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
<stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
<stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
</linearGradient>
<filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
<feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
</filter>
<filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
<feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
</filter>
<filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
<feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
</filter>
<pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
<circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
</pattern>
<marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
</marker>
<marker id="arrow-accent" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--accent)"/>
</marker>
<marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
</marker>
<marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<symbol id="sym-monitor" viewBox="0 0 14 14">
<rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
<line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
</symbol>
<symbol id="sym-fork" viewBox="0 0 14 14">
<path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="12" cy="3" r="1.2" fill="currentColor"/>
<circle cx="12" cy="11" r="1.2" fill="currentColor"/>
</symbol>
<symbol id="sym-gauge" viewBox="0 0 14 14">
<path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="7" cy="9" r="1" fill="currentColor"/>
</symbol>
<symbol id="sym-layers" viewBox="0 0 14 14">
<path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
</symbol>
<symbol id="sym-stream" viewBox="0 0 14 14">
<line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
</symbol>
<symbol id="sym-blocks" viewBox="0 0 14 14">
<rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
</symbol>
<symbol id="sym-gate" viewBox="0 0 14 14">
<rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
<rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
</symbol>
<symbol id="sym-transfer" viewBox="0 0 14 12">
<line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
<path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
</symbol>
</defs>
<style>
:root {
--surface: #FFFFFF;
--surface-zone: rgba(219,234,254,0.25);
--surface-accent: url(#green-fill);
--text-primary: #0F172A;
--text-secondary: #1E293B;
--text-muted: #475569;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(226,232,240,0.6);
--border-zone: rgba(147,197,253,0.5);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #EF4444;
--info: #3B82F6;
--connector: #CBD5E1;
--connector-dark: #64748B;
--shadow: rgba(15,23,42,0.10);
--glow-green: rgba(118,185,0,0.35);
--glow-blue: rgba(59,130,246,0.15);
--grid-dot: rgba(148,163,184,0.2);
--pill-request: #3B82F6;
--pill-kv: #D97706;
--pill-kv-dark: #B45309;
--pill-perf: #64748B;
--zone-label: rgba(59,130,246,0.65);
}
@media (prefers-color-scheme: dark) {
:root {
--surface: #1E293B;
--surface-zone: rgba(59,130,246,0.08);
--surface-accent: url(#green-fill);
--text-primary: #F1F5F9;
--text-secondary: #CBD5E1;
--text-muted: #94A3B8;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(148,163,184,0.25);
--border-zone: rgba(147,197,253,0.3);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #F87171;
--info: #60A5FA;
--connector: #475569;
--connector-dark: #94A3B8;
--shadow: rgba(0,0,0,0.40);
--glow-green: rgba(118,185,0,0.25);
--glow-blue: rgba(59,130,246,0.10);
--grid-dot: rgba(226,232,240,0.15);
--pill-request: #60A5FA;
--pill-kv: #FBBF24;
--pill-kv-dark: #D97706;
--pill-perf: #94A3B8;
--zone-label: rgba(96,165,250,0.7);
}
}
text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
.title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
.subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
.box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
.box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
.box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
.box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
.box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
.box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
.conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
.zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
.pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
.label-request { fill: var(--info); }
.pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
.label-kv { fill: var(--pill-kv-dark); }
.pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
.label-perf { fill: var(--text-muted); }
.connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
.connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
.connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
</style>
<rect x="20" y="20" width="760" height="400" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="400.0" y="40.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">PHYSICAL NODE (8X H100 GPUS)</text>
<rect x="38" y="60" width="330" height="230" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="40" y="60" width="330" height="230" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="50" y="79.9" class="box-label">Pod A (Prefill)</text>
<text x="50" y="92.9" class="box-sub">Process Namespace: PID 1</text>
<rect x="428" y="60" width="330" height="230" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="430" y="60" width="330" height="230" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="440" y="79.9" class="box-label">Pod B (Decode)</text>
<text x="440" y="92.9" class="box-sub">Process Namespace: PID 1</text>
<rect x="75" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="135.0" y="202.9" class="box-label-white">GPU 0</text>
<rect x="215" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="275.0" y="202.9" class="box-label-white">GPU 1</text>
<rect x="465" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="525.0" y="202.9" class="box-label-white">GPU 2</text>
<rect x="615" y="170" width="120" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="675.0" y="202.9" class="box-label-white">GPU 3</text>
<path d="M195,198 L211.0,198.0" fill="none" stroke="var(--accent)" stroke-width="2.0" marker-end="url(#arrow-accent)"/>
<path d="M585,198 L611.0,198.0" fill="none" stroke="var(--accent)" stroke-width="2.0" marker-end="url(#arrow-accent)"/>
<path d="M370,198 L430,198" fill="none" stroke="var(--error)" stroke-width="2.0" stroke-dasharray="4 4"/>
<text x="205" y="150" class="box-label-center" style="font-size:9px;text-anchor:middle;fill:var(--text-subtle)">CUDA_VISIBLE_DEVICES: 0,1</text>
<text x="595" y="150" class="box-label-center" style="font-size:9px;text-anchor:middle;fill:var(--text-subtle)">CUDA_VISIBLE_DEVICES: 2-7</text>
<text x="205" y="248" class="box-label-center" style="font-size:10px;text-anchor:middle;fill:var(--accent)">NVLink (works!)</text>
<text x="595" y="248" class="box-label-center" style="font-size:10px;text-anchor:middle;fill:var(--accent)">NVLink (works!)</text>
<text x="400" y="330" class="box-label-center" style="font-size:14px;text-anchor:middle;fill:var(--error)">NO NVLink between pods</text>
<text class="box-label-center" style="font-size:11px;text-anchor:middle;fill:var(--text-subtle)">
<tspan x="400" y="365">Separate process namespaces, separate CUDA contexts,</tspan>
<tspan x="400" dy="18">separate GPU device assignments</tspan>
</text>
</svg>
\ No newline at end of file
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 250" width="800" height="250" role="img" aria-labelledby="title desc">
<title id="title">disagg-same-node</title>
<desc id="desc">Architecture diagram showing disagg-same-node</desc>
<defs>
<linearGradient id="green-fill" x1="0%" y1="0%" x2="100%" y2="100%">
<stop offset="0%" stop-color="#8BD420"/>
<stop offset="100%" stop-color="#6AAF00"/>
</linearGradient>
<linearGradient id="accent-fade" x1="0" y1="0" x2="0" y2="1">
<stop offset="0%" stop-color="#76B900" stop-opacity="0"/>
<stop offset="12%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="88%" stop-color="#76B900" stop-opacity="0.65"/>
<stop offset="100%" stop-color="#76B900" stop-opacity="0"/>
</linearGradient>
<filter id="shadow" x="-8%" y="-10%" width="116%" height="128%">
<feDropShadow dx="0" dy="3" stdDeviation="6" flood-color="rgba(15,23,42,0.10)"/>
</filter>
<filter id="green-glow" x="-12%" y="-14%" width="124%" height="136%">
<feDropShadow dx="0" dy="3" stdDeviation="8" flood-color="rgba(118,185,0,0.35)"/>
</filter>
<filter id="blue-glow" x="-4%" y="-4%" width="108%" height="114%">
<feDropShadow dx="0" dy="2" stdDeviation="10" flood-color="rgba(59,130,246,0.15)"/>
</filter>
<pattern id="grid-dots" width="40" height="40" patternUnits="userSpaceOnUse">
<circle cx="20" cy="20" r="0.5" fill="var(--grid-dot)"/>
</pattern>
<marker id="arrow" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--connector)"/>
</marker>
<marker id="arrow-info" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--info)"/>
</marker>
<marker id="arrow-dark" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="5" markerHeight="5" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--text-subtle)"/>
</marker>
<marker id="arrow-green" viewBox="0 0 6 6" refX="5" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M0,0.5 L6,3 L0,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<marker id="arrow-green-rev" viewBox="0 0 6 6" refX="1" refY="3"
markerWidth="6" markerHeight="6" orient="auto">
<path d="M6,0.5 L0,3 L6,5.5 Z" fill="var(--pill-kv)"/>
</marker>
<symbol id="sym-monitor" viewBox="0 0 14 14">
<rect x="2" y="1" width="10" height="8" rx="1.5" fill="none" stroke="currentColor" stroke-width="1.6"/>
<line x1="7" y1="9" x2="7" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="4" y1="12" x2="10" y2="12" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
</symbol>
<symbol id="sym-fork" viewBox="0 0 14 14">
<path d="M2,7 L7,7" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,3" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<path d="M7,7 L12,11" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="12" cy="3" r="1.2" fill="currentColor"/>
<circle cx="12" cy="11" r="1.2" fill="currentColor"/>
</symbol>
<symbol id="sym-gauge" viewBox="0 0 14 14">
<path d="M2,10 A5.5,5.5 0 1,1 12,10" fill="none" stroke="currentColor" stroke-width="1.6" stroke-linecap="round"/>
<line x1="7" y1="9" x2="9.5" y2="4.5" stroke="currentColor" stroke-width="1.8" stroke-linecap="round"/>
<circle cx="7" cy="9" r="1" fill="currentColor"/>
</symbol>
<symbol id="sym-layers" viewBox="0 0 14 14">
<path d="M1,8 L7,11 L13,8" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,5.5 L7,8.5 L13,5.5" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1,3 L7,6 L13,3 L7,0 Z" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linejoin="round"/>
</symbol>
<symbol id="sym-stream" viewBox="0 0 14 14">
<line x1="2" y1="3" x2="8" y2="3" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="7" x2="10" y2="7" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="2" y1="11" x2="6" y2="11" stroke="currentColor" stroke-width="1.5" stroke-linecap="round"/>
<line x1="12" y1="0" x2="12" y2="14" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" opacity="0.6"/>
</symbol>
<symbol id="sym-blocks" viewBox="0 0 14 14">
<rect x="0.5" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="7" y="0.5" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="0.5" y="7" width="5.5" height="5.5" rx="1" fill="none" stroke="currentColor" stroke-width="1.3"/>
<rect x="8" y="8" width="5.5" height="5.5" rx="1" fill="currentColor" opacity="0.25" stroke="currentColor" stroke-width="1.3"/>
</symbol>
<symbol id="sym-gate" viewBox="0 0 14 14">
<rect x="2" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
<rect x="8" y="1" width="4" height="12" rx="1" fill="none" stroke="currentColor" stroke-width="1.4"/>
</symbol>
<symbol id="sym-transfer" viewBox="0 0 14 12">
<line x1="0" y1="6" x2="14" y2="6" stroke="currentColor" stroke-width="0.8" stroke-dasharray="2 2"/>
<path d="M3,2 L7,6 L3,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M11,2 L7,6 L11,10" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
</symbol>
</defs>
<style>
:root {
--surface: #FFFFFF;
--surface-zone: rgba(219,234,254,0.25);
--surface-accent: url(#green-fill);
--text-primary: #0F172A;
--text-secondary: #1E293B;
--text-muted: #475569;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(226,232,240,0.6);
--border-zone: rgba(147,197,253,0.5);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #EF4444;
--info: #3B82F6;
--connector: #CBD5E1;
--connector-dark: #64748B;
--shadow: rgba(15,23,42,0.10);
--glow-green: rgba(118,185,0,0.35);
--glow-blue: rgba(59,130,246,0.15);
--grid-dot: rgba(148,163,184,0.2);
--pill-request: #3B82F6;
--pill-kv: #D97706;
--pill-kv-dark: #B45309;
--pill-perf: #64748B;
--zone-label: rgba(59,130,246,0.65);
}
@media (prefers-color-scheme: dark) {
:root {
--surface: #1E293B;
--surface-zone: rgba(59,130,246,0.08);
--surface-accent: url(#green-fill);
--text-primary: #F1F5F9;
--text-secondary: #CBD5E1;
--text-muted: #94A3B8;
--text-subtle: #64748B;
--text-on-accent: #FFFFFF;
--border: rgba(148,163,184,0.25);
--border-zone: rgba(147,197,253,0.3);
--accent: #76B900;
--accent-start: #8BD420;
--accent-end: #6AAF00;
--error: #F87171;
--info: #60A5FA;
--connector: #475569;
--connector-dark: #94A3B8;
--shadow: rgba(0,0,0,0.40);
--glow-green: rgba(118,185,0,0.25);
--glow-blue: rgba(59,130,246,0.10);
--grid-dot: rgba(226,232,240,0.15);
--pill-request: #60A5FA;
--pill-kv: #FBBF24;
--pill-kv-dark: #D97706;
--pill-perf: #94A3B8;
--zone-label: rgba(96,165,250,0.7);
}
}
text { font-family: -apple-system, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; }
.title { font-size: 18px; font-weight: 700; fill: var(--text-primary); }
.subtitle { font-size: 11px; font-weight: 400; fill: var(--text-subtle); letter-spacing: 0.3px; }
.box-label { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: start; }
.box-label-center { font-size: 14px; font-weight: 600; fill: var(--text-secondary); text-anchor: middle; }
.box-label-white { font-size: 14px; font-weight: 700; fill: var(--text-on-accent); text-anchor: middle; }
.box-sub { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: start; }
.box-sub-center { font-size: 10px; font-weight: 400; fill: var(--text-muted); text-anchor: middle; }
.box-sub-white { font-size: 10px; font-weight: 400; fill: var(--text-on-accent); opacity: 0.75; text-anchor: middle; }
.conn-label { font-size: 10px; font-weight: 600; text-anchor: middle; }
.zone-label { font-size: 10px; font-weight: 600; fill: var(--text-subtle); text-anchor: middle; letter-spacing: 1.2px; }
.zone-label-desc { font-size: 9px; font-weight: normal; fill: #94A3B8; text-anchor: start; }
.pill-request { fill: rgba(59,130,246,0.12); stroke: var(--glow-blue); stroke-width: 0.5; }
.label-request { fill: var(--info); }
.pill-kv { fill: rgba(217,119,6,0.12); stroke: rgba(217,119,6,0.15); stroke-width: 0.5; }
.label-kv { fill: var(--pill-kv-dark); }
.pill-perf { fill: rgba(100,116,139,0.08); stroke: rgba(100,116,139,0.12); stroke-width: 0.5; }
.label-perf { fill: var(--text-muted); }
.connector { fill: none; stroke: var(--connector); stroke-width: 2.0; }
.connector-kv { fill: none; stroke: var(--pill-kv); stroke-width: 2.0; }
.connector-dashed { fill: none; stroke: var(--connector); stroke-width: 2.0; stroke-dasharray: 6 4; }
</style>
<rect x="20" y="15" width="760" height="220" rx="14" fill="var(--surface-zone)" stroke="var(--border-zone)" stroke-width="1.0" stroke-dasharray="6 3" filter="url(#blue-glow)"/>
<text x="400.0" y="35.5" class="zone-label" style="fill:var(--zone-label);text-anchor:middle;">SAME NODE</text>
<rect x="58" y="55" width="260" height="130" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="60" y="55" width="260" height="130" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="70" y="74.9" class="box-label">Prefill Pod</text>
<rect x="478" y="55" width="260" height="130" rx="10" fill="var(--accent)" opacity="0.7"/>
<rect x="480" y="55" width="260" height="130" rx="10" fill="var(--surface)" stroke="var(--border)" stroke-width="1.0" filter="url(#shadow)"/>
<text x="490" y="74.9" class="box-label">Decode Pod</text>
<rect x="110" y="110" width="150" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="185.0" y="142.9" class="box-label-white">GPU 0 (VRAM)</text>
<rect x="540" y="110" width="150" height="56" rx="10" fill="var(--surface-accent)" filter="url(#green-glow)"/>
<text x="615.0" y="142.9" class="box-label-white">GPU 2 (VRAM)</text>
<path d="M185,166 L185,220 L615,220 L615.0,170.0" fill="none" stroke="var(--info)" stroke-width="2.0" marker-end="url(#arrow-info)"/>
<rect x="336.8" y="202.0" width="126.39999999999995" height="16" rx="8" class="pill-request"/>
<text x="400" y="213" class="conn-label label-request">RDMA (InfiniBand/RoCE)</text>
</svg>
\ No newline at end of file
......@@ -22,43 +22,9 @@ This guide explains how prefill and decode workers communicate in Dynamo's disag
### Communication Stack
```text
┌─────────────────────────────────────────────────────────────────────────┐
│ Dynamo Disaggregated Serving │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────────┐ ┌──────────────────┐ │
│ │ Prefill Worker │ │ Decode Worker │ │
│ │ (Pod A) │ │ (Pod B) │ │
│ │ │ │ │ │
│ │ ┌────────────┐ │ │ ┌────────────┐ │ │
│ │ │ KV Cache │ │ Transfer │ │ KV Cache │ │ │
│ │ │ (GPU VRAM) │──┼──────────────┼─▶│ (GPU VRAM) │ │ │
│ │ └────────────┘ │ │ └────────────┘ │ │
│ └────────┬─────────┘ └────────┬─────────┘ │
│ │ │ │
├───────────┼──────────────────────────────────┼──────────────────────────┤
│ │ NIXL Library │ │
│ │ (KV Cache Transfer API) │ │
├───────────┼──────────────────────────────────┼──────────────────────────┤
│ │ │ │
│ │ UCX │ │
│ │ (Unified Communication X) │ │
│ │ │ │
│ ┌────────┴──────────────────────────────────┴────────┐ │
│ │ Transport Layer │ │
│ │ │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ rc_x/dc_x │ │ cuda_copy │ │ tcp │ │ │
│ │ │ (RDMA) │ │ (staging) │ │ (fallback) │ │ │
│ │ │ │ │ │ │ │ │ │
│ │ │ InfiniBand │ │ GPU↔Host │ │ Network │ │ │
│ │ │ or RoCE │ │ memory copy │ │ sockets │ │ │
│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
│ └─────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────┘
```
<Frame>
<img src="../assets/img/disagg-comm-stack.svg" alt="Disaggregated inference communication stack showing NIXL, UCX, and transport layers" />
</Frame>
### Component Responsibilities
......@@ -82,29 +48,9 @@ NVLink is a **direct GPU-to-GPU interconnect** that operates at the hardware lev
**Kubernetes pods violate all three requirements:**
```text
┌─────────────────────────────────────────────────────────────────────────┐
│ Physical Node (8× H100 GPUs) │
│ │
│ ┌─────────────────────────────┐ ┌─────────────────────────────┐ │
│ │ Pod A (Prefill) │ │ Pod B (Decode) │ │
│ │ │ │ │ │
│ │ Process Namespace: PID 1 │ │ Process Namespace: PID 1 │ │
│ │ CUDA_VISIBLE_DEVICES: 0,1 │ │ CUDA_VISIBLE_DEVICES: 2-7 │ │
│ │ │ │ │ │
│ │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ ... │ │
│ │ │GPU 0│ │GPU 1│ │ │ │GPU 2│ │GPU 3│ │ │
│ │ └─────┘ └─────┘ │ │ └─────┘ └─────┘ │ │
│ │ ↑ NVLink ↑ │ │ ↑ NVLink ↑ │ │
│ │ (works!) │ │ (works!) │ │
│ └─────────────────────────────┘ └─────────────────────────────┘ │
│ │
│ ╳ NO NVLink possible between pods ╳ │
│ │
│ Reason: Separate process namespaces, separate CUDA contexts, │
│ separate GPU device assignments │
└─────────────────────────────────────────────────────────────────────────┘
```
<Frame>
<img src="../assets/img/disagg-nvlink-limitation.svg" alt="Why NVLink cannot work between Kubernetes pods due to process isolation" />
</Frame>
### Technical Explanation
......@@ -148,28 +94,15 @@ VLLMDecodeWorker:
When prefill and decode workers are on the **same physical node**:
```text
┌─────────────────────────────────────────────────────────────────────────┐
│ Same Node │
│ │
│ ┌────────────────────┐ ┌────────────────────┐ │
│ │ Prefill Pod │ │ Decode Pod │ │
│ │ │ │ │ │
│ │ ┌──────────────┐ │ │ ┌──────────────┐ │ │
│ │ │ GPU 0 (VRAM) │ │ │ │ GPU 2 (VRAM) │ │ │
│ │ └──────┬───────┘ │ │ └──────▲───────┘ │ │
│ └─────────┼──────────┘ └─────────┼──────────┘ │
│ │ │ │
│ │ RDMA (InfiniBand/RoCE) │ │
│ └─────────────────────────────────────────┘ │
│ │
│ Options (best to worst): │
│ 1. InfiniBand RDMA with GPUDirect → GPU-to-GPU, bypasses CPU │
│ 2. RoCE RDMA with GPUDirect → GPU-to-GPU, bypasses CPU │
│ 3. Host-staged RDMA → GPU→CPU→RDMA→CPU→GPU │
│ 4. TCP (fallback) → GPU→CPU→TCP→CPU→GPU │
└─────────────────────────────────────────────────────────────────────────┘
```
<Frame>
<img src="../assets/img/disagg-same-node.svg" alt="Same-node RDMA communication between prefill and decode pods" />
</Frame>
**Options (best to worst):**
1. InfiniBand RDMA with GPUDirect → GPU-to-GPU, bypasses CPU
2. RoCE RDMA with GPUDirect → GPU-to-GPU, bypasses CPU
3. Host-staged RDMA → GPU→CPU→RDMA→CPU→GPU
4. TCP (fallback) → GPU→CPU→TCP→CPU→GPU
**Best Practice**: Use RDMA even for same-node communication. The overhead is minimal and it provides consistent behavior whether pods land on the same or different nodes.
......@@ -177,24 +110,9 @@ When prefill and decode workers are on the **same physical node**:
When prefill and decode workers are on **different nodes**:
```text
┌──────────────────────────────┐ ┌──────────────────────────────┐
│ Node 1 │ │ Node 2 │
│ │ │ │
│ ┌────────────────────┐ │ │ ┌────────────────────┐ │
│ │ Prefill Pod │ │ │ │ Decode Pod │ │
│ │ ┌──────────────┐ │ │ │ │ ┌──────────────┐ │ │
│ │ │ GPU (VRAM) │ │ │ │ │ │ GPU (VRAM) │ │ │
│ │ └──────┬───────┘ │ │ │ │ └──────▲───────┘ │ │
│ └─────────┼──────────┘ │ │ └─────────┼──────────┘ │
│ │ │ │ │ │
│ ┌─────────▼─────────┐ │ │ ┌────────┴────────┐ │
│ │ RDMA NIC │ │ │ │ RDMA NIC │ │
│ │ (InfiniBand/ │◄──────┼─────────┼──────▶│ (InfiniBand/ │ │
│ │ RoCE) │ │ Network │ │ RoCE) │ │
│ └───────────────────┘ │ │ └─────────────────┘ │
└──────────────────────────────┘ └──────────────────────────────┘
```
<Frame>
<img src="../assets/img/disagg-cross-node.svg" alt="Cross-node RDMA communication between prefill and decode pods on separate nodes" />
</Frame>
**Requirements for optimal cross-node performance:**
- InfiniBand or RoCE network fabric
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment