Commit 9df0c4a3 authored by wenjh's avatar wenjh
Browse files

Merge branch 'nv_main'

parents 0d874a4e f122b07d
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 850 480" width="850" height="480" style="display: block; margin: 0 auto;">
<defs>
<style>
@import url("../_static/css/diagram-colors.css");
.arrow { stroke: #616161; stroke-width: 1.5; fill: none; marker-end: url(#arrowhead); }
.section-label { font-family: 'Segoe UI', Arial, sans-serif; font-size: 16px; font-weight: 600; fill: #424242; text-anchor: start; }
</style>
<marker id="arrowhead" markerWidth="3" markerHeight="3" refX="3" refY="1.5" orient="auto">
<polygon points="0 0, 3 1.5, 0 3" fill="#616161" />
</marker>
</defs>
<!-- Title -->
<text x="425" y="30" class="title" style="text-anchor: middle;">Transformer Layer – default precision of operation in low precision recipe</text>
<!-- Row 1: Input → Layer Norm → QKV Linear → QK^T → Softmax -->
<rect x="20" y="60" width="115" height="50" rx="5" class="hp"/>
<text x="77" y="90" class="text">Input</text>
<path d="M 135 85 L 158 85" class="arrow"/>
<rect x="158" y="60" width="115" height="50" rx="5" class="hp"/>
<text x="215" y="90" class="text">Layer Norm</text>
<path d="M 273 85 L 296 85" class="arrow"/>
<rect x="296" y="60" width="115" height="50" rx="5" class="gemm"/>
<text x="353" y="90" class="text">QKV Linear</text>
<path d="M 411 85 L 434 85" class="arrow"/>
<rect x="434" y="60" width="115" height="50" rx="5" class="hp"/>
<text x="491" y="90" class="text">QK^T</text>
<path d="M 549 85 L 572 85" class="arrow"/>
<rect x="572" y="60" width="115" height="50" rx="5" class="hp"/>
<text x="629" y="90" class="text">Softmax</text>
<!-- Row 2: Attn * V → Output Linear → Dropout + Add -->
<path d="M 629 110 L 629 145" class="arrow"/>
<rect x="572" y="145" width="115" height="50" rx="5" class="hp"/>
<text x="629" y="175" class="text">Scores * V</text>
<path d="M 572 170 L 549 170" class="arrow"/>
<rect x="434" y="145" width="115" height="50" rx="5" class="gemm"/>
<text x="491" y="175" class="text">Output Linear</text>
<path d="M 434 170 L 273 170" class="arrow"/>
<rect x="158" y="145" width="115" height="50" rx="5" class="hp"/>
<text x="215" y="175" class="text">Dropout + Add</text>
<!-- Row 3: Layer Norm → FFN Linear 1 → GELU → FFN Linear 2 → Output -->
<path d="M 215 195 L 215 230" class="arrow"/>
<rect x="158" y="230" width="115" height="50" rx="5" class="hp"/>
<text x="215" y="260" class="text">Layer Norm</text>
<path d="M 273 255 L 296 255" class="arrow"/>
<rect x="296" y="230" width="115" height="50" rx="5" class="gemm"/>
<text x="353" y="260" class="text">FFN Linear 1</text>
<path d="M 411 255 L 434 255" class="arrow"/>
<rect x="434" y="230" width="115" height="50" rx="5" class="hp"/>
<text x="491" y="260" class="text">GELU</text>
<path d="M 549 255 L 572 255" class="arrow"/>
<rect x="572" y="230" width="115" height="50" rx="5" class="gemm"/>
<text x="629" y="260" class="text">FFN Linear 2</text>
<path d="M 687 255 L 710 255" class="arrow"/>
<rect x="710" y="230" width="115" height="50" rx="5" class="hp"/>
<text x="767" y="260" class="text">Output</text>
<!-- Memory State Section -->
<text x="20" y="325" class="section-label">Memory State:</text>
<!-- Parameters -->
<rect x="20" y="340" width="180" height="45" rx="5" class="hp"/>
<text x="110" y="365" class="text">Parameters</text>
<!-- Gradients -->
<rect x="225" y="340" width="140" height="45" rx="5" class="hp"/>
<text x="295" y="365" class="text">Gradients</text>
<!-- Legend -->
<g transform="translate(20, 415)">
<!-- High Precision -->
<rect x="0" y="0" width="80" height="40" rx="5" class="hp"/>
<text x="95" y="23" class="text" style="text-anchor: start;">Higher Precision (FP32/BF16/FP16)</text>
<!-- Low Precision -->
<rect x="400" y="0" width="80" height="40" rx="5" class="gemm"/>
<text x="495" y="23" class="text" style="text-anchor: start;">Lower Precision (FP8, MXFP8 etc.)</text>
</g>
</svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 700 300" width="100%" style="max-width: 700px;">
<style>
.tensor-fill { fill: #87CEEB; stroke: #444; stroke-width: 2; }
.scale-fill { fill: #FFA500; stroke: #444; stroke-width: 2; }
.grid-line { stroke: #444; stroke-width: 2; fill: none; }
.dots-text { font: bold 24px sans-serif; fill: #333; text-anchor: middle; }
.label { font: 12px sans-serif; fill: #333; text-anchor: middle; }
</style>
<!-- Left tensor (128x128 blocks) - FP8 tensor -->
<!-- Main rectangle with white background -->
<rect x="60" y="40" width="260" height="240" fill="#FFFFFF" stroke="#444" stroke-width="2"/>
<!-- Blue grid area (upper-left) -->
<rect x="60" y="40" width="180" height="180" fill="#87CEEB" stroke="#444" stroke-width="2"/>
<!-- Grid lines for 3x3 blocks -->
<line x1="120" y1="40" x2="120" y2="220" class="grid-line"/>
<line x1="180" y1="40" x2="180" y2="220" class="grid-line"/>
<line x1="60" y1="100" x2="240" y2="100" class="grid-line"/>
<line x1="60" y1="160" x2="240" y2="160" class="grid-line"/>
<!-- Dots in white area (right side) -->
<text x="280" y="90" class="dots-text"></text>
<text x="280" y="150" class="dots-text"></text>
<text x="280" y="210" class="dots-text"></text>
<!-- Dots in white area (bottom) -->
<text x="90" y="260" class="dots-text"></text>
<text x="150" y="260" class="dots-text"></text>
<text x="210" y="260" class="dots-text"></text>
<text x="280" y="260" class="dots-text"></text>
<!-- Label -->
<text x="190" y="20" class="label">FP8 Tensor (128×128 blocks)</text>
<!-- Right tensor (128x4 blocks) - Scaling factors (orange) -->
<!-- Main rectangle with white background -->
<rect x="480" y="40" width="120" height="240" fill="#FFFFFF" stroke="#444" stroke-width="2"/>
<!-- Orange grid area (upper-left) -->
<rect x="480" y="40" width="60" height="180" fill="#FFA500" stroke="#444" stroke-width="2"/>
<!-- Grid lines for narrow blocks (3 columns x 3 rows) -->
<line x1="500" y1="40" x2="500" y2="220" class="grid-line"/>
<line x1="520" y1="40" x2="520" y2="220" class="grid-line"/>
<line x1="480" y1="100" x2="540" y2="100" class="grid-line"/>
<line x1="480" y1="160" x2="540" y2="160" class="grid-line"/>
<!-- Dots in white area (right side) -->
<text x="565" y="90" class="dots-text"></text>
<text x="565" y="150" class="dots-text"></text>
<text x="565" y="210" class="dots-text"></text>
<!-- Dots in white area (bottom) -->
<text x="500" y="260" class="dots-text"></text>
<text x="530" y="260" class="dots-text"></text>
<text x="565" y="260" class="dots-text"></text>
<!-- Label -->
<text x="540" y="20" class="label">Scaling Factors (128×4 blocks)</text>
</svg>
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment