Unverified Commit df39a7c2 authored by Paweł Gadziński's avatar Paweł Gadziński Committed by GitHub
Browse files

Docs fix (#2301)



* init
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* lines lenght
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* subtitle --- fix in many files:
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* cross entropy _input -> input rename
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* cross entropy _input -> input rename
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* a lot of small fixes
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* torch_version() change
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* add missing module and fix warnings
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* removed training whitespace:
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* Update docs/api/pytorch.rst
Co-authored-by: default avatargreptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Signed-off-by: default avatarPaweł Gadziński <62263673+pggPL@users.noreply.github.com>

* Fix import
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>

* Fix more imports
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>

* Fix NumPy docstring parameter spacing and indentation

- Standardize parameter documentation to use 'param : type' format (space before and after colon) per NumPy style guide
- Fix inconsistent indentation in cpu_offload.py docstring
- Modified 51 Python files across transformer_engine/pytorch
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

---------
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>
Signed-off-by: default avatarPaweł Gadziński <62263673+pggPL@users.noreply.github.com>
Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>
Co-authored-by: default avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: default avatargreptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>
parent ca468ebe
......@@ -382,26 +382,26 @@ class NVFP4Tensor(NVFP4TensorStorage, QuantizedTensor):
Parameters
----------
rowwise_data: torch.Tensor
rowwise_data : torch.Tensor
Raw FP4 data in a uint8 tensor (rowwise layout).
rowwise_scale_inv: torch.Tensor
rowwise_scale_inv : torch.Tensor
Reciprocal of the scaling factor applied when
casting to FP4, i.e. the scaling factor that must
be applied when casting from FP4 to higher
precision (rowwise).
columnwise_data: torch.Tensor, optional
columnwise_data : torch.Tensor, optional
Raw FP4 data in a uint8 tensor (columnwise layout).
columnwise_scale_inv: torch.Tensor, optional
columnwise_scale_inv : torch.Tensor, optional
Reciprocal of the scaling factor for columnwise FP4 data.
amax_rowwise: torch.Tensor, optional
amax_rowwise : torch.Tensor, optional
Rowwise amax tracking tensor.
amax_columnwise: torch.Tensor, optional
amax_columnwise : torch.Tensor, optional
Columnwise amax tracking tensor.
fp4_dtype: TE_DType
fp4_dtype : TE_DType
The FP4 data type used for quantization.
quantizer: Quantizer
quantizer : Quantizer
The quantizer instance used for this tensor.
dtype: torch.dtype, default = torch.float32
dtype : torch.dtype, default = torch.float32
Nominal tensor datatype, used in dequantize.
"""
......
......@@ -74,7 +74,7 @@ def cast_master_weights_to_fp8(
fsdp_shard_model_weights : list of FSDP shard model weights. If None, it means that the model weights are
not sharded. Otherwise, it means that the model weights are sharded and we get
target model weights data storage using the FSDP shard model weights.
manual_post_all_gather_processing: bool, default = `False`.
manual_post_all_gather_processing : bool, default = `False`.
If False, post processing will be automatically triggered during next forward.
If True, the timing of calling post_all_gather_processing is left to the user.
Note that users must call `post_all_gather_processing` if it's set to True,
......
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.
"""PyTorch version utilities"""
from __future__ import annotations
import functools
import torch
from packaging.version import Version as PkgVersion
@functools.lru_cache(maxsize=None)
def torch_version() -> tuple[int, ...]:
"""Get PyTorch version"""
return PkgVersion(str(torch.__version__)).release
This diff is collapsed.
......@@ -31,18 +31,18 @@ def make_row_id_map(
Parameters
----------
routing_map: torch.Tensor
routing_map : torch.Tensor
Input tensor of shape `[num_tokens, num_experts]`. It is a mask tensor that indicates
which experts are routed to which tokens. The values in it: 1 means the token is routed to
this expert and 0 means not.
num_tokens: int
num_tokens : int
Number of tokens in the input tensor.
num_experts: int
num_experts : int
Number of experts in the input tensor.
Returns
-------
row_id_map: torch.Tensor
row_id_map : torch.Tensor
The row_id_map for the permutation of shape `[num_tokens, num_experts * 2 + 1]`.
For each token, the last item is the number of experts that are routed (n_routed).
The first n_routed items are the destination row indices in the permuted tokens.
......@@ -134,23 +134,23 @@ def permute_with_mask_map(
Parameters
----------
inp: torch.Tensor
inp : torch.Tensor
Input tensor of shape `[num_tokens, hidden_size]`, on which permutation will be applied.
row_id_map: torch.Tensor
row_id_map : torch.Tensor
The token to expert mapping tensor of shape `[num_tokens, num_experts * 2 + 1]`.
probs: torch.Tensor
probs : torch.Tensor
The probabilities of the input tensor. If it is not None, it will be permuted.
scale: torch.Tensor
scale : torch.Tensor
The scale of the input tensor. If it is not None, it will be permuted.
num_tokens: int
num_tokens : int
Number of tokens in the input tensor.
num_experts: int
num_experts : int
Number of experts in the input tensor.
num_out_tokens: int
num_out_tokens : int
Number of tokens in the permuted tensor.
hidden_size: int
hidden_size : int
Hidden size of the input tensor.
scale_hidden_dim: int
scale_hidden_dim : int
Hidden size of the scale tensor.
"""
output = torch.empty((num_out_tokens, hidden_size), dtype=inp.dtype, device="cuda")
......@@ -211,20 +211,20 @@ def unpermute_with_mask_map(
Parameters
----------
inp: torch.Tensor
inp : torch.Tensor
Input tensor of shape `[num_out_tokens, hidden_size]`.
row_id_map: torch.Tensor
row_id_map : torch.Tensor
The token to expert mapping tensor of shape `[num_tokens, num_experts * 2 + 1]`.
merging_probs: torch.Tensor
merging_probs : torch.Tensor
The merging probabilities of the input tensor. If it is not None, it will be used as weights
to reduce the unpermuted tokens.
permuted_probs: torch.Tensor
permuted_probs : torch.Tensor
The permuted probabilities of the input tensor. If it is not None, it will be unpermuted.
num_tokens: int
num_tokens : int
Number of tokens in the permuted tensor.
num_experts: int
num_experts : int
Number of experts in the permuted tensor.
hidden_size: int
hidden_size : int
Hidden size of the permuted tensor.
"""
output = torch.empty((num_tokens, hidden_size), dtype=inp.dtype, device="cuda")
......@@ -278,21 +278,21 @@ def unpermute_with_mask_map_bwd_with_merging_probs(
Parameters
----------
fwd_output_grad: torch.Tensor
fwd_output_grad : torch.Tensor
The gradient of the output tensor of shape `[num_tokens, hidden_size]`.
row_id_map: torch.Tensor
row_id_map : torch.Tensor
The token to expert mapping tensor of shape `[num_tokens, num_experts * 2 + 1]`.
fwd_input: torch.Tensor
fwd_input : torch.Tensor
The input tensor of the forward pass of shape `[num_out_tokens, hidden_size]`.
merging_probs: torch.Tensor
merging_probs : torch.Tensor
The merging probabilities of the input tensor of shape `[num_tokens, num_experts]`.
num_tokens: int
num_tokens : int
Number of tokens in the permuted tensor.
num_experts: int
num_experts : int
Number of experts in the permuted tensor.
num_out_tokens: int
num_out_tokens : int
Number of tokens in the output tensor.
hidden_size: int
hidden_size : int
Hidden size of the output tensor.
"""
act_grad = torch.empty(
......@@ -339,13 +339,13 @@ def make_chunk_sort_map(
Parameters
----------
split_sizes: torch.Tensor
split_sizes : torch.Tensor
The sizes of the chunks of shape `[num_splits,]`.
sorted_indices: torch.Tensor
sorted_indices : torch.Tensor
The indices of the sorted chunks of shape `[num_splits,]`.
num_tokens: int
num_tokens : int
Number of tokens in the input tensor.
num_splits: int
num_splits : int
Number of splits of split_sizes and sorted_indices.
"""
row_id_map = torch.empty((num_tokens,), dtype=torch.int32, device="cuda")
......@@ -373,17 +373,17 @@ def sort_chunks_by_map(
Parameters
----------
inp: torch.Tensor
inp : torch.Tensor
Input tensor of shape `[num_tokens, hidden_size]`.
row_id_map: torch.Tensor
row_id_map : torch.Tensor
The token to expert mapping tensor of shape `[num_tokens,]`.
probs: torch.Tensor
probs : torch.Tensor
The probabilities of the input tensor. If it is not None, it will be permuted.
num_tokens: int
num_tokens : int
Number of tokens in the input tensor.
hidden_size: int
hidden_size : int
Hidden size of the input tensor.
is_forward: bool
is_forward : bool
Whether the sort is for forward or backward.
"""
output = torch.empty((num_tokens, hidden_size), dtype=inp.dtype, device="cuda")
......
......@@ -12,8 +12,8 @@ from contextlib import nullcontext
import numpy as np
import torch
from . import torch_version
from .quantized_tensor import Quantizer
from .torch_version import torch_version
from ..debug.pytorch.debug_quantization import DebugQuantizedTensor
......@@ -601,7 +601,7 @@ def get_nvtx_range_context(msg: str):
Parameters
----------
msg: str
msg : str
Message to associate with profiling context.
"""
......@@ -619,7 +619,7 @@ def nvtx_range_push(msg: str) -> None:
Parameters
----------
msg: str
msg : str
Message to associate with range
"""
......@@ -637,7 +637,7 @@ def nvtx_range_pop(msg: Optional[str] = None) -> None:
Parameters
----------
msg: str, optional
msg : str, optional
Message associated with range
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment