"vscode:/vscode.git/clone" did not exist on "1e85a140a358a71fbafb669dc2bc20a6589ae63b"
Commit bf323343 authored by dongcl's avatar dongcl
Browse files

support flux for mtp

parent 31e933a8
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
from typing import List from typing import List
import torch import torch
......
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
from typing import Literal from typing import Literal
import torch import torch
......
# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. import os
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
from typing import Union, Optional, Literal from typing import Union, Optional, Literal
...@@ -137,12 +137,16 @@ class MultiTokenPredictor(MegatronModule): ...@@ -137,12 +137,16 @@ class MultiTokenPredictor(MegatronModule):
self.embedding_activation_buffer = None self.embedding_activation_buffer = None
self.grad_output_buffer = None self.grad_output_buffer = None
self.output_layer = tensor_parallel.ColumnParallelLinear( if int(os.getenv("USE_FLUX_OVERLAP", "0")):
config.hidden_size, column_parallel_linear_impl = FluxColumnParallelLinear
else:
column_parallel_linear_impl = tensor_parallel.ColumnParallelLinear
self.output_layer = column_parallel_linear_impl(
self.config.hidden_size,
self.vocab_size, self.vocab_size,
config=config, config=self.config,
init_method=config.init_method, init_method=self.config.init_method,
bias=self.add_output_layer_bias, bias=False,
skip_bias_add=False, skip_bias_add=False,
gather_output=not self.parallel_output, gather_output=not self.parallel_output,
skip_weight_param_allocation=self.share_mtp_embedding_and_output_weight, skip_weight_param_allocation=self.share_mtp_embedding_and_output_weight,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment