".github/git@developer.sourcefind.cn:Fzc7075/nunchaku.git" did not exist on "56ff6d8f81e28bb96e7fd12d40437011be2d5860"
Commit 5b1e05ab authored by dongcl's avatar dongcl
Browse files

bug fix

parent c6c5f555
......@@ -73,7 +73,7 @@ def gpt_model_init_wrapper(fn):
parallel_output=self.parallel_output,
position_embedding_type=self.position_embedding_type,
rotary_percent=self.rotary_percent,
seq_len_interpolation_factor=seq_len_interpolation_factor,
seq_len_interpolation_factor=kwargs.get("seq_len_interpolation_factor", None),
share_mtp_embedding_and_output_weight=self.share_mtp_embedding_and_output_weight,
recompute_mtp_norm=self.recompute_mtp_norm,
recompute_mtp_layer=self.recompute_mtp_layer,
......
......@@ -2,5 +2,5 @@ from .layers import (
FluxColumnParallelLinear,
FluxRowParallelLinear,
vocab_parallel_embedding_forward,
vocab_parallel_embedding_init,
vocab_parallel_embedding_init_wrapper,
)
\ No newline at end of file
......@@ -5,7 +5,7 @@ import warnings
from functools import wraps
from typing import Callable, List, Optional
if int(os.getenv("USE_FLUX_OVERLAP", "0"))
if int(os.getenv("USE_FLUX_OVERLAP", "0")):
try:
import flux
from dcu_megatron.core.utils import is_flux_min_version
......@@ -54,7 +54,6 @@ from megatron.core.tensor_parallel.layers import (
linear_with_grad_accumulation_and_async_allreduce
)
_grad_accum_fusion_available = True
try:
import fused_weight_gradient_mlp_cuda
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment