Commit 8b656f45 authored by silencealiang's avatar silencealiang
Browse files

bug fix

parent 1f73f815
......@@ -73,7 +73,7 @@ def gpt_model_init_wrapper(fn):
parallel_output=self.parallel_output,
position_embedding_type=self.position_embedding_type,
rotary_percent=self.rotary_percent,
seq_len_interpolation_factor=seq_len_interpolation_factor,
seq_len_interpolation_factor=kwargs.get("seq_len_interpolation_factor", None),
share_mtp_embedding_and_output_weight=self.share_mtp_embedding_and_output_weight,
recompute_mtp_norm=self.recompute_mtp_norm,
recompute_mtp_layer=self.recompute_mtp_layer,
......
......@@ -2,5 +2,5 @@ from .layers import (
FluxColumnParallelLinear,
FluxRowParallelLinear,
vocab_parallel_embedding_forward,
vocab_parallel_embedding_init,
vocab_parallel_embedding_init_wrapper,
)
\ No newline at end of file
......@@ -5,7 +5,7 @@ import warnings
from functools import wraps
from typing import Callable, List, Optional
if int(os.getenv("USE_FLUX_OVERLAP", "0"))
if int(os.getenv("USE_FLUX_OVERLAP", "0")):
try:
import flux
from dcu_megatron.core.utils import is_flux_min_version
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment