Commit 8b656f45 authored by silencealiang's avatar silencealiang
Browse files

bug fix

parent 1f73f815
...@@ -73,7 +73,7 @@ def gpt_model_init_wrapper(fn): ...@@ -73,7 +73,7 @@ def gpt_model_init_wrapper(fn):
parallel_output=self.parallel_output, parallel_output=self.parallel_output,
position_embedding_type=self.position_embedding_type, position_embedding_type=self.position_embedding_type,
rotary_percent=self.rotary_percent, rotary_percent=self.rotary_percent,
seq_len_interpolation_factor=seq_len_interpolation_factor, seq_len_interpolation_factor=kwargs.get("seq_len_interpolation_factor", None),
share_mtp_embedding_and_output_weight=self.share_mtp_embedding_and_output_weight, share_mtp_embedding_and_output_weight=self.share_mtp_embedding_and_output_weight,
recompute_mtp_norm=self.recompute_mtp_norm, recompute_mtp_norm=self.recompute_mtp_norm,
recompute_mtp_layer=self.recompute_mtp_layer, recompute_mtp_layer=self.recompute_mtp_layer,
......
...@@ -2,5 +2,5 @@ from .layers import ( ...@@ -2,5 +2,5 @@ from .layers import (
FluxColumnParallelLinear, FluxColumnParallelLinear,
FluxRowParallelLinear, FluxRowParallelLinear,
vocab_parallel_embedding_forward, vocab_parallel_embedding_forward,
vocab_parallel_embedding_init, vocab_parallel_embedding_init_wrapper,
) )
\ No newline at end of file
...@@ -5,7 +5,7 @@ import warnings ...@@ -5,7 +5,7 @@ import warnings
from functools import wraps from functools import wraps
from typing import Callable, List, Optional from typing import Callable, List, Optional
if int(os.getenv("USE_FLUX_OVERLAP", "0")) if int(os.getenv("USE_FLUX_OVERLAP", "0")):
try: try:
import flux import flux
from dcu_megatron.core.utils import is_flux_min_version from dcu_megatron.core.utils import is_flux_min_version
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment