"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "62832bb2728e0e8ac5f97dc7687eaf263aaa927f"
Commit cbe77ec0 authored by Dan Kondratyuk's avatar Dan Kondratyuk Committed by A. Unique TensorFlower
Browse files

Revert causal padding until new checkpoints are trained.

PiperOrigin-RevId: 382606601
parent c3b4fa95
......@@ -26,6 +26,10 @@ from official.modeling import tf_utils
States = Dict[str, tf.Tensor]
Activation = Union[str, Callable]
# TODO(dankondratyuk): keep legacy padding until new checkpoints are trained.
# Otherwise, accuracy will be affected.
LEGACY_PADDING = True
def make_divisible(value: float,
divisor: int,
......@@ -725,7 +729,8 @@ class CausalConvMixin:
def _compute_buffered_causal_padding(self,
inputs: tf.Tensor,
use_buffered_input: bool = False,
time_axis: int = 1) -> List[List[int]]:
time_axis: int = 1,
) -> List[List[int]]:
"""Calculates padding for 'causal' option for conv layers.
Args:
......@@ -747,6 +752,10 @@ class CausalConvMixin:
(self.kernel_size[i] - 1) * (self.dilation_rate[i] - 1))
for i in range(self.rank)
]
if LEGACY_PADDING:
# Apply legacy padding that does not take into account spatial strides
pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
else:
pad_total = [kernel_size_effective[0] - 1]
for i in range(1, self.rank):
overlap = (input_shape[i] - 1) % self.strides[i] + 1
......
......@@ -24,6 +24,10 @@ from official.vision.beta.modeling.layers import nn_layers
class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
def setUp(self):
super().setUp()
nn_layers.LEGACY_PADDING = False
def test_hard_swish(self):
activation = tf.keras.layers.Activation('hard_swish')
output = activation(tf.constant([-3, -1.5, 0, 3]))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment