Unverified Commit 72f373c1 authored by Paul Johnson's avatar Paul Johnson Committed by GitHub
Browse files

Remove sort_iseed_config and related dependencies. (#969)

This is no longer needed since isort's version is 5.10

Also fix black version to 22.3.0 to fix issue with click
dependency.

Update files that now fail with new version of black {a = 2 ** 4} ->
{a = 2**4}
parent 1bc96fa8
......@@ -233,7 +233,7 @@ def run_ddp_parity(
@skip_if_no_cuda
@skip_if_single_gpu
@pytest.mark.parametrize("reduce_buffer_size", [0, 2 ** 20])
@pytest.mark.parametrize("reduce_buffer_size", [0, 2**20])
@pytest.mark.parametrize("grad_accumulation", [True, False])
@pytest.mark.parametrize("change_train_graph", [True, False])
@pytest.mark.parametrize("fp16_reduction", _test_fp16_reduction)
......@@ -347,7 +347,7 @@ def run_ddp_parity_two_optim(rank, world_size, backend, temp_file_name, reduce_b
@skip_if_no_cuda
@skip_if_single_gpu
@pytest.mark.parametrize("reduce_buffer_size", [0, 2 ** 20])
@pytest.mark.parametrize("reduce_buffer_size", [0, 2**20])
def test_ddp_parity_two_optim(reduce_buffer_size):
world_size = 2
backend = dist.Backend.NCCL
......
......@@ -38,7 +38,7 @@ def tilt_dist(input):
# Tilt mean by single batch.
for i, single in enumerate(input):
single += 2 ** i
single += 2**i
return input
......@@ -150,7 +150,7 @@ def test_optimize():
dbn.eval()
with torch.no_grad():
assert torch.allclose(bn(input), dbn(input), atol=1e-1 * (10 ** i))
assert torch.allclose(bn(input), dbn(input), atol=1e-1 * (10**i))
def test_conv_bn():
......
......@@ -311,14 +311,14 @@ def test_update_optim_scale():
weight, bias, input = make_half_precision_params()
optimizer = Adam([weight, bias], lr=1e-3, precision=Precision.PURE_FP16)
optimizer._optim_scale_update_freq = 1
optimizer._optim_scale = 2 ** 15
optimizer._optim_scale = 2**15
optimizer.zero_grad()
loss = (weight.mv(input) + bias).pow(2).sum()
loss.backward()
optimizer.step()
assert optimizer._optim_scale == 2 ** 16
assert optimizer._optim_scale == 2**16
@skip_if_no_cuda
......
......@@ -602,7 +602,7 @@ def run_test_multiple_groups(rank, world_size, tempfile_name):
# With SGD, Momentum is required to get a state to shard
optimizer = optim.OSS(
model.parameters(), lr=0.1, momentum=0.99, group=process_group, broadcast_buffer_size=2 ** 20
model.parameters(), lr=0.1, momentum=0.99, group=process_group, broadcast_buffer_size=2**20
)
check(optimizer)
......@@ -875,7 +875,7 @@ def run_ddp_parity(rank, world_size, backend, temp_file_name, change_train_graph
params=oss_trainable_params,
optim=optimizer,
group=None,
broadcast_buffer_size=2 ** 10,
broadcast_buffer_size=2**10,
**optimizer_settings,
)
......
......@@ -175,7 +175,7 @@ def test_lr_scheduler():
model = Linear(2, 2, bias=False)
optim = AdaScale(SGD(model.parameters(), lr=0.1), num_gradients_to_accumulate=3)
# We use 1, not 0.1 here since scheduler.step() is called here first.
scheduler = LambdaLR(optim, lr_lambda=lambda epoch: 1 / 10 ** epoch)
scheduler = LambdaLR(optim, lr_lambda=lambda epoch: 1 / 10**epoch)
for epoch in range(3):
for data_idx in range(10):
for accumulation in range(3):
......@@ -186,7 +186,7 @@ def test_lr_scheduler():
optim.step()
optim.zero_grad()
# asserting LR is right
assert np.allclose(optim.param_groups[0]["lr"], 0.1 / 10 ** epoch), optim.param_groups[0]["lr"]
assert np.allclose(optim.param_groups[0]["lr"], 0.1 / 10**epoch), optim.param_groups[0]["lr"]
scheduler.step()
# asserting LR is right
assert np.allclose(optim.param_groups[0]["lr"], 0.1 / 10 ** (epoch + 1)), optim.param_groups[0]["lr"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment