Commit 47a7d4ec authored by thomwolf's avatar thomwolf
Browse files

update examples from master

parents c8cba677 02173a1a
...@@ -395,7 +395,7 @@ python run_squad.py \ ...@@ -395,7 +395,7 @@ python run_squad.py \
--num_train_epochs 2.0 \ --num_train_epochs 2.0 \
--max_seq_length 384 \ --max_seq_length 384 \
--doc_stride 128 \ --doc_stride 128 \
--output_dir ../debug_squad/ --output_dir /tmp/debug_squad/
``` ```
Training with the previous hyper-parameters gave us the following results: Training with the previous hyper-parameters gave us the following results:
......
...@@ -327,11 +327,14 @@ def set_optimizer_params_grad(named_params_optimizer, named_params_model, test_n ...@@ -327,11 +327,14 @@ def set_optimizer_params_grad(named_params_optimizer, named_params_model, test_n
if name_opti != name_model: if name_opti != name_model:
logger.error("name_opti != name_model: {} {}".format(name_opti, name_model)) logger.error("name_opti != name_model: {} {}".format(name_opti, name_model))
raise ValueError raise ValueError
if param_model.grad is not None:
if test_nan and torch.isnan(param_model.grad).sum() > 0: if test_nan and torch.isnan(param_model.grad).sum() > 0:
is_nan = True is_nan = True
if param_opti.grad is None: if param_opti.grad is None:
param_opti.grad = torch.nn.Parameter(param_opti.data.new().resize_(*param_opti.data.size())) param_opti.grad = torch.nn.Parameter(param_opti.data.new().resize_(*param_opti.data.size()))
param_opti.grad.data.copy_(param_model.grad.data) param_opti.grad.data.copy_(param_model.grad.data)
else:
param_opti.grad = None
return is_nan return is_nan
def main(): def main():
......
...@@ -693,6 +693,8 @@ def set_optimizer_params_grad(named_params_optimizer, named_params_model, test_n ...@@ -693,6 +693,8 @@ def set_optimizer_params_grad(named_params_optimizer, named_params_model, test_n
if param_opti.grad is None: if param_opti.grad is None:
param_opti.grad = torch.nn.Parameter(param_opti.data.new().resize_(*param_opti.data.size())) param_opti.grad = torch.nn.Parameter(param_opti.data.new().resize_(*param_opti.data.size()))
param_opti.grad.data.copy_(param_model.grad.data) param_opti.grad.data.copy_(param_model.grad.data)
else:
param_opti.grad = None
return is_nan return is_nan
def main(): def main():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment