Commit aee5aff4 authored by Deyu Fu's avatar Deyu Fu
Browse files

Merge branch 'master' into deyuf/fused_optimizer_v2

parents 007c5947 880ab925
This diff is collapsed.
This diff is collapsed.
......@@ -795,11 +795,13 @@ void cuda_layer_norm_gradient(
invvar->data<accscalar_t>(),
input,
n1,n2,
gamma->data<scalar_t_0>(),
beta->data<scalar_t_0>(),
// TMJ pass NULL argument for gamma, beta, grad_gamma and grad_beta
// if gamma Tensor is NULL on input.
gamma != NULL ? gamma->data<scalar_t_0>() : NULL,
gamma != NULL ? beta->data<scalar_t_0>() : NULL,
epsilon,
grad_input->data<scalar_t_0>(),
grad_gamma->data<scalar_t_0>(),
grad_beta->data<scalar_t_0>());
gamma != NULL ? grad_gamma->data<scalar_t_0>() : NULL,
gamma != NULL ? grad_beta->data<scalar_t_0>() : NULL);
)
}
This diff is collapsed.
This diff is collapsed.
cxxfilt>=0.2.0
tqdm>=4.28.1
numpy>=1.15.3
PyYAML>=5.1
pytest>=3.5.1
This diff is collapsed.
import test_pyprof_nvtx.TestPyProfNvtx as TestPyProfNvtx
This diff is collapsed.
import unittest
import sys
test_dirs = ["run_amp", "run_fp16util", "run_fused_layer_norm", "run_optimizers"]
test_dirs = ["run_amp", "run_fp16util", "run_optimizers", "run_fused_layer_norm", "run_pyprof_nvtx"]
runner = unittest.TextTestRunner(verbosity=2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment