Do not use sqrt_2 in device code (fixes #2208) (#2210)

* do not use sqrt_2 in device code * use CUDART_SQRT_2PI * better sort includes

Do not use sqrt_2 in device code (fixes #2208) (#2210)
* do not use sqrt_2 in device code * use CUDART_SQRT_2PI * better sort includes
a1f15837 · Adrià Arrufat · GitHub · 3ba004f8 · a1f15837 · a1f15837
Unverified Commit a1f15837 authored Oct 10, 2020 by Adrià Arrufat Committed by GitHub Oct 10, 2020
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

dlib/cuda/cpu_dlib.cpp dlib/cuda/cpu_dlib.cpp +1 -1

dlib/cuda/cuda_dlib.cu dlib/cuda/cuda_dlib.cu +2 -1

No files found.
--- a/dlib/cuda/cpu_dlib.cpp
+++ b/dlib/cuda/cpu_dlib.cpp
@@ -1711,7 +1711,7 @@ namespace dlib
            const tensor& gradient_input
        )
        {
-            const float beta = 1.0f / std::sqrt(pi) / sqrt_2;
+            const float beta = 1.0f / std::sqrt(2.0f * pi);
            const auto compute_gradient = [beta](float x)
            {
                const float cdf = 0.5f*(1.0f + std::erf(x/sqrt_2));

--- a/dlib/cuda/cuda_dlib.cu
+++ b/dlib/cuda/cuda_dlib.cu
@@ -4,6 +4,7 @@
 #include "cuda_utils.h"
 #include "cuda_dlib.h"
 #include "cudnn_dlibapi.h"
+#include <math_constants.h>


 namespace dlib 
@@ -1501,7 +1502,7 @@ namespace dlib

        __device__ float gelu_compute_gradient(float x)
        {
-                const float beta = 1.0f / std::sqrt(pi) / sqrt_2;
+                const float beta = 1.0f / CUDART_SQRT_2PI;
                const float cdf = normcdf(x);
                const float pdf = beta*std::exp(-0.5f*x*x);
                return cdf + x * pdf;