Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
373cbb57
Commit
373cbb57
authored
Apr 04, 2017
by
Davis King
Browse files
Added a tt::inv() for computing matrix inversions on the GPU.
parent
b3dc8169
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
381 additions
and
14 deletions
+381
-14
dlib/CMakeLists.txt
dlib/CMakeLists.txt
+23
-2
dlib/dnn/cusolver_dlibapi.cu
dlib/dnn/cusolver_dlibapi.cu
+204
-0
dlib/dnn/cusolver_dlibapi.h
dlib/dnn/cusolver_dlibapi.h
+75
-0
dlib/dnn/tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+26
-11
dlib/dnn/tensor_tools.h
dlib/dnn/tensor_tools.h
+31
-0
dlib/test/cublas.cpp
dlib/test/cublas.cpp
+22
-1
No files found.
dlib/CMakeLists.txt
View file @
373cbb57
...
...
@@ -568,14 +568,24 @@ if (NOT TARGET dlib)
endif
()
endif
()
endif
()
endif
()
# Find where cuSOLVER is since the FindCUDA cmake package doesn't
# bother to look for it.
get_filename_component
(
cuda_blas_path
${
CUDA_CUBLAS_LIBRARIES
}
DIRECTORY
)
find_library
(
cusolver cusolver HINTS
${
cuda_blas_path
}
)
# Also find OpenMP since cuSOLVER needs it.
find_package
(
OpenMP
)
if
(
NOT OPENMP_FOUND
)
message
(
STATUS
"*** Didn't find OpenMP, which is required to use CUDA. ***"
)
endif
()
endif
()
if
(
CUDA_FOUND AND cudnn AND cudnn_include AND COMPILER_CAN_DO_CPP_11 AND cuda_test_compile_worked AND cudnn_test_compile_worked
)
if
(
CUDA_FOUND AND cudnn AND cudnn_include AND COMPILER_CAN_DO_CPP_11 AND cuda_test_compile_worked AND cudnn_test_compile_worked
AND OPENMP_FOUND
)
set
(
source_files
${
source_files
}
dnn/cuda_dlib.cu
dnn/cudnn_dlibapi.cpp
dnn/cublas_dlibapi.cpp
dnn/cusolver_dlibapi.cu
dnn/curand_dlibapi.cpp
dnn/cuda_data_ptr.cpp
dnn/gpu_data.cpp
...
...
@@ -584,6 +594,7 @@ if (NOT TARGET dlib)
${
CUDA_CUBLAS_LIBRARIES
}
${
cudnn
}
${
CUDA_curand_LIBRARY
}
${
cusolver
}
)
include_directories
(
${
cudnn_include
}
)
else
()
...
...
@@ -648,6 +659,11 @@ if (NOT TARGET dlib)
PUBLIC
${
dlib_needed_includes
}
)
target_link_libraries
(
dlib PRIVATE
${
dlib_needed_libraries
}
)
if
(
OPENMP_FOUND
)
# Enable OpenMP
target_compile_options
(
dlib PUBLIC
${
OpenMP_CXX_FLAGS
}
)
target_link_libraries
(
dlib PUBLIC
${
OpenMP_CXX_FLAGS
}
)
endif
()
if
(
UNIX AND NOT DLIB_IN_PROJECT_BUILD
)
if
(
DLIB_USE_CUDA
)
cuda_add_library
(
dlib_shared SHARED
${
source_files
}
)
...
...
@@ -662,6 +678,11 @@ if (NOT TARGET dlib)
PUBLIC
${
dlib_needed_includes
}
)
target_link_libraries
(
dlib_shared PRIVATE
${
dlib_needed_libraries
}
)
if
(
OPENMP_FOUND
)
# Enable OpenMP
target_compile_options
(
dlib_shared PUBLIC
${
OpenMP_CXX_FLAGS
}
)
target_link_libraries
(
dlib_shared PUBLIC
${
OpenMP_CXX_FLAGS
}
)
endif
()
endif
()
endif
()
##### end of if NOT DLIB_ISO_CPP_ONLY ##########################################################
...
...
dlib/dnn/cusolver_dlibapi.cu
0 → 100644
View file @
373cbb57
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CuSOLVER_CU_
#define DLIB_DNN_CuSOLVER_CU_
#ifdef DLIB_USE_CUDA
#include "cusolver_dlibapi.h"
#include <cublas_v2.h>
#include <cusolverDn.h>
#include "cuda_utils.h"
// ----------------------------------------------------------------------------------------
static
const
char
*
cusolver_get_error_string
(
cusolverStatus_t
s
)
{
switch
(
s
)
{
case
CUSOLVER_STATUS_NOT_INITIALIZED
:
return
"CUDA Runtime API initialization failed."
;
case
CUSOLVER_STATUS_ALLOC_FAILED
:
return
"CUDA Resources could not be allocated."
;
default:
return
"A call to cuSolver failed"
;
}
}
// Check the return value of a call to the cuSolver runtime for an error condition.
#define CHECK_CUSOLVER(call) \
do{ \
const cusolverStatus_t error = call; \
if (error != CUSOLVER_STATUS_SUCCESS) \
{ \
std::ostringstream sout; \
sout << "Error while calling " << #call << " in file " << __FILE__ << ":" << __LINE__ << ". ";\
sout << "code: " << error << ", reason: " << cusolver_get_error_string(error);\
throw dlib::cusolver_error(sout.str()); \
} \
}while(false)
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
namespace
dlib
{
namespace
cuda
{
// -----------------------------------------------------------------------------------
class
cusolver_context
{
public:
// not copyable
cusolver_context
(
const
cusolver_context
&
)
=
delete
;
cusolver_context
&
operator
=
(
const
cusolver_context
&
)
=
delete
;
cusolver_context
()
{
handles
.
resize
(
16
);
}
~
cusolver_context
()
{
for
(
auto
h
:
handles
)
{
if
(
h
)
cusolverDnDestroy
(
h
);
}
}
cusolverDnHandle_t
get_handle
(
)
{
int
new_device_id
;
CHECK_CUDA
(
cudaGetDevice
(
&
new_device_id
));
// make room for more devices if needed
if
(
new_device_id
>=
(
long
)
handles
.
size
())
handles
.
resize
(
new_device_id
+
16
);
// If we don't have a handle already for this device then make one
if
(
!
handles
[
new_device_id
])
CHECK_CUSOLVER
(
cusolverDnCreate
(
&
handles
[
new_device_id
]));
// Finally, return the handle for the current device
return
handles
[
new_device_id
];
}
private:
std
::
vector
<
cusolverDnHandle_t
>
handles
;
};
static
cusolverDnHandle_t
context
()
{
thread_local
cusolver_context
c
;
return
c
.
get_handle
();
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
__global__
void
_cuda_set_to_identity_matrix
(
float
*
m
,
size_t
nr
)
{
for
(
auto
j
:
grid_stride_range
(
0
,
nr
*
nr
))
{
if
(
j
%
(
nr
+
1
)
==
0
)
m
[
j
]
=
1
;
else
m
[
j
]
=
0
;
}
}
void
set_to_identity_matrix
(
tensor
&
m
)
{
DLIB_CASSERT
(
m
.
size
()
==
m
.
num_samples
()
*
m
.
num_samples
());
launch_kernel
(
_cuda_set_to_identity_matrix
,
max_jobs
(
m
.
size
()),
m
.
device
(),
m
.
num_samples
());
}
// ------------------------------------------------------------------------------------
inv
::~
inv
()
{
sync_if_needed
();
}
// ------------------------------------------------------------------------------------
void
inv
::
operator
()
(
const
tensor
&
m_
,
resizable_tensor
&
out
)
{
DLIB_CASSERT
(
m_
.
size
()
==
m_
.
num_samples
()
*
m_
.
num_samples
(),
"Input matrix must be square if you want to invert it."
);
m
=
m_
;
out
.
copy_size
(
m
);
set_to_identity_matrix
(
out
);
const
int
nc
=
m
.
num_samples
();
int
Lwork
;
CHECK_CUSOLVER
(
cusolverDnSgetrf_bufferSize
(
context
(),
nc
,
nc
,
m
.
device
(),
nc
,
&
Lwork
));
if
(
Lwork
>
(
int
)
workspace
.
size
())
{
sync_if_needed
();
workspace
=
cuda_data_ptr
<
float
>
(
Lwork
);
}
if
(
nc
>
(
int
)
Ipiv
.
size
())
{
sync_if_needed
();
Ipiv
=
cuda_data_ptr
<
int
>
(
nc
);
}
if
(
info
.
size
()
!=
1
)
{
info
=
cuda_data_ptr
<
int
>
(
1
);
}
CHECK_CUSOLVER
(
cusolverDnSgetrf
(
context
(),
nc
,
nc
,
m
.
device
(),
nc
,
workspace
,
Ipiv
,
info
));
CHECK_CUSOLVER
(
cusolverDnSgetrs
(
context
(),
CUBLAS_OP_N
,
nc
,
nc
,
m
.
device
(),
nc
,
Ipiv
,
out
.
device
(),
nc
,
info
));
did_work_lately
=
true
;
}
// ------------------------------------------------------------------------------------
int
inv
::
get_last_status
(
)
{
std
::
vector
<
int
>
linfo
;
memcpy
(
linfo
,
info
);
if
(
linfo
.
size
()
!=
0
)
return
linfo
[
0
];
else
return
0
;
}
// ------------------------------------------------------------------------------------
void
inv
::
sync_if_needed
()
{
if
(
did_work_lately
)
{
did_work_lately
=
false
;
// make sure we wait until any previous kernel launches have finished
// before we do something like deallocate the GPU memory.
cudaDeviceSynchronize
();
}
}
// ------------------------------------------------------------------------------------
}
}
#endif // DLIB_USE_CUDA
#endif // DLIB_DNN_CuSOLVER_CU_
dlib/dnn/cusolver_dlibapi.h
0 → 100644
View file @
373cbb57
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CuSOLVER_H_
#define DLIB_DNN_CuSOLVER_H_
#ifdef DLIB_USE_CUDA
#include "tensor.h"
#include "cuda_errors.h"
#include "cuda_data_ptr.h"
#include "../noncopyable.h"
namespace
dlib
{
namespace
cuda
{
// -----------------------------------------------------------------------------------
class
inv
:
noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a functor for doing matrix inversion on the GPU. The only
reason it's an object is to avoid the reallocation of some GPU memory
blocks if you want to do a bunch of matrix inversions in a row.
!*/
public:
inv
()
=
default
;
~
inv
();
void
operator
()
(
const
tensor
&
m
,
resizable_tensor
&
out
);
/*!
requires
- m.size() == m.num_samples()*m.num_samples()
(i.e. mat(m) must be a square matrix)
ensures
- out == inv(mat(m));
!*/
int
get_last_status
(
);
/*!
ensures
- returns 0 if the last matrix inversion was successful and != 0
otherwise.
!*/
private:
void
sync_if_needed
();
bool
did_work_lately
=
false
;
resizable_tensor
m
;
cuda_data_ptr
<
float
>
workspace
;
cuda_data_ptr
<
int
>
Ipiv
;
cuda_data_ptr
<
int
>
info
;
};
// ------------------------------------------------------------------------------------
}
}
#endif // DLIB_USE_CUDA
#endif // DLIB_DNN_CuSOLVER_H_
dlib/dnn/tensor_tools.cpp
View file @
373cbb57
...
...
@@ -792,20 +792,35 @@ namespace dlib { namespace tt
// ------------------------------------------------------------------------------------
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
#ifdef DLIB_USE_CUDA
cuda
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
cuda
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
#else
cpu
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
cpu
::
copy_tensor
(
dest
,
dest_k_offset
,
src
,
src_k_offset
,
count_k
);
#endif
}
}
// ----------------------------------------------------------------------------------------
void
inv
::
operator
()
(
const
tensor
&
m
,
resizable_tensor
&
out
)
{
#ifdef DLIB_USE_CUDA
finv
(
m
,
out
);
#else
out
=
dlib
::
inv
(
m
);
#endif
}
// ----------------------------------------------------------------------------------------
...
...
dlib/dnn/tensor_tools.h
View file @
373cbb57
...
...
@@ -6,6 +6,7 @@
#include "tensor.h"
#include "cudnn_dlibapi.h"
#include "cublas_dlibapi.h"
#include "cusolver_dlibapi.h"
#include "curand_dlibapi.h"
#include "cpu_dlib.h"
#include "cuda_dlib.h"
...
...
@@ -123,6 +124,36 @@ namespace dlib { namespace tt
- performs: dest = alpha*L*R + beta*mat(dest)
!*/
// ----------------------------------------------------------------------------------------
class
inv
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a functor for doing matrix inversion on the GPU. The only
reason it's an object is to avoid the reallocation of some GPU memory
blocks if you want to do a bunch of matrix inversions in a row.
!*/
public:
void
operator
()
(
const
tensor
&
m
,
resizable_tensor
&
out
);
/*!
requires
- m.size() == m.num_samples()*m.num_samples()
(i.e. mat(m) must be a square matrix)
ensures
- out == inv(mat(m));
!*/
private:
#ifdef DLIB_USE_CUDA
cuda
::
inv
finv
;
#endif
};
// ----------------------------------------------------------------------------------------
class
tensor_rand
...
...
dlib/test/cublas.cpp
View file @
373cbb57
...
...
@@ -8,7 +8,7 @@
#include <cstdlib>
#include <ctime>
#include <vector>
#include "../dnn/
cublas_dlibapi
.h"
#include "../dnn/
tensor_tools
.h"
#include "tester.h"
...
...
@@ -25,6 +25,26 @@ namespace
logger
dlog
(
"test.cublas"
);
void
test_inv
()
{
tt
::
tensor_rand
rnd
;
dlib
::
tt
::
inv
tinv
;
dlib
::
cuda
::
inv
cinv
;
resizable_tensor
minv1
,
minv2
;
for
(
int
n
=
1
;
n
<
20
;
++
n
)
{
print_spinner
();
resizable_tensor
m
(
n
,
n
);
rnd
.
fill_uniform
(
m
);
tinv
(
m
,
minv1
);
cinv
(
m
,
minv2
);
matrix
<
float
>
mref
=
inv
(
mat
(
m
));
DLIB_TEST_MSG
(
mean
(
abs
(
mref
-
mat
(
minv1
)))
/
mean
(
abs
(
mref
))
<
1e-5
,
mean
(
abs
(
mref
-
mat
(
minv1
)))
/
mean
(
abs
(
mref
))
<<
" n: "
<<
n
);
DLIB_TEST_MSG
(
mean
(
abs
(
mref
-
mat
(
minv2
)))
/
mean
(
abs
(
mref
))
<
1e-5
,
mean
(
abs
(
mref
-
mat
(
minv2
)))
/
mean
(
abs
(
mref
))
<<
" n: "
<<
n
);
}
}
class
cublas_tester
:
public
tester
{
...
...
@@ -38,6 +58,7 @@ namespace
void
perform_test
(
)
{
test_inv
();
{
resizable_tensor
a
(
4
,
3
),
b
(
3
,
4
),
c
(
3
,
3
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment