Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
3586d409
Commit
3586d409
authored
Oct 26, 2016
by
Davis King
Browse files
Added tt::scale_columns(). Also optimized some other cuda kernel launches a
little.
parent
2a2956a2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
73 additions
and
4 deletions
+73
-4
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+25
-2
dlib/dnn/cuda_dlib.h
dlib/dnn/cuda_dlib.h
+6
-0
dlib/dnn/tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+26
-0
dlib/dnn/tensor_tools.h
dlib/dnn/tensor_tools.h
+16
-2
No files found.
dlib/dnn/cuda_dlib.cu
View file @
3586d409
...
@@ -142,7 +142,9 @@ namespace dlib
...
@@ -142,7 +142,9 @@ namespace dlib
)
)
{
{
invnorms
.
set_size
(
data
.
num_samples
());
invnorms
.
set_size
(
data
.
num_samples
());
launch_kernel
(
_cuda_inverse_norms
,
max_jobs
(
data
.
size
()),
invnorms
.
device
(),
data
.
device
(),
data
.
num_samples
(),
data
.
size
()
/
data
.
num_samples
(),
eps
);
dim3
blocks
(
10
,
1
);
dim3
threads
(
32
,
32
);
// x size must be 32 because we are using warp_reduce_atomic_add() in the kernel.
_cuda_inverse_norms
<<<
blocks
,
threads
>>>
(
invnorms
.
device
(),
data
.
device
(),
data
.
num_samples
(),
data
.
size
()
/
data
.
num_samples
(),
eps
);
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
@@ -174,7 +176,28 @@ namespace dlib
...
@@ -174,7 +176,28 @@ namespace dlib
)
)
{
{
out
.
set_size
(
lhs
.
num_samples
());
out
.
set_size
(
lhs
.
num_samples
());
launch_kernel
(
_cuda_dot_prods
,
max_jobs
(
lhs
.
size
()),
out
.
device
(),
lhs
.
device
(),
rhs
.
device
(),
lhs
.
num_samples
(),
lhs
.
size
()
/
lhs
.
num_samples
());
dim3
blocks
(
10
,
1
);
dim3
threads
(
32
,
32
);
// x size must be 32 because we are using warp_reduce_atomic_add() in the kernel.
_cuda_dot_prods
<<<
blocks
,
threads
>>>
(
out
.
device
(),
lhs
.
device
(),
rhs
.
device
(),
lhs
.
num_samples
(),
lhs
.
size
()
/
lhs
.
num_samples
());
}
// ----------------------------------------------------------------------------------------
__global__
void
_cuda_scale_columns
(
float
*
out
,
const
float
*
m
,
const
float
*
v
,
size_t
nr
,
size_t
nc
)
{
for
(
auto
j
:
grid_stride_range
(
0
,
nr
*
nc
))
{
out
[
j
]
=
m
[
j
]
*
v
[
j
%
nc
];
}
}
void
scale_columns
(
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
v
)
{
launch_kernel
(
_cuda_scale_columns
,
max_jobs
(
m
.
size
()),
out
.
device
(),
m
.
device
(),
v
.
device
(),
m
.
num_samples
(),
m
.
size
()
/
m
.
num_samples
());
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
...
dlib/dnn/cuda_dlib.h
View file @
3586d409
...
@@ -120,6 +120,12 @@ namespace dlib
...
@@ -120,6 +120,12 @@ namespace dlib
const
tensor
&
rhs
const
tensor
&
rhs
);
);
void
scale_columns
(
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
v
);
void
scale_rows
(
void
scale_rows
(
tensor
&
out
,
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
m
,
...
...
dlib/dnn/tensor_tools.cpp
View file @
3586d409
...
@@ -69,6 +69,27 @@ namespace dlib { namespace tt
...
@@ -69,6 +69,27 @@ namespace dlib { namespace tt
#endif
#endif
}
}
void
scale_columns
(
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
v
)
{
DLIB_CASSERT
(
have_same_dimensions
(
out
,
m
));
DLIB_CASSERT
(
is_vector
(
v
));
if
(
m
.
size
()
==
0
&&
v
.
size
()
==
0
)
return
;
DLIB_CASSERT
(
m
.
size
()
!=
0
);
DLIB_CASSERT
(
m
.
size
()
/
m
.
num_samples
()
==
v
.
size
());
#ifdef DLIB_USE_CUDA
cuda
::
scale_columns
(
out
,
m
,
v
);
#else
DLIB_CASSERT
(
false
,
"shouldn't be called right now"
);
out
=
scale_columns
(
mat
(
m
),
mat
(
v
));
#endif
}
void
scale_rows
(
void
scale_rows
(
tensor
&
out
,
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
m
,
...
@@ -76,6 +97,11 @@ namespace dlib { namespace tt
...
@@ -76,6 +97,11 @@ namespace dlib { namespace tt
)
)
{
{
DLIB_CASSERT
(
have_same_dimensions
(
out
,
m
));
DLIB_CASSERT
(
have_same_dimensions
(
out
,
m
));
DLIB_CASSERT
(
is_vector
(
v
));
if
(
m
.
size
()
==
0
&&
v
.
size
()
==
0
)
return
;
DLIB_CASSERT
(
m
.
size
()
!=
0
);
DLIB_CASSERT
(
m
.
num_samples
()
==
v
.
size
());
#ifdef DLIB_USE_CUDA
#ifdef DLIB_USE_CUDA
cuda
::
scale_rows
(
out
,
m
,
v
);
cuda
::
scale_rows
(
out
,
m
,
v
);
...
...
dlib/dnn/tensor_tools.h
View file @
3586d409
...
@@ -48,6 +48,20 @@ namespace dlib { namespace tt
...
@@ -48,6 +48,20 @@ namespace dlib { namespace tt
- #out == sum_cols(pointwise_multiply(mat(lhs), mat(rhs)));
- #out == sum_cols(pointwise_multiply(mat(lhs), mat(rhs)));
!*/
!*/
void
scale_columns
(
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
v
);
/*!
requires
- have_same_dimensions(out,m) == true
- is_vector(v) == true
- v.size() == mat(m).nc()
ensures
- performs: out = scale_columns(mat(m),mat(v));
!*/
void
scale_rows
(
void
scale_rows
(
tensor
&
out
,
tensor
&
out
,
const
tensor
&
m
,
const
tensor
&
m
,
...
@@ -56,7 +70,7 @@ namespace dlib { namespace tt
...
@@ -56,7 +70,7 @@ namespace dlib { namespace tt
/*!
/*!
requires
requires
- have_same_dimensions(out,m) == true
- have_same_dimensions(out,m) == true
- is_vector(
mat(v)
) == true
- is_vector(
v
) == true
- v.size() == m.num_samples()
- v.size() == m.num_samples()
ensures
ensures
- performs: out = scale_rows(mat(m),mat(v));
- performs: out = scale_rows(mat(m),mat(v));
...
@@ -75,7 +89,7 @@ namespace dlib { namespace tt
...
@@ -75,7 +89,7 @@ namespace dlib { namespace tt
- have_same_dimensions(out,m1) == true
- have_same_dimensions(out,m1) == true
- have_same_dimensions(out,m2) == true
- have_same_dimensions(out,m2) == true
- have_same_dimensions(v1,v2) == true
- have_same_dimensions(v1,v2) == true
- is_vector(
mat(
v1)
)
== true
- is_vector(v1) == true
- v1.size() == m1.num_samples()
- v1.size() == m1.num_samples()
ensures
ensures
- performs:
- performs:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment