Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
2c2f9556
Commit
2c2f9556
authored
Apr 28, 2013
by
Davis King
Browse files
Added cca() bindings
parent
8770498c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
141 additions
and
2 deletions
+141
-2
tools/python/CMakeLists.txt
tools/python/CMakeLists.txt
+1
-0
tools/python/src/cca.cpp
tools/python/src/cca.cpp
+127
-0
tools/python/src/dlib.cpp
tools/python/src/dlib.cpp
+7
-0
tools/python/src/matrix.cpp
tools/python/src/matrix.cpp
+6
-2
No files found.
tools/python/CMakeLists.txt
View file @
2c2f9556
...
@@ -12,4 +12,5 @@ add_python_module(dlib
...
@@ -12,4 +12,5 @@ add_python_module(dlib
src/decision_functions.cpp
src/decision_functions.cpp
src/other.cpp
src/other.cpp
src/basic.cpp
src/basic.cpp
src/cca.cpp
)
)
tools/python/src/cca.cpp
0 → 100644
View file @
2c2f9556
#include <boost/python.hpp>
#include <boost/shared_ptr.hpp>
#include <dlib/statistics.h>
#include "pyassert.h"
#include <boost/python/args.hpp>
using
namespace
dlib
;
using
namespace
boost
::
python
;
typedef
std
::
vector
<
std
::
pair
<
unsigned
long
,
double
>
>
sparse_vect
;
struct
cca_outputs
{
matrix
<
double
,
0
,
1
>
correlations
;
matrix
<
double
>
Ltrans
;
matrix
<
double
>
Rtrans
;
};
cca_outputs
_cca1
(
const
std
::
vector
<
sparse_vect
>&
L
,
const
std
::
vector
<
sparse_vect
>&
R
,
unsigned
long
num_correlations
,
unsigned
long
extra_rank
,
unsigned
long
q
,
double
regularization
)
{
pyassert
(
num_correlations
>
0
&&
L
.
size
()
>
0
&&
R
.
size
()
>
0
&&
L
.
size
()
==
R
.
size
()
&&
regularization
>=
0
,
"Invalid inputs"
);
cca_outputs
temp
;
temp
.
correlations
=
cca
(
L
,
R
,
temp
.
Ltrans
,
temp
.
Rtrans
,
num_correlations
,
extra_rank
,
q
,
regularization
);
return
temp
;
}
// ----------------------------------------------------------------------------------------
unsigned
long
sparse_vector_max_index_plus_one
(
const
sparse_vect
&
v
)
{
return
max_index_plus_one
(
v
);
}
matrix
<
double
,
0
,
1
>
apply_cca_transform
(
const
matrix
<
double
>&
m
,
const
sparse_vect
&
v
)
{
pyassert
(
max_index_plus_one
(
v
)
<=
m
.
nr
(),
"Invalid Inputs"
);
return
sparse_matrix_vector_multiply
(
trans
(
m
),
v
);
}
void
bind_cca
()
{
class_
<
cca_outputs
>
(
"_cca_outputs"
)
.
add_property
(
"correlations"
,
&
cca_outputs
::
correlations
)
.
add_property
(
"Ltrans"
,
&
cca_outputs
::
Ltrans
)
.
add_property
(
"Rtrans"
,
&
cca_outputs
::
Rtrans
);
def
(
"max_index_plus_one"
,
sparse_vector_max_index_plus_one
,
arg
(
"v"
),
"ensures
\n
\
- returns the dimensionality of the given sparse vector. That is, returns a
\n
\
number one larger than the maximum index value in the vector. If the vector
\n
\
is empty then returns 0. "
);
def
(
"apply_cca_transform"
,
apply_cca_transform
,
(
arg
(
"m"
),
arg
(
"v"
)),
"requires
\n
\
- max_index_plus_one(v) <= m.nr()
\n
\
ensures
\n
\
- returns trans(m)*v
\n
\
(i.e. multiply m by the vector v and return the result) "
);
def
(
"cca"
,
_cca1
,
(
arg
(
"L"
),
arg
(
"R"
),
arg
(
"num_correlations"
),
arg
(
"extra_rank"
)
=
5
,
arg
(
"q"
)
=
2
,
arg
(
"regularization"
)
=
0
),
"requires
\n
\
- num_correlations > 0
\n
\
- len(L) > 0
\n
\
- len(R) > 0
\n
\
- len(L) == len(r)
\n
\
- regularization >= 0
\n
\
ensures
\n
\
- This function performs a canonical correlation analysis between the vectors
\n
\
in L and R. That is, it finds two transformation matrices, Ltrans and
\n
\
Rtrans, such that row vectors in the transformed matrices L*Ltrans and
\n
\
R*Rtrans are as correlated as possible (note that in this notation we
\n
\
interpret L as a matrix with the input vectors in its rows). Note also that
\n
\
this function tries to find transformations which produce num_correlations
\n
\
dimensional output vectors.
\n
\
- Note that you can easily apply the transformation to a vector using
\n
\
apply_cca_transform(). So for example, like this:
\n
\
- apply_cca_transform(Ltrans, some_sparse_vector)
\n
\
- returns a structure containing the Ltrans and Rtrans transformation matrices
\n
\
as well as the estimated correlations between elements of the transformed
\n
\
vectors.
\n
\
- No centering is applied to the L and R matrices. Therefore, if you want a
\n
\
CCA relative to the centered vectors then you must apply centering yourself
\n
\
before calling cca().
\n
\
- This function works with reduced rank approximations of the L and R matrices.
\n
\
This makes it fast when working with large matrices. In particular, we use
\n
\
the dlib::svd_fast() routine to find reduced rank representations of the input
\n
\
matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q)
\n
\
and similarly for R. This means that you can use the extra_rank and q
\n
\
arguments to cca() to influence the accuracy of the reduced rank
\n
\
approximation. However, the default values should work fine for most
\n
\
problems.
\n
\
- This function performs the ridge regression version of Canonical Correlation
\n
\
Analysis when regularization is set to a value > 0. In particular, larger
\n
\
values indicate the solution should be more heavily regularized. This can be
\n
\
useful when the dimensionality of the data is larger than the number of
\n
\
samples.
\n
\
- A good discussion of CCA can be found in the paper
\"
Canonical Correlation
\n
\
Analysis
\"
by David Weenink. In particular, this function is implemented
\n
\
using equations 29 and 30 from his paper. We also use the idea of doing CCA
\n
\
on a reduced rank approximation of L and R as suggested by Paramveer S.
\n
\
Dhillon in his paper
\"
Two Step CCA: A new spectral method for estimating
\n
\
vector models of words
\"
. "
);
}
tools/python/src/dlib.cpp
View file @
2c2f9556
...
@@ -8,10 +8,15 @@ void bind_decision_functions();
...
@@ -8,10 +8,15 @@ void bind_decision_functions();
void
bind_basic_types
();
void
bind_basic_types
();
void
bind_other
();
void
bind_other
();
void
bind_svm_rank_trainer
();
void
bind_svm_rank_trainer
();
void
bind_cca
();
BOOST_PYTHON_MODULE
(
dlib
)
BOOST_PYTHON_MODULE
(
dlib
)
{
{
// Disable printing of the C++ function signature in the python __doc__ string
// since it is full of huge amounts of template clutter.
boost
::
python
::
docstring_options
options
(
true
,
true
,
false
);
bind_matrix
();
bind_matrix
();
bind_vector
();
bind_vector
();
bind_svm_c_trainer
();
bind_svm_c_trainer
();
...
@@ -19,4 +24,6 @@ BOOST_PYTHON_MODULE(dlib)
...
@@ -19,4 +24,6 @@ BOOST_PYTHON_MODULE(dlib)
bind_basic_types
();
bind_basic_types
();
bind_other
();
bind_other
();
bind_svm_rank_trainer
();
bind_svm_rank_trainer
();
bind_cca
();
}
}
tools/python/src/matrix.cpp
View file @
2c2f9556
...
@@ -4,11 +4,13 @@
...
@@ -4,11 +4,13 @@
#include <dlib/matrix.h>
#include <dlib/matrix.h>
#include <dlib/string.h>
#include <dlib/string.h>
#include "serialize_pickle.h"
#include "serialize_pickle.h"
#include <boost/python/args.hpp>
using
namespace
dlib
;
using
namespace
dlib
;
using
namespace
std
;
using
namespace
boost
::
python
;
using
namespace
boost
::
python
;
using
std
::
string
;
using
std
::
ostringstream
;
void
matrix_set_size
(
matrix
<
double
>&
m
,
long
nr
,
long
nc
)
void
matrix_set_size
(
matrix
<
double
>&
m
,
long
nr
,
long
nc
)
...
@@ -159,10 +161,12 @@ void bind_matrix()
...
@@ -159,10 +161,12 @@ void bind_matrix()
class_
<
matrix
<
double
>
>
(
"matrix"
,
init
<>
())
class_
<
matrix
<
double
>
>
(
"matrix"
,
init
<>
())
.
def
(
"__init__"
,
make_constructor
(
&
make_matrix_from_size
))
.
def
(
"__init__"
,
make_constructor
(
&
make_matrix_from_size
))
.
def
(
"set_size"
,
&
matrix_set_size
)
.
def
(
"set_size"
,
&
matrix_set_size
,
(
arg
(
"rows"
),
arg
(
"cols"
)),
"Set the size of the matrix to the given number of rows and columns."
)
.
def
(
"__init__"
,
make_constructor
(
&
from_object
))
.
def
(
"__init__"
,
make_constructor
(
&
from_object
))
.
def
(
"__repr__"
,
&
matrix_double__repr__
)
.
def
(
"__repr__"
,
&
matrix_double__repr__
)
.
def
(
"__str__"
,
&
matrix_double__str__
)
.
def
(
"__str__"
,
&
matrix_double__str__
)
.
def
(
"nr"
,
&
matrix
<
double
>::
nr
,
"Return the number of rows in the matrix."
)
.
def
(
"nc"
,
&
matrix
<
double
>::
nc
,
"Return the number of columns in the matrix."
)
.
def
(
"__len__"
,
&
matrix_double__len__
)
.
def
(
"__len__"
,
&
matrix_double__len__
)
.
def
(
"__getitem__"
,
&
matrix_double__getitem__
,
with_custodian_and_ward_postcall
<
0
,
1
>
())
.
def
(
"__getitem__"
,
&
matrix_double__getitem__
,
with_custodian_and_ward_postcall
<
0
,
1
>
())
.
add_property
(
"shape"
,
&
get_matrix_size
)
.
add_property
(
"shape"
,
&
get_matrix_size
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment