Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
26dab733
Commit
26dab733
authored
Apr 20, 2013
by
Davis King
Browse files
Added create_max_margin_projection_hash()
parent
f320932e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
119 additions
and
1 deletion
+119
-1
dlib/lsh/create_random_projection_hash.h
dlib/lsh/create_random_projection_hash.h
+82
-0
dlib/lsh/create_random_projection_hash_abstract.h
dlib/lsh/create_random_projection_hash_abstract.h
+37
-1
No files found.
dlib/lsh/create_random_projection_hash.h
View file @
26dab733
...
...
@@ -8,6 +8,7 @@
#include "../matrix.h"
#include "../rand.h"
#include "../statistics.h"
#include "../svm.h"
#include <vector>
namespace
dlib
...
...
@@ -113,6 +114,87 @@ namespace dlib
return
projection_hash
(
proj
,
offset
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
vector_type
>
projection_hash
create_max_margin_projection_hash
(
const
vector_type
&
v
,
const
int
bits
,
const
double
C
=
10
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
0
<
bits
&&
bits
<=
32
&&
v
.
size
()
>
1
,
"
\t
projection_hash create_max_margin_projection_hash()"
<<
"
\n\t
Invalid arguments were given to this function."
<<
"
\n\t
bits: "
<<
bits
<<
"
\n\t
v.size(): "
<<
v
.
size
()
);
#ifdef ENABLE_ASSERTS
for
(
unsigned
long
i
=
0
;
i
<
v
.
size
();
++
i
)
{
DLIB_ASSERT
(
v
[
0
].
size
()
==
v
[
i
].
size
()
&&
v
[
i
].
size
()
>
0
&&
is_col_vector
(
v
[
i
]),
"
\t
projection_hash create_max_margin_projection_hash()"
<<
"
\n\t
Invalid arguments were given to this function."
<<
"
\n\t
m(0).size(): "
<<
v
[
0
].
size
()
<<
"
\n\t
m("
<<
i
<<
").size(): "
<<
v
[
i
].
size
()
<<
"
\n\t
is_col_vector(v["
<<
i
<<
"]): "
<<
is_col_vector
(
v
[
i
])
);
}
#endif
running_covariance
<
matrix
<
double
>
>
rc
;
for
(
unsigned
long
i
=
0
;
i
<
v
.
size
();
++
i
)
rc
.
add
(
matrix_cast
<
double
>
(
v
[
i
]));
// compute a whitening matrix
matrix
<
double
>
whiten
=
trans
(
chol
(
pinv
(
rc
.
covariance
())));
const
matrix
<
double
,
0
,
1
>
meanval
=
whiten
*
rc
.
mean
();
dlib
::
rand
rnd
;
typedef
matrix
<
double
,
0
,
1
>
sample_type
;
random_subset_selector
<
sample_type
>
training_samples
;
random_subset_selector
<
double
>
training_labels
;
// We set this up to use enough samples to cover the vector space used by elements
// of v.
training_samples
.
set_max_size
(
v
[
0
].
size
()
*
10
);
training_labels
.
set_max_size
(
v
[
0
].
size
()
*
10
);
matrix
<
double
>
proj
(
bits
,
v
[
0
].
size
());
matrix
<
double
,
0
,
1
>
offset
(
bits
);
// learn the random planes and put them into proj and offset.
for
(
int
itr
=
0
;
itr
<
offset
.
size
();
++
itr
)
{
training_samples
.
make_empty
();
training_labels
.
make_empty
();
// pick random training data and give each sample a random label.
for
(
unsigned
long
i
=
0
;
i
<
v
.
size
();
++
i
)
{
training_samples
.
add
(
whiten
*
v
[
i
]
-
meanval
);
if
(
rnd
.
get_random_double
()
>
0.5
)
training_labels
.
add
(
+
1
);
else
training_labels
.
add
(
-
1
);
}
svm_c_linear_dcd_trainer
<
linear_kernel
<
sample_type
>
>
trainer
;
trainer
.
set_c
(
C
);
decision_function
<
linear_kernel
<
sample_type
>
>
df
=
trainer
.
train
(
training_samples
,
training_labels
);
offset
(
itr
)
=
-
df
.
b
;
set_rowm
(
proj
,
itr
)
=
trans
(
df
.
basis_vectors
(
0
));
}
return
projection_hash
(
proj
*
whiten
,
offset
-
proj
*
meanval
);
}
// ----------------------------------------------------------------------------------------
}
...
...
dlib/lsh/create_random_projection_hash_abstract.h
View file @
26dab733
...
...
@@ -35,7 +35,43 @@ namespace dlib
- H.num_hash_bins() == pow(2,bits)
- H will be setup so that it hashes the contents of v such that
each bin ends up with roughly the same number of elements
in it.
in it. This is accomplished by picking random hyperplanes
passing though the data.
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
vector_type
>
projection_hash
create_max_margin_projection_hash
(
const
vector_type
&
v
,
const
int
bits
,
const
double
C
=
10
);
/*!
requires
- 0 < bits <= 32
- v.size() > 1
- vector_type == a std::vector or compatible type containing dlib::matrix
objects, each representing a column vector of the same size.
- for all valid i, j:
- is_col_vector(v[i]) == true
- v[i].size() > 0
- v[i].size() == v[j].size()
- i.e. v contains only column vectors and all the column vectors
have the same non-zero length
ensures
- returns a hash function H such that:
- H.num_hash_bins() == pow(2,bits)
- H will be setup so that it hashes the contents of v such that
each bin ends up with roughly the same number of elements
in it. This is accomplished using a variation on the random hyperplane
generation technique from the paper:
Random Maximum Margin Hashing by Alexis Joly and Olivier Buisson
In particular, we use the svm_c_linear_dcd_trainer to generate planes.
We train it on randomly selected and randomly labeled points from v.
The C SVM parameter is set to the given C argument.
!*/
// ----------------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment