Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
320d56ce
Commit
320d56ce
authored
Dec 25, 2011
by
Davis King
Browse files
Added some basic locality sensitive hashing tools.
parent
3ebf0f2e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
253 additions
and
0 deletions
+253
-0
dlib/lsh.h
dlib/lsh.h
+12
-0
dlib/lsh/projection_hash.h
dlib/lsh/projection_hash.h
+170
-0
dlib/lsh/projection_hash_abstract.h
dlib/lsh/projection_hash_abstract.h
+71
-0
No files found.
dlib/lsh.h
0 → 100644
View file @
320d56ce
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_LSh_
#define DLIB_LSh_
#include "lsh/projection_hash.h"
#endif // DLIB_LSh_
dlib/lsh/projection_hash.h
0 → 100644
View file @
320d56ce
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_PROJECTION_HASh_H__
#define DLIB_PROJECTION_HASh_H__
#include "projection_hash_abstract.h"
#include "../matrix.h"
#include "../rand.h"
#include <vector>
namespace
dlib
{
// ----------------------------------------------------------------------------------------
class
projection_hash
{
public:
projection_hash
()
{}
template
<
typename
EXP1
,
typename
EXP2
>
projection_hash
(
const
matrix_exp
<
EXP1
>&
proj_
,
const
matrix_exp
<
EXP2
>&
offset_
)
:
proj
(
proj_
),
offset
(
offset_
)
{}
const
matrix
<
double
>&
get_projection_matrix
(
)
const
{
return
proj
;
}
const
matrix
<
double
,
0
,
1
>&
get_offset_matrix
(
)
const
{
return
offset
;
}
unsigned
long
size
(
)
const
{
return
(
unsigned
long
)
std
::
pow
(
2
,
offset
.
size
());
}
template
<
typename
EXP
>
unsigned
long
operator
()
(
const
matrix_exp
<
EXP
>&
v
)
const
{
return
do_hash
(
proj
*
matrix_cast
<
double
>
(
v
)
+
offset
);
}
private:
template
<
typename
EXP
>
unsigned
long
do_hash
(
const
matrix_exp
<
EXP
>&
v
)
const
{
unsigned
long
h
=
0
;
for
(
long
i
=
0
;
i
<
v
.
size
();
++
i
)
{
h
<<=
1
;
if
(
v
(
i
)
>
0
)
h
|=
1
;
}
return
h
;
}
matrix
<
double
>
proj
;
matrix
<
double
,
0
,
1
>
offset
;
};
// ----------------------------------------------------------------------------------------
inline
void
serialize
(
const
projection_hash
&
item
,
std
::
ostream
&
out
)
{
serialize
(
item
.
get_projection_matrix
(),
out
);
serialize
(
item
.
get_offset_matrix
(),
out
);
}
inline
void
deserialize
(
projection_hash
&
item
,
std
::
istream
&
in
)
{
matrix
<
double
>
proj
;
matrix
<
double
,
0
,
1
>
offset
;
deserialize
(
proj
,
in
);
deserialize
(
offset
,
in
);
item
=
projection_hash
(
proj
,
offset
);
}
// ----------------------------------------------------------------------------------------
template
<
typename
vector_type
>
projection_hash
create_random_projection_hash
(
const
vector_type
&
v
,
const
int
bits
)
{
// compute a whitening matrix
matrix
<
double
>
whiten
=
trans
(
chol
(
pinv
(
covariance
(
vector_to_matrix
(
v
)))));
// hashes
std
::
vector
<
unsigned
long
>
h
(
v
.
size
(),
0
);
std
::
vector
<
double
>
vals
(
v
.
size
(),
0
);
// number of hits for each hash value
std
::
vector
<
unsigned
long
>
counts
;
std
::
vector
<
double
>
temp
;
// build a random projection matrix
dlib
::
rand
rnd
;
matrix
<
double
>
proj
(
bits
,
v
[
0
].
size
());
for
(
long
r
=
0
;
r
<
proj
.
nr
();
++
r
)
for
(
long
c
=
0
;
c
<
proj
.
nc
();
++
c
)
proj
(
r
,
c
)
=
rnd
.
get_random_gaussian
();
// merge whitening matrix with projection matrix
proj
=
proj
*
whiten
;
matrix
<
double
,
0
,
1
>
offset
(
bits
);
// figure out what the offset values should be
for
(
int
itr
=
0
;
itr
<
offset
.
size
();
++
itr
)
{
counts
.
assign
(
std
::
pow
(
2
,
bits
),
0
);
// count the popularity of each hash value
for
(
unsigned
long
i
=
0
;
i
<
h
.
size
();
++
i
)
{
h
[
i
]
<<=
1
;
counts
[
h
[
i
]]
+=
1
;
}
const
unsigned
long
max_h
=
index_of_max
(
vector_to_matrix
(
counts
));
temp
.
clear
();
for
(
unsigned
long
i
=
0
;
i
<
v
.
size
();
++
i
)
{
vals
[
i
]
=
dot
(
rowm
(
proj
,
itr
),
v
[
i
]);
if
(
h
[
i
]
==
max_h
)
temp
.
push_back
(
vals
[
i
]);
}
// split down the middle
std
::
sort
(
temp
.
begin
(),
temp
.
end
());
const
double
split
=
temp
[
temp
.
size
()
/
2
];
offset
(
itr
)
=
-
split
;
for
(
unsigned
long
i
=
0
;
i
<
vals
.
size
();
++
i
)
{
if
(
vals
[
i
]
-
split
>
0
)
h
[
i
]
|=
1
;
}
}
return
projection_hash
(
proj
,
offset
);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_PROJECTION_HASh_H__
dlib/lsh/projection_hash_abstract.h
0 → 100644
View file @
320d56ce
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_PROJECTION_HASh_ABSTRACT_H__
#ifdef DLIB_PROJECTION_HASh_ABSTRACT_H__
#include "../matrix.h"
#include "../rand.h"
#include <vector>
namespace
dlib
{
// ----------------------------------------------------------------------------------------
class
projection_hash
{
/*!
WHAT THIS OBJECT REPRESENTS
!*/
public:
projection_hash
(
);
template
<
typename
EXP1
,
typename
EXP2
>
projection_hash
(
const
matrix_exp
<
EXP1
>&
proj
,
const
matrix_exp
<
EXP2
>&
offset
);
const
matrix
<
double
>&
get_projection_matrix
(
)
const
;
const
matrix
<
double
,
0
,
1
>&
get_offset_matrix
(
)
const
;
unsigned
long
size
(
)
const
;
template
<
typename
EXP
>
unsigned
long
operator
()
(
const
matrix_exp
<
EXP
>&
v
)
const
;
};
// ----------------------------------------------------------------------------------------
void
serialize
(
const
projection_hash
&
item
,
std
::
ostream
&
out
);
void
deserialize
(
projection_hash
&
item
,
std
::
istream
&
in
);
// ----------------------------------------------------------------------------------------
template
<
typename
vector_type
>
projection_hash
create_random_projection_hash
(
const
vector_type
&
v
,
const
int
bits
);
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_PROJECTION_HASh_ABSTRACT_H__
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment