Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
52e35c31
Commit
52e35c31
authored
Dec 16, 2012
by
Davis King
Browse files
Made this object properly warm-startable
parent
34a9e4f6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
160 additions
and
78 deletions
+160
-78
dlib/svm/svm_c_linear_dcd_trainer.h
dlib/svm/svm_c_linear_dcd_trainer.h
+160
-78
No files found.
dlib/svm/svm_c_linear_dcd_trainer.h
View file @
52e35c31
...
@@ -207,6 +207,128 @@ namespace dlib
...
@@ -207,6 +207,128 @@ namespace dlib
Cneg
=
C
;
Cneg
=
C
;
}
}
class
optimizer_state
{
friend
class
svm_c_linear_dcd_trainer
;
public:
optimizer_state
()
:
did_init
(
false
)
{}
private:
template
<
typename
in_sample_vector_type
>
void
init
(
const
in_sample_vector_type
&
x
,
bool
have_bias_
,
bool
last_weight_1_
)
{
const
long
new_dims
=
max_index_plus_one
(
x
);
long
new_idx
=
0
;
if
(
did_init
)
{
DLIB_CASSERT
(
have_bias_
==
have_bias
&&
last_weight_1_
==
last_weight_1
,
""
);
DLIB_CASSERT
(
new_dims
>=
dims
,
""
);
DLIB_CASSERT
(
x
.
size
()
>=
static_cast
<
long
>
(
alpha
.
size
()),
""
);
// make sure we amortize the cost of growing the alpha vector.
if
(
alpha
.
capacity
()
<
static_cast
<
unsigned
long
>
(
x
.
size
()))
alpha
.
reserve
(
x
.
size
()
*
2
);
new_idx
=
alpha
.
size
();
// Make sure alpha has the same length as x. So pad with extra zeros if
// necessary to make this happen.
alpha
.
resize
(
x
.
size
(),
0
);
if
(
new_dims
!=
dims
)
{
// The only valid way the dimensions can be different here is if
// you are using a sparse vector type. This is because we might
// have had training samples which just happened to not include all
// the features previously. Therefore, max_index_plus_one() would
// have given too low of a result. But for dense vectors it is
// definitely a user error if the dimensions don't match.
DLIB_CASSERT
(
is_matrix
<
sample_type
>::
value
==
false
,
""
);
// extend w by the right number of elements
if
(
have_bias
)
{
// Splice some zeros into the w vector so it will have the
// right length. Here we are being careful to move the bias
// weight to the end of the resulting vector.
w
=
join_cols
(
join_cols
(
colm
(
w
,
0
,
dims
),
zeros_matrix
<
scalar_type
>
(
1
,
new_dims
-
dims
)),
uniform_matrix
<
scalar_type
>
(
1
,
1
,
w
(
dims
))
);
}
else
{
// Just concatenate the right number of zeros.
w
=
join_cols
(
w
,
zeros_matrix
<
scalar_type
>
(
1
,
new_dims
-
dims
));
}
dims
=
new_dims
;
}
}
else
{
did_init
=
true
;
have_bias
=
have_bias_
;
last_weight_1
=
last_weight_1_
;
dims
=
new_dims
;
alpha
.
resize
(
x
.
size
());
index
.
reserve
(
x
.
size
());
Q
.
reserve
(
x
.
size
());
if
(
have_bias
)
w
.
set_size
(
dims
+
1
);
else
w
.
set_size
(
dims
);
w
=
0
;
}
for
(
long
i
=
new_idx
;
i
<
x
.
size
();
++
i
)
{
Q
.
push_back
(
dlib
::
dot
(
x
(
i
),
x
(
i
)));
if
(
have_bias
)
{
index
.
push_back
(
i
);
Q
.
back
()
+=
1
;
}
else
if
(
Q
.
back
()
!=
0
)
{
index
.
push_back
(
i
);
}
}
if
(
last_weight_1
)
w
(
dims
-
1
)
=
1
;
}
bool
did_init
;
bool
have_bias
;
bool
last_weight_1
;
std
::
vector
<
scalar_type
>
alpha
;
scalar_vector_type
w
;
std
::
vector
<
scalar_type
>
Q
;
std
::
vector
<
long
>
index
;
long
dims
;
dlib
::
rand
rnd
;
};
template
<
template
<
typename
in_sample_vector_type
,
typename
in_sample_vector_type
,
typename
in_scalar_vector_type
typename
in_scalar_vector_type
...
@@ -216,9 +338,8 @@ namespace dlib
...
@@ -216,9 +338,8 @@ namespace dlib
const
in_scalar_vector_type
&
y
const
in_scalar_vector_type
&
y
)
const
)
const
{
{
scalar_vector_type
alpha
(
x
.
size
());
optimizer_state
state
;
alpha
=
0
;
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
state
);
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
alpha
);
}
}
template
<
template
<
...
@@ -228,24 +349,10 @@ namespace dlib
...
@@ -228,24 +349,10 @@ namespace dlib
const
decision_function
<
kernel_type
>
train
(
const
decision_function
<
kernel_type
>
train
(
const
in_sample_vector_type
&
x
,
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
,
const
in_scalar_vector_type
&
y
,
scalar_vector_type
&
alpha
optimizer_state
&
state
)
const
)
const
{
{
DLIB_CASSERT
(
static_cast
<
long
>
(
x
.
size
())
>=
alpha
.
size
(),
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
state
);
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y,alpha)"
<<
"
\n\t
invalid inputs were given to this function"
<<
"
\n\t
x.size(): "
<<
x
.
size
()
<<
"
\n\t
alpha.size(): "
<<
alpha
.
size
()
);
if
(
static_cast
<
long
>
(
x
.
size
())
>
alpha
.
size
())
{
// Make sure alpha has the same length as x. So pad with extra zeros if
// necessary to make this happen.
alpha
=
join_cols
(
alpha
,
zeros_matrix
<
scalar_type
>
(
1
,
x
.
size
()
-
alpha
.
size
()));
}
return
do_train
(
vector_to_matrix
(
x
),
vector_to_matrix
(
y
),
alpha
);
}
}
private:
private:
...
@@ -259,12 +366,9 @@ namespace dlib
...
@@ -259,12 +366,9 @@ namespace dlib
const
decision_function
<
kernel_type
>
do_train
(
const
decision_function
<
kernel_type
>
do_train
(
const
in_sample_vector_type
&
x
,
const
in_sample_vector_type
&
x
,
const
in_scalar_vector_type
&
y
,
const
in_scalar_vector_type
&
y
,
scalar_vector_type
&
alpha
optimizer_state
&
state
)
const
)
const
{
{
// TODO, requires labels are all +1 or -1. But we don't have to see both
// types.
// make sure requires clause is not broken
// make sure requires clause is not broken
DLIB_ASSERT
(
is_learning_problem
(
x
,
y
)
==
true
,
DLIB_ASSERT
(
is_learning_problem
(
x
,
y
)
==
true
,
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y)"
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y)"
...
@@ -273,50 +377,25 @@ namespace dlib
...
@@ -273,50 +377,25 @@ namespace dlib
<<
"
\n\t
y.size(): "
<<
y
.
size
()
<<
"
\n\t
y.size(): "
<<
y
.
size
()
<<
"
\n\t
is_learning_problem(x,y): "
<<
is_learning_problem
(
x
,
y
)
<<
"
\n\t
is_learning_problem(x,y): "
<<
is_learning_problem
(
x
,
y
)
);
);
#if ENABLE_ASSERTS
const
long
dims
=
max_index_plus_one
(
x
);
for
(
long
i
=
0
;
i
<
x
.
size
();
++
i
)
// TODO, return an opaque object instead of alpha. Also, the object
// needs to verify that the trainer has the same settings from one
// call to the next.
std
::
vector
<
long
>
index
(
x
.
size
());
scalar_vector_type
Q
(
x
.
size
());
scalar_vector_type
w
;
if
(
have_bias
)
w
.
set_size
(
dims
+
1
);
else
w
.
set_size
(
dims
);
w
=
0
;
if
(
last_weight_1
)
w
(
dims
-
1
)
=
1
;
long
ii
=
0
;
for
(
long
i
=
0
;
i
<
alpha
.
size
();
++
i
)
{
{
index
[
ii
]
=
i
;
DLIB_ASSERT
(
y
(
i
)
==
+
1
||
y
(
i
)
==
-
1
,
Q
(
ii
)
=
dlib
::
dot
(
x
(
i
),
x
(
i
));
"
\t
decision_function svm_c_linear_dcd_trainer::train(x,y)"
<<
"
\n\t
invalid inputs were given to this function"
if
(
have_bias
)
<<
"
\n\t
y("
<<
i
<<
"): "
<<
y
(
i
)
{
);
Q
(
ii
)
+=
1
;
++
ii
;
}
else
if
(
Q
(
ii
)
!=
0
)
{
++
ii
;
}
}
}
#endif
// What we are doing here is ignoring x elements that have 0 norm. We
state
.
init
(
x
,
have_bias
,
last_weight_1
);
// Do this because they are impossible to classify and this also avoids
// a division by zero problem later on in the code.
const
long
max_possible_active
=
ii
;
dlib
::
rand
rnd
;
std
::
vector
<
scalar_type
>&
alpha
=
state
.
alpha
;
long
active_size
=
max_possible_active
;
scalar_vector_type
&
w
=
state
.
w
;
std
::
vector
<
long
>&
index
=
state
.
index
;
const
long
dims
=
state
.
dims
;
unsigned
long
active_size
=
index
.
size
();
scalar_type
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
...
@@ -328,15 +407,15 @@ namespace dlib
...
@@ -328,15 +407,15 @@ namespace dlib
scalar_type
PG_min
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
scalar_type
PG_min
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
// randomly shuffle the indices
// randomly shuffle the indices
for
(
long
i
=
0
;
i
<
active_size
;
++
i
)
for
(
unsigned
long
i
=
0
;
i
<
active_size
;
++
i
)
{
{
// pick a random index >= i
// pick a random index >= i
const
long
j
=
i
+
rnd
.
get_random_32bit_number
()
%
(
active_size
-
i
);
const
long
j
=
i
+
state
.
rnd
.
get_random_32bit_number
()
%
(
active_size
-
i
);
std
::
swap
(
index
[
i
],
index
[
j
]);
std
::
swap
(
index
[
i
],
index
[
j
]);
}
}
// for all the active training samples
// for all the active training samples
for
(
long
ii
=
0
;
ii
<
active_size
;
++
ii
)
for
(
unsigned
long
ii
=
0
;
ii
<
active_size
;
++
ii
)
{
{
const
long
i
=
index
[
ii
];
const
long
i
=
index
[
ii
];
...
@@ -344,7 +423,7 @@ namespace dlib
...
@@ -344,7 +423,7 @@ namespace dlib
const
scalar_type
C
=
(
y
(
i
)
>
0
)
?
Cpos
:
Cneg
;
const
scalar_type
C
=
(
y
(
i
)
>
0
)
?
Cpos
:
Cneg
;
scalar_type
PG
=
0
;
scalar_type
PG
=
0
;
if
(
alpha
(
i
)
==
0
)
if
(
alpha
[
i
]
==
0
)
{
{
if
(
G
>
PG_max_prev
)
if
(
G
>
PG_max_prev
)
{
{
...
@@ -358,7 +437,7 @@ namespace dlib
...
@@ -358,7 +437,7 @@ namespace dlib
if
(
G
<
0
)
if
(
G
<
0
)
PG
=
G
;
PG
=
G
;
}
}
else
if
(
alpha
(
i
)
==
C
)
else
if
(
alpha
[
i
]
==
C
)
{
{
if
(
G
<
PG_min_prev
)
if
(
G
<
PG_min_prev
)
{
{
...
@@ -385,9 +464,9 @@ namespace dlib
...
@@ -385,9 +464,9 @@ namespace dlib
// if PG != 0
// if PG != 0
if
(
std
::
abs
(
PG
)
>
1e-12
)
if
(
std
::
abs
(
PG
)
>
1e-12
)
{
{
const
scalar_type
alpha_old
=
alpha
(
i
)
;
const
scalar_type
alpha_old
=
alpha
[
i
]
;
alpha
(
i
)
=
std
::
min
(
std
::
max
(
alpha
(
i
)
-
G
/
Q
(
i
)
,
(
scalar_type
)
0.0
),
C
);
alpha
[
i
]
=
std
::
min
(
std
::
max
(
alpha
[
i
]
-
G
/
state
.
Q
[
i
]
,
(
scalar_type
)
0.0
),
C
);
const
scalar_type
delta
=
(
alpha
(
i
)
-
alpha_old
)
*
y
(
i
);
const
scalar_type
delta
=
(
alpha
[
i
]
-
alpha_old
)
*
y
(
i
);
add_to
(
w
,
x
(
i
),
delta
);
add_to
(
w
,
x
(
i
),
delta
);
if
(
have_bias
)
if
(
have_bias
)
w
(
w
.
size
()
-
1
)
-=
delta
;
w
(
w
.
size
()
-
1
)
-=
delta
;
...
@@ -411,12 +490,12 @@ namespace dlib
...
@@ -411,12 +490,12 @@ namespace dlib
{
{
// stop if we are within eps tolerance and the last iteration
// stop if we are within eps tolerance and the last iteration
// was over all the samples
// was over all the samples
if
(
active_size
==
max_possible_active
)
if
(
active_size
==
index
.
size
()
)
break
;
break
;
// Turn of shrinking on the next iteration. We will stop if the
// Turn of shrinking on the next iteration. We will stop if the
// tolerance is still <= eps when shrinking is off.
// tolerance is still <= eps when shrinking is off.
active_size
=
max_possible_active
;
active_size
=
index
.
size
()
;
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
PG_max_prev
=
std
::
numeric_limits
<
scalar_type
>::
infinity
();
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
}
}
...
@@ -429,7 +508,11 @@ namespace dlib
...
@@ -429,7 +508,11 @@ namespace dlib
if
(
PG_min_prev
>=
0
)
if
(
PG_min_prev
>=
0
)
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
PG_min_prev
=
-
std
::
numeric_limits
<
scalar_type
>::
infinity
();
}
}
}
}
// end of main optimization loop
// put the solution into a decision function and then return it
// put the solution into a decision function and then return it
decision_function
<
kernel_type
>
df
;
decision_function
<
kernel_type
>
df
;
...
@@ -439,10 +522,9 @@ namespace dlib
...
@@ -439,10 +522,9 @@ namespace dlib
df
.
b
=
0
;
df
.
b
=
0
;
df
.
basis_vectors
.
set_size
(
1
);
df
.
basis_vectors
.
set_size
(
1
);
// Copy the plane normal into the output basis vector. The output vector might be a
// Copy the plane normal into the output basis vector. The output vector might
// sparse vector container so we need to use this special kind of copy to handle that case.
// be a sparse vector container so we need to use this special kind of copy to
// As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because
// handle that case.
// doing it this way avoids an inane warning from gcc that can occur in some cases.
assign
(
df
.
basis_vectors
(
0
),
colm
(
w
,
0
,
dims
));
assign
(
df
.
basis_vectors
(
0
),
colm
(
w
,
0
,
dims
));
df
.
alpha
.
set_size
(
1
);
df
.
alpha
.
set_size
(
1
);
df
.
alpha
(
0
)
=
1
;
df
.
alpha
(
0
)
=
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment