"dgl_sparse/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "b1e2695f3e8c98284b95b897f098b38fd9f336fc"
Commit a88f1bd8 authored by Davis King's avatar Davis King
Browse files

Made the converter add zero padding layers when needed by Eltwise to replicate

the behavior of dlib's add_prev layers.
parent 984b6949
...@@ -21,6 +21,8 @@ struct layer ...@@ -21,6 +21,8 @@ struct layer
string type; // comp, loss, or input string type; // comp, loss, or input
int idx; int idx;
matrix<long,4,1> output_tensor_shape; // (N,K,NR,NC)
string detail_name; // The name of the tag inside the layer tag. e.g. fc, con, max_pool, input_rgb_image. string detail_name; // The name of the tag inside the layer tag. e.g. fc, con, max_pool, input_rgb_image.
std::map<string,double> attributes; std::map<string,double> attributes;
matrix<double> params; matrix<double> params;
...@@ -49,30 +51,32 @@ struct layer ...@@ -49,30 +51,32 @@ struct layer
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
std::vector<layer> parse_dlib_xml( std::vector<layer> parse_dlib_xml(
const matrix<long,4,1>& input_tensor_shape,
const string& xml_filename const string& xml_filename
); );
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename iterator> template <typename iterator>
string find_layer_caffe_name ( const layer& find_layer (
iterator i, iterator i,
long tag_id long tag_id
) )
/*! /*!
requires requires
- i is an iterator pointing to a layer in the list of layers produced by parse_dlib_xml(). - i is a reverse iterator pointing to a layer in the list of layers produced by parse_dlib_xml().
- i is not an input layer. - i is not an input layer.
ensures ensures
- if (tag_id == -1) then - if (tag_id == -1) then
- returns the caffe string name for the previous layer to layer i. - returns the previous layer (i.e. closer to the input) to layer i.
- else - else
- returns the caffe string name for the previous layer to layer i with the given tag_id. - returns the previous layer (i.e. closer to the input) to layer i with the
given tag_id.
!*/ !*/
{ {
if (tag_id == -1) if (tag_id == -1)
{ {
return (i-1)->caffe_layer_name(); return *(i-1);
} }
else else
{ {
...@@ -81,7 +85,7 @@ string find_layer_caffe_name ( ...@@ -81,7 +85,7 @@ string find_layer_caffe_name (
i--; i--;
// if we hit the end of the network before we found what we were looking for // if we hit the end of the network before we found what we were looking for
if (i->tag_id == tag_id) if (i->tag_id == tag_id)
return i->caffe_layer_name(); return *i;
if (i->type == "input") if (i->type == "input")
throw dlib::error("Network definition is bad, a layer wanted to skip back to a non-existing layer."); throw dlib::error("Network definition is bad, a layer wanted to skip back to a non-existing layer.");
} }
...@@ -89,7 +93,19 @@ string find_layer_caffe_name ( ...@@ -89,7 +93,19 @@ string find_layer_caffe_name (
} }
template <typename iterator> template <typename iterator>
string find_input_layer_caffe_name (iterator i) { return find_layer_caffe_name(i, i->skip_id); } const layer& find_input_layer (iterator i) { return find_layer(i, i->skip_id); }
template <typename iterator>
string find_layer_caffe_name (
iterator i,
long tag_id
)
{
return find_layer(i,tag_id).caffe_layer_name();
}
template <typename iterator>
string find_input_layer_caffe_name (iterator i) { return find_input_layer(i).caffe_layer_name(); }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -116,7 +132,8 @@ void convert_dlib_xml_to_caffe_python_code( ...@@ -116,7 +132,8 @@ void convert_dlib_xml_to_caffe_python_code(
cout << "Writing model to " << out_filename << endl; cout << "Writing model to " << out_filename << endl;
ofstream fout(out_filename); ofstream fout(out_filename);
fout.precision(9); fout.precision(9);
const auto layers = parse_dlib_xml(xml_filename); const auto layers = parse_dlib_xml({N,K,NR,NC}, xml_filename);
fout << "#\n"; fout << "#\n";
fout << "# !!! This file was automatically generated by dlib's tools/convert_dlib_nets_to_caffe utility. !!!\n"; fout << "# !!! This file was automatically generated by dlib's tools/convert_dlib_nets_to_caffe utility. !!!\n";
...@@ -301,10 +318,59 @@ void convert_dlib_xml_to_caffe_python_code( ...@@ -301,10 +318,59 @@ void convert_dlib_xml_to_caffe_python_code(
} }
else if (i->detail_name == "add_prev") else if (i->detail_name == "add_prev")
{ {
fout << " n." << i->caffe_layer_name() << " = L.Eltwise(n." << find_input_layer_caffe_name(i); auto in_shape1 = find_input_layer(i).output_tensor_shape;
fout << ", n." << find_layer_caffe_name(i, i->attribute("tag")); auto in_shape2 = find_layer(i,i->attribute("tag")).output_tensor_shape;
fout << ", operation=P.Eltwise.SUM"; if (in_shape1 != in_shape2)
fout << ");\n"; {
// if only the number of channels differs then we will use a dummy layer to
// pad with zeros. But otherwise we will throw an error.
if (in_shape1(0) == in_shape2(0) &&
in_shape1(2) == in_shape2(2) &&
in_shape1(3) == in_shape2(3))
{
fout << " n." << i->caffe_layer_name() << "_zeropad = L.DummyData(num=" << in_shape1(0);
fout << ", channels="<<std::abs(in_shape1(1)-in_shape2(1));
fout << ", height="<<in_shape1(2);
fout << ", width="<<in_shape1(3);
fout << ");\n";
string smaller_layer = find_input_layer_caffe_name(i);
string bigger_layer = find_layer_caffe_name(i, i->attribute("tag"));
if (in_shape1(1) > in_shape2(1))
swap(smaller_layer, bigger_layer);
fout << " n." << i->caffe_layer_name() << "_concat = L.Concat(n." << smaller_layer;
fout << ", n." << i->caffe_layer_name() << "_zeropad";
fout << ");\n";
fout << " n." << i->caffe_layer_name() << " = L.Eltwise(n." << i->caffe_layer_name() << "_concat";
fout << ", n." << bigger_layer;
fout << ", operation=P.Eltwise.SUM";
fout << ");\n";
}
else
{
std::ostringstream sout;
sout << "The dlib network contained an add_prev layer (layer idx " << i->idx << ") that adds two previous ";
sout << "layers with different output tensor dimensions. Caffe's equivalent layer, Eltwise, doesn't support ";
sout << "adding layers together with different dimensions. In the special case where the only difference is ";
sout << "in the number of channels, this converter program will add a dummy layer that outputs a tensor full of zeros ";
sout << "and concat it appropriately so this will work. However, this network you are converting has tensor dimensions ";
sout << "different in values other than the number of channels. In particular, here are the two tensor shapes (batch size, channels, rows, cols): ";
std::ostringstream sout2;
sout2 << wrap_string(sout.str()) << endl;
sout2 << trans(in_shape1);
sout2 << trans(in_shape2);
throw dlib::error(sout2.str());
}
}
else
{
fout << " n." << i->caffe_layer_name() << " = L.Eltwise(n." << find_input_layer_caffe_name(i);
fout << ", n." << find_layer_caffe_name(i, i->attribute("tag"));
fout << ", operation=P.Eltwise.SUM";
fout << ");\n";
}
} }
else else
{ {
...@@ -549,7 +615,68 @@ public: ...@@ -549,7 +615,68 @@ public:
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void compute_output_tensor_shapes(const matrix<long,4,1>& input_tensor_shape, std::vector<layer>& layers)
{
DLIB_CASSERT(layers.back().type == "input");
layers.back().output_tensor_shape = input_tensor_shape;
for (auto i = ++layers.rbegin(); i != layers.rend(); ++i)
{
const auto input_shape = find_input_layer(i).output_tensor_shape;
if (i->type == "comp")
{
if (i->detail_name == "fc" || i->detail_name == "fc_no_bias")
{
long num_outputs = i->attribute("num_outputs");
i->output_tensor_shape = {input_shape(0), num_outputs, 1, 1};
}
else if (i->detail_name == "con")
{
long num_filters = i->attribute("num_filters");
long filter_nc = i->attribute("nc");
long filter_nr = i->attribute("nr");
long stride_x = i->attribute("stride_x");
long stride_y = i->attribute("stride_y");
long padding_x = i->attribute("padding_x");
long padding_y = i->attribute("padding_y");
long nr = 1+(input_shape(2) + 2*padding_y - filter_nr)/stride_y;
long nc = 1+(input_shape(3) + 2*padding_x - filter_nc)/stride_x;
i->output_tensor_shape = {input_shape(0), num_filters, nr, nc};
}
else if (i->detail_name == "max_pool" || i->detail_name == "avg_pool")
{
long filter_nc = i->attribute("nc");
long filter_nr = i->attribute("nr");
long stride_x = i->attribute("stride_x");
long stride_y = i->attribute("stride_y");
long padding_x = i->attribute("padding_x");
long padding_y = i->attribute("padding_y");
long nr = 1+(input_shape(2) + 2*padding_y - filter_nr)/stride_y;
long nc = 1+(input_shape(3) + 2*padding_x - filter_nc)/stride_x;
i->output_tensor_shape = {input_shape(0), input_shape(1), nr, nc};
}
else if (i->detail_name == "add_prev")
{
auto aux_shape = find_layer(i, i->attribute("tag")).output_tensor_shape;
for (long j = 0; j < input_shape.size(); ++j)
i->output_tensor_shape(j) = std::max(input_shape(j), aux_shape(j));
}
else
{
i->output_tensor_shape = input_shape;
}
}
else
{
i->output_tensor_shape = input_shape;
}
}
}
// ----------------------------------------------------------------------------------------
std::vector<layer> parse_dlib_xml( std::vector<layer> parse_dlib_xml(
const matrix<long,4,1>& input_tensor_shape,
const string& xml_filename const string& xml_filename
) )
{ {
...@@ -561,6 +688,8 @@ std::vector<layer> parse_dlib_xml( ...@@ -561,6 +688,8 @@ std::vector<layer> parse_dlib_xml(
if (dh.layers.back().type != "input") if (dh.layers.back().type != "input")
throw dlib::error("The network in the XML file is missing an input layer!"); throw dlib::error("The network in the XML file is missing an input layer!");
compute_output_tensor_shapes(input_tensor_shape, dh.layers);
return dh.layers; return dh.layers;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment