Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
edea2b67
Commit
edea2b67
authored
May 11, 2018
by
Terry Koo
Browse files
Remove runtime because reasons.
parent
a4bb31d0
Changes
291
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
3589 deletions
+0
-3589
research/syntaxnet/dragnn/runtime/lstm_network_test.cc
research/syntaxnet/dragnn/runtime/lstm_network_test.cc
+0
-244
research/syntaxnet/dragnn/runtime/master.cc
research/syntaxnet/dragnn/runtime/master.cc
+0
-148
research/syntaxnet/dragnn/runtime/master.h
research/syntaxnet/dragnn/runtime/master.h
+0
-97
research/syntaxnet/dragnn/runtime/master_test.cc
research/syntaxnet/dragnn/runtime/master_test.cc
+0
-531
research/syntaxnet/dragnn/runtime/math/BUILD
research/syntaxnet/dragnn/runtime/math/BUILD
+0
-257
research/syntaxnet/dragnn/runtime/math/arithmetic.h
research/syntaxnet/dragnn/runtime/math/arithmetic.h
+0
-40
research/syntaxnet/dragnn/runtime/math/arithmetic_avx.h
research/syntaxnet/dragnn/runtime/math/arithmetic_avx.h
+0
-39
research/syntaxnet/dragnn/runtime/math/arithmetic_common.h
research/syntaxnet/dragnn/runtime/math/arithmetic_common.h
+0
-113
research/syntaxnet/dragnn/runtime/math/arithmetic_neon.h
research/syntaxnet/dragnn/runtime/math/arithmetic_neon.h
+0
-39
research/syntaxnet/dragnn/runtime/math/arithmetic_sse.h
research/syntaxnet/dragnn/runtime/math/arithmetic_sse.h
+0
-39
research/syntaxnet/dragnn/runtime/math/arithmetic_test.cc
research/syntaxnet/dragnn/runtime/math/arithmetic_test.cc
+0
-176
research/syntaxnet/dragnn/runtime/math/avx_activation_functions.h
.../syntaxnet/dragnn/runtime/math/avx_activation_functions.h
+0
-167
research/syntaxnet/dragnn/runtime/math/avx_activation_functions_test.cc
...xnet/dragnn/runtime/math/avx_activation_functions_test.cc
+0
-110
research/syntaxnet/dragnn/runtime/math/avx_vector_array.h
research/syntaxnet/dragnn/runtime/math/avx_vector_array.h
+0
-732
research/syntaxnet/dragnn/runtime/math/avx_vector_array_test.cc
...ch/syntaxnet/dragnn/runtime/math/avx_vector_array_test.cc
+0
-198
research/syntaxnet/dragnn/runtime/math/eigen.h
research/syntaxnet/dragnn/runtime/math/eigen.h
+0
-104
research/syntaxnet/dragnn/runtime/math/eigen_test.cc
research/syntaxnet/dragnn/runtime/math/eigen_test.cc
+0
-135
research/syntaxnet/dragnn/runtime/math/float16_types.h
research/syntaxnet/dragnn/runtime/math/float16_types.h
+0
-87
research/syntaxnet/dragnn/runtime/math/float16_types_test.cc
research/syntaxnet/dragnn/runtime/math/float16_types_test.cc
+0
-87
research/syntaxnet/dragnn/runtime/math/sgemvv.h
research/syntaxnet/dragnn/runtime/math/sgemvv.h
+0
-246
No files found.
Too many changes to show.
To preserve performance only
291 of 291+
files are displayed.
Plain diff
Email patch
research/syntaxnet/dragnn/runtime/lstm_network_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/core/test/generic.h"
#include "dragnn/protos/runtime.pb.h"
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/runtime/lstm_cell/cell_function.h"
#include "dragnn/runtime/network_unit.h"
#include "dragnn/runtime/test/network_test_base.h"
#include "dragnn/runtime/variable_store.h"
#include <gmock/gmock.h>
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
using
::
testing
::
Invoke
;
using
::
testing
::
_
;
class
LstmNetworkTest
:
public
NetworkTestBase
{
protected:
// Adds a blocked weight matrix with the |name| with the given dimensions and
// |fill_value|. If |is_flexible_matrix| is true, the variable is set up for
// use by the FlexibleMatrixKernel.
void
AddWeights
(
const
string
&
name
,
size_t
input_dim
,
size_t
output_dim
,
float
fill_value
,
bool
is_flexible_matrix
=
false
)
{
constexpr
int
kBatchSize
=
LstmCellFunction
<>::
kBatchSize
;
size_t
output_padded
=
kBatchSize
*
((
output_dim
+
kBatchSize
-
1
)
/
kBatchSize
);
size_t
num_views
=
(
output_padded
/
kBatchSize
)
*
input_dim
;
string
var_name
=
tensorflow
::
strings
::
StrCat
(
kTestComponentName
,
"/"
,
name
,
is_flexible_matrix
?
FlexibleMatrixKernel
::
kSuffix
:
"/matrix/blocked48"
);
const
std
::
vector
<
float
>
block
(
kBatchSize
,
fill_value
);
const
std
::
vector
<
std
::
vector
<
float
>>
blocks
(
num_views
,
block
);
variable_store_
.
AddOrDie
(
var_name
,
blocks
,
VariableSpec
::
FORMAT_COLUMN_BLOCKED_ROW_MAJOR_MATRIX
);
variable_store_
.
SetBlockedDimensionOverride
(
var_name
,
{
input_dim
,
output_padded
,
kBatchSize
});
}
// Adds a bias vector with the |name_suffix| with the given dimensions and
// |fill_value|.
void
AddBiases
(
const
string
&
name
,
size_t
dimension
,
float
fill_value
)
{
const
string
biases_name
=
tensorflow
::
strings
::
StrCat
(
kTestComponentName
,
"/"
,
name
);
AddVectorVariable
(
biases_name
,
dimension
,
fill_value
);
}
// Creates a network unit, initializes it based on the |component_spec_text|,
// and evaluates it. On error, returns non-OK.
tensorflow
::
Status
Run
(
const
string
&
component_spec_text
)
{
ComponentSpec
component_spec
;
CHECK
(
TextFormat
::
ParseFromString
(
component_spec_text
,
&
component_spec
));
component_spec
.
set_name
(
kTestComponentName
);
// Since LSTMNetwork uses the concatenated input, it is insensitive
// to the particular fixed or linked embedding inputs. For simplicity, the
// tests use a trivial network structure and a single fixed embedding.
AddComponent
(
kTestComponentName
);
TF_RETURN_IF_ERROR
(
NetworkUnit
::
CreateOrError
(
"LSTMNetwork"
,
&
network_unit_
));
TF_RETURN_IF_ERROR
(
network_unit_
->
Initialize
(
component_spec
,
&
variable_store_
,
&
network_state_manager_
,
&
extension_manager_
));
network_states_
.
Reset
(
&
network_state_manager_
);
StartComponent
(
1
);
// only evaluate the first step
session_state_
.
extensions
.
Reset
(
&
extension_manager_
);
TF_RETURN_IF_ERROR
(
network_unit_
->
Evaluate
(
0
,
&
session_state_
,
&
compute_session_
));
return
tensorflow
::
Status
::
OK
();
}
// Returns the activation vector of the first step of layer named |layer_name|
// in the current component.
Vector
<
float
>
GetActivations
(
const
string
&
layer_name
)
const
{
Matrix
<
float
>
layer
(
GetLayer
(
kTestComponentName
,
layer_name
));
return
layer
.
row
(
0
);
}
std
::
unique_ptr
<
NetworkUnit
>
network_unit_
;
};
// Tests that the LSTMNetwork does not produce logits when omit_logits is
// true, even if there are actions.
TEST_F
(
LstmNetworkTest
,
NoLogitsOrSoftmaxWhenOmitLogitsTrue
)
{
constexpr
size_t
input_dim
=
32
;
constexpr
int
kHiddenDim
=
LstmCellFunction
<>::
kBatchSize
;
const
string
kSpec
=
R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 32
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '48'
}
parameters {
key: 'omit_logits'
value: 'true'
}
}
num_actions: 10)"
;
const
float
kEmbedding
=
1.25
;
const
float
kFeature
=
0.5
;
const
float
kWeight
=
1.5
;
AddFixedEmbeddingMatrix
(
0
,
50
,
input_dim
,
kEmbedding
);
// No "softmax" weights or biases.
AddWeights
(
"x_to_ico"
,
input_dim
,
3
*
kHiddenDim
,
kWeight
);
AddWeights
(
"h_to_ico"
,
kHiddenDim
,
3
*
kHiddenDim
,
kWeight
);
AddWeights
(
"c2i"
,
kHiddenDim
,
kHiddenDim
,
kWeight
);
AddWeights
(
"c2o"
,
kHiddenDim
,
kHiddenDim
,
kWeight
);
AddBiases
(
"ico_bias"
,
3
*
kHiddenDim
,
kWeight
);
EXPECT_CALL
(
compute_session_
,
GetInputFeatures
(
_
,
_
,
_
,
_
,
_
))
.
WillOnce
(
Invoke
(
ExtractFeatures
(
0
,
{{
1
,
kFeature
}})));
TF_EXPECT_OK
(
Run
(
kSpec
));
// No specified logits layer.
EXPECT_TRUE
(
network_unit_
->
GetLogitsName
().
empty
());
// No "logits" layer.
size_t
unused_dimension
=
0
;
LayerHandle
<
float
>
unused_handle
;
EXPECT_THAT
(
network_state_manager_
.
LookupLayer
(
kTestComponentName
,
"logits"
,
&
unused_dimension
,
&
unused_handle
),
test
::
IsErrorWithSubstr
(
"Unknown layer 'logits' in component 'test_component'"
));
}
TEST_F
(
LstmNetworkTest
,
NormalOperationSmallHidden
)
{
constexpr
size_t
input_dim
=
32
;
constexpr
int
kHiddenDim
=
8
;
constexpr
int
num_actions
=
10
;
const
string
kSpec
=
R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 32
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '8'
}
}
num_actions: 10)"
;
const
float
kEmbedding
=
1.25
;
const
float
kFeature
=
0.5
;
const
float
kWeight
=
1.5
;
AddFixedEmbeddingMatrix
(
0
,
50
,
input_dim
,
kEmbedding
);
// Same as above, with "softmax" weights and biases.
AddWeights
(
"x_to_ico"
,
input_dim
,
3
*
kHiddenDim
,
kWeight
);
AddWeights
(
"h_to_ico"
,
kHiddenDim
,
3
*
kHiddenDim
,
kWeight
);
AddWeights
(
"c2i"
,
kHiddenDim
,
kHiddenDim
,
kWeight
);
AddWeights
(
"c2o"
,
kHiddenDim
,
kHiddenDim
,
kWeight
);
AddWeights
(
"weights_softmax"
,
kHiddenDim
,
num_actions
,
kWeight
,
/*is_flexible_matrix=*/
true
);
AddBiases
(
"ico_bias"
,
3
*
kHiddenDim
,
kWeight
);
AddBiases
(
"bias_softmax"
,
num_actions
,
kWeight
);
EXPECT_CALL
(
compute_session_
,
GetInputFeatures
(
_
,
_
,
_
,
_
,
_
))
.
WillOnce
(
Invoke
(
ExtractFeatures
(
0
,
{{
1
,
kFeature
}})));
TF_EXPECT_OK
(
Run
(
kSpec
));
// Logits should exist.
EXPECT_EQ
(
network_unit_
->
GetLogitsName
(),
"logits"
);
// Logits dimension matches "num_actions" above. We don't test the values very
// precisely here, and feel free to update if the cell function changes. Most
// value tests should be in lstm_cell/cell_function_test.cc.
Vector
<
float
>
logits
=
GetActivations
(
"logits"
);
EXPECT_EQ
(
logits
.
size
(),
num_actions
);
EXPECT_NEAR
(
logits
[
0
],
10.6391
,
0.1
);
for
(
int
i
=
1
;
i
<
10
;
++
i
)
{
EXPECT_EQ
(
logits
[
i
],
logits
[
0
])
<<
"With uniform weights, all logits should be equal."
;
}
}
TEST_F
(
LstmNetworkTest
,
ErrorWithTooSmallHidden
)
{
constexpr
size_t
input_dim
=
32
;
constexpr
int
kHiddenDim
=
4
;
const
string
kSpec
=
R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 32
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4'
}
}
num_actions: 0)"
;
const
float
kEmbedding
=
1.25
;
const
float
kWeight
=
1.5
;
AddFixedEmbeddingMatrix
(
0
,
50
,
input_dim
,
kEmbedding
);
// Same as above, with "softmax" weights and biases.
AddWeights
(
"x_to_ico"
,
input_dim
,
3
*
kHiddenDim
,
kWeight
);
AddWeights
(
"h_to_ico"
,
kHiddenDim
,
3
*
kHiddenDim
,
kWeight
);
AddWeights
(
"c2i"
,
kHiddenDim
,
kHiddenDim
,
kWeight
);
AddWeights
(
"c2o"
,
kHiddenDim
,
kHiddenDim
,
kWeight
);
AddBiases
(
"ico_bias"
,
3
*
kHiddenDim
,
kWeight
);
EXPECT_THAT
(
Run
(
kSpec
),
test
::
IsErrorWithSubstr
(
"Expected hidden size (4) to be a multiple of the AVX width (8)"
));
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/master.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/master.h"
#include <utility>
#include <vector>
#include "dragnn/protos/runtime.pb.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/gtl/cleanup.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
constexpr
int
kMaxBeamSize
=
1
;
// Combines, using MergeFrom(), each step trace in the |source| with the
// corresponding step of the |target|. If |source| has more steps, then
// |target| is extended to match.
void
MergeTraces
(
const
ComponentTrace
&
source
,
ComponentTrace
*
target
)
{
while
(
target
->
step_trace_size
()
<
source
.
step_trace_size
())
{
target
->
add_step_trace
();
}
for
(
int
i
=
0
;
i
<
source
.
step_trace_size
();
++
i
)
{
target
->
mutable_step_trace
(
i
)
->
MergeFrom
(
source
.
step_trace
(
i
));
}
}
// Combines, using MergeTraces(), each component trace in the |source| with the
// corresponding component of the |target|. If |source| has more components,
// then |target| is extended to match.
void
MergeTraces
(
const
MasterTrace
&
source
,
MasterTrace
*
target
)
{
while
(
target
->
component_trace_size
()
<
source
.
component_trace_size
())
{
target
->
add_component_trace
();
}
for
(
int
i
=
0
;
i
<
source
.
component_trace_size
();
++
i
)
{
MergeTraces
(
source
.
component_trace
(
i
),
target
->
mutable_component_trace
(
i
));
}
}
}
// namespace
tensorflow
::
Status
Master
::
Initialize
(
const
MasterSpec
&
master_spec
,
std
::
unique_ptr
<
VariableStore
>
variable_store
)
{
if
(
variable_store_
!=
nullptr
)
{
return
tensorflow
::
errors
::
FailedPrecondition
(
"Can't initialize twice"
);
}
if
(
variable_store
==
nullptr
)
{
return
tensorflow
::
errors
::
InvalidArgument
(
"No VariableStore"
);
}
variable_store_
=
std
::
move
(
variable_store
);
const
auto
&
master_performance_settings
=
master_spec
.
GetExtension
(
MasterPerformanceSettings
::
master_spec_extension
);
session_state_pool_
.
reset
(
new
SessionStatePool
(
master_performance_settings
.
session_state_pool_max_free_states
()));
components_
.
reserve
(
master_spec
.
component_size
());
for
(
const
ComponentSpec
&
component_spec
:
master_spec
.
component
())
{
const
auto
&
component_performance_settings
=
component_spec
.
GetExtension
(
ComponentPerformanceSettings
::
component_spec_extension
);
components_
.
emplace_back
();
ComponentConfig
&
component
=
components_
.
back
();
component
.
name
=
component_spec
.
name
();
component
.
pre_allocate_num_steps
=
component_performance_settings
.
pre_allocate_num_steps
();
TF_RETURN_IF_ERROR
(
network_state_manager_
.
AddComponent
(
component_spec
.
name
()));
const
string
component_type
=
GetNormalizedComponentBuilderName
(
component_spec
);
TF_RETURN_IF_ERROR
(
Component
::
CreateOrError
(
component_type
,
&
component
.
instance
));
TF_RETURN_IF_ERROR
(
component
.
instance
->
Initialize
(
component_spec
,
variable_store_
.
get
(),
&
network_state_manager_
,
&
extension_manager_
));
}
return
variable_store_
->
Close
();
}
tensorflow
::
Status
Master
::
Evaluate
(
ComputeSession
*
compute_session
,
MasterTrace
*
master_trace
)
const
{
if
(
variable_store_
==
nullptr
)
{
return
tensorflow
::
errors
::
FailedPrecondition
(
"Not initialized"
);
}
if
(
compute_session
==
nullptr
)
{
return
tensorflow
::
errors
::
InvalidArgument
(
"No ComputeSession"
);
}
if
(
master_trace
!=
nullptr
)
{
master_trace
->
Clear
();
compute_session
->
SetTracing
(
true
);
}
const
auto
ensure_tracing_disabled
=
tensorflow
::
gtl
::
MakeCleanup
([
=
]
{
if
(
master_trace
!=
nullptr
)
compute_session
->
SetTracing
(
false
);
});
const
ScopedSessionState
session_state
(
session_state_pool_
.
get
());
session_state
->
network_states
.
Reset
(
&
network_state_manager_
);
session_state
->
extensions
.
Reset
(
&
extension_manager_
);
for
(
const
ComponentConfig
&
component
:
components_
)
{
// TODO(googleuser): Generically trace all layers?
ComponentTrace
*
component_trace
=
nullptr
;
if
(
master_trace
!=
nullptr
)
{
component_trace
=
master_trace
->
add_component_trace
();
component_trace
->
set_name
(
component
.
name
);
}
compute_session
->
InitializeComponentData
(
component
.
name
,
kMaxBeamSize
);
TF_RETURN_IF_ERROR
(
session_state
->
network_states
.
StartNextComponent
(
component
.
pre_allocate_num_steps
));
TF_RETURN_IF_ERROR
(
component
.
instance
->
Evaluate
(
session_state
.
get
(),
compute_session
,
component_trace
));
compute_session
->
FinalizeData
(
component
.
name
);
}
if
(
master_trace
!=
nullptr
)
{
// Use only the first trace from the compute session.
const
std
::
vector
<
MasterTrace
>
traces
=
compute_session
->
GetTraceProtos
();
if
(
!
traces
.
empty
())
MergeTraces
(
traces
[
0
],
master_trace
);
}
return
tensorflow
::
Status
::
OK
();
}
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/master.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_MASTER_H_
#define DRAGNN_RUNTIME_MASTER_H_
#include <memory>
#include <string>
#include <vector>
#include "dragnn/core/compute_session.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/protos/trace.pb.h"
#include "dragnn/runtime/component.h"
#include "dragnn/runtime/extensions.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/session_state.h"
#include "dragnn/runtime/session_state_pool.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/status.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// A DRAGNN master, which evaluates a series of components.
class
Master
{
public:
// Creates an uninitialized master. Call Initialize() before use.
Master
()
=
default
;
// Initializes the components in this based on the |master_spec|, which may
// have performance tuning settings attached (see runtime.proto). Retrieves
// pre-trained variables from the |variable_store|, which must not be closed.
// On error, returns non-OK.
tensorflow
::
Status
Initialize
(
const
MasterSpec
&
master_spec
,
std
::
unique_ptr
<
VariableStore
>
variable_store
);
// Evaluates the pipeline of components on the |compute_session|, which must
// be based on the same MasterSpec as this and populated with input data. If
// |master_trace| is non-null, overwrites it with extracted traces. On error,
// returns non-OK.
tensorflow
::
Status
Evaluate
(
ComputeSession
*
compute_session
,
MasterTrace
*
master_trace
)
const
;
private:
// A Component with some associated configuration.
struct
ComponentConfig
{
// Name of the component.
string
name
;
// Number of steps to pre-allocate operands for the component.
size_t
pre_allocate_num_steps
=
0
;
// Component instance to initialize and evaluate.
std
::
unique_ptr
<
Component
>
instance
;
};
// Store of pre-trained variables used by the |components_|. Must be declared
// before the |components_| to ensure it outlives them.
std
::
unique_ptr
<
VariableStore
>
variable_store_
;
// Manager for the network states in the |components_|.
NetworkStateManager
network_state_manager_
;
// Manager for SessionState extensions.
ExtensionManager
extension_manager_
;
// Ordered list of components to evaluate.
std
::
vector
<
ComponentConfig
>
components_
;
// Pool of session states used when evaluating the |components_|. This must
// be destroyed before the |components_|, in case there are state extensions
// that depend on the |components_|. Declaring this after the |components_|
// ensures the proper destructor ordering.
std
::
unique_ptr
<
SessionStatePool
>
session_state_pool_
;
};
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // DRAGNN_RUNTIME_MASTER_H_
research/syntaxnet/dragnn/runtime/master_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/master.h"
#include <stddef.h>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "dragnn/core/test/generic.h"
#include "dragnn/core/test/mock_compute_session.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/protos/trace.pb.h"
#include "dragnn/runtime/alignment.h"
#include "dragnn/runtime/component.h"
#include "dragnn/runtime/extensions.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/session_state.h"
#include "dragnn/runtime/test/fake_variable_store.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include <gmock/gmock.h>
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
using
::
testing
::
_
;
using
::
testing
::
InSequence
;
using
::
testing
::
Invoke
;
using
::
testing
::
Return
;
// Number of steps to take in each component.
constexpr
size_t
kNumSteps
=
123
;
// Outputs a layer of all 1s.
class
Ones
:
public
Component
{
public:
// Implements Component.
tensorflow
::
Status
Initialize
(
const
ComponentSpec
&
component_spec
,
VariableStore
*
variable_store
,
NetworkStateManager
*
network_state_manager
,
ExtensionManager
*
extension_manager
)
override
{
return
network_state_manager
->
AddLayer
(
"ones"
,
1
,
&
output_handle_
);
}
tensorflow
::
Status
Evaluate
(
SessionState
*
session_state
,
ComputeSession
*
compute_session
,
ComponentTrace
*
component_trace
)
const
override
{
NetworkStates
*
network_states
=
&
session_state
->
network_states
;
for
(
size_t
step
=
0
;
step
<
kNumSteps
;
++
step
)
{
network_states
->
AddStep
();
network_states
->
GetLayer
(
output_handle_
).
row
(
step
)[
0
]
=
1.0
;
}
return
tensorflow
::
Status
::
OK
();
}
bool
Supports
(
const
ComponentSpec
&
spec
,
const
string
&
normalized_builder_name
)
const
override
{
return
normalized_builder_name
==
"Ones"
;
}
bool
PreferredTo
(
const
Component
&
other
)
const
override
{
return
false
;
}
private:
// Handle to the output layer.
LayerHandle
<
float
>
output_handle_
;
};
DRAGNN_RUNTIME_REGISTER_COMPONENT
(
Ones
);
// Extends its input layer with the step-wise cumulative sum of the final entry
// in each row of the input. E.g.,
// [[0, 1], [[0, 1, 1 (= 1)],
// [2, 3], => [2, 3, 4 (= 1 + 3)],
// [4, 5]] [4, 5, 9 (= 1 + 3 + 5)]]
class
ExtendWithCumulativeSum
:
public
Component
{
public:
// Implements Component.
tensorflow
::
Status
Initialize
(
const
ComponentSpec
&
component_spec
,
VariableStore
*
variable_store
,
NetworkStateManager
*
network_state_manager
,
ExtensionManager
*
extension_manager
)
override
{
// NB: In a real Component implementation, linked embeddings are accessed
// using the LinkedEmbeddingManager and LinkedEmbeddings. Here, we set up
// the link manually because it's simple and makes the test self-contained.
CHECK_EQ
(
component_spec
.
linked_feature_size
(),
1
);
const
LinkedFeatureChannel
&
link
=
component_spec
.
linked_feature
(
0
);
size_t
dimension
=
0
;
TF_RETURN_IF_ERROR
(
network_state_manager
->
LookupLayer
(
link
.
source_component
(),
link
.
source_layer
(),
&
dimension
,
&
input_handle_
));
CHECK_GT
(
dimension
,
0
);
return
network_state_manager
->
AddLayer
(
"sums"
,
dimension
+
1
,
&
output_handle_
);
}
tensorflow
::
Status
Evaluate
(
SessionState
*
session_state
,
ComputeSession
*
compute_session
,
ComponentTrace
*
component_trace
)
const
override
{
NetworkStates
*
network_states
=
&
session_state
->
network_states
;
float
sum
=
0.0
;
for
(
size_t
step
=
0
;
step
<
kNumSteps
;
++
step
)
{
network_states
->
AddStep
();
const
Vector
<
float
>
inputs
(
network_states
->
GetLayer
(
input_handle_
).
row
(
step
));
const
MutableVector
<
float
>
outputs
(
network_states
->
GetLayer
(
output_handle_
).
row
(
step
));
CHECK_EQ
(
outputs
.
size
(),
inputs
.
size
()
+
1
);
sum
+=
inputs
[
inputs
.
size
()
-
1
];
*
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
outputs
.
begin
())
=
sum
;
}
return
tensorflow
::
Status
::
OK
();
}
bool
Supports
(
const
ComponentSpec
&
spec
,
const
string
&
normalized_builder_name
)
const
override
{
return
normalized_builder_name
==
"ExtendWithCumulativeSum"
;
}
bool
PreferredTo
(
const
Component
&
other
)
const
override
{
return
false
;
}
private:
// Handles to the input and output layers.
LayerHandle
<
float
>
input_handle_
;
LayerHandle
<
float
>
output_handle_
;
};
DRAGNN_RUNTIME_REGISTER_COMPONENT
(
ExtendWithCumulativeSum
);
// Makes predictions using its inputs.
class
MakePredictions
:
public
Component
{
public:
// Implements Component.
tensorflow
::
Status
Initialize
(
const
ComponentSpec
&
component_spec
,
VariableStore
*
variable_store
,
NetworkStateManager
*
network_state_manager
,
ExtensionManager
*
extension_manager
)
override
{
name_
=
component_spec
.
name
();
CHECK_EQ
(
component_spec
.
linked_feature_size
(),
1
);
const
LinkedFeatureChannel
&
link
=
component_spec
.
linked_feature
(
0
);
size_t
dimension
=
0
;
return
network_state_manager
->
LookupLayer
(
link
.
source_component
(),
link
.
source_layer
(),
&
dimension
,
&
input_handle_
);
}
tensorflow
::
Status
Evaluate
(
SessionState
*
session_state
,
ComputeSession
*
compute_session
,
ComponentTrace
*
component_trace
)
const
override
{
NetworkStates
*
network_states
=
&
session_state
->
network_states
;
Matrix
<
float
>
inputs
(
network_states
->
GetLayer
(
input_handle_
));
for
(
size_t
step
=
0
;
step
<
kNumSteps
;
++
step
)
{
const
Vector
<
float
>
logits
=
inputs
.
row
(
step
);
if
(
!
compute_session
->
AdvanceFromPrediction
(
name_
,
logits
.
data
(),
1
,
logits
.
size
()))
{
return
tensorflow
::
errors
::
Internal
(
"Error in ComputeSession::AdvanceFromPrediction() at step "
,
step
);
}
}
return
tensorflow
::
Status
::
OK
();
}
bool
Supports
(
const
ComponentSpec
&
spec
,
const
string
&
normalized_builder_name
)
const
override
{
return
normalized_builder_name
==
"MakePredictions"
;
}
bool
PreferredTo
(
const
Component
&
other
)
const
override
{
return
false
;
}
private:
// Name of this component.
string
name_
;
// Handle to the input layer, which is treated as prediction logits.
LayerHandle
<
float
>
input_handle_
;
};
DRAGNN_RUNTIME_REGISTER_COMPONENT
(
MakePredictions
);
// Component whose Evaluate() always fails.
class
AlwaysFails
:
public
Component
{
public:
// Implements Component.
tensorflow
::
Status
Initialize
(
const
ComponentSpec
&
component_spec
,
VariableStore
*
variable_store
,
NetworkStateManager
*
network_state_manager
,
ExtensionManager
*
extension_manager
)
override
{
return
tensorflow
::
Status
::
OK
();
}
tensorflow
::
Status
Evaluate
(
SessionState
*
session_state
,
ComputeSession
*
compute_session
,
ComponentTrace
*
component_trace
)
const
override
{
return
tensorflow
::
errors
::
Internal
(
"I always fail!"
);
}
bool
Supports
(
const
ComponentSpec
&
spec
,
const
string
&
normalized_builder_name
)
const
override
{
return
normalized_builder_name
==
"AlwaysFails"
;
}
bool
PreferredTo
(
const
Component
&
other
)
const
override
{
return
false
;
}
};
DRAGNN_RUNTIME_REGISTER_COMPONENT
(
AlwaysFails
);
class
MasterTest
:
public
::
testing
::
Test
{
protected:
// Returns a new VariableStore.
static
std
::
unique_ptr
<
VariableStore
>
NewVariableStore
()
{
// None of the tests or components look at the pre-trained variables, so
// return an empty store.
return
std
::
unique_ptr
<
VariableStore
>
(
new
FakeVariableStore
());
}
// Initializes and runs the |master_| using the text-format MasterSpec in
// |master_spec_text|. The |master_trace| is overwritten with traces, if
// specified. If |expect_success| is false, then EXPECT_CALLs that assume
// success are disabled. On error, returns non-OK.
tensorflow
::
Status
TryRun
(
const
string
&
master_spec_text
,
bool
expect_success
,
MasterTrace
*
master_trace
=
nullptr
)
{
MasterSpec
master_spec
;
CHECK
(
TextFormat
::
ParseFromString
(
master_spec_text
,
&
master_spec
));
TF_RETURN_IF_ERROR
(
master_
.
Initialize
(
master_spec
,
NewVariableStore
()));
{
// Add call expectations for initializing each component, in order.
InSequence
ordered_calls
;
for
(
const
ComponentSpec
&
component_spec
:
master_spec
.
component
())
{
EXPECT_CALL
(
compute_session_
,
InitializeComponentData
(
component_spec
.
name
(),
1
))
.
Times
(
1
);
}
}
// If applicable, add call expectations for making "predictions" in the
// final component that capture the prediction logits for inspection.
if
(
master_spec
.
component_size
()
>
0
&&
expect_success
)
{
const
string
&
last_component_name
=
master_spec
.
component
(
master_spec
.
component_size
()
-
1
).
name
();
EXPECT_CALL
(
compute_session_
,
AdvanceFromPrediction
(
last_component_name
,
_
,
1
,
_
))
.
Times
(
kNumSteps
)
.
WillRepeatedly
(
Invoke
([
this
](
const
string
&
,
const
float
*
data
,
int
,
int
size
)
{
logits_
.
emplace_back
(
data
,
data
+
size
);
return
true
;
}));
}
// Add call expectations for finalizing data in all components.
if
(
expect_success
)
{
for
(
const
ComponentSpec
&
component_spec
:
master_spec
.
component
())
{
EXPECT_CALL
(
compute_session_
,
FinalizeData
(
component_spec
.
name
()))
.
Times
(
1
);
}
}
return
master_
.
Evaluate
(
&
compute_session_
,
master_trace
);
}
// As above, but asserts that all operations succeed.
void
Run
(
const
string
&
master_spec_text
,
MasterTrace
*
master_trace
=
nullptr
)
{
TF_ASSERT_OK
(
TryRun
(
master_spec_text
,
/*expect_success=*/
true
,
master_trace
));
}
::
testing
::
StrictMock
<
MockComputeSession
>
compute_session_
;
std
::
vector
<
std
::
vector
<
float
>>
logits_
;
Master
master_
;
};
// Tests that Master cannot be initialized multiple times.
TEST_F
(
MasterTest
,
InitializeTwice
)
{
TF_ASSERT_OK
(
master_
.
Initialize
(
MasterSpec
(),
NewVariableStore
()));
EXPECT_THAT
(
master_
.
Initialize
(
MasterSpec
(),
NewVariableStore
()),
test
::
IsErrorWithSubstr
(
"Can't initialize twice"
));
}
// Tests that Master requires a variable store.
TEST_F
(
MasterTest
,
NoVariableStore
)
{
EXPECT_THAT
(
master_
.
Initialize
(
MasterSpec
(),
nullptr
),
test
::
IsErrorWithSubstr
(
"No VariableStore"
));
}
// Tests that Master must be initialized prior to session.
TEST_F
(
MasterTest
,
EvaluateWithoutInitializing
)
{
EXPECT_THAT
(
master_
.
Evaluate
(
&
compute_session_
,
nullptr
),
test
::
IsErrorWithSubstr
(
"Not initialized"
));
}
// Tests that Master requires a compute session.
TEST_F
(
MasterTest
,
NoComputeSession
)
{
TF_ASSERT_OK
(
master_
.
Initialize
(
MasterSpec
(),
NewVariableStore
()));
EXPECT_THAT
(
master_
.
Evaluate
(
nullptr
,
nullptr
),
test
::
IsErrorWithSubstr
(
"No ComputeSession"
));
}
// Tests that Master works with an empty spec and does nothing (StrictMock would
// raise an error if any methods on the ComputeSession were called).
TEST_F
(
MasterTest
,
EmptySpec
)
{
Run
(
""
);
EXPECT_TRUE
(
logits_
.
empty
());
}
// Tests that Master can run a simple pipeline that generates ones.
TEST_F
(
MasterTest
,
Ones
)
{
Run
(
R"(component {
name: 'component1'
component_builder {
registered_name: 'Ones'
}
}
component {
name: 'component2'
component_builder {
registered_name: 'MakePredictions'
}
linked_feature {
source_component: 'component1'
source_layer: 'ones'
}
})"
);
EXPECT_EQ
(
logits_
.
size
(),
kNumSteps
);
const
std
::
vector
<
float
>
expected_row
=
{
1.0
};
for
(
const
auto
&
row
:
logits_
)
EXPECT_EQ
(
row
,
expected_row
);
}
// Tests that Master can run a pipeline with a cumulative summation.
TEST_F
(
MasterTest
,
SingleSummation
)
{
Run
(
R"(component {
name: 'component1'
component_builder {
registered_name: 'Ones'
}
}
component {
name: 'component2'
component_builder {
registered_name: 'ExtendWithCumulativeSum'
}
linked_feature {
source_component: 'component1'
source_layer: 'ones'
}
}
component {
name: 'component3'
component_builder {
registered_name: 'MakePredictions'
}
linked_feature {
source_component: 'component2'
source_layer: 'sums'
}
})"
);
EXPECT_EQ
(
logits_
.
size
(),
kNumSteps
);
float
sum
=
0.0
;
for
(
const
auto
&
row
:
logits_
)
{
++
sum
;
const
std
::
vector
<
float
>
expected_row
=
{
1.0
,
sum
};
EXPECT_EQ
(
row
,
expected_row
);
}
}
// Tests that Master can run a pipeline with multiple summations.
TEST_F
(
MasterTest
,
MultiSummation
)
{
Run
(
R"(component {
name: 'component1'
component_builder {
registered_name: 'Ones'
}
}
component {
name: 'component2'
component_builder {
registered_name: 'ExtendWithCumulativeSum'
}
linked_feature {
source_component: 'component1'
source_layer: 'ones'
}
}
component {
name: 'component3'
component_builder {
registered_name: 'ExtendWithCumulativeSum'
}
linked_feature {
source_component: 'component2'
source_layer: 'sums'
}
}
component {
name: 'component4'
component_builder {
registered_name: 'ExtendWithCumulativeSum'
}
linked_feature {
source_component: 'component3'
source_layer: 'sums'
}
}
component {
name: 'component5'
component_builder {
registered_name: 'MakePredictions'
}
linked_feature {
source_component: 'component4'
source_layer: 'sums'
}
})"
);
EXPECT_EQ
(
logits_
.
size
(),
kNumSteps
);
float
sum1
=
0.0
,
sum2
=
0.0
,
sum3
=
0.0
;
for
(
const
auto
&
row
:
logits_
)
{
sum3
+=
sum2
+=
++
sum1
;
const
std
::
vector
<
float
>
expected_row
=
{
1.0
,
sum1
,
sum2
,
sum3
};
EXPECT_EQ
(
row
,
expected_row
);
}
}
// Tests that Master can run a pipeline with tracing.
TEST_F
(
MasterTest
,
SingleSummationWithTracing
)
{
{
// Expect to enable and then disable tracing, in that order.
InSequence
ordered_calls
;
EXPECT_CALL
(
compute_session_
,
SetTracing
(
true
));
EXPECT_CALL
(
compute_session_
,
SetTracing
(
false
));
}
// Build a set of traces for the compute session to return.
std
::
vector
<
MasterTrace
>
traces
(
1
);
traces
.
back
().
add_component_trace
()
->
add_step_trace
()
->
set_caption
(
"A"
);
traces
.
back
().
add_component_trace
()
->
add_step_trace
()
->
set_caption
(
"B"
);
traces
.
back
().
add_component_trace
()
->
add_step_trace
()
->
set_caption
(
"C"
);
traces
.
back
().
add_component_trace
()
->
add_step_trace
()
->
set_caption
(
"D"
);
EXPECT_CALL
(
compute_session_
,
GetTraceProtos
()).
WillOnce
(
Return
(
traces
));
MasterTrace
master_trace
;
Run
(
R"(component {
name: 'component1'
component_builder {
registered_name: 'Ones'
}
}
component {
name: 'component2'
component_builder {
registered_name: 'ExtendWithCumulativeSum'
}
linked_feature {
source_component: 'component1'
source_layer: 'ones'
}
}
component {
name: 'component3'
component_builder {
registered_name: 'MakePredictions'
}
linked_feature {
source_component: 'component2'
source_layer: 'sums'
}
})"
,
&
master_trace
);
const
string
kExpectedTraceText
=
R"(
component_trace { name: 'component1' step_trace { caption: 'A' } }
component_trace { name: 'component2' step_trace { caption: 'B' } }
component_trace { name: 'component3' step_trace { caption: 'C' } }
component_trace { step_trace { caption: 'D' } }
)"
;
MasterTrace
expected_trace
;
ASSERT_TRUE
(
TextFormat
::
ParseFromString
(
kExpectedTraceText
,
&
expected_trace
));
EXPECT_THAT
(
master_trace
,
test
::
EqualsProto
(
expected_trace
));
}
// Tests that Master disables tracing even on error.
TEST_F
(
MasterTest
,
DisablesTracingOnFailure
)
{
{
// Expect to enable and then disable tracing, in that order.
InSequence
ordered_calls
;
EXPECT_CALL
(
compute_session_
,
SetTracing
(
true
));
EXPECT_CALL
(
compute_session_
,
SetTracing
(
false
));
}
const
string
kMasterSpec
=
R"(component {
name: 'component1'
component_builder {
registered_name: 'AlwaysFails'
}
})"
;
MasterTrace
master_trace
;
EXPECT_THAT
(
TryRun
(
kMasterSpec
,
/*expect_success=*/
false
,
&
master_trace
),
test
::
IsErrorWithSubstr
(
"I always fail!"
));
const
string
kExpectedTraceText
=
"component_trace { name: 'component1' }"
;
MasterTrace
expected_trace
;
ASSERT_TRUE
(
TextFormat
::
ParseFromString
(
kExpectedTraceText
,
&
expected_trace
));
EXPECT_THAT
(
master_trace
,
test
::
EqualsProto
(
expected_trace
));
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/math/BUILD
deleted
100644 → 0
View file @
a4bb31d0
package
(
default_visibility
=
[
"//visibility:public"
],
)
load
(
"@org_tensorflow//tensorflow:tensorflow.bzl"
,
"if_linux_x86_64"
,
)
load
(
"//dragnn/runtime:multiarch.bzl"
,
"dragnn_cc_multiarch_test"
,
)
FAST_MATH_COPTS
=
if_linux_x86_64
([
"-O3"
,
"-msse4.2"
,
"-ffast-math"
,
"-ftree-vectorize"
,
])
cc_library
(
name
=
"avx_vector_array"
,
hdrs
=
[
"avx_vector_array.h"
],
deps
=
[
":float16_types"
],
)
cc_test
(
name
=
"avx_vector_array_test"
,
srcs
=
[
"avx_vector_array_test.cc"
],
deps
=
[
":avx_vector_array"
,
"//dragnn/runtime/test:helpers"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"avx_activation_functions"
,
hdrs
=
[
"avx_activation_functions.h"
],
deps
=
[
":avx_vector_array"
,
],
)
dragnn_cc_multiarch_test
(
name
=
"avx_activation_functions_test"
,
srcs
=
[
"avx_activation_functions_test.cc"
],
copts
=
FAST_MATH_COPTS
,
deps
=
[
":avx_activation_functions"
,
"//dragnn/runtime/test:helpers"
,
"//syntaxnet:base"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"float16_types"
,
hdrs
=
[
"float16_types.h"
],
deps
=
[
"//syntaxnet:base"
,
"@org_tensorflow//tensorflow/core:lib"
,
],
)
cc_test
(
name
=
"float16_types_test"
,
srcs
=
[
"float16_types_test.cc"
],
deps
=
[
":float16_types"
,
"//syntaxnet:test_main"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"sgemvv"
,
hdrs
=
[
"sgemvv.h"
],
deps
=
[
":avx_vector_array"
,
":types"
,
"@org_tensorflow//tensorflow/core:lib"
,
],
)
cc_test
(
name
=
"sgemvv_test"
,
srcs
=
[
"sgemvv_test.cc"
],
copts
=
[
"-O3"
,
"-mavx2"
,
"-mfma"
,
],
tags
=
[
"manual"
,
],
deps
=
[
":arithmetic"
,
":sgemvv"
,
":transformations"
,
":types"
,
"//dragnn/core/test:generic"
,
"//dragnn/runtime/test:helpers"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_test
(
name
=
"sgemvv_compatibility_test"
,
srcs
=
[
"sgemvv_test.cc"
],
copts
=
[
"-O3"
,
"-ftree-vectorize"
,
"-ffast-math"
,
],
deps
=
[
":arithmetic"
,
":sgemvv"
,
":transformations"
,
":types"
,
"//dragnn/core/test:generic"
,
"//dragnn/runtime/test:helpers"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"transformations"
,
hdrs
=
[
"transformations.h"
],
deps
=
[
":types"
,
"@org_tensorflow//tensorflow/core:lib"
,
],
)
cc_test
(
name
=
"transformations_test"
,
srcs
=
[
"transformations_test.cc"
],
deps
=
[
":transformations"
,
"//dragnn/runtime/test:helpers"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"types"
,
hdrs
=
[
"types.h"
],
deps
=
[
"//dragnn/runtime:alignment"
,
"@org_tensorflow//tensorflow/core:lib"
,
],
)
cc_test
(
name
=
"types_test"
,
size
=
"small"
,
srcs
=
[
"types_test.cc"
],
deps
=
[
":types"
,
"//dragnn/core/test:generic"
,
"//dragnn/runtime:alignment"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"eigen"
,
hdrs
=
[
"eigen.h"
],
deps
=
[
":types"
,
"//dragnn/runtime:alignment"
,
"@org_tensorflow//third_party/eigen3"
,
],
)
cc_test
(
name
=
"eigen_test"
,
size
=
"small"
,
srcs
=
[
"eigen_test.cc"
],
deps
=
[
":eigen"
,
":types"
,
"//dragnn/core/test:generic"
,
"//dragnn/runtime/test:helpers"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_library
(
name
=
"arithmetic"
,
srcs
=
[
"arithmetic_avx.h"
,
"arithmetic_common.h"
,
"arithmetic_neon.h"
,
"arithmetic_sse.h"
,
],
hdrs
=
[
"arithmetic.h"
],
deps
=
[
":types"
,
"@org_tensorflow//tensorflow/core:lib"
,
],
)
cc_test
(
name
=
"arithmetic_test"
,
size
=
"small"
,
srcs
=
[
"arithmetic_test.cc"
],
deps
=
[
":arithmetic"
,
":types"
,
"//dragnn/runtime/test:helpers"
,
"//syntaxnet:test_main"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_test
(
name
=
"arithmetic_avx_test"
,
size
=
"small"
,
srcs
=
[
"arithmetic_test.cc"
],
copts
=
[
"-mavx2"
,
"-mfma"
,
],
tags
=
[
"manual"
,
],
deps
=
[
":arithmetic"
,
":types"
,
"//dragnn/runtime/test:helpers"
,
"//syntaxnet:test_main"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
cc_test
(
name
=
"arithmetic_sse_test"
,
size
=
"small"
,
srcs
=
[
"arithmetic_test.cc"
],
copts
=
[
"-msse4.2"
],
deps
=
[
":arithmetic"
,
":types"
,
"//dragnn/runtime/test:helpers"
,
"//syntaxnet:test_main"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:test"
,
],
)
research/syntaxnet/dragnn/runtime/math/arithmetic.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Top-level organizational header for arithmetic operations. Users should
// include this instead of directly including the sub-headers below. See
// arithmetic_common.h for function declarations and comments.
//
// NB: If you wish to use an architecture-specific implementation, make sure to
// add the relevant copts to the cc_library whose .cc file includes this header.
#ifndef DRAGNN_RUNTIME_MATH_ARITHMETIC_H_
#define DRAGNN_RUNTIME_MATH_ARITHMETIC_H_
// Select an architecture-specific implementation, if possible, or fall back to
// the trivial generic implementations. The order of the clauses is important:
// in cases where architectures may overlap the newer version should be checked
// first (e.g., AVX before SSE).
#if defined(__AVX2__)
#include "dragnn/runtime/math/arithmetic_avx.h"
#elif defined(__SSE4_2__)
#include "dragnn/runtime/math/arithmetic_sse.h"
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
#include "dragnn/runtime/math/arithmetic_neon.h"
#else // no architecture-specific implementation
#include "dragnn/runtime/math/arithmetic_common.h"
#endif
#endif // DRAGNN_RUNTIME_MATH_ARITHMETIC_H_
research/syntaxnet/dragnn/runtime/math/arithmetic_avx.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_MATH_ARITHMETIC_AVX_H_
#define DRAGNN_RUNTIME_MATH_ARITHMETIC_AVX_H_
#if defined(__AVX2__)
#include <stddef.h>
#include "dragnn/runtime/math/arithmetic_common.h"
#include "dragnn/runtime/math/types.h"
#include "tensorflow/core/platform/logging.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// TODO(googleuser): Leaving this empty means that the definitions
// from arithmetic_common.h carry through. Provide template specializations
// that use architecture-specific intrinsics.
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // defined(__AVX2__)
#endif // DRAGNN_RUNTIME_MATH_ARITHMETIC_AVX_H_
research/syntaxnet/dragnn/runtime/math/arithmetic_common.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Declarations of arithmetic operations and trivial generic implementations.
// Architecture-specific implementations should include this header and define
// template specializations that override the generic implementations.
#ifndef DRAGNN_RUNTIME_MATH_ARITHMETIC_COMMON_H_
#define DRAGNN_RUNTIME_MATH_ARITHMETIC_COMMON_H_
#include <stddef.h>
#include <algorithm>
#include "dragnn/runtime/math/types.h"
#include "tensorflow/core/platform/logging.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// Performs output = scale * input. Dimensions must match.
template
<
class
T
>
void
ScaleElements
(
Vector
<
T
>
input
,
T
scale
,
MutableVector
<
T
>
output
);
// Performs output += scale * input. Dimensions must match.
template
<
class
T
>
void
AddScaledElements
(
Vector
<
T
>
input
,
T
scale
,
MutableVector
<
T
>
output
);
// Performs values = max(minimum, values) in place.
template
<
class
T
>
void
MaxElements
(
T
minimum
,
MutableVector
<
T
>
values
);
// Performs output = matrix * input. All vectors are interpreted as column
// vectors. Dimensions must match.
template
<
class
T
>
void
MultiplyMatrixAndVector
(
Matrix
<
T
>
matrix
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
);
// Performs output = bias + matrix * input. All vectors are interpreted as
// column vectors. Dimensions must match.
template
<
class
T
>
void
MultiplyMatrixAndVectorWithBias
(
Matrix
<
T
>
matrix
,
Vector
<
T
>
bias
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
);
// Implementation details below.
template
<
class
T
>
void
ScaleElements
(
T
scale
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
)
{
DCHECK_EQ
(
input
.
size
(),
output
.
size
());
for
(
size_t
i
=
0
;
i
<
input
.
size
();
++
i
)
output
[
i
]
=
scale
*
input
[
i
];
}
template
<
class
T
>
void
AddScaledElements
(
T
scale
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
)
{
DCHECK_EQ
(
input
.
size
(),
output
.
size
());
for
(
size_t
i
=
0
;
i
<
input
.
size
();
++
i
)
output
[
i
]
+=
scale
*
input
[
i
];
}
template
<
class
T
>
void
MaxElements
(
T
minimum
,
MutableVector
<
T
>
values
)
{
for
(
T
&
value
:
values
)
value
=
std
::
max
(
minimum
,
value
);
}
namespace
internal
{
// Like MultiplyMatrixAndVectorWithBias(), but if |ignore_bias| is true, then
// the |bias| is treated as zero and its dimensions are not checked.
template
<
bool
ignore_bias
,
class
T
>
void
MultiplyMatrixAndVectorImpl
(
Matrix
<
T
>
matrix
,
Vector
<
T
>
bias
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
)
{
DCHECK_EQ
(
matrix
.
num_columns
(),
input
.
size
());
if
(
!
ignore_bias
)
DCHECK_EQ
(
matrix
.
num_rows
(),
bias
.
size
());
DCHECK_EQ
(
matrix
.
num_rows
(),
output
.
size
());
for
(
size_t
i
=
0
;
i
<
matrix
.
num_rows
();
++
i
)
{
const
Vector
<
T
>
row
=
matrix
.
row
(
i
);
DCHECK_EQ
(
row
.
size
(),
input
.
size
());
T
sum
=
ignore_bias
?
T
()
:
bias
[
i
];
for
(
size_t
j
=
0
;
j
<
row
.
size
();
++
j
)
sum
+=
row
[
j
]
*
input
[
j
];
output
[
i
]
=
sum
;
}
}
}
// namespace internal
template
<
class
T
>
void
MultiplyMatrixAndVector
(
Matrix
<
T
>
matrix
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
)
{
internal
::
MultiplyMatrixAndVectorImpl
<
true
>
(
matrix
,
{},
input
,
output
);
}
template
<
class
T
>
void
MultiplyMatrixAndVectorWithBias
(
Matrix
<
T
>
matrix
,
Vector
<
T
>
bias
,
Vector
<
T
>
input
,
MutableVector
<
T
>
output
)
{
internal
::
MultiplyMatrixAndVectorImpl
<
false
>
(
matrix
,
bias
,
input
,
output
);
}
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // DRAGNN_RUNTIME_MATH_ARITHMETIC_COMMON_H_
research/syntaxnet/dragnn/runtime/math/arithmetic_neon.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_MATH_ARITHMETIC_NEON_H_
#define DRAGNN_RUNTIME_MATH_ARITHMETIC_NEON_H_
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
#include <stddef.h>
#include "dragnn/runtime/math/arithmetic_common.h"
#include "dragnn/runtime/math/types.h"
#include "tensorflow/core/platform/logging.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// TODO(googleuser): Leaving this empty means that the definitions
// from arithmetic_common.h carry through. Provide template specializations
// that use architecture-specific intrinsics.
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // defined(__ARM_NEON) || defined(__ARM_NEON__)
#endif // DRAGNN_RUNTIME_MATH_ARITHMETIC_NEON_H_
research/syntaxnet/dragnn/runtime/math/arithmetic_sse.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_MATH_ARITHMETIC_SSE_H_
#define DRAGNN_RUNTIME_MATH_ARITHMETIC_SSE_H_
#if defined(__SSE4_2__)
#include <stddef.h>
#include "dragnn/runtime/math/arithmetic_common.h"
#include "dragnn/runtime/math/types.h"
#include "tensorflow/core/platform/logging.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// TODO(googleuser): Leaving this empty means that the definitions
// from arithmetic_common.h carry through. Provide template specializations
// that use architecture-specific intrinsics.
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // defined(__SSE4_2__)
#endif // DRAGNN_RUNTIME_MATH_ARITHMETIC_SSE_H_
research/syntaxnet/dragnn/runtime/math/arithmetic_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/math/arithmetic.h"
#include <stddef.h>
#include <vector>
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/test/helpers.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
// Tests that ScaleElements() doesn't crash on empty vectors.
TEST
(
ScaleElementsTest
,
Empty
)
{
Vector
<
float
>
input
;
MutableVector
<
float
>
output
;
ScaleElements
(
1.5
f
,
input
,
output
);
}
// Tests that ScaleElements() copies scaled values from one vector to another.
TEST
(
ScaleElementsTest
,
Populated
)
{
UniqueVector
<
float
>
input
({
-
2.0
f
,
-
3.0
f
,
5.0
f
});
UniqueVector
<
float
>
output
({
7.0
f
,
11.0
f
,
13.0
f
});
// gets overwritten
ScaleElements
(
1.5
f
,
Vector
<
float
>
(
*
input
),
*
output
);
EXPECT_EQ
((
*
output
)[
0
],
1.5
*
-
2.0
);
EXPECT_EQ
((
*
output
)[
1
],
1.5
*
-
3.0
);
EXPECT_EQ
((
*
output
)[
2
],
1.5
*
5.0
);
}
// Tests that AddScaledElements() doesn't crash on empty vectors.
TEST
(
AddScaledElementsTest
,
Empty
)
{
Vector
<
float
>
input
;
MutableVector
<
float
>
output
;
AddScaledElements
(
1.5
f
,
input
,
output
);
}
// Tests that AddScaledElements() adds scaled values from one vector to another.
TEST
(
AddScaledElementsTest
,
Populated
)
{
UniqueVector
<
float
>
input
({
-
2.0
f
,
-
3.0
f
,
5.0
f
});
UniqueVector
<
float
>
output
({
7.0
f
,
11.0
f
,
13.0
f
});
// gets added to
AddScaledElements
(
1.5
f
,
Vector
<
float
>
(
*
input
),
*
output
);
EXPECT_EQ
((
*
output
)[
0
],
1.5
*
-
2.0
+
7.0
);
EXPECT_EQ
((
*
output
)[
1
],
1.5
*
-
3.0
+
11.0
);
EXPECT_EQ
((
*
output
)[
2
],
1.5
*
5.0
+
13.0
);
}
// Tests that MaxElements() doesn't crash on empty vectors.
TEST
(
MaxElementsTest
,
Empty
)
{
MutableVector
<
float
>
values
;
MaxElements
(
1.5
f
,
values
);
}
// Tests that MaxElements() performs an in-place element-wise maximum.
TEST
(
MaxElementsTest
,
Populated
)
{
UniqueVector
<
float
>
values
({
-
1.0
f
,
2.0
f
,
0.25
f
,
-
0.5
f
,
0.375
f
});
MaxElements
(
0.125
f
,
*
values
);
EXPECT_EQ
((
*
values
)[
0
],
0.125
);
EXPECT_EQ
((
*
values
)[
1
],
2.0
);
EXPECT_EQ
((
*
values
)[
2
],
0.25
);
EXPECT_EQ
((
*
values
)[
3
],
0.125
);
EXPECT_EQ
((
*
values
)[
4
],
0.375
);
}
// Tests that MultiplyMatrixAndVector() doesn't crash on empty inputs.
TEST
(
MultiplyMatrixAndVectorTest
,
Empty
)
{
Matrix
<
float
>
matrix
;
Vector
<
float
>
input
;
MutableVector
<
float
>
output
;
MultiplyMatrixAndVector
(
matrix
,
input
,
output
);
}
// Tests that MultiplyMatrixAndVector() computes a matrix-vector product.
TEST
(
MultiplyMatrixAndVectorTest
,
Populated
)
{
UniqueMatrix
<
float
>
matrix
({{
2.0
f
,
3.0
f
},
//
{
5.0
f
,
7.0
f
},
//
{
11.0
f
,
13.0
f
}});
UniqueVector
<
float
>
input
({
-
0.5
f
,
2.0
f
});
UniqueVector
<
float
>
output
({
9.8
f
,
7.6
f
,
5.4
f
});
// gets overwritten
MultiplyMatrixAndVector
(
Matrix
<
float
>
(
*
matrix
),
Vector
<
float
>
(
*
input
),
*
output
);
EXPECT_EQ
((
*
output
)[
0
],
2.0
*
-
0.5
+
3.0
*
2.0
);
EXPECT_EQ
((
*
output
)[
1
],
5.0
*
-
0.5
+
7.0
*
2.0
);
EXPECT_EQ
((
*
output
)[
2
],
11.0
*
-
0.5
+
13.0
*
2.0
);
}
// Tests that MultiplyMatrixAndVectorWithBias() doesn't crash on empty inputs.
TEST
(
MultiplyMatrixAndVectorWithBiasTest
,
Empty
)
{
Matrix
<
float
>
matrix
;
Vector
<
float
>
bias
;
Vector
<
float
>
input
;
MutableVector
<
float
>
output
;
MultiplyMatrixAndVectorWithBias
(
matrix
,
bias
,
input
,
output
);
}
// Tests that MultiplyMatrixAndVectorWithBias() computes a matrix-vector product
// with an additive bias.
TEST
(
MultiplyMatrixAndVectorWithBiasTest
,
Populated
)
{
UniqueMatrix
<
float
>
matrix
({{
2.0
f
,
3.0
f
},
//
{
5.0
f
,
7.0
f
},
//
{
11.0
f
,
13.0
f
}});
UniqueVector
<
float
>
bias
({
100.5
f
,
200.25
f
,
300.75
f
});
UniqueVector
<
float
>
input
({
-
0.5
f
,
2.0
f
});
UniqueVector
<
float
>
output
({
9.8
f
,
7.6
f
,
5.4
f
});
// gets overwritten
MultiplyMatrixAndVectorWithBias
(
Matrix
<
float
>
(
*
matrix
),
Vector
<
float
>
(
*
bias
),
Vector
<
float
>
(
*
input
),
*
output
);
EXPECT_EQ
((
*
output
)[
0
],
100.5
+
2.0
*
-
0.5
+
3.0
*
2.0
);
EXPECT_EQ
((
*
output
)[
1
],
200.25
+
5.0
*
-
0.5
+
7.0
*
2.0
);
EXPECT_EQ
((
*
output
)[
2
],
300.75
+
11.0
*
-
0.5
+
13.0
*
2.0
);
}
// A dummy type for the specializations below. Specializing on this unique
// dummy type ensures we don't conflict with any existing specialization.
struct
Foo
{
float
value
;
};
}
// namespace
// Dummy specializations for use in the subsequent tests.
template
<
>
void
ScaleElements
(
Foo
scale
,
Vector
<
Foo
>
input
,
MutableVector
<
Foo
>
output
)
{
for
(
Foo
&
foo
:
output
)
foo
.
value
=
777.0
;
}
namespace
{
// Tests that the template specialization overrides the generic implementation.
TEST
(
ScaleElementsTest
,
OverriddenByTemplateSpecialization
)
{
// These values are uninitialized, but it doesn't matter because the
// specialization never looks at them.
UniqueVector
<
Foo
>
input
(
3
);
UniqueVector
<
Foo
>
output
(
3
);
ScaleElements
(
Foo
(),
Vector
<
Foo
>
(
*
input
),
*
output
);
EXPECT_EQ
((
*
output
)[
0
].
value
,
777.0
);
EXPECT_EQ
((
*
output
)[
1
].
value
,
777.0
);
EXPECT_EQ
((
*
output
)[
2
].
value
,
777.0
);
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/math/avx_activation_functions.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Contains logic for activation functions and more-complex elementwise
// vectorized operations.
//
// Uses operator overloading to express computation that looks like regular
// code. Currently, overloaded operators are scoped away in an "internal"
// namespace so they won't be accidentally used.
#ifndef DRAGNN_RUNTIME_MATH_AVX_ACTIVATION_FUNCTIONS_H_
#define DRAGNN_RUNTIME_MATH_AVX_ACTIVATION_FUNCTIONS_H_
#if defined(__AVX2__)
#include <immintrin.h>
#endif
#include "dragnn/runtime/math/avx_vector_array.h"
#define DRAGNN_AVXAF_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#ifdef __clang__
#define DRAGNN_AVXAF_GCC_UNROLL
#else
#define DRAGNN_AVXAF_GCC_UNROLL __attribute__((optimize("unroll-loops")))
#endif
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// Public API
namespace
activations
{
// Calculates elementwise exp(x).
inline
AvxFloatVec
DRAGNN_AVXAF_ATTRIBUTE_ALWAYS_INLINE
DRAGNN_AVXAF_GCC_UNROLL
Exponential
(
AvxFloatVec
x
);
// Calculates elementwise sigmoid(x) = 1/(1+exp(-x)).
inline
AvxFloatVec
DRAGNN_AVXAF_ATTRIBUTE_ALWAYS_INLINE
Sigmoid
(
AvxFloatVec
x
);
// Calculates elementwise tanh(x).
inline
AvxFloatVec
DRAGNN_AVXAF_ATTRIBUTE_ALWAYS_INLINE
Tanh
(
AvxFloatVec
x
);
}
// namespace activations
namespace
activations
{
// Calculates e^x by representing x = m * ln(2) + r. It does a polynomial
// expansion of e^r, and then multiplies in e^(m * ln(2)) = 2^m.
//
inline
AvxFloatVec
Exponential
(
AvxFloatVec
x
)
{
// EDSL-like helpers for writing vectorized code.
auto
Const
=
AvxFloatVec
::
Const
;
constexpr
float
explo
=
-
88.3762626647949
f
;
constexpr
float
exphi
=
88.3762626647950
f
;
const
float
cephes_exp_factors
[]
=
{
1.9875691500e-4
f
,
1.3981999507e-3
f
,
8.3334519073e-3
f
,
4.1665795894e-2
f
,
1.6666665459e-1
f
,
5.0000001201e-1
f
,
};
// Clamp the input. i.e. assume exp(-88) is close to zero and exp(88) is
// close to infinity.
x
.
Clamp
(
explo
,
exphi
);
// Calculate `m = floor(x/ln(2) + 0.5)`.
constexpr
float
inv_log2e
=
1.44269504088896341
f
;
AvxFloatVec
m
=
Const
(
0.5
f
);
m
+=
Const
(
inv_log2e
)
*
x
;
m
.
Floor
();
// Calculate `r = x - m*ln(2)` (see function-level comment).
constexpr
float
neg_ln2
=
-
0.6931471805599453
f
;
AvxFloatVec
r
=
x
;
r
+=
m
*
Const
(
neg_ln2
);
// Calculate a polynomial expansion of y = exp(r).
AvxFloatVec
r_squared
(
r
*
r
);
AvxFloatVec
y
=
Const
(
cephes_exp_factors
[
0
]);
for
(
int
i
=
1
;
i
<
6
;
++
i
)
{
y
=
y
*
r
+
Const
(
cephes_exp_factors
[
i
]);
}
y
=
y
*
r_squared
+
r
;
y
+=
Const
(
1.0
f
);
// Calculate `emm0 = 2^m`. This is done by converting emm0 into an integer,
// and shifting it into the exponent bits of the desired floating-point
// result. Recall that the exponent is unsigned with 127 representing 2^0.
AvxFloatVec
emm0
=
m
;
emm0
+=
Const
(
127.0
f
);
AvxIntVec
emm0_i
(
emm0
);
emm0_i
.
LeftShift
(
23
);
// The final result is `2^m * exp(r)`.
return
AvxFloatVec
(
emm0_i
.
ReinterpretCastFloat
()
*
y
);
}
inline
AvxFloatVec
Tanh
(
AvxFloatVec
x
)
{
// EDSL-like helpers for writing vectorized code.
auto
Const
=
AvxFloatVec
::
Const
;
const
float
numerator_coefficients
[]
=
{
-
2.76076847742355e-16
f
,
2.00018790482477e-13
f
,
-
8.60467152213735e-11
f
,
5.12229709037114e-08
f
,
1.48572235717979e-05
f
,
6.37261928875436e-04
f
,
4.89352455891786e-03
f
,
};
const
float
denominator_coefficients
[]
=
{
1.19825839466702e-06
f
,
1.18534705686654e-04
f
,
2.26843463243900e-03
f
,
4.89352518554385e-03
f
,
};
// Clamp the inputs to the range [-9, 9] since anything outside this range
// is +/-1.0 in single-precision.
x
.
Clamp
(
-
9.0
f
,
9.0
f
);
// Compute x^2.
AvxFloatVec
x_squared
(
x
*
x
);
// Compute the numerator polynomial.
AvxFloatVec
p
=
Const
(
numerator_coefficients
[
0
]);
for
(
int
i
=
1
;
i
<
7
;
++
i
)
{
// p = p * x^2 + numerator_coefficients_i
p
=
p
*
x_squared
+
Const
(
numerator_coefficients
[
i
]);
}
// p = p * x
p
=
AvxFloatVec
(
p
*
x
);
// Compute the denominator polynomial.
AvxFloatVec
q
=
Const
(
denominator_coefficients
[
0
]);
for
(
int
i
=
1
;
i
<
4
;
++
i
)
{
// q = q * x^2 + alqha_i
q
=
q
*
x_squared
+
Const
(
denominator_coefficients
[
i
]);
}
// Divide the numerator by the denominator.
return
p
/
q
;
}
inline
AvxFloatVec
Sigmoid
(
AvxFloatVec
x
)
{
AvxFloatVec
half
=
AvxFloatVec
::
Const
(
0.5
);
return
half
*
Tanh
(
AvxFloatVec
(
half
*
x
))
+
half
;
}
}
// namespace activations
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#undef DRAGNN_AVXAF_ATTRIBUTE_ALWAYS_INLINE
#undef DRAGNN_AVXAF_GCC_UNROLL
#endif // DRAGNN_RUNTIME_MATH_AVX_ACTIVATION_FUNCTIONS_H_
research/syntaxnet/dragnn/runtime/math/avx_activation_functions_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/math/avx_activation_functions.h"
#include <cmath>
#include <chrono>
#include "dragnn/runtime/test/helpers.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
TEST
(
AvxActivationFunctionsTest
,
ExponentialTest
)
{
AvxVectorFuzzTest
(
[](
AvxFloatVec
*
vec
)
{
*
vec
=
activations
::
Exponential
(
*
vec
);
},
[](
float
input_value
,
float
actual
)
{
const
float
inverted
=
log
(
actual
);
EXPECT_NEAR
(
input_value
,
inverted
,
1e-6
)
<<
"exp("
<<
input_value
<<
") = "
<<
actual
<<
", log(actual) = "
<<
inverted
;
});
}
TEST
(
AvxActivationFunctionsTest
,
SigmoidTest
)
{
AvxVectorFuzzTest
(
//
[](
AvxFloatVec
*
vec
)
{
*
vec
=
activations
::
Sigmoid
(
*
vec
);
},
[](
float
input_value
,
float
actual
)
{
const
float
expected
=
1.0
f
/
(
1.0
f
+
exp
(
-
input_value
));
EXPECT_NEAR
(
actual
,
expected
,
1e-6
)
<<
"sigmoid("
<<
input_value
<<
") = "
<<
actual
<<
", expected = "
<<
expected
;
});
}
template
<
int
batch_size
,
class
Function
>
void
RunPerformanceTest
(
Function
activation
,
int
flops
)
{
constexpr
uint64
kIterations
=
1000000
;
UniqueVector
<
float
>
input
(
batch_size
);
UniqueVector
<
float
>
output
(
batch_size
);
InitRandomVector
(
*
input
);
InitRandomVector
(
*
output
);
AvxFloatVecArray
<
batch_size
/
kAvxWidth
>
array
;
auto
start_time
=
std
::
chrono
::
system_clock
::
now
();
for
(
int
i
=
0
;
i
<
kIterations
;
++
i
)
{
array
.
Load
(
input
->
data
());
array
.
Apply
(
activation
);
array
.
Store
(
output
->
data
());
}
auto
end_time
=
std
::
chrono
::
system_clock
::
now
();
std
::
chrono
::
duration
<
double
>
elapsed_seconds
=
end_time
-
start_time
;
double
elapsed
=
elapsed_seconds
.
count
();
double
exp_ops
=
kIterations
*
batch_size
;
double
macro_gops
=
exp_ops
/
1e9
/
elapsed
;
VLOG
(
0
)
<<
"For batch_size "
<<
batch_size
<<
" macro-GOPS (giga-ops per sec): "
<<
macro_gops
<<
", raw arithmetic: "
<<
flops
*
macro_gops
;
}
TEST
(
AvxActivationFunctionsTest
,
SigmoidPerformanceTest
)
{
RunPerformanceTest
<
8
>
(
activations
::
Sigmoid
,
26
);
RunPerformanceTest
<
16
>
(
activations
::
Sigmoid
,
26
);
RunPerformanceTest
<
32
>
(
activations
::
Sigmoid
,
26
);
RunPerformanceTest
<
48
>
(
activations
::
Sigmoid
,
26
);
RunPerformanceTest
<
64
>
(
activations
::
Sigmoid
,
26
);
RunPerformanceTest
<
128
>
(
activations
::
Sigmoid
,
26
);
}
TEST
(
AvxActivationFunctionsTest
,
TanhTest
)
{
AvxVectorFuzzTest
([](
AvxFloatVec
*
vec
)
{
*
vec
=
activations
::
Tanh
(
*
vec
);
},
[](
float
input_value
,
float
actual
)
{
const
float
expected
=
tanh
(
input_value
);
EXPECT_NEAR
(
actual
,
expected
,
1e-6
)
<<
"tanh("
<<
input_value
<<
") = "
<<
actual
<<
", expected = "
<<
expected
;
});
}
TEST
(
AvxActivationFunctionsTest
,
TanhPerformanceTest
)
{
RunPerformanceTest
<
8
>
(
activations
::
Sigmoid
,
23
);
RunPerformanceTest
<
16
>
(
activations
::
Sigmoid
,
23
);
RunPerformanceTest
<
32
>
(
activations
::
Tanh
,
23
);
RunPerformanceTest
<
48
>
(
activations
::
Tanh
,
23
);
RunPerformanceTest
<
64
>
(
activations
::
Tanh
,
23
);
RunPerformanceTest
<
128
>
(
activations
::
Tanh
,
23
);
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/math/avx_vector_array.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Wraps AVX vectors into convenient helper classes. This contains a class
// wrapping a single AVX register, AvxFloatVec, and a class to manipulate a
// batch of registers, AvxFloatVecArray. Use of the latter is recommended where
// applicable, since it will be unrolled into more vectorizable code.
#ifndef DRAGNN_RUNTIME_MATH_AVX_VECTOR_ARRAY_H_
#define DRAGNN_RUNTIME_MATH_AVX_VECTOR_ARRAY_H_
#include <cmath>
#if defined(__AVX__)
#include <immintrin.h>
#elif defined(__SSE4_2__)
#include <nmmintrin.h>
#endif
#include "dragnn/runtime/math/float16_types.h"
#define DRAGNN_AVXVA_ALWAYS_INLINE inline __attribute__((always_inline))
#ifdef __clang__
// Clang doesn't support __attribute__((optimize(...))).
#define DRAGNN_AVXVA_INLINED_UNROLLED inline __attribute__((always_inline))
#else
// Assume we're using GCC, which does.
#define DRAGNN_AVXVA_INLINED_UNROLLED \
inline __attribute__((always_inline)) \
__attribute__((optimize("unroll-loops")))
#endif
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// Number of single-precision floating point numbers that fit into a single SSE
// / AVX2 register (which are 128 and 256 bits respectively).
constexpr
int
kSseWidth
=
128
/
32
;
// = 4
constexpr
int
kAvxWidth
=
256
/
32
;
// = 8
constexpr
int
kSseWidthHalfPrecision
=
128
/
16
;
// = 8
constexpr
int
kAvxWidthHalfPrecision
=
256
/
16
;
// = 16
class
AvxFloatVec
;
namespace
internal
{
// This struct should always be eliminated by the compiler; it only exists so we
// can write `foo += bar * baz`, and have that compiled into a single FMA
// operation.
struct
AvxMultiplyExpr
{
const
AvxFloatVec
&
a
;
const
AvxFloatVec
&
b
;
};
}
// namespace internal
// Allows EDSL-like programming with AVX vectors.
inline
internal
::
AvxMultiplyExpr
operator
*
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
);
inline
AvxFloatVec
operator
+
(
const
internal
::
AvxMultiplyExpr
&
expr
,
const
AvxFloatVec
&
v
);
inline
AvxFloatVec
operator
+
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
);
inline
AvxFloatVec
operator
/
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
);
inline
AvxFloatVec
operator
-
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
);
// API over a single AVX vector (register). The implementation will either use
// a real AVX vector, or a fixed array of floats for compatibility.
//
// Note that we include the "inline" directive in declarations, not just
// definitions, because it is necessary for the "always_inline" directive.
struct
AvxFloatVec
{
public:
AvxFloatVec
()
{}
// Evaluates an AvxMultiplyExpr intermediary without adding anything. This is
// not an implicit cast, because typically when we write `a * b` we want to
// add it to something and use an FMA operation.
explicit
AvxFloatVec
(
const
internal
::
AvxMultiplyExpr
&
expr
);
// Loads from an aligned region of memory.
inline
void
Load
(
const
float
*
source
);
// Loads a constant value.
inline
void
LoadConstVector
(
const
float
val
);
// Stores to an aligned region of memory.
inline
void
Store
(
float
*
dst
)
const
;
// Adds `a * b` to this value, using a fused multiply-add operation.
inline
void
AddProductOf
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
);
// Element-wise floor.
inline
void
Floor
();
// Element-wise clamps values between a min and max value.
inline
void
Clamp
(
const
float
min_value
,
const
float
max_value
);
// Convenience method for more complex calculations.
static
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
Const
(
const
float
value
)
{
AvxFloatVec
result
;
result
.
LoadConstVector
(
value
);
return
result
;
}
// Syntactic sugar for computing an FMA operation.
inline
AvxFloatVec
&
operator
+=
(
const
internal
::
AvxMultiplyExpr
&
to_add
);
// Adds another vector element-wise.
inline
AvxFloatVec
&
operator
+=
(
const
AvxFloatVec
&
vec
);
// Subtracts another vector element-wise.
inline
AvxFloatVec
&
operator
-=
(
const
AvxFloatVec
&
vec
);
// Divides another vector element-wise.
inline
AvxFloatVec
&
operator
/=
(
const
AvxFloatVec
&
vec
);
#if defined(__AVX__)
__m256
ymm
;
#elif defined(__SSE4_2__)
__m128
xmm
[
2
];
#else
float
ymm
[
8
];
#endif
};
// Small wrapper around integer AVX vectors, exposing only methods we need for
// implementing the activation functions.
//
// As above, `inline` is specified here for the always_inline directive.
class
AvxIntVec
{
public:
// Constructs an AVX integer vector, by converting floating-point values.
inline
explicit
AvxIntVec
(
const
AvxFloatVec
&
v
);
// Left-shifts integer values.
inline
void
LeftShift
(
int
bits
);
// Reinterprets the register as a floating-point register, for bitwise tricks.
inline
AvxFloatVec
ReinterpretCastFloat
();
private:
// Underlying register.
#if defined(__AVX__)
__m256i
ymm_
;
#elif defined(__SSE4_2__)
__m128i
xmm_
[
2
];
#else
int
ymm_
[
8
];
#endif
};
// Implements the index permutation that is effectively applied by the
// _mm256_unpack instructions. This permutation is equivalent to swapping the
// 3rd and 4th bits. See the PermutationFunctionIsEqualToTable test for the
// effective permutation that this encodes.
//
// We haven't done performance testing, but hopefully this is sufficiently fast
// for the compatibility routine. Hopefully in its use below, the compiler will
// determine it is being called with a constant (post-unrolling) and inline it.
DRAGNN_AVXVA_ALWAYS_INLINE
int
FastUnpackPermutation
(
int
original_idx
)
{
// Bit in the 4th index if the 3rd and 4th bits should be swapped.
int
should_swap
=
(
original_idx
+
/* 0b0100 */
4
)
&
/* 0b1000 */
8
;
// If should_swap is zero, leaves original_idx untouched. Otherwise, does an
// xor with 0b1100, which will flip 10 to 01 and 01 to 10.
return
(
should_swap
|
(
should_swap
>>
1
))
^
original_idx
;
}
// API over an array of AVX vectors (registers). The methods on this class are
// annotated such that the compiler should unroll them.
template
<
int
N
>
struct
AvxFloatVecArray
{
public:
DRAGNN_AVXVA_INLINED_UNROLLED
void
Load
(
const
float
*
source
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
vectors
[
i
].
Load
(
source
+
8
*
i
);
}
}
DRAGNN_AVXVA_INLINED_UNROLLED
void
Load
(
const
float
*
source
,
int
max_idx
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
if
(
i
<
max_idx
)
{
vectors
[
i
].
Load
(
source
+
8
*
i
);
}
else
{
// When testing with a memory sanitizer, we make sure not to read
// uninitialized values. This is usually safe in normal operation
// because such results are never stored (via corresponding
// store-masking logic), but of course each algorithm must be tested to
// ensure correct operation.
//
// It is also worth pointing out that exceptional values (NaN, etc.) can
// slow down AVX/FMA floating point operations considerably. So we
// should investigate whether this is worth enabling in all cases (and
// forcing algorithms to provide a default).
#if defined(MEMORY_SANITIZER)
vectors
[
i
].
LoadConstVector
(
0
);
#endif
}
}
}
// Reads and unpacks truncated half-precision values.
//
// Currently, only matrix coefficients use compressed/half-precision values,
// so it's not yet necessary to support max_idx masking (which will get a bit
// more complicated).
DRAGNN_AVXVA_INLINED_UNROLLED
void
Load
(
const
TruncatedFloat16
*
source
);
#if defined(__F16C__)
// Reads and unpacks IEEE-754 half-precision values.
//
// Currently, only matrix coefficients use compressed/half-precision values,
// so it's not yet necessary to support max_idx masking (which will get a bit
// more complicated).
//
// TODO(googleuser): Either add non-F16C compatibility support from Eigen,
// or delete this code if it turns out not to be helpful.
DRAGNN_AVXVA_INLINED_UNROLLED
void
Load
(
const
IeeeFloat16
*
source
);
#endif
DRAGNN_AVXVA_INLINED_UNROLLED
void
LoadConstVector
(
const
float
val
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
vectors
[
i
].
LoadConstVector
(
val
);
}
}
DRAGNN_AVXVA_INLINED_UNROLLED
void
Store
(
float
*
dst
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
vectors
[
i
].
Store
(
dst
+
8
*
i
);
}
}
DRAGNN_AVXVA_INLINED_UNROLLED
void
Store
(
float
*
dst
,
int
max_idx
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
// This is equivalent to writing `i < N && i < max_idx` above, but forces
// the compiler to produce more efficient code (it's still creating jump
// instructions, but the branching is probably more predictable, and the
// loops are unrolled). In the future we could switch to VMASKMOV if
// necessary.
if
(
i
<
max_idx
)
{
vectors
[
i
].
Store
(
dst
+
8
*
i
);
}
}
}
template
<
class
Function
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
Apply
(
const
Function
&
fcn
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
vectors
[
i
]
=
fcn
(
vectors
[
i
]);
}
}
AvxFloatVec
vectors
[
N
];
};
// Implementation details.
#if defined(__AVX__)
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
::
AvxFloatVec
(
const
internal
::
AvxMultiplyExpr
&
expr
)
{
ymm
=
_mm256_mul_ps
(
expr
.
a
.
ymm
,
expr
.
b
.
ymm
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Load
(
const
float
*
source
)
{
ymm
=
_mm256_load_ps
(
source
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
LoadConstVector
(
const
float
val
)
{
ymm
=
_mm256_set1_ps
(
val
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Store
(
float
*
dst
)
const
{
_mm256_store_ps
(
dst
,
ymm
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
AddProductOf
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
#if defined(__AVX2__) && defined(__FMA__)
ymm
=
_mm256_fmadd_ps
(
a
.
ymm
,
b
.
ymm
,
ymm
);
#else
*
this
+=
AvxFloatVec
(
a
*
b
);
#endif
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Floor
()
{
ymm
=
_mm256_floor_ps
(
ymm
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Clamp
(
const
float
min_value
,
const
float
max_value
)
{
ymm
=
_mm256_min_ps
(
ymm
,
Const
(
max_value
).
ymm
);
ymm
=
_mm256_max_ps
(
ymm
,
Const
(
min_value
).
ymm
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
+=
(
const
AvxFloatVec
&
vec
)
{
ymm
=
_mm256_add_ps
(
vec
.
ymm
,
ymm
);
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
-=
(
const
AvxFloatVec
&
vec
)
{
ymm
=
_mm256_sub_ps
(
ymm
,
vec
.
ymm
);
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
/=
(
const
AvxFloatVec
&
vec
)
{
ymm
=
_mm256_div_ps
(
ymm
,
vec
.
ymm
);
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxIntVec
::
AvxIntVec
(
const
AvxFloatVec
&
v
)
:
ymm_
(
_mm256_cvttps_epi32
(
v
.
ymm
))
{}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxIntVec
::
LeftShift
(
int
bits
)
{
#if defined(__AVX2__)
ymm_
=
_mm256_slli_epi32
(
ymm_
,
bits
);
#else
// Convert to SSE and back again. This is pretty slow, so don't use this code
// except for compatibility purposes.
__m256i
upper_bits
=
_mm256_permute2f128_si256
(
ymm_
,
ymm_
,
1
);
__m128i
first
=
_mm256_castsi256_si128
(
ymm_
);
// Lower bits as SSE
__m128i
second
=
_mm256_castsi256_si128
(
upper_bits
);
// Upper bits as SSE
first
=
_mm_slli_epi32
(
first
,
bits
);
second
=
_mm_slli_epi32
(
second
,
bits
);
ymm_
=
_mm256_permute2f128_si256
(
_mm256_castsi128_si256
(
first
),
_mm256_castsi128_si256
(
second
),
(
2
<<
4
));
#endif
}
AvxFloatVec
DRAGNN_AVXVA_ALWAYS_INLINE
AvxIntVec
::
ReinterpretCastFloat
()
{
AvxFloatVec
result
;
result
.
ymm
=
_mm256_castsi256_ps
(
ymm_
);
return
result
;
}
template
<
int
N
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
AvxFloatVecArray
<
N
>::
Load
(
const
TruncatedFloat16
*
source
)
{
static_assert
(
N
%
2
==
0
,
"Load() from half floats requires even-sized vector arrays."
);
for
(
int
i
=
0
;
i
<
N
/
2
;
i
++
)
{
#if defined(__AVX2__)
const
__m256i
input
=
_mm256_load_si256
(
reinterpret_cast
<
__m256i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
));
vectors
[
2
*
i
].
ymm
=
_mm256_castsi256_ps
(
_mm256_unpacklo_epi16
(
_mm256_setzero_si256
(),
input
));
vectors
[
2
*
i
+
1
].
ymm
=
_mm256_castsi256_ps
(
_mm256_unpackhi_epi16
(
_mm256_setzero_si256
(),
input
));
#else
// Compatibility AVX (not AVX2) implementation.
__m128i
input
[
2
];
input
[
0
]
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
));
input
[
1
]
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
+
kSseWidthHalfPrecision
));
// Unpack. This permutation is kinda cryptic and, to be honest, derived by
// simply trying many combinations.
vectors
[
2
*
i
].
ymm
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm_castsi128_ps
(
_mm_unpacklo_epi16
(
_mm_setzero_si128
(),
input
[
0
]))),
_mm_castsi128_ps
(
_mm_unpacklo_epi16
(
_mm_setzero_si128
(),
input
[
1
])),
1
);
vectors
[
2
*
i
+
1
].
ymm
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm_castsi128_ps
(
_mm_unpackhi_epi16
(
_mm_setzero_si128
(),
input
[
0
]))),
_mm_castsi128_ps
(
_mm_unpackhi_epi16
(
_mm_setzero_si128
(),
input
[
1
])),
1
);
#endif
}
}
#if defined(__F16C__)
template
<
int
N
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
AvxFloatVecArray
<
N
>::
Load
(
const
IeeeFloat16
*
source
)
{
static_assert
(
N
%
2
==
0
,
"Load() from half floats requires even-sized vector arrays."
);
for
(
int
i
=
0
;
i
<
N
/
2
;
i
++
)
{
// TODO(googleuser): Experiment with doing a single AVX2 load and
// dividing the result.
__m128i
first_half
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
));
__m128i
second_half
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
+
kAvxWidth
));
vectors
[
2
*
i
].
ymm
=
_mm256_cvtph_ps
(
first_half
);
vectors
[
2
*
i
+
1
].
ymm
=
_mm256_cvtph_ps
(
second_half
);
}
}
#endif
#elif defined(__SSE4_2__)
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
::
AvxFloatVec
(
const
internal
::
AvxMultiplyExpr
&
expr
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_mul_ps
(
expr
.
a
.
xmm
[
i
],
expr
.
b
.
xmm
[
i
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Load
(
const
float
*
source
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_load_ps
(
&
source
[
i
*
kSseWidth
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
LoadConstVector
(
const
float
val
)
{
xmm
[
1
]
=
xmm
[
0
]
=
_mm_set1_ps
(
val
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Store
(
float
*
dst
)
const
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
_mm_store_ps
(
&
dst
[
i
*
kSseWidth
],
xmm
[
i
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
AddProductOf
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
*
this
+=
AvxFloatVec
(
a
*
b
);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Floor
()
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_floor_ps
(
xmm
[
i
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Clamp
(
const
float
min_value
,
const
float
max_value
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_min_ps
(
xmm
[
i
],
Const
(
max_value
).
xmm
[
i
]);
xmm
[
i
]
=
_mm_max_ps
(
xmm
[
i
],
Const
(
min_value
).
xmm
[
i
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
+=
(
const
AvxFloatVec
&
vec
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_add_ps
(
vec
.
xmm
[
i
],
xmm
[
i
]);
}
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
-=
(
const
AvxFloatVec
&
vec
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_sub_ps
(
xmm
[
i
],
vec
.
xmm
[
i
]);
}
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
/=
(
const
AvxFloatVec
&
vec
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm
[
i
]
=
_mm_div_ps
(
xmm
[
i
],
vec
.
xmm
[
i
]);
}
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxIntVec
::
AvxIntVec
(
const
AvxFloatVec
&
v
)
{
xmm_
[
0
]
=
_mm_cvttps_epi32
(
v
.
xmm
[
0
]);
xmm_
[
1
]
=
_mm_cvttps_epi32
(
v
.
xmm
[
1
]);
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxIntVec
::
LeftShift
(
int
bits
)
{
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
xmm_
[
i
]
=
_mm_slli_epi32
(
xmm_
[
i
],
bits
);
}
}
AvxFloatVec
DRAGNN_AVXVA_ALWAYS_INLINE
AvxIntVec
::
ReinterpretCastFloat
()
{
AvxFloatVec
result
;
for
(
int
i
=
0
;
i
<
2
;
++
i
)
{
result
.
xmm
[
i
]
=
_mm_castsi128_ps
(
xmm_
[
i
]);
}
return
result
;
}
template
<
int
N
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
AvxFloatVecArray
<
N
>::
Load
(
const
TruncatedFloat16
*
source
)
{
static_assert
(
N
%
2
==
0
,
"Load() from half floats requires even-sized vector arrays."
);
for
(
int
i
=
0
;
i
<
N
/
2
;
i
++
)
{
__m128i
input
[
2
];
input
[
0
]
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
));
input
[
1
]
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
+
kSseWidthHalfPrecision
));
vectors
[
2
*
i
].
xmm
[
0
]
=
_mm_castsi128_ps
(
_mm_unpacklo_epi16
(
_mm_setzero_si128
(),
input
[
0
]));
vectors
[
2
*
i
+
1
].
xmm
[
0
]
=
_mm_castsi128_ps
(
_mm_unpackhi_epi16
(
_mm_setzero_si128
(),
input
[
0
]));
vectors
[
2
*
i
].
xmm
[
1
]
=
_mm_castsi128_ps
(
_mm_unpacklo_epi16
(
_mm_setzero_si128
(),
input
[
1
]));
vectors
[
2
*
i
+
1
].
xmm
[
1
]
=
_mm_castsi128_ps
(
_mm_unpackhi_epi16
(
_mm_setzero_si128
(),
input
[
1
]));
}
}
#if defined(__F16C__)
template
<
int
N
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
AvxFloatVecArray
<
N
>::
Load
(
const
IeeeFloat16
*
source
)
{
static_assert
(
N
%
2
==
0
,
"Load() from half floats requires even-sized vector arrays."
);
for
(
int
i
=
0
;
i
<
N
/
2
;
i
++
)
{
__m128i
first_half
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
));
__m128i
second_half
=
_mm_load_si128
(
reinterpret_cast
<
__m128i
const
*>
(
source
+
kAvxWidthHalfPrecision
*
i
+
kAvxWidth
));
vectors
[
2
*
i
].
xmm
[
0
]
=
_mm_cvtph_ps
(
first_half
);
vectors
[
2
*
i
+
1
].
xmm
[
0
]
=
_mm_cvtph_ps
(
second_half
);
first_half
=
_mm_shuffle_epi32
(
first_half
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
second_half
=
_mm_shuffle_epi32
(
second_half
,
_MM_SHUFFLE
(
0
,
1
,
3
,
2
));
vectors
[
2
*
i
].
xmm
[
1
]
=
_mm_cvtph_ps
(
first_half
);
vectors
[
2
*
i
+
1
].
xmm
[
1
]
=
_mm_cvtph_ps
(
second_half
);
}
}
#endif
#else
// Compatibility implementations. If you compile with -ftree-vectorize and
// -msse2 flags, you should still get decent performance (maybe 1/4 of the
// AVX/FMA version).
//
// See the class above for method documentation.
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
::
AvxFloatVec
(
const
internal
::
AvxMultiplyExpr
&
expr
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
=
expr
.
a
.
ymm
[
i
]
*
expr
.
b
.
ymm
[
i
];
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Load
(
const
float
*
source
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
=
source
[
i
];
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
LoadConstVector
(
const
float
val
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
=
val
;
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Store
(
float
*
dst
)
const
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
dst
[
i
]
=
ymm
[
i
];
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
AddProductOf
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
+=
a
.
ymm
[
i
]
*
b
.
ymm
[
i
];
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Floor
()
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
=
floor
(
ymm
[
i
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxFloatVec
::
Clamp
(
const
float
min_value
,
const
float
max_value
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
=
fmin
(
fmax
(
ymm
[
i
],
min_value
),
max_value
);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
+=
(
const
AvxFloatVec
&
vec
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
+=
vec
.
ymm
[
i
];
}
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
-=
(
const
AvxFloatVec
&
vec
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
-=
vec
.
ymm
[
i
];
}
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
/=
(
const
AvxFloatVec
&
vec
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm
[
i
]
/=
vec
.
ymm
[
i
];
}
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxIntVec
::
AvxIntVec
(
const
AvxFloatVec
&
v
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm_
[
i
]
=
static_cast
<
int
>
(
v
.
ymm
[
i
]);
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
void
AvxIntVec
::
LeftShift
(
int
bits
)
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ymm_
[
i
]
=
ymm_
[
i
]
<<
bits
;
}
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
AvxIntVec
::
ReinterpretCastFloat
()
{
AvxFloatVec
result
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
result
.
ymm
[
i
]
=
reinterpret_cast
<
float
&>
(
ymm_
[
i
]);
}
return
result
;
}
template
<
int
N
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
AvxFloatVecArray
<
N
>::
Load
(
const
TruncatedFloat16
*
source
)
{
static_assert
(
N
%
2
==
0
,
"Load() from half floats requires even-sized vector arrays."
);
// Iterate through mock AVX vectors, each composed of 16 half-floats.
for
(
int
vec_idx
=
0
;
vec_idx
<
N
/
2
;
vec_idx
++
)
{
// Making this code a bit more verbose, by reading in-order to a temporary
// array, results in faster performance. The compatibility version is still
// pretty slow though.
TruncatedFloat16
tmp
[
16
];
for
(
int
i
=
0
;
i
<
kAvxWidthHalfPrecision
;
++
i
)
{
tmp
[
i
]
=
source
[
i
+
kAvxWidthHalfPrecision
*
vec_idx
];
}
float
unpacked
[
16
];
for
(
int
i
=
0
;
i
<
kAvxWidthHalfPrecision
;
++
i
)
{
unpacked
[
i
]
=
tmp
[
i
].
DebugToFloat
();
}
for
(
int
i
=
0
;
i
<
kAvxWidthHalfPrecision
;
++
i
)
{
int
permuted
=
FastUnpackPermutation
(
i
);
vectors
[
2
*
vec_idx
+
(
i
/
8
)].
ymm
[
i
%
8
]
=
unpacked
[
permuted
];
}
}
}
#if defined(__F16C__)
template
<
int
N
>
DRAGNN_AVXVA_INLINED_UNROLLED
void
AvxFloatVecArray
<
N
>::
Load
(
const
IeeeFloat16
*
source
)
{
// Not actually required for the compatibility implementation, but it'd be
// rather non-uniform if this API succeeded, and then compilation failed when
// AVX2 was turned on.
static_assert
(
N
%
2
==
0
,
"Load() from half floats requires even-sized vector arrays."
);
// Iterate through mock AVX vectors, each composed of 16 half-floats.
for
(
int
i
=
0
;
i
<
N
*
kAvxWidth
;
++
i
)
{
vectors
[
i
/
8
].
ymm
[
i
%
8
]
=
source
[
i
].
DebugToFloat
();
}
}
#endif
#endif
// The following operations are mostly syntax sugar, so they do not need
// architecture-specific implementations.
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
&
AvxFloatVec
::
operator
+=
(
const
internal
::
AvxMultiplyExpr
&
to_add
)
{
AddProductOf
(
to_add
.
a
,
to_add
.
b
);
return
*
this
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
internal
::
AvxMultiplyExpr
operator
*
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
return
internal
::
AvxMultiplyExpr
{
a
,
b
};
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
operator
+
(
const
internal
::
AvxMultiplyExpr
&
expr
,
const
AvxFloatVec
&
v
)
{
AvxFloatVec
result
=
v
;
result
+=
expr
;
return
result
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
operator
+
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
AvxFloatVec
result
=
a
;
result
+=
b
;
return
result
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
operator
/
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
AvxFloatVec
result
=
a
;
result
/=
b
;
return
result
;
}
DRAGNN_AVXVA_ALWAYS_INLINE
AvxFloatVec
operator
-
(
const
AvxFloatVec
&
a
,
const
AvxFloatVec
&
b
)
{
AvxFloatVec
result
=
a
;
result
-=
b
;
return
result
;
}
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#undef DRAGNN_AVXVA_ALWAYS_INLINE
#undef DRAGNN_AVXVA_INLINED_UNROLLED
#endif // DRAGNN_RUNTIME_MATH_AVX_VECTOR_ARRAY_H_
research/syntaxnet/dragnn/runtime/math/avx_vector_array_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/math/avx_vector_array.h"
#include <cmath>
#include "dragnn/runtime/test/helpers.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
TEST
(
AvxVectorTest
,
LoadAndStore
)
{
UniqueVector
<
float
>
input
(
kAvxWidth
);
UniqueVector
<
float
>
output
(
kAvxWidth
);
InitRandomVector
(
*
input
);
InitRandomVector
(
*
output
);
AvxFloatVec
vec
;
vec
.
Load
(
input
->
data
());
vec
.
Store
(
output
->
data
());
for
(
int
i
=
0
;
i
<
kAvxWidth
;
++
i
)
{
EXPECT_EQ
((
*
input
)[
i
],
(
*
output
)[
i
]);
}
}
// Test flooring with assignment, just to make the compiler not erase aliases.
TEST
(
AvxVectorTest
,
AssignmentAndFloor
)
{
UniqueVector
<
float
>
input
(
kAvxWidth
);
UniqueVector
<
float
>
output
(
kAvxWidth
);
UniqueVector
<
float
>
floored
(
kAvxWidth
);
InitRandomVector
(
*
input
);
InitRandomVector
(
*
output
);
AvxFloatVec
vec
;
vec
.
Load
(
input
->
data
());
AvxFloatVec
vec2
=
vec
;
vec
.
Floor
();
vec
.
Store
(
floored
->
data
());
vec2
.
Store
(
output
->
data
());
for
(
int
i
=
0
;
i
<
kAvxWidth
;
++
i
)
{
EXPECT_EQ
((
*
input
)[
i
],
(
*
output
)[
i
]);
EXPECT_EQ
(
floor
((
*
input
)[
i
]),
(
*
floored
)[
i
]);
}
}
TEST
(
AvxVectorTest
,
ClampTest
)
{
bool
modified
=
false
;
// check that some value was clamped.
AvxVectorFuzzTest
(
[](
AvxFloatVec
*
vec
)
{
vec
->
Clamp
(
-
0.314
f
,
0.314
f
);
},
[
&
modified
](
float
input_value
,
float
output_value
)
{
modified
=
modified
||
input_value
<
-
0.314
||
input_value
>
0.314
;
EXPECT_EQ
(
fmax
(
-
0.314
f
,
fmin
(
0.314
f
,
input_value
)),
output_value
);
});
EXPECT_TRUE
(
modified
)
<<
"No values fell outside test range for ClampTest()."
;
}
TEST
(
AvxVectorTest
,
LoadConstAndStore
)
{
UniqueVector
<
float
>
output
(
kAvxWidth
);
InitRandomVector
(
*
output
);
AvxFloatVec
vec
;
vec
.
LoadConstVector
(
3.14
f
);
vec
.
Store
(
output
->
data
());
for
(
int
i
=
0
;
i
<
kAvxWidth
;
++
i
)
{
EXPECT_EQ
((
*
output
)[
i
],
3.14
f
);
}
}
TEST
(
AvxVectorTest
,
AddTest
)
{
AvxVectorFuzzTest
(
//
[](
AvxFloatVec
*
vec
)
{
(
*
vec
)
+=
*
vec
;
},
[](
float
input_value
,
float
output_value
)
{
EXPECT_EQ
(
input_value
*
2
,
output_value
);
});
}
TEST
(
AvxVectorTest
,
SubtractTest
)
{
AvxVectorFuzzTest
(
[](
AvxFloatVec
*
vec
)
{
AvxFloatVec
one
;
one
.
LoadConstVector
(
1.0
f
);
(
*
vec
)
-=
one
;
},
[](
float
input_value
,
float
output_value
)
{
EXPECT_EQ
(
input_value
-
1.0
f
,
output_value
);
});
}
TEST
(
AvxVectorTest
,
DivideTest
)
{
AvxVectorFuzzTest
(
[](
AvxFloatVec
*
vec
)
{
AvxFloatVec
result
;
result
.
LoadConstVector
(
1.0
f
);
result
/=
*
vec
;
*
vec
=
result
;
},
[](
float
input_value
,
float
output_value
)
{
EXPECT_EQ
(
1.0
f
/
input_value
,
output_value
);
});
}
// This is a really basic test; half of the purpose is to ensure that the float
// API is still OK (i.e. compiles) for odd-sized arrays. If you try to add a
// call to array.Load(TruncatedFloat16 *source), it should produce a compiler
// error.
TEST
(
AvxFloatVecArrayTest
,
SingletonArrayLoadsAndStores
)
{
AvxFloatVecArray
<
1
>
array
;
UniqueVector
<
float
>
input
(
kAvxWidth
);
UniqueVector
<
float
>
output
(
kAvxWidth
);
InitRandomVector
(
*
input
);
InitRandomVector
(
*
output
);
array
.
Load
(
input
->
data
());
array
.
Store
(
output
->
data
());
for
(
int
i
=
0
;
i
<
kAvxWidth
;
++
i
)
{
EXPECT_EQ
((
*
input
)[
i
],
(
*
output
)[
i
]);
}
}
TEST
(
AvxFloatVecArrayTest
,
LoadTruncatedFloat16
)
{
AvxFloatVecArray
<
2
>
array
;
UniqueVector
<
TruncatedFloat16
>
values
(
2
*
kAvxWidth
);
UniqueVector
<
float
>
decompressed
(
2
*
kAvxWidth
);
for
(
int
i
=
0
;
i
<
2
*
kAvxWidth
;
++
i
)
{
int
permuted
=
FastUnpackPermutation
(
i
);
(
*
values
)[
i
]
=
TruncatedFloat16
::
DebugFromFloat
(
permuted
/
10.0
);
}
// Ensure that state persisted from other tests won't cause this test to
// erroneously pass.
array
.
LoadConstVector
(
-
1.0
f
);
array
.
Load
(
values
->
data
());
array
.
Store
(
decompressed
->
data
());
for
(
int
i
=
0
;
i
<
2
*
kAvxWidth
;
++
i
)
{
ASSERT_NEAR
((
*
decompressed
)[
i
],
i
/
10.0
,
0.01
);
}
}
TEST
(
AvxFloatVecArrayTest
,
LoadIeeeFloat16
)
{
#if defined(__F16C__)
AvxFloatVecArray
<
2
>
array
;
UniqueVector
<
IeeeFloat16
>
values
(
2
*
kAvxWidth
);
UniqueVector
<
float
>
decompressed
(
2
*
kAvxWidth
);
for
(
int
i
=
0
;
i
<
2
*
kAvxWidth
;
++
i
)
{
(
*
values
)[
i
]
=
IeeeFloat16
::
DebugFromFloat
(
i
/
10.0
);
}
// Ensure that state persisted from other tests won't cause this test to
// erroneously pass.
array
.
LoadConstVector
(
-
1.0
f
);
array
.
Load
(
values
->
data
());
array
.
Store
(
decompressed
->
data
());
for
(
int
i
=
0
;
i
<
2
*
kAvxWidth
;
++
i
)
{
ASSERT_NEAR
((
*
decompressed
)[
i
],
i
/
10.0
,
0.01
);
}
#else
LOG
(
INFO
)
<<
"Test binary wasn't compiled with F16C support, so skipping "
<<
"this test."
;
#endif
}
TEST
(
AvxFloatVecArrayTest
,
PermutationFunctionIsEqualToTable
)
{
std
::
vector
<
int
>
permutation
=
{
0
,
1
,
2
,
3
,
8
,
9
,
10
,
11
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
};
for
(
int
i
=
0
;
i
<
kAvxWidthHalfPrecision
;
++
i
)
{
EXPECT_EQ
(
FastUnpackPermutation
(
i
),
permutation
[
i
]);
}
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/math/eigen.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2018 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Compatibility support for Eigen.
#ifndef DRAGNN_RUNTIME_MATH_EIGEN_H_
#define DRAGNN_RUNTIME_MATH_EIGEN_H_
#include "dragnn/runtime/alignment.h"
#include "dragnn/runtime/math/types.h"
#include "third_party/eigen3/Eigen/Core"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
internal
{
// Returns a combination of bit-options for Eigen matrices.
constexpr
int
GetEigenMatrixOptions
()
{
return
Eigen
::
AutoAlign
|
Eigen
::
RowMajor
;
}
// Returns a combination of bit-options for Eigen maps of runtime types.
constexpr
int
GetEigenMapOptions
()
{
static_assert
(
kAlignmentBytes
>=
EIGEN_MAX_ALIGN_BYTES
,
"Runtime alignment is not compatible with Eigen alignment."
);
return
Eigen
::
Aligned
;
}
// Eigen matrix and (row) vector types. Don't use these directly; instead use
// the public Map types and functions below to wrap runtime types.
template
<
class
T
>
using
EigenVector
=
Eigen
::
Matrix
<
T
,
1
,
Eigen
::
Dynamic
,
GetEigenMatrixOptions
()
>
;
template
<
class
T
>
using
EigenMatrix
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
GetEigenMatrixOptions
()
>
;
// Eigen stride for matrix types.
using
EigenMatrixStride
=
Eigen
::
Stride
<
Eigen
::
Dynamic
,
1
>
;
// Returns the Eigen stride associated with the |matrix|.
template
<
class
T
>
EigenMatrixStride
GetEigenMatrixStride
(
MatrixImpl
<
T
>
matrix
)
{
return
EigenMatrixStride
(
matrix
.
row_stride
(),
1
);
}
}
// namespace internal
// Eigen wrappers around a runtime-allocated matrix or (row) vector.
template
<
class
T
>
using
EigenVectorMap
=
Eigen
::
Map
<
const
internal
::
EigenVector
<
T
>
,
internal
::
GetEigenMapOptions
()
>
;
template
<
class
T
>
using
MutableEigenVectorMap
=
Eigen
::
Map
<
internal
::
EigenVector
<
T
>
,
internal
::
GetEigenMapOptions
()
>
;
template
<
class
T
>
using
EigenMatrixMap
=
Eigen
::
Map
<
const
internal
::
EigenMatrix
<
T
>
,
internal
::
GetEigenMapOptions
(),
internal
::
EigenMatrixStride
>
;
template
<
class
T
>
using
MutableEigenMatrixMap
=
Eigen
::
Map
<
internal
::
EigenMatrix
<
T
>
,
internal
::
GetEigenMapOptions
(),
internal
::
EigenMatrixStride
>
;
// Returns an Eigen wrapper around the |vector| or |matrix|.
template
<
class
T
>
EigenVectorMap
<
T
>
AsEigenMap
(
Vector
<
T
>
vector
)
{
return
EigenVectorMap
<
T
>
(
vector
.
data
(),
vector
.
size
());
}
template
<
class
T
>
MutableEigenVectorMap
<
T
>
AsEigenMap
(
MutableVector
<
T
>
vector
)
{
return
MutableEigenVectorMap
<
T
>
(
vector
.
data
(),
vector
.
size
());
}
template
<
class
T
>
EigenMatrixMap
<
T
>
AsEigenMap
(
Matrix
<
T
>
matrix
)
{
return
EigenMatrixMap
<
T
>
(
matrix
.
data
(),
matrix
.
num_rows
(),
matrix
.
num_columns
(),
internal
::
GetEigenMatrixStride
(
matrix
));
}
template
<
class
T
>
MutableEigenMatrixMap
<
T
>
AsEigenMap
(
MutableMatrix
<
T
>
matrix
)
{
return
MutableEigenMatrixMap
<
T
>
(
matrix
.
data
(),
matrix
.
num_rows
(),
matrix
.
num_columns
(),
internal
::
GetEigenMatrixStride
(
matrix
));
}
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // DRAGNN_RUNTIME_MATH_EIGEN_H_
research/syntaxnet/dragnn/runtime/math/eigen_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2018 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/math/eigen.h"
#include <vector>
#include "dragnn/core/test/generic.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/test/helpers.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
// Expects that two pointers point to the same address.
void
ExpectSameAddress
(
const
void
*
ptr1
,
const
void
*
ptr2
)
{
EXPECT_EQ
(
ptr1
,
ptr2
);
}
// Expects that the |vector| has the |values|.
void
ExpectValues
(
MutableVector
<
float
>
vector
,
const
std
::
vector
<
float
>
&
values
)
{
ASSERT_EQ
(
vector
.
size
(),
values
.
size
());
for
(
int
i
=
0
;
i
<
values
.
size
();
++
i
)
{
EXPECT_EQ
(
vector
[
i
],
values
[
i
]);
}
}
// Expects that the Eigen |matrix| has the |values|.
template
<
class
EigenMatrix
>
void
ExpectValues
(
const
EigenMatrix
&
matrix
,
const
std
::
vector
<
std
::
vector
<
float
>>
&
values
)
{
ASSERT_EQ
(
matrix
.
rows
(),
values
.
size
());
for
(
int
row
=
0
;
row
<
matrix
.
rows
();
++
row
)
{
ASSERT_EQ
(
matrix
.
cols
(),
values
[
row
].
size
());
for
(
int
column
=
0
;
column
<
matrix
.
cols
();
++
column
)
{
EXPECT_EQ
(
matrix
(
row
,
column
),
values
[
row
][
column
]);
}
}
}
// Tests that an Eigen vector map references the same memory as the underlying
// runtime vector.
TEST
(
EigenTest
,
Vector
)
{
UniqueVector
<
float
>
vector
({
1.0
,
2.0
,
3.0
,
4.0
});
EigenVectorMap
<
float
>
const_eigen_vector
=
AsEigenMap
(
Vector
<
float
>
(
*
vector
));
ExpectSameAddress
(
const_eigen_vector
.
data
(),
vector
->
data
());
ExpectValues
(
const_eigen_vector
,
{{
1.0
,
2.0
,
3.0
,
4.0
}});
MutableEigenVectorMap
<
float
>
mutable_eigen_vector
=
AsEigenMap
(
*
vector
);
ExpectSameAddress
(
mutable_eigen_vector
.
data
(),
vector
->
data
());
ExpectValues
(
mutable_eigen_vector
,
{{
1.0
,
2.0
,
3.0
,
4.0
}});
// Write into the runtime vector and read from the other views.
(
*
vector
)[
0
]
=
10.0
;
(
*
vector
)[
1
]
=
20.0
;
(
*
vector
)[
2
]
=
30.0
;
(
*
vector
)[
3
]
=
40.0
;
ExpectValues
(
const_eigen_vector
,
{{
10.0
,
20.0
,
30.0
,
40.0
}});
ExpectValues
(
mutable_eigen_vector
,
{{
10.0
,
20.0
,
30.0
,
40.0
}});
// Write into the mutable Eigen vector and read from the other views.
mutable_eigen_vector
<<
100.0
,
200.0
,
300.0
,
400.0
;
ExpectValues
(
const_eigen_vector
,
{{
100.0
,
200.0
,
300.0
,
400.0
}});
ExpectValues
(
*
vector
,
{
100.0
,
200.0
,
300.0
,
400.0
});
}
// Tests that an Eigen matrix map references the same memory as the underlying
// runtime vector.
TEST
(
EigenTest
,
Matrix
)
{
UniqueMatrix
<
float
>
matrix
({{
1.0
,
2.0
,
3.0
},
//
{
4.0
,
5.0
,
6.0
},
//
{
7.0
,
8.0
,
9.0
}});
EigenMatrixMap
<
float
>
const_eigen_matrix
=
AsEigenMap
(
Matrix
<
float
>
(
*
matrix
));
ExpectSameAddress
(
const_eigen_matrix
.
data
(),
matrix
->
row
(
0
).
data
());
ExpectValues
(
const_eigen_matrix
,
{{
1.0
,
2.0
,
3.0
},
//
{
4.0
,
5.0
,
6.0
},
//
{
7.0
,
8.0
,
9.0
}});
MutableEigenMatrixMap
<
float
>
mutable_eigen_matrix
=
AsEigenMap
(
*
matrix
);
ExpectSameAddress
(
mutable_eigen_matrix
.
data
(),
matrix
->
row
(
0
).
data
());
ExpectValues
(
mutable_eigen_matrix
,
{{
1.0
,
2.0
,
3.0
},
//
{
4.0
,
5.0
,
6.0
},
//
{
7.0
,
8.0
,
9.0
}});
// Write into the runtime matrix and read from the other views.
matrix
->
row
(
0
)[
0
]
=
10.0
;
matrix
->
row
(
0
)[
1
]
=
20.0
;
matrix
->
row
(
0
)[
2
]
=
30.0
;
matrix
->
row
(
1
)[
0
]
=
40.0
;
matrix
->
row
(
1
)[
1
]
=
50.0
;
matrix
->
row
(
1
)[
2
]
=
60.0
;
matrix
->
row
(
2
)[
0
]
=
70.0
;
matrix
->
row
(
2
)[
1
]
=
80.0
;
matrix
->
row
(
2
)[
2
]
=
90.0
;
ExpectValues
(
const_eigen_matrix
,
{{
10.0
,
20.0
,
30.0
},
//
{
40.0
,
50.0
,
60.0
},
//
{
70.0
,
80.0
,
90.0
}});
ExpectValues
(
mutable_eigen_matrix
,
{{
10.0
,
20.0
,
30.0
},
//
{
40.0
,
50.0
,
60.0
},
//
{
70.0
,
80.0
,
90.0
}});
// Write into the mutable Eigen matrix and read from the other views.
mutable_eigen_matrix
<<
100.0
,
200.0
,
300.0
,
400.0
,
500.0
,
600.0
,
700.0
,
800.0
,
900.0
;
ExpectValues
(
const_eigen_matrix
,
{{
100.0
,
200.0
,
300.0
},
//
{
400.0
,
500.0
,
600.0
},
//
{
700.0
,
800.0
,
900.0
}});
ExpectValues
(
matrix
->
row
(
0
),
{
100.0
,
200.0
,
300.0
});
ExpectValues
(
matrix
->
row
(
1
),
{
400.0
,
500.0
,
600.0
});
ExpectValues
(
matrix
->
row
(
2
),
{
700.0
,
800.0
,
900.0
});
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/math/float16_types.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Declares 16-bit floating point types.
#ifndef DRAGNN_RUNTIME_MATH_FLOAT16_TYPES_H_
#define DRAGNN_RUNTIME_MATH_FLOAT16_TYPES_H_
#if defined(__F16C__)
#include <emmintrin.h>
#endif
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/casts.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// Represents a truncated 16-bit floating point value. This corresponds to
// `bfloat16` in TensorFlow. It just chops the last 16 least-significant bits
// off the significand of a 32-bit floating point value, leaving 7 significand
// bits, 8 exponent bits, and 1 sign bit.
struct
TruncatedFloat16
{
// Slow unpacking routine. Use avx_vector_array.h for normal operation.
float
DebugToFloat
()
const
{
uint32
upcast
=
bits
;
upcast
<<=
16
;
return
tensorflow
::
bit_cast
<
float
>
(
upcast
);
}
// Slow packing routine. Use avx_vector_array.h for normal operation.
static
TruncatedFloat16
DebugFromFloat
(
float
value
)
{
uint32
float_bits
=
tensorflow
::
bit_cast
<
uint32
>
(
value
);
return
TruncatedFloat16
{
static_cast
<
uint16
>
(
float_bits
>>
16
)};
}
uint16
bits
;
};
static_assert
(
sizeof
(
TruncatedFloat16
)
==
sizeof
(
uint16
),
"Bad struct size"
);
// Currently, only CPUs with the F16C instruction set are supported. All use of
// this struct should be flag-guarded.
//
// If this becomes a problem, we can implement this method with Eigen's
// CUDA/Half.h.
#if defined(__F16C__)
// Represents an IEEE-754 16-bit floating point value. This has 10 significand
// bits, 5 exponent bits, and 1 sign bit.
//
// TODO(googleuser): Either add compatibility support, or delete this code if
// it turns out not to be helpful.
struct
IeeeFloat16
{
// Slow unpacking routine. Use avx_vector_array.h for normal operation.
float
DebugToFloat
()
const
{
return
_cvtsh_ss
(
bits
);
}
// Slow packing routine. Use avx_vector_array.h for normal operation.
static
IeeeFloat16
DebugFromFloat
(
float
value
)
{
return
IeeeFloat16
{
_cvtss_sh
(
value
,
0
)};
}
uint16
bits
;
};
static_assert
(
sizeof
(
IeeeFloat16
)
==
sizeof
(
uint16
),
"Bad struct size"
);
#endif
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#endif // DRAGNN_RUNTIME_MATH_FLOAT16_TYPES_H_
research/syntaxnet/dragnn/runtime/math/float16_types_test.cc
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/math/float16_types.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
namespace
{
// C++11 doesn't support binary literals like 0b01001, so add a helper. :(
uint16
ParseBinaryString
(
const
string
&
bits
)
{
CHECK_EQ
(
bits
.
size
(),
16
)
<<
"ParseBinaryString expects full 16-bit values"
;
uint16
value
=
0
;
for
(
const
char
bit
:
bits
)
{
CHECK
(
bit
==
'0'
||
bit
==
'1'
)
<<
"String must be 0's and 1's."
;
value
=
(
value
<<
1
)
+
(
bit
==
'0'
?
0
:
1
);
}
return
value
;
}
TEST
(
Float16TypesTest
,
IeeeFloat16Accuracy
)
{
#if defined(__F16C__)
bool
some_not_exact
=
false
;
for
(
int
i
=
-
100
;
i
<
100
;
++
i
)
{
float
value
=
i
/
10.0
f
;
IeeeFloat16
half
=
IeeeFloat16
::
DebugFromFloat
(
value
);
float
unpacked
=
half
.
DebugToFloat
();
EXPECT_NEAR
(
value
,
unpacked
,
0.01
);
some_not_exact
=
some_not_exact
||
(
value
!=
unpacked
);
}
EXPECT_TRUE
(
some_not_exact
);
#else
LOG
(
INFO
)
<<
"Test binary wasn't compiled with F16C support, so skipping "
<<
"this test."
;
#endif
}
TEST
(
Float16TypesTest
,
TruncatedAccuracy
)
{
bool
some_not_exact
=
false
;
for
(
int
i
=
-
100
;
i
<
100
;
++
i
)
{
float
value
=
i
/
10.0
f
;
TruncatedFloat16
half
=
TruncatedFloat16
::
DebugFromFloat
(
value
);
float
unpacked
=
half
.
DebugToFloat
();
EXPECT_NEAR
(
value
,
unpacked
,
0.06
);
some_not_exact
=
some_not_exact
||
(
value
!=
unpacked
);
}
EXPECT_TRUE
(
some_not_exact
);
}
TEST
(
Float16TypesTest
,
TruncatedKnownBinaryRepresentation
)
{
uint16
neg_1
=
ParseBinaryString
(
"1011111110000000"
);
uint16
one
=
ParseBinaryString
(
"0011111110000000"
);
EXPECT_EQ
((
TruncatedFloat16
{
neg_1
}).
DebugToFloat
(),
-
1.0
f
);
EXPECT_EQ
((
TruncatedFloat16
{
one
}).
DebugToFloat
(),
1.0
f
);
}
TEST
(
Float16TypesTest
,
IeeeFloat16KnownBinaryRepresentation
)
{
#if defined(__F16C__)
uint16
neg_1
=
ParseBinaryString
(
"1011110000000000"
);
uint16
one
=
ParseBinaryString
(
"0011110000000000"
);
EXPECT_EQ
((
IeeeFloat16
{
neg_1
}).
DebugToFloat
(),
-
1.0
f
);
EXPECT_EQ
((
IeeeFloat16
{
one
}).
DebugToFloat
(),
1.0
f
);
#else
LOG
(
INFO
)
<<
"Test binary wasn't compiled with F16C support, so skipping "
<<
"this test."
;
#endif
}
}
// namespace
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
research/syntaxnet/dragnn/runtime/math/sgemvv.h
deleted
100644 → 0
View file @
a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Computes `[y_1, y_2, ...] = M * [v_1, v_2, ...] + [b_1, b_2, ...]`, where
//
// M is a `m x n` dense matrix.
// v_i are `n`-dimensional dense vectors.
// b_i and y_i are `m`-dimensional dense vectors.
//
// Unfortunately even larger (e.g. 128x128) matrix sizes are not sufficient to
// hide the latency of a function call. So the entire implementation needs to
// live in this header file. Please make sure to use all of the optimization
// flags mentioned in the BUILD file in any client libraries.
#ifndef DRAGNN_RUNTIME_MATH_SGEMVV_H_
#define DRAGNN_RUNTIME_MATH_SGEMVV_H_
#if defined(__SSE2__)
#include <xmmintrin.h>
#endif
#include "dragnn/runtime/math/avx_vector_array.h"
#include "dragnn/runtime/math/types.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#define DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#ifdef __clang__
#define DRAGNN_SGEMVV_GCC_UNROLL
#else
#define DRAGNN_SGEMVV_GCC_UNROLL __attribute__((optimize("unroll-loops")))
#endif
namespace
syntaxnet
{
namespace
dragnn
{
namespace
runtime
{
// Represents `v, b` from one operation `y = M * v + b`.
template
<
int
num_ops
>
struct
SgemvInputBatch
{
const
float
*
input
[
num_ops
];
const
float
*
initial
[
num_ops
];
};
template
<
int
num_ops
>
struct
SgemvOutputBatch
{
float
*
output
[
num_ops
];
};
// Matrix argument for the SGEMV/SGEMVV operation. Based on row-batched
// column-major matrices, but pulls the batch size into a template argument
// so code can be compiled more efficiently.
template
<
int
sse_batch_size
,
typename
ElementType
=
float
>
class
SgemvMatrix
final
{
public:
// Convenience type alias.
using
MatrixType
=
BlockedMatrix
<
ElementType
,
BlockedMatrixFormat
::
kRowBlockedColumnMajor
>
;
// Creates an empty SgemvMatrix.
SgemvMatrix
()
=
default
;
// Initializes the new matrix. Returns an InvalidArgumentError if the block
// size of `matrix` is not equal to `sse_batch_size.
::
tensorflow
::
Status
Initialize
(
const
MatrixType
&
matrix
);
// Computes the matrix-vector product with a set of other inputs. See
// top-level comment for the general algorithm.
template
<
int
num_ops
,
int
lookahead_1
=
8
,
int
lookahead_2
=
8
>
void
DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE
DRAGNN_SGEMVV_GCC_UNROLL
MatrixMultiVectorProduct
(
const
SgemvInputBatch
<
num_ops
>
&
inputs
,
SgemvOutputBatch
<
num_ops
>
*
outputs
)
const
{
MatrixMultiVectorProductImpl
<
num_ops
,
/*mask_input_output=*/
false
,
/*read_initial=*/
true
,
lookahead_1
,
lookahead_2
>
(
inputs
,
-
1
,
outputs
);
}
// Computes the matrix-vector product with a set of other inputs. See
// top-level comment for the general algorithm. This variant allows another
// parameter, `output_vector_elements`, to write to outputs which are a
// multiple of kAvxWidth (8 floats, or 32 bytes) but not necessarily
// sse_batch_size. It is slightly slower, but probably more than noise.
//
// |lookahead_1| and |lookahead_2| parameters control prefetching, and should
// usually be tuned via a script. They issue prefetch instructions that are
// `lookahead_1 * sse_batch_size` values ahead of the current matrix entry
// being read, if `lookahead_1 != 0` (and `(lookahead_1 + lookahead_2) *
// sse_batch_size` values, if lookahead_2 != 0). To reiterate, all prefetching
// can be disabled by setting |lookahead_1| to 0, or the second prefetch can
// be disabled by setting |lookahead_2| to 0.
template
<
int
num_ops
,
int
lookahead_1
=
8
,
int
lookahead_2
=
8
>
void
DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE
DRAGNN_SGEMVV_GCC_UNROLL
MaskedMatrixMultiVectorProduct
(
const
SgemvInputBatch
<
num_ops
>
&
inputs
,
int
output_vector_elements
,
SgemvOutputBatch
<
num_ops
>
*
outputs
)
const
{
MatrixMultiVectorProductImpl
<
num_ops
,
/*mask_input_output=*/
true
,
/*read_initial=*/
true
,
lookahead_1
,
lookahead_2
>
(
inputs
,
output_vector_elements
,
outputs
);
}
// Like the above, but assumes existing values are zero instead of reading
// them.
template
<
int
num_ops
>
void
DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE
DRAGNN_SGEMVV_GCC_UNROLL
MaskedMatrixMultiVectorProductNoInitial
(
const
SgemvInputBatch
<
num_ops
>
&
inputs
,
int
output_vector_elements
,
SgemvOutputBatch
<
num_ops
>
*
outputs
)
const
{
MatrixMultiVectorProductImpl
<
num_ops
,
/*mask_input_output=*/
true
,
/*read_initial=*/
false
>
(
inputs
,
output_vector_elements
,
outputs
);
}
// Read-only accessor.
const
MatrixType
&
matrix
()
const
{
return
matrix_
;
}
private:
template
<
int
num_ops
,
bool
mask_input_output
,
bool
read_initial
,
int
lookahead_1
=
8
,
int
lookahead_2
=
8
>
DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE
DRAGNN_SGEMVV_GCC_UNROLL
void
MatrixMultiVectorProductImpl
(
const
SgemvInputBatch
<
num_ops
>
&
inputs
,
int
output_vector_elements
,
SgemvOutputBatch
<
num_ops
>
*
outputs
)
const
;
MatrixType
matrix_
;
};
// Implementation details.
template
<
int
sse_batch_size
,
typename
ElementType
>
template
<
int
num_ops
,
bool
mask_input_output
,
bool
read_initial
,
int
lookahead_1
,
int
lookahead_2
>
inline
void
DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE
DRAGNN_SGEMVV_GCC_UNROLL
SgemvMatrix
<
sse_batch_size
,
ElementType
>::
MatrixMultiVectorProductImpl
(
const
SgemvInputBatch
<
num_ops
>
&
inputs
,
int
output_vector_elements
,
SgemvOutputBatch
<
num_ops
>
*
outputs
)
const
{
static_assert
(
sse_batch_size
%
kAvxWidth
==
0
,
"sse_batch_size must be a multiple of kAvxWidth (8)."
);
if
(
mask_input_output
)
{
DCHECK_EQ
(
output_vector_elements
%
kAvxWidth
,
0
)
<<
"output_vector_elements must be padded to alignment"
;
}
const
ElementType
*
curr_matrix_ptr
=
matrix_
.
vector
(
0
).
data
();
// Loop over blocks of output rows. Each block of output rows will get a
// partial sum of the [matrix-vector] dot product, where the range of that
// partial sum is designated by start_col and end_col.
for
(
int
row_start
=
0
;
row_start
<
matrix_
.
num_rows
();
row_start
+=
sse_batch_size
)
{
const
int
load_store_max_idx
=
(
output_vector_elements
-
row_start
)
/
kAvxWidth
;
AvxFloatVecArray
<
sse_batch_size
/
kAvxWidth
>
accumulators
[
num_ops
];
// Read inputs.
for
(
int
op
=
0
;
op
<
num_ops
;
++
op
)
{
if
(
read_initial
)
{
if
(
mask_input_output
)
{
accumulators
[
op
].
Load
(
&
inputs
.
initial
[
op
][
row_start
],
load_store_max_idx
);
}
else
{
accumulators
[
op
].
Load
(
&
inputs
.
initial
[
op
][
row_start
]);
}
}
else
{
accumulators
[
op
].
LoadConstVector
(
0.0
f
);
}
}
// Compute matrix-vector product.
for
(
int
col
=
0
;
col
<
matrix_
.
num_columns
();
++
col
)
{
if
(
lookahead_1
!=
0
)
{
#if defined(__SSE2__)
_mm_prefetch
(
curr_matrix_ptr
+
lookahead_1
*
sse_batch_size
,
_MM_HINT_T0
);
if
(
lookahead_2
!=
0
)
{
_mm_prefetch
(
curr_matrix_ptr
+
(
lookahead_1
+
lookahead_2
)
*
sse_batch_size
,
_MM_HINT_T0
);
}
#endif
}
// These are the coefficients from each vector at column `col` (just
// broadcast over the whole AVX array).
AvxFloatVec
weights
[
num_ops
];
for
(
int
op
=
0
;
op
<
num_ops
;
++
op
)
{
weights
[
op
].
LoadConstVector
(
inputs
.
input
[
op
][
col
]);
}
// Loop over each AVX vector and add the current sub-product.
AvxFloatVecArray
<
sse_batch_size
/
kAvxWidth
>
matrix_block
;
matrix_block
.
Load
(
curr_matrix_ptr
);
curr_matrix_ptr
+=
sse_batch_size
;
for
(
int
row_offset
=
0
;
row_offset
<
sse_batch_size
/
kAvxWidth
;
row_offset
++
)
{
for
(
int
op
=
0
;
op
<
num_ops
;
++
op
)
{
accumulators
[
op
].
vectors
[
row_offset
].
AddProductOf
(
weights
[
op
],
matrix_block
.
vectors
[
row_offset
]);
}
}
}
// Save the results.
for
(
int
op
=
0
;
op
<
num_ops
;
++
op
)
{
if
(
mask_input_output
)
{
accumulators
[
op
].
Store
(
&
outputs
->
output
[
op
][
row_start
],
load_store_max_idx
);
}
else
{
accumulators
[
op
].
Store
(
&
outputs
->
output
[
op
][
row_start
]);
}
}
}
}
template
<
int
sse_batch_size
,
typename
ElementType
>
::
tensorflow
::
Status
SgemvMatrix
<
sse_batch_size
,
ElementType
>::
Initialize
(
const
SgemvMatrix
<
sse_batch_size
,
ElementType
>::
MatrixType
&
matrix
)
{
if
(
matrix
.
block_size
()
!=
sse_batch_size
)
{
return
::
tensorflow
::
errors
::
InvalidArgument
(
"Blocked matrix block_size ("
,
matrix
.
block_size
(),
") must be equal to sse_batch_size ("
,
sse_batch_size
,
")"
);
}
matrix_
=
matrix
;
return
::
tensorflow
::
Status
::
OK
();
}
}
// namespace runtime
}
// namespace dragnn
}
// namespace syntaxnet
#undef DRAGNN_SGEMVV_ATTRIBUTE_ALWAYS_INLINE
#undef DRAGNN_SGEMVV_GCC_UNROLL
#endif // DRAGNN_RUNTIME_MATH_SGEMVV_H_
Prev
1
2
3
4
5
6
7
8
9
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment