Commit edea2b67 authored by Terry Koo's avatar Terry Koo
Browse files

Remove runtime because reasons.

parent a4bb31d0
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/extensions.h"
#include <algorithm>
#include <iterator>
namespace syntaxnet {
namespace dragnn {
namespace runtime {
void ExtensionManager::GetSharedImpl(Deleter deleter, size_t *index) {
// Look for a matching shared extension.
const auto it = std::find_if(
configs_.begin(), configs_.end(), [=](const ExtensionConfig &config) {
return config.is_shared && config.deleter == deleter;
});
if (it != configs_.end()) { // found; use its index
*index = std::distance(configs_.begin(), it);
} else { // missing; add it using the next index
*index = configs_.size();
configs_.emplace_back(/*is_shared=*/true, deleter);
}
}
void ExtensionManager::AddLocalImpl(Deleter deleter, size_t *index) {
*index = configs_.size();
configs_.emplace_back(/*is_shared=*/false, deleter);
}
Extensions::Extensions(Extensions &&that)
: manager_(that.manager_), extensions_(std::move(that.extensions_)) {
that.manager_ = nullptr;
that.extensions_.clear();
}
Extensions &Extensions::operator=(Extensions &&that) {
Clear();
manager_ = that.manager_;
extensions_ = std::move(that.extensions_);
that.manager_ = nullptr;
that.extensions_.clear();
return *this;
}
void Extensions::Reset(const ExtensionManager *manager) {
if (manager == manager_) return; // reuse existing extensions
// Discard current extensions before reassigning the |manager_|.
Clear();
manager_ = manager;
extensions_.assign(manager_->configs_.size(), nullptr);
}
void Extensions::Clear() {
// NB: This works even if the |manager_| is null, because that only happens
// when |extensions_| is empty.
for (size_t index = 0; index < extensions_.size(); ++index) {
manager_->configs_[index].deleter(extensions_[index]);
}
extensions_.clear();
manager_ = nullptr;
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Utils for declaring, allocating, and retrieving reusable typed extensions of
// the SessionState. There are two types of extensions:
//
// * Shared extensions, which are shared by all components in a DRAGNN network,
// like the layers in NetworkStates.
//
// * Local extensions, which are private to a particular component in a DRAGNN
// network, like the local operands in NetworkStates.
//
// Extensions are reused across network invocations, so users cannot rely on
// them having any particular state when they are retrieved. For example, a
// std::vector<int> extension could be filled with values from the previous
// invocation when it is retrieved.
//
// To maximize the benefits of reuse, use shared extensions when possible. In
// addition, avoid operations that can deallocate memory. For example, avoid
// resize()-ing a std::vector<std::vector<int>> extension to a smaller size,
// because that deallocates the trailing std::vector<int>s. On the other hand,
// a std::vector<int> extension can be resize()d freely, because that does not
// shrink capacity().
//
// NOTE: Theoretically, shared extensions can be used to pass information down
// the pipeline of components. However, this usage is not a supported and is
// unnecessary since components can already communicate via NetworkStates.
#ifndef DRAGNN_RUNTIME_EXTENSIONS_H_
#define DRAGNN_RUNTIME_EXTENSIONS_H_
#include <stddef.h>
#include <utility>
#include <vector>
#include "tensorflow/core/platform/logging.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
// Opaque handles used to access extensions.
template <class T>
class SharedExtensionHandle;
template <class T>
class LocalExtensionHandle;
// A class that manages a set of SessionState extensions.
class ExtensionManager {
public:
// Creates an empty manager.
ExtensionManager() = default;
// Sets |handle| to refer to the shared extension of type |T|, creating it if
// it does not already exist. Calling N times with the same |T| results in N
// handles to the same extension.
template <class T>
void GetShared(SharedExtensionHandle<T> *handle);
// Sets |handle| to refer to a new local extension of type |T|. The extension
// is "local" in the sense that only the caller knows its handle. Calling N
// times with the same |T| results in N handles to N different extensions.
template <class T>
void AddLocal(LocalExtensionHandle<T> *handle);
private:
friend class Extensions;
// Function that can delete an untyped pointer using the proper type. All
// |Deleter|s are pointers to instantiations of DeleteAs<T>() below, so this
// can also be used as a type ID.
using Deleter = void (*)(void *);
// Configuration information for an extension.
struct ExtensionConfig {
ExtensionConfig(bool is_shared, Deleter deleter)
: is_shared(is_shared), deleter(deleter) {}
// Whether the extension is shared or local.
const bool is_shared;
// Extension deleter, which also serves as a type ID.
const Deleter deleter;
};
// Deletes the |object| as a |T|. All |Deleter|s point to this function.
template <class T>
static void DeleteAs(void *object);
// Implements the non-templated part of GetShared(). Sets |index| to the
// index of the extension whose type matches the |deleter|, adding it if it
// does not already exist.
void GetSharedImpl(Deleter deleter, size_t *index);
// Implements the non-templated part of AddLocal(). Adds an extension that
// uses the |deleter| and sets |index| to its index.
void AddLocalImpl(Deleter deleter, size_t *index);
// Ordered list of configurations for all extensions.
std::vector<ExtensionConfig> configs_;
};
// A set of SessionState extensions. The extensions are configured by an
// ExtensionManager, and instances of extension can be accessed using the
// handles produced by the manager.
//
// Note that this class is not thread-safe, so only one thread may access any
// particular instance at a time. In normal usage, this will be attached to a
// SessionState and thus single-threaded access is guaranteed.
class Extensions {
public:
// Creates an empty set of extensions.
Extensions() = default;
// Moves all extensions from |that| to this. Afterwards, the extensions in
// this are address-equal to the extensions originally in |that|.
Extensions(Extensions &&that);
Extensions &operator=(Extensions &&that);
~Extensions() { Clear(); }
// Resets this to an empty set configured by the |manager|. The |manager|
// must live until this is destroyed or Reset(), and should not be modified
// during that time.
void Reset(const ExtensionManager *manager);
// Returns the shared extension associated with the |handle|. Creates the
// extension first via "new T()" if it does not already exist.
template <class T>
T &Get(SharedExtensionHandle<T> handle);
// Returns the local extension associated with the |handle|. Creates the
// extension first via "new T(args)" if it does not already exist.
template <class T, class... Args>
T &Get(LocalExtensionHandle<T> handle, Args &&... args);
private:
// Restores this to a just-default-constructed state.
void Clear();
// Manager of this set of extensions.
const ExtensionManager *manager_ = nullptr;
// Ordered list of per-component operands, aligned with |manager_->configs_|.
std::vector<void *> extensions_;
};
// Implementation details below.
// An opaque handle to a typed shared extension.
template <class T>
class SharedExtensionHandle {
public:
// Creates an invalid handle.
SharedExtensionHandle() = default;
private:
friend class ExtensionManager;
friend class Extensions;
// Index of this extension in the Extensions.
size_t index_ = SIZE_MAX;
};
// An opaque handle to a typed local extension.
template <class T>
class LocalExtensionHandle {
public:
// Creates an invalid handle.
LocalExtensionHandle() = default;
private:
friend class ExtensionManager;
friend class Extensions;
// Index of this extension in the Extensions.
size_t index_ = SIZE_MAX;
};
template <class T>
void ExtensionManager::DeleteAs(void *object) {
delete reinterpret_cast<T *>(object);
}
template <class T>
void ExtensionManager::GetShared(SharedExtensionHandle<T> *handle) {
GetSharedImpl(&DeleteAs<T>, &handle->index_);
}
template <class T>
void ExtensionManager::AddLocal(LocalExtensionHandle<T> *handle) {
AddLocalImpl(&DeleteAs<T>, &handle->index_);
}
template <class T>
T &Extensions::Get(SharedExtensionHandle<T> handle) {
DCHECK(manager_->configs_[handle.index_].is_shared);
DCHECK_EQ(manager_->configs_[handle.index_].deleter,
&ExtensionManager::DeleteAs<T>);
void *&extension = extensions_[handle.index_];
if (extension == nullptr) extension = new T();
return *reinterpret_cast<T *>(extension);
}
template <class T, class... Args>
T &Extensions::Get(LocalExtensionHandle<T> handle, Args &&... args) {
DCHECK(!manager_->configs_[handle.index_].is_shared);
DCHECK_EQ(manager_->configs_[handle.index_].deleter,
&ExtensionManager::DeleteAs<T>);
void *&extension = extensions_[handle.index_];
if (extension == nullptr) extension = new T(std::forward<Args>(args)...);
return *reinterpret_cast<T *>(extension);
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
#endif // DRAGNN_RUNTIME_EXTENSIONS_H_
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/extensions.h"
#include <utility>
#include <vector>
#include <gmock/gmock.h>
#include "tensorflow/core/platform/test.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
using ::testing::ElementsAre;
// Dummy struct for tests.
struct Foo {
Foo() = default;
explicit Foo(float real, int num) : real(real) {
for (int i = 0; i < num; ++i) ints.push_back(i);
}
float real = 0.0;
std::vector<int> ints;
};
// Returns a shared extension handle from the |manager|.
template<class T>
SharedExtensionHandle<T> GetShared(ExtensionManager *manager) {
SharedExtensionHandle<T> handle;
manager->GetShared(&handle);
return handle;
}
// Returns a local extension handle from the |manager|.
template<class T>
LocalExtensionHandle<T> AddLocal(ExtensionManager *manager) {
LocalExtensionHandle<T> handle;
manager->AddLocal(&handle);
return handle;
}
// Tests that GetShared() reuses existing extensions.
TEST(ExtensionManagerTest, GetShared) {
ExtensionManager manager;
const auto foo_handle1 = GetShared<Foo>(&manager);
const auto int_handle = GetShared<int>(&manager);
const auto foo_handle2 = GetShared<Foo>(&manager);
Extensions extensions;
extensions.Reset(&manager);
Foo &foo1 = extensions.Get(foo_handle1);
Foo &foo2 = extensions.Get(foo_handle2);
EXPECT_EQ(&foo1, &foo2);
EXPECT_EQ(foo1.real, 0.0);
EXPECT_TRUE(foo1.ints.empty());
EXPECT_EQ(extensions.Get(int_handle), 0); // T() zero-initializes POD
}
// Tests that AddLocal() always adds a new extension.
TEST(ExtensionManagerTest, AddLocal) {
ExtensionManager manager;
const auto foo_handle1 = AddLocal<Foo>(&manager);
const auto int_handle = AddLocal<int>(&manager);
const auto foo_handle2 = AddLocal<Foo>(&manager);
Extensions extensions;
extensions.Reset(&manager);
Foo &foo1 = extensions.Get(foo_handle1);
Foo &foo2 = extensions.Get(foo_handle2);
EXPECT_NE(&foo1, &foo2);
EXPECT_EQ(foo1.real, 0.0);
EXPECT_EQ(foo2.real, 0.0);
EXPECT_TRUE(foo1.ints.empty());
EXPECT_TRUE(foo2.ints.empty());
EXPECT_EQ(extensions.Get(int_handle), 0); // T() zero-initializes POD
}
// Tests that Get() always returns the same object.
TEST(ExtensionManagerTest, GetReturnsSameObject) {
ExtensionManager manager;
const auto foo_shared = GetShared<Foo>(&manager);
const auto int_shared = GetShared<int>(&manager);
const auto foo_local = AddLocal<Foo>(&manager);
const auto int_local = AddLocal<int>(&manager);
Extensions extensions;
extensions.Reset(&manager);
Foo &foo_shared1 = extensions.Get(foo_shared);
int &int_shared1 = extensions.Get(int_shared);
Foo &foo_local1 = extensions.Get(foo_local);
int &int_local1 = extensions.Get(int_local);
Foo &foo_shared2 = extensions.Get(foo_shared);
int &int_shared2 = extensions.Get(int_shared);
Foo &foo_local2 = extensions.Get(foo_local);
int &int_local2 = extensions.Get(int_local);
EXPECT_EQ(&foo_shared1, &foo_shared2);
EXPECT_EQ(&int_shared1, &int_shared2);
EXPECT_EQ(&foo_local1, &foo_local2);
EXPECT_EQ(&int_local1, &int_local2);
}
// Tests that local extensions can use non-default constructors.
TEST(ExtensionManagerTest, LocalAllowsNonDefaultConstructor) {
ExtensionManager manager;
const auto foo_handle = AddLocal<Foo>(&manager);
const auto int_handle = AddLocal<int>(&manager);
Extensions extensions;
extensions.Reset(&manager);
// Use non-default constructors to get initialized values.
Foo &foo1 = extensions.Get(foo_handle, 0.5, 5);
EXPECT_EQ(foo1.real, 0.5);
EXPECT_THAT(foo1.ints, ElementsAre(0, 1, 2, 3, 4));
EXPECT_EQ(extensions.Get(int_handle, -123), -123);
// However, once created, the non-default constructor args are ignored.
Foo &foo2 = extensions.Get(foo_handle, 1.23, 1000);
EXPECT_EQ(foo2.real, 0.5);
EXPECT_THAT(foo2.ints, ElementsAre(0, 1, 2, 3, 4));
EXPECT_EQ(extensions.Get(int_handle, -456), -123);
}
// Tests that calling Reset() with the same manager is a NOP.
TEST(ExtensionManagerTest, ResetWithSameManager) {
ExtensionManager manager;
const auto foo_shared = GetShared<Foo>(&manager);
const auto int_shared = GetShared<int>(&manager);
const auto foo_local = AddLocal<Foo>(&manager);
const auto int_local = AddLocal<int>(&manager);
Extensions extensions;
extensions.Reset(&manager);
Foo &foo_shared1 = extensions.Get(foo_shared);
int &int_shared1 = extensions.Get(int_shared);
Foo &foo_local1 = extensions.Get(foo_local);
int &int_local1 = extensions.Get(int_local);
extensions.Reset(&manager);
Foo &foo_shared2 = extensions.Get(foo_shared);
int &int_shared2 = extensions.Get(int_shared);
Foo &foo_local2 = extensions.Get(foo_local);
int &int_local2 = extensions.Get(int_local);
EXPECT_EQ(&foo_shared1, &foo_shared2);
EXPECT_EQ(&int_shared1, &int_shared2);
EXPECT_EQ(&foo_local1, &foo_local2);
EXPECT_EQ(&int_local1, &int_local2);
}
// Tests that Reset() can be used to switch managers.
TEST(ExtensionManagerTest, ResetWithDifferentManager) {
ExtensionManager manager1;
const auto foo_shared = GetShared<Foo>(&manager1);
const auto foo_local = AddLocal<Foo>(&manager1);
ExtensionManager manager2;
const auto int_shared = GetShared<int>(&manager2);
const auto int_local = AddLocal<int>(&manager2);
Extensions extensions;
extensions.Reset(&manager1);
EXPECT_EQ(extensions.Get(foo_shared).real, 0.0);
EXPECT_EQ(extensions.Get(foo_local, 0.75, 3).real, 0.75);
extensions.Reset(&manager2);
EXPECT_EQ(extensions.Get(int_shared), 0);
EXPECT_EQ(extensions.Get(int_local, 5), 5);
}
// Tests that Extensions supports move construction.
TEST(ExtensionManagerTest, MoveConstruction) {
ExtensionManager manager;
const auto foo_shared = GetShared<Foo>(&manager);
const auto int_shared = GetShared<int>(&manager);
const auto foo_local = AddLocal<Foo>(&manager);
const auto int_local = AddLocal<int>(&manager);
// Add a couple more spurious extensions that are never set, to exercise
// movement of non-present extensions.
GetShared<float>(&manager);
AddLocal<float>(&manager);
Extensions extensions1;
extensions1.Reset(&manager);
Foo &foo_shared1 = extensions1.Get(foo_shared);
int &int_shared1 = extensions1.Get(int_shared);
Foo &foo_local1 = extensions1.Get(foo_local);
int &int_local1 = extensions1.Get(int_local);
Extensions extensions2 = std::move(extensions1);
Foo &foo_shared2 = extensions2.Get(foo_shared);
int &int_shared2 = extensions2.Get(int_shared);
Foo &foo_local2 = extensions2.Get(foo_local);
int &int_local2 = extensions2.Get(int_local);
EXPECT_EQ(&foo_shared1, &foo_shared2);
EXPECT_EQ(&int_shared1, &int_shared2);
EXPECT_EQ(&foo_local1, &foo_local2);
EXPECT_EQ(&int_local1, &int_local2);
}
// Tests that Extensions supports move assignment.
TEST(ExtensionManagerTest, MoveAssignment) {
ExtensionManager manager1;
const auto foo_shared = GetShared<Foo>(&manager1);
const auto foo_local = AddLocal<Foo>(&manager1);
ExtensionManager manager2;
const auto int_shared = GetShared<int>(&manager2);
const auto int_local = AddLocal<int>(&manager2);
// Add a couple more spurious extensions that are never set, to exercise
// movement of non-present extensions.
GetShared<float>(&manager1);
GetShared<float>(&manager2);
AddLocal<float>(&manager1);
AddLocal<float>(&manager2);
// Fill two sets of extensions.
Extensions extensions1;
extensions1.Reset(&manager1);
extensions1.Get(foo_shared).real = 1.0;
extensions1.Get(foo_local).real = 1.0;
Extensions extensions2;
extensions2.Reset(&manager2);
extensions2.Get(int_shared) = 2;
extensions2.Get(int_local) = 2;
// Use a third set of extensions to perform a swap.
Extensions extensions3;
extensions3 = std::move(extensions1);
extensions1 = std::move(extensions2);
extensions2 = std::move(extensions3);
EXPECT_EQ(extensions1.Get(int_shared), 2);
EXPECT_EQ(extensions1.Get(int_local), 2);
EXPECT_EQ(extensions2.Get(foo_shared).real, 1.0);
EXPECT_EQ(extensions2.Get(foo_local).real, 1.0);
}
} // namespace
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include <stddef.h>
#include <string>
#include "dragnn/core/compute_session.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/extensions.h"
#include "dragnn/runtime/feed_forward_network_kernel.h"
#include "dragnn/runtime/feed_forward_network_layer.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/network_unit.h"
#include "dragnn/runtime/network_unit_base.h"
#include "dragnn/runtime/session_state.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/strings/strcat.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
// A network unit that evaluates a feed-forward multi-layer perceptron.
class FeedForwardNetwork : public NetworkUnitBase {
public:
// Implements NetworkUnit.
tensorflow::Status Initialize(const ComponentSpec &component_spec,
VariableStore *variable_store,
NetworkStateManager *network_state_manager,
ExtensionManager *extension_manager) override;
string GetLogitsName() const override { return kernel_.logits_name(); }
tensorflow::Status Evaluate(size_t step_index, SessionState *session_state,
ComputeSession *compute_session) const override;
private:
// Kernel that implements the feed-forward network.
FeedForwardNetworkKernel kernel_;
};
tensorflow::Status FeedForwardNetwork::Initialize(
const ComponentSpec &component_spec, VariableStore *variable_store,
NetworkStateManager *network_state_manager,
ExtensionManager *extension_manager) {
TF_RETURN_IF_ERROR(kernel_.Initialize(component_spec, variable_store,
network_state_manager));
const bool use_concatenated_input = true;
TF_RETURN_IF_ERROR(InitializeBase(use_concatenated_input, component_spec,
variable_store, network_state_manager,
extension_manager));
// Check dimensions across layers. This must be done after InitializeBase(),
// when concatenated_input_dim() is known.
return kernel_.ValidateInputDimension(concatenated_input_dim());
}
tensorflow::Status FeedForwardNetwork::Evaluate(
size_t step_index, SessionState *session_state,
ComputeSession *compute_session) const {
Vector<float> input;
TF_RETURN_IF_ERROR(EvaluateBase(session_state, compute_session, &input));
for (const FeedForwardNetworkLayer &layer : kernel_.layers()) {
input = layer.Apply(input, session_state->network_states, step_index);
}
return tensorflow::Status::OK();
}
DRAGNN_RUNTIME_REGISTER_NETWORK_UNIT(FeedForwardNetwork);
} // namespace
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/feed_forward_network_kernel.h"
#include "dragnn/runtime/activation_functions.h"
#include "dragnn/runtime/attributes.h"
#include "dragnn/runtime/transition_system_traits.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/strings/strcat.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
// Attributes used by the feed-forward network.
struct FeedForwardNetworkAttributes : public Attributes {
// Hidden layer sizes; e.g., "64,64,32".
Optional<std::vector<size_t>> hidden_layer_sizes{
"hidden_layer_sizes", {}, this};
// Whether to omit the "logits" layer.
Optional<bool> omit_logits{"omit_logits", false, this};
// Only the default settings are supported for these attributes.
Optional<bool> layer_norm_input{"layer_norm_input", false, this};
Optional<bool> layer_norm_hidden{"layer_norm_hidden", false, this};
Optional<string> nonlinearity{"nonlinearity", "relu", this};
// Training-only attributes, ignored in the runtime.
Ignored dropout_keep_prob{"dropout_keep_prob", this};
Ignored dropout_per_sequence{"dropout_per_sequence", this};
Ignored dropout_all_layers{"dropout_all_layers", this};
Ignored initialize_bias_zero{"initialize_bias_zero", this};
Ignored initialize_softmax_zero{"initialize_softmax_zero", this};
Ignored initialize_hidden_orthogonal{"initialize_hidden_orthogonal", this};
};
} // namespace
tensorflow::Status FeedForwardNetworkKernel::Initialize(
const ComponentSpec &component_spec, VariableStore *variable_store,
NetworkStateManager *network_state_manager) {
FeedForwardNetworkAttributes attributes;
TF_RETURN_IF_ERROR(
attributes.Reset(component_spec.network_unit().parameters()));
// Check for unsupported attribute values.
if (attributes.layer_norm_input() || attributes.layer_norm_hidden()) {
return tensorflow::errors::Unimplemented("Layer norm is not supported");
}
if (attributes.nonlinearity() != "relu") {
return tensorflow::errors::Unimplemented("Non-linearity is not supported: ",
attributes.nonlinearity());
}
// Add all hidden layers.
for (const size_t hidden_layer_size : attributes.hidden_layer_sizes()) {
const size_t height = layers_.size();
layers_.emplace_back();
TF_RETURN_IF_ERROR(layers_.back().Initialize(
component_spec.name(), tensorflow::strings::StrCat("layer_", height),
hidden_layer_size, ActivationFunction::kRelu,
tensorflow::strings::StrCat(height), variable_store,
network_state_manager));
}
// Add "last_layer" as an alias for the last hidden layer, if any.
if (!layers_.empty()) {
TF_RETURN_IF_ERROR(network_state_manager->AddLayerAlias(
"last_layer",
tensorflow::strings::StrCat("layer_", layers_.size() - 1)));
}
// Add a linear "logits" layer, if necessary.
const bool has_logits =
!TransitionSystemTraits(component_spec).is_deterministic &&
!attributes.omit_logits();
if (has_logits) {
logits_name_ = FeedForwardNetworkLayer::kLogitsName;
layers_.emplace_back();
TF_RETURN_IF_ERROR(layers_.back().InitializeSoftmax(
component_spec, variable_store, network_state_manager));
}
return tensorflow::Status::OK();
}
tensorflow::Status FeedForwardNetworkKernel::ValidateInputDimension(
size_t dimension) const {
for (const FeedForwardNetworkLayer &layer : layers_) {
TF_RETURN_IF_ERROR(
layer.CheckInputDimAndGetOutputDim(dimension, &dimension));
}
return tensorflow::Status::OK();
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_FEED_FORWARD_NETWORK_KERNEL_H_
#define DRAGNN_RUNTIME_FEED_FORWARD_NETWORK_KERNEL_H_
#include <stddef.h>
#include <string>
#include <vector>
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/feed_forward_network_layer.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/status.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
// A kernel that evaluates a multi-layer perceptron.
class FeedForwardNetworkKernel {
public:
// Initializes this to the configuration in the |component_spec|. Retrieves
// pre-trained variables from the |variable_store|, which must outlive this.
// Adds layers and local operands to the |network_state_manager|, which must
// be positioned at the current component. On error, returns non-OK.
tensorflow::Status Initialize(const ComponentSpec &component_spec,
VariableStore *variable_store,
NetworkStateManager *network_state_manager);
// Returns OK iff this is compatible with the input |dimension|.
tensorflow::Status ValidateInputDimension(size_t dimension) const;
// Accessors.
const std::vector<FeedForwardNetworkLayer> &layers() const { return layers_; }
const string &logits_name() const { return logits_name_; }
private:
// List of layers, including hidden layers and the logits, if any.
std::vector<FeedForwardNetworkLayer> layers_;
// Name of the logits layer.
string logits_name_;
};
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
#endif // DRAGNN_RUNTIME_FEED_FORWARD_NETWORK_KERNEL_H_
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/feed_forward_network_kernel.h"
#include <stddef.h>
#include <algorithm>
#include <memory>
#include <string>
#include "dragnn/core/test/generic.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/test/network_test_base.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
class FeedForwardNetworkKernelTest : public NetworkTestBase {
protected:
// Adds a weight matrix with the |name_suffix| with the given dimensions and
// |fill_value|.
void AddWeights(const string &name_suffix, size_t num_rows,
size_t num_columns, float fill_value) {
const string weights_name =
tensorflow::strings::StrCat(kTestComponentName, "/weights_",
name_suffix, FlexibleMatrixKernel::kSuffix);
AddMatrixVariable(weights_name, num_columns, num_rows, fill_value);
}
// Adds a bias vector with the |name_suffix| with the given dimensions and
// |fill_value|.
void AddBiases(const string &name_suffix, size_t dimension,
float fill_value) {
const string biases_name =
tensorflow::strings::StrCat(kTestComponentName, "/bias_", name_suffix);
AddVectorVariable(biases_name, dimension, fill_value);
}
// Initializes the |kernel_| based on the |component_spec_text|. On error,
// returns non-OK.
tensorflow::Status Initialize(const string &component_spec_text) {
ComponentSpec component_spec;
CHECK(TextFormat::ParseFromString(component_spec_text, &component_spec));
component_spec.set_name(kTestComponentName);
// Since FeedForwardNetwork uses the concatenated input, it is insensitive
// to the particular fixed or linked embedding inputs. For simplicity, the
// tests use a trivial network structure and a single fixed embedding.
AddComponent(kTestComponentName);
TF_RETURN_IF_ERROR(kernel_.Initialize(component_spec, &variable_store_,
&network_state_manager_));
size_t input_dimension = 0;
for (const FixedFeatureChannel &channel : component_spec.fixed_feature()) {
input_dimension += channel.embedding_dim();
}
return kernel_.ValidateInputDimension(input_dimension);
}
FeedForwardNetworkKernel kernel_;
};
// Tests that FeedForwardNetworkKernel fails when a weight matrix does not match
// the dimension of its output activations.
TEST_F(FeedForwardNetworkKernelTest, BadWeightRows) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddWeights("softmax", kInputDim, kLogitsDim - 1 /* bad */, 1.0);
AddBiases("softmax", kLogitsDim, 1.0);
EXPECT_THAT(
Initialize(kBadSpec),
test::IsErrorWithSubstr(
"Weight matrix shape should be output dimension plus padding"));
}
// Tests that FeedForwardNetworkKernel fails when a weight matrix does not match
// the dimension of its input activations.
TEST_F(FeedForwardNetworkKernelTest, BadWeightColumns) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddWeights("softmax", kInputDim + 1 /* bad */, kLogitsDim, 1.0);
AddBiases("softmax", kLogitsDim, 1.0);
EXPECT_THAT(Initialize(kBadSpec),
test::IsErrorWithSubstr(
"Weight matrix shape does not match input dimension"));
}
// Tests that FeedForwardNetworkKernel fails when a bias vector does not match
// the dimension of its output activations.
TEST_F(FeedForwardNetworkKernelTest, BadBiasDimension) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddWeights("softmax", kInputDim, kLogitsDim, 1.0);
AddBiases("softmax", kLogitsDim + 1 /* bad */, 1.0);
EXPECT_THAT(Initialize(kBadSpec),
test::IsErrorWithSubstr(
"Bias vector shape does not match output dimension"));
}
// Tests that FeedForwardNetworkKernel fails when the value of the
// "layer_norm_input" option is not false.
TEST_F(FeedForwardNetworkKernelTest, UnsupportedLayerNormInputOption) {
const string kBadSpec = R"(network_unit {
parameters {
key: 'layer_norm_input'
value: 'true'
}
})";
EXPECT_THAT(Initialize(kBadSpec),
test::IsErrorWithSubstr("Layer norm is not supported"));
}
// Tests that FeedForwardNetworkKernel fails when the value of the
// "layer_norm_hidden" option is not false.
TEST_F(FeedForwardNetworkKernelTest, UnsupportedLayerNormHiddenOption) {
const string kBadSpec = R"(network_unit {
parameters {
key: 'layer_norm_hidden'
value: 'true'
}
})";
EXPECT_THAT(Initialize(kBadSpec),
test::IsErrorWithSubstr("Layer norm is not supported"));
}
// Tests that FeedForwardNetworkKernel fails when the value of the
// "nonlinearity" option is not "relu".
TEST_F(FeedForwardNetworkKernelTest, UnsupportedNonlinearityOption) {
const string kBadSpec = R"(network_unit {
parameters {
key: 'nonlinearity'
value: 'elu'
}
})";
EXPECT_THAT(Initialize(kBadSpec),
test::IsErrorWithSubstr("Non-linearity is not supported"));
}
// Tests that the FeedForwardNetworkKernel works when there are no hidden
// layers, just a softmax that computes logits.
TEST_F(FeedForwardNetworkKernelTest, JustLogits) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddWeights("softmax", kInputDim, kLogitsDim, 0.0);
AddBiases("softmax", kLogitsDim, 0.0);
TF_ASSERT_OK(Initialize(kSpec));
EXPECT_EQ(kernel_.logits_name(), "logits");
EXPECT_EQ(kernel_.layers().size(), 1);
}
// Tests that the FeedForwardNetworkKernel works with multiple hidden layers as
// well as a softmax that computes logits.
TEST_F(FeedForwardNetworkKernelTest, MultiLayer) {
const size_t kDims[] = {5, 4, 3, 2};
const string kSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4,3'
}
}
num_actions: 2)";
AddWeights("0", kDims[0], kDims[1], 0.0);
AddBiases("0", kDims[1], 0.0);
AddWeights("1", kDims[1], kDims[2], 0.0);
AddBiases("1", kDims[2], 0.0);
AddWeights("softmax", kDims[2], kDims[3], 0.0);
AddBiases("softmax", kDims[3], 0.0);
TF_ASSERT_OK(Initialize(kSpec));
EXPECT_EQ(kernel_.logits_name(), "logits");
EXPECT_EQ(kernel_.layers().size(), 3);
}
// Tests that the FeedForwardNetworkKernel does not produce logits and does not
// use the softmax variables when the component is deterministic.
TEST_F(FeedForwardNetworkKernelTest, NoLogitsOrSoftmaxWhenDeterministic) {
const size_t kDims[] = {5, 4};
const string kSpec = R"(num_actions: 1
fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4'
}
})";
// No "softmax" weights or biases.
AddWeights("0", kDims[0], kDims[1], 0.0);
AddBiases("0", kDims[1], 0.0);
TF_ASSERT_OK(Initialize(kSpec));
// No specified logits layer.
EXPECT_TRUE(kernel_.logits_name().empty());
EXPECT_EQ(kernel_.layers().size(), 1);
}
// Tests that the FeedForwardNetworkKernel does not produce logits when
// omit_logits is true, even if there are actions.
TEST_F(FeedForwardNetworkKernelTest, NoLogitsOrSoftmaxWhenOmitLogitsTrue) {
const size_t kDims[] = {5, 4};
const string kSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4'
}
parameters {
key: 'omit_logits'
value: 'true'
}
}
num_actions: 10)";
// No "softmax" weights or biases.
AddWeights("0", kDims[0], kDims[1], 0.0);
AddBiases("0", kDims[1], 0.0);
TF_ASSERT_OK(Initialize(kSpec));
// No specified logits layer.
EXPECT_TRUE(kernel_.logits_name().empty());
EXPECT_EQ(kernel_.layers().size(), 1);
}
} // namespace
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/feed_forward_network_layer.h"
#include "tensorflow/core/lib/core/errors.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
constexpr char FeedForwardNetworkLayer::kLogitsName[];
tensorflow::Status FeedForwardNetworkLayer::Initialize(
const string &component_name, const string &layer_name,
size_t output_dimension, ActivationFunction activation_function,
const string &variable_suffix, VariableStore *variable_store,
NetworkStateManager *network_state_manager) {
debug_name_ = tensorflow::strings::StrCat(component_name, "/", layer_name);
activation_function_ = activation_function;
const string weights_name =
tensorflow::strings::StrCat(component_name, "/weights_", variable_suffix);
const string biases_name =
tensorflow::strings::StrCat(component_name, "/bias_", variable_suffix);
TF_RETURN_IF_ERROR(variable_store->Lookup(biases_name, &biases_));
TF_RETURN_IF_ERROR(matrix_kernel_.Initialize(
debug_name_, weights_name, output_dimension, variable_store));
TF_RETURN_IF_ERROR(
network_state_manager->AddLayer(layer_name, output_dimension, &handle_));
if (!matrix_kernel_.MatchesOutputDimension(output_dimension)) {
return tensorflow::errors::InvalidArgument(
"Weight matrix shape should be output dimension plus padding. ",
debug_name_, ": weights=[", matrix_kernel_.NumPaddedRows(), ", ",
matrix_kernel_.NumColumns(), "] vs output=", output_dimension);
}
// NOTE(gatoatigrado): Do we need to pad the bias vector?
if (biases_.size() != output_dimension) {
return tensorflow::errors::InvalidArgument(
"Bias vector shape does not match output dimension in ", debug_name_,
": biases=[", biases_.size(), "] vs output=", output_dimension);
}
return tensorflow::Status::OK();
}
tensorflow::Status FeedForwardNetworkLayer::InitializeSoftmax(
const ComponentSpec &component_spec, VariableStore *variable_store,
NetworkStateManager *network_state_manager) {
return Initialize(component_spec.name(), kLogitsName,
component_spec.num_actions(), ActivationFunction::kIdentity,
"softmax", variable_store, network_state_manager);
}
tensorflow::Status FeedForwardNetworkLayer::CheckInputDimAndGetOutputDim(
size_t input_dim, size_t *output_dim) const {
if (matrix_kernel_.NumColumns() != input_dim) {
return tensorflow::errors::InvalidArgument(
"Weight matrix shape does not match input dimension in ", debug_name_,
": weights=[", matrix_kernel_.NumPaddedRows(), ", ",
matrix_kernel_.NumColumns(), "] vs input=", input_dim);
}
*output_dim = matrix_kernel_.NumPaddedRows();
return tensorflow::Status::OK();
}
MutableMatrix<float> FeedForwardNetworkLayer::Apply(
Matrix<float> inputs, const NetworkStates &network_states) const {
const MutableMatrix<float> outputs = network_states.GetLayer(handle_);
size_t row = 0;
for (; row + 1 < inputs.num_rows(); row += 2) {
matrix_kernel_.MatrixVectorVectorProduct(
inputs.row(row), inputs.row(row + 1), biases_, biases_,
outputs.row(row), outputs.row(row + 1));
ApplyActivationFunction(activation_function_, outputs.row(row));
ApplyActivationFunction(activation_function_, outputs.row(row + 1));
}
if (row < inputs.num_rows()) {
Vector<float> input_row = inputs.row(row);
MutableVector<float> output_row = outputs.row(row);
matrix_kernel_.MatrixVectorProduct(input_row, biases_, output_row);
ApplyActivationFunction(activation_function_, output_row);
}
return outputs;
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_FEED_FORWARD_NETWORK_LAYER_H_
#define DRAGNN_RUNTIME_FEED_FORWARD_NETWORK_LAYER_H_
#include <stddef.h>
#include <string>
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/activation_functions.h"
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/status.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
// Configuration and parameters of some layer of a multi-layer perceptron.
class FeedForwardNetworkLayer {
public:
// Name of the logits layer produced by a softmax.
static constexpr char kLogitsName[] = "logits";
// Creates an uninitialized layer. Call Initialize() before use.
FeedForwardNetworkLayer() = default;
// Initializes this as a layer named |layer_name| of the component named
// |component_name| that produces activations of size |output_dimension|,
// and applies the |activation_function| to the output. Adds this layer to
// the |network_state_manager| and retrieves trained parameters from the
// |variable_store| using the |variable_suffix|. On error, returns non-OK.
tensorflow::Status Initialize(const string &component_name,
const string &layer_name,
size_t output_dimension,
ActivationFunction activation_function,
const string &variable_suffix,
VariableStore *variable_store,
NetworkStateManager *network_state_manager);
// For convenience, initializes this as a softmax that produces a layer named
// |kLogitsName|.
tensorflow::Status InitializeSoftmax(
const ComponentSpec &component_spec, VariableStore *variable_store,
NetworkStateManager *network_state_manager);
// Returns OK iff this is compatible with input activation vectors of size
// |input_dim| and sets |output_dim| to the output dimension of this layer.
tensorflow::Status CheckInputDimAndGetOutputDim(size_t input_dim,
size_t *output_dim) const;
// Applies the weights and biases of this layer to the |input| activations,
// writes the resulting output activations into the |step_index|'th row of
// the relevant output layer in the |network_states|, and returns the row.
MutableVector<float> Apply(Vector<float> input,
const NetworkStates &network_states,
size_t step_index) const;
// As above, but applies to a step-wise matrix of |inputs|.
MutableMatrix<float> Apply(Matrix<float> inputs,
const NetworkStates &network_states) const;
private:
// Name of the layer, for debug purposes.
string debug_name_;
// Handle of the layer in the network states.
LayerHandle<float> handle_;
// Weight matrix and bias vector for computing the layer activations.
FlexibleMatrixKernel matrix_kernel_;
Vector<float> biases_;
// The activation function to apply to the output.
ActivationFunction activation_function_ = ActivationFunction::kIdentity;
};
// Implementation details below.
inline MutableVector<float> FeedForwardNetworkLayer::Apply(
Vector<float> input, const NetworkStates &network_states,
size_t step_index) const {
const MutableVector<float> output =
network_states.GetLayer(handle_).row(step_index);
matrix_kernel_.MatrixVectorProduct(input, biases_, output);
ApplyActivationFunction(activation_function_, output);
return output;
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
#endif // DRAGNN_RUNTIME_FEED_FORWARD_NETWORK_LAYER_H_
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/feed_forward_network_layer.h"
#include <stddef.h>
#include <algorithm>
#include <string>
#include "dragnn/core/test/generic.h"
#include "dragnn/runtime/activation_functions.h"
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/test/helpers.h"
#include "dragnn/runtime/test/network_test_base.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
constexpr char kLayerName[] = "layer";
constexpr char kVariableSuffix[] = "suffix";
constexpr size_t kInputDim = 5;
constexpr size_t kLogitsDim = 3;
constexpr size_t kNumSteps = 4;
class FeedForwardNetworkLayerTest : public NetworkTestBase {
protected:
// Adds a weight matrix with the given dimensions and |fill_value|.
void AddWeights(size_t num_rows, size_t num_columns, float fill_value) {
const string weights_name = tensorflow::strings::StrCat(
kTestComponentName, "/weights_", kVariableSuffix,
FlexibleMatrixKernel::kSuffix);
AddMatrixVariable(weights_name, num_columns, num_rows, fill_value);
}
// Adds a bias vector with the given dimensions and |fill_value|.
void AddBiases(size_t dimension, float fill_value) {
const string biases_name = tensorflow::strings::StrCat(
kTestComponentName, "/bias_", kVariableSuffix);
AddVectorVariable(biases_name, dimension, fill_value);
}
// Returns the result of initializing the |layer_| with the arguments.
tensorflow::Status Initialize(
ActivationFunction activation_function = ActivationFunction::kIdentity,
size_t num_steps = kNumSteps) {
if (!initialized_) {
AddComponent(kTestComponentName);
TF_RETURN_IF_ERROR(layer_.Initialize(
kTestComponentName, kLayerName, kLogitsDim, activation_function,
kVariableSuffix, &variable_store_, &network_state_manager_));
initialized_ = true;
}
network_states_.Reset(&network_state_manager_);
StartComponent(num_steps);
return tensorflow::Status::OK();
}
// Applies the |layer_| to the |input| and returns the result.
Vector<float> Apply(const std::vector<float> &input) {
UniqueVector<float> input_vector(input);
layer_.Apply(Vector<float>(*input_vector), network_states_,
/*step_index=*/0);
return Vector<float>(GetLayer(kTestComponentName, kLayerName).row(0));
}
// Applies the |layer_| to the |inputs| and returns the result.
Matrix<float> Apply(const std::vector<std::vector<float>> &inputs) {
UniqueMatrix<float> input_matrix(inputs);
layer_.Apply(Matrix<float>(*input_matrix), network_states_);
return Matrix<float>(GetLayer(kTestComponentName, kLayerName));
}
bool initialized_ = false;
FeedForwardNetworkLayer layer_;
};
// Tests that FeedForwardNetworkLayer fails when a weight matrix does not match
// the dimension of its output activations.
TEST_F(FeedForwardNetworkLayerTest, BadWeightRows) {
AddWeights(kInputDim, kLogitsDim - 1 /* bad */, 1.0);
AddBiases(kLogitsDim, 1.0);
EXPECT_THAT(
Initialize(),
test::IsErrorWithSubstr(
"Weight matrix shape should be output dimension plus padding"));
}
// Tests that FeedForwardNetworkLayer fails when a weight matrix does not match
// the dimension of its input activations.
TEST_F(FeedForwardNetworkLayerTest, BadWeightColumns) {
AddWeights(kInputDim + 1 /* bad */, kLogitsDim, 1.0);
AddBiases(kLogitsDim, 1.0);
TF_ASSERT_OK(Initialize());
size_t output_dim = 0;
EXPECT_THAT(layer_.CheckInputDimAndGetOutputDim(kInputDim, &output_dim),
test::IsErrorWithSubstr(
"Weight matrix shape does not match input dimension"));
}
// Tests that FeedForwardNetworkLayer fails when a bias vector does not match
// the dimension of its output activations.
TEST_F(FeedForwardNetworkLayerTest, BadBiasDimension) {
AddWeights(kInputDim, kLogitsDim, 1.0);
AddBiases(kLogitsDim + 1 /* bad */, 1.0);
EXPECT_THAT(Initialize(),
test::IsErrorWithSubstr(
"Bias vector shape does not match output dimension"));
}
// Tests that FeedForwardNetworkLayer can be used with identity activations.
TEST_F(FeedForwardNetworkLayerTest, IdentityActivations) {
AddWeights(kInputDim, kLogitsDim, 1.0);
AddBiases(kLogitsDim, 0.5);
TF_ASSERT_OK(Initialize());
size_t output_dim = 0;
TF_ASSERT_OK(layer_.CheckInputDimAndGetOutputDim(kInputDim, &output_dim));
EXPECT_EQ(output_dim, kLogitsDim);
// 0.5 + 1 + 2 + 3 + 4 + 5 = 15.5
std::vector<float> row = {1.0, 2.0, 3.0, 4.0, 5.0};
ExpectVector(Apply(row), kLogitsDim, 15.5);
ExpectMatrix(Apply(std::vector<std::vector<float>>(kNumSteps, row)),
kNumSteps, kLogitsDim, 15.5);
// 0.5 - 1 - 2 - 3 - 4 - 5 = -14.5
row = {-1.0, -2.0, -3.0, -4.0, -5.0};
ExpectVector(Apply(row), kLogitsDim, -14.5);
ExpectMatrix(Apply(std::vector<std::vector<float>>(kNumSteps, row)),
kNumSteps, kLogitsDim, -14.5);
}
// Tests that FeedForwardNetworkLayer can be used with ReLU activations.
TEST_F(FeedForwardNetworkLayerTest, ReluActivations) {
AddWeights(kInputDim, kLogitsDim, 1.0);
AddBiases(kLogitsDim, 0.5);
TF_ASSERT_OK(Initialize(ActivationFunction::kRelu));
size_t output_dim = 0;
TF_ASSERT_OK(layer_.CheckInputDimAndGetOutputDim(kInputDim, &output_dim));
EXPECT_EQ(output_dim, kLogitsDim);
// max(0.0, 0.5 + 1 + 2 + 3 + 4 + 5) = 15.5
std::vector<float> row = {1.0, 2.0, 3.0, 4.0, 5.0};
ExpectVector(Apply(row), kLogitsDim, 15.5);
ExpectMatrix(Apply(std::vector<std::vector<float>>(kNumSteps, row)),
kNumSteps, kLogitsDim, 15.5);
// max(0.0, 0.5 - 1 - 2 - 3 - 4 - 5) = 0.0
row = {-1.0, -2.0, -3.0, -4.0, -5.0};
ExpectVector(Apply(row), kLogitsDim, 0.0);
ExpectMatrix(Apply(std::vector<std::vector<float>>(kNumSteps, row)),
kNumSteps, kLogitsDim, 0.0);
}
// Make sure SGEMVV implementation is correct.
TEST_F(FeedForwardNetworkLayerTest, VaryingSizes) {
AddWeights(kInputDim, kLogitsDim, 1.0);
AddBiases(kLogitsDim, 0.5);
std::vector<float> row1 = {1.0, 2.0, 3.0, 4.0, 5.0}; // relu(sum + b) = 15.5
std::vector<float> row2 = {-1.0, -2.0, -3.0, -4.0, -5.0}; // result: 0
std::vector<float> row3 = {1.0, -2.0, 3.0, -4.0, 5.0}; // result: 3.5
// Zero-row computation.
TF_ASSERT_OK(Initialize(ActivationFunction::kRelu, 0));
Matrix<float> result = Apply(std::vector<std::vector<float>>());
EXPECT_EQ(result.num_rows(), 0);
// One-row computation.
TF_ASSERT_OK(Initialize(ActivationFunction::kRelu, 1));
result = Apply(std::vector<std::vector<float>>{row1});
EXPECT_EQ(result.num_rows(), 1);
ExpectVector(result.row(0), kLogitsDim, 15.5);
// Two-row computation.
TF_ASSERT_OK(Initialize(ActivationFunction::kRelu, 2));
result = Apply({row1, row2});
EXPECT_EQ(result.num_rows(), 2);
ExpectVector(result.row(0), kLogitsDim, 15.5);
ExpectVector(result.row(1), kLogitsDim, 0.0);
// Three-row computation.
TF_ASSERT_OK(Initialize(ActivationFunction::kRelu, 3));
result = Apply({row1, row2, row3});
EXPECT_EQ(result.num_rows(), 3);
ExpectVector(result.row(0), kLogitsDim, 15.5);
ExpectVector(result.row(1), kLogitsDim, 0.0);
ExpectVector(result.row(2), kLogitsDim, 3.5);
}
} // namespace
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include <stddef.h>
#include <algorithm>
#include <memory>
#include <string>
#include "dragnn/core/test/generic.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/network_unit.h"
#include "dragnn/runtime/test/network_test_base.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include <gmock/gmock.h>
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
using ::testing::_;
using ::testing::Invoke;
// Applies the ReLU activation to the |value|.
float Relu(float value) { return std::max(0.0f, value); }
class FeedForwardNetworkTest : public NetworkTestBase {
protected:
// Adds a weight matrix with the |name_suffix| with the given dimensions and
// |fill_value|.
void AddWeights(const string &name_suffix, size_t num_rows,
size_t num_columns, float fill_value) {
const string weights_name =
tensorflow::strings::StrCat(kTestComponentName, "/weights_",
name_suffix, FlexibleMatrixKernel::kSuffix);
AddMatrixVariable(weights_name, num_columns, num_rows, fill_value);
}
// Adds a bias vector with the |name_suffix| with the given dimensions and
// |fill_value|.
void AddBiases(const string &name_suffix, size_t dimension,
float fill_value) {
const string biases_name =
tensorflow::strings::StrCat(kTestComponentName, "/bias_", name_suffix);
AddVectorVariable(biases_name, dimension, fill_value);
}
// Creates a network unit, initializes it based on the |component_spec_text|,
// and evaluates it. On error, returns non-OK.
tensorflow::Status Run(const string &component_spec_text) {
ComponentSpec component_spec;
CHECK(TextFormat::ParseFromString(component_spec_text, &component_spec));
component_spec.set_name(kTestComponentName);
// Since FeedForwardNetwork uses the concatenated input, it is insensitive
// to the particular fixed or linked embedding inputs. For simplicity, the
// tests use a trivial network structure and a single fixed embedding.
AddComponent(kTestComponentName);
TF_RETURN_IF_ERROR(
NetworkUnit::CreateOrError("FeedForwardNetwork", &network_unit_));
TF_RETURN_IF_ERROR(network_unit_->Initialize(
component_spec, &variable_store_, &network_state_manager_,
&extension_manager_));
network_states_.Reset(&network_state_manager_);
StartComponent(1); // only evaluate the first step
session_state_.extensions.Reset(&extension_manager_);
TF_RETURN_IF_ERROR(
network_unit_->Evaluate(0, &session_state_, &compute_session_));
return tensorflow::Status::OK();
}
// Returns the activation vector of the first step of layer named |layer_name|
// in the current component.
Vector<float> GetActivations(const string &layer_name) const {
Matrix<float> layer(GetLayer(kTestComponentName, layer_name));
return layer.row(0);
}
std::unique_ptr<NetworkUnit> network_unit_;
};
// Tests that FeedForwardNetwork fails when a weight matrix does not match the
// dimension of its output activations.
TEST_F(FeedForwardNetworkTest, BadWeightRows) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddFixedEmbeddingMatrix(0, 50, kInputDim, 1.0);
AddWeights("softmax", kInputDim, kLogitsDim - 1 /* bad */, 1.0);
AddBiases("softmax", kLogitsDim, 1.0);
EXPECT_THAT(
Run(kBadSpec),
test::IsErrorWithSubstr(
"Weight matrix shape should be output dimension plus padding"));
}
// Tests that FeedForwardNetwork fails when a weight matrix does not match the
// dimension of its input activations.
TEST_F(FeedForwardNetworkTest, BadWeightColumns) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddFixedEmbeddingMatrix(0, 50, kInputDim, 1.0);
AddWeights("softmax", kInputDim + 1 /* bad */, kLogitsDim, 1.0);
AddBiases("softmax", kLogitsDim, 1.0);
EXPECT_THAT(Run(kBadSpec),
test::IsErrorWithSubstr(
"Weight matrix shape does not match input dimension"));
}
// Tests that FeedForwardNetwork fails when a bias vector does not match the
// dimension of its output activations.
TEST_F(FeedForwardNetworkTest, BadBiasDimension) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
AddFixedEmbeddingMatrix(0, 50, kInputDim, 1.0);
AddWeights("softmax", kInputDim, kLogitsDim, 1.0);
AddBiases("softmax", kLogitsDim + 1 /* bad */, 1.0);
EXPECT_THAT(Run(kBadSpec),
test::IsErrorWithSubstr(
"Bias vector shape does not match output dimension"));
}
// Tests that FeedForwardNetwork fails when the value of the "layer_norm_input"
// option is not false.
TEST_F(FeedForwardNetworkTest, UnsupportedLayerNormInputOption) {
const string kBadSpec = R"(network_unit {
parameters {
key: 'layer_norm_input'
value: 'true'
}
})";
EXPECT_THAT(Run(kBadSpec),
test::IsErrorWithSubstr("Layer norm is not supported"));
}
// Tests that FeedForwardNetwork fails when the value of the "layer_norm_hidden"
// option is not false.
TEST_F(FeedForwardNetworkTest, UnsupportedLayerNormHiddenOption) {
const string kBadSpec = R"(network_unit {
parameters {
key: 'layer_norm_hidden'
value: 'true'
}
})";
EXPECT_THAT(Run(kBadSpec),
test::IsErrorWithSubstr("Layer norm is not supported"));
}
// Tests that FeedForwardNetwork fails when the value of the "nonlinearity"
// option is not "relu".
TEST_F(FeedForwardNetworkTest, UnsupportedNonlinearityOption) {
const string kBadSpec = R"(network_unit {
parameters {
key: 'nonlinearity'
value: 'elu'
}
})";
EXPECT_THAT(Run(kBadSpec),
test::IsErrorWithSubstr("Non-linearity is not supported"));
}
// Tests that the FeedForwardNetwork works when there are no hidden layers, just
// a softmax that computes logits.
TEST_F(FeedForwardNetworkTest, JustLogits) {
const size_t kInputDim = 5;
const size_t kLogitsDim = 3;
const string kSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
num_actions: 3)";
const float kEmbedding = 1.25;
const float kFeature = 0.5;
const float kWeight = 1.5;
const float kBias = 0.75;
AddFixedEmbeddingMatrix(0, 50, kInputDim, kEmbedding);
AddWeights("softmax", kInputDim, kLogitsDim, kWeight);
AddBiases("softmax", kLogitsDim, kBias);
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{1, kFeature}})));
TF_ASSERT_OK(Run(kSpec));
EXPECT_EQ("logits", network_unit_->GetLogitsName());
ExpectVector(GetActivations("logits"), kLogitsDim,
kInputDim * kEmbedding * kFeature * kWeight + kBias);
}
// Tests that the FeedForwardNetwork works with multiple hidden layers as well
// as a softmax that computes logits.
TEST_F(FeedForwardNetworkTest, MultiLayer) {
const size_t kDims[] = {5, 4, 3, 2};
const string kSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4,3'
}
}
num_actions: 2)";
const float kWeights[] = {-1.5, 1.0, 0.5};
const float kBiases[] = {0.75, -0.5, -1.0};
AddFixedEmbeddingMatrix(0, 50, 5, 1.0);
AddWeights("0", kDims[0], kDims[1], kWeights[0]);
AddBiases("0", kDims[1], kBiases[0]);
AddWeights("1", kDims[1], kDims[2], kWeights[1]);
AddBiases("1", kDims[2], kBiases[1]);
AddWeights("softmax", kDims[2], kDims[3], kWeights[2]);
AddBiases("softmax", kDims[3], kBiases[2]);
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{1, 1.0}})));
TF_ASSERT_OK(Run(kSpec));
EXPECT_EQ("logits", network_unit_->GetLogitsName());
float expected = Relu(kDims[0] * kWeights[0] + kBiases[0]);
ExpectVector(GetActivations("layer_0"), kDims[1], expected);
expected = Relu(kDims[1] * expected * kWeights[1] + kBiases[1]);
ExpectVector(GetActivations("layer_1"), kDims[2], expected);
ExpectVector(GetActivations("last_layer"), kDims[2], expected);
expected = kDims[2] * expected * kWeights[2] + kBiases[2];
ExpectVector(GetActivations("logits"), kDims[3], expected);
}
// Tests that the FeedForwardNetwork does not produce logits and does not use
// the softmax variables when the component is deterministic.
TEST_F(FeedForwardNetworkTest, NoLogitsOrSoftmaxWhenDeterministic) {
const size_t kDims[] = {5, 4};
const string kSpec = R"(num_actions: 1
fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4'
}
})";
const float kEmbedding = 1.25;
const float kFeature = 0.5;
const float kWeight = -1.5;
const float kBias = 0.75;
AddFixedEmbeddingMatrix(0, 50, kDims[0], kEmbedding);
// No "softmax" weights or biases.
AddWeights("0", kDims[0], kDims[1], kWeight);
AddBiases("0", kDims[1], kBias);
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{1, kFeature}})));
TF_ASSERT_OK(Run(kSpec));
// No specified logits layer.
EXPECT_TRUE(network_unit_->GetLogitsName().empty());
// No "logits" layer.
size_t unused_dimension = 0;
LayerHandle<float> unused_handle;
EXPECT_THAT(
network_state_manager_.LookupLayer(kTestComponentName, "logits",
&unused_dimension, &unused_handle),
test::IsErrorWithSubstr(
"Unknown layer 'logits' in component 'test_component'"));
// Hidden layer is still produced.
const float kExpected =
Relu(kDims[0] * kEmbedding * kFeature * kWeight + kBias);
ExpectVector(GetActivations("layer_0"), kDims[1], kExpected);
ExpectVector(GetActivations("last_layer"), kDims[1], kExpected);
}
// Tests that the FeedForwardNetwork does not produce logits when omit_logits is
// true, even if there are actions.
TEST_F(FeedForwardNetworkTest, NoLogitsOrSoftmaxWhenOmitLogitsTrue) {
const size_t kDims[] = {5, 4};
const string kSpec = R"(fixed_feature {
vocabulary_size: 50
embedding_dim: 5
size: 1
}
network_unit {
parameters {
key: 'hidden_layer_sizes'
value: '4'
}
parameters {
key: 'omit_logits'
value: 'true'
}
}
num_actions: 10)";
const float kEmbedding = 1.25;
const float kFeature = 0.5;
const float kWeight = 1.5;
const float kBias = 0.75;
AddFixedEmbeddingMatrix(0, 50, kDims[0], kEmbedding);
// No "softmax" weights or biases.
AddWeights("0", kDims[0], kDims[1], kWeight);
AddBiases("0", kDims[1], kBias);
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{1, kFeature}})));
TF_ASSERT_OK(Run(kSpec));
// No specified logits layer.
EXPECT_TRUE(network_unit_->GetLogitsName().empty());
// No "logits" layer.
size_t unused_dimension = 0;
LayerHandle<float> unused_handle;
EXPECT_THAT(
network_state_manager_.LookupLayer(kTestComponentName, "logits",
&unused_dimension, &unused_handle),
test::IsErrorWithSubstr(
"Unknown layer 'logits' in component 'test_component'"));
// Hidden layer is still produced.
const float kExpected = kDims[0] * kEmbedding * kFeature * kWeight + kBias;
ExpectVector(GetActivations("layer_0"), kDims[1], kExpected);
ExpectVector(GetActivations("last_layer"), kDims[1], kExpected);
}
} // namespace
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/file_array_variable_store.h"
#include <string.h>
#include <utility>
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/platform/env.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
tensorflow::Status FileArrayVariableStore::Reset(
const ArrayVariableStoreSpec &spec, const string &path) {
string content;
TF_RETURN_IF_ERROR(
tensorflow::ReadFileToString(tensorflow::Env::Default(), path, &content));
UniqueAlignedArray data;
data.Reset(content.size());
memcpy(data.view().data(), content.data(), content.size());
TF_RETURN_IF_ERROR(ArrayVariableStore::Reset(spec, AlignedView(data.view())));
// Success; make modifications.
data_ = std::move(data);
return tensorflow::Status::OK();
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_FILE_ARRAY_VARIABLE_STORE_H_
#define DRAGNN_RUNTIME_FILE_ARRAY_VARIABLE_STORE_H_
#include <string>
#include "dragnn/protos/runtime.pb.h"
#include "dragnn/runtime/alignment.h"
#include "dragnn/runtime/array_variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/status.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
// An ArrayVariableStore subclass that reads a file into a new-allocated array.
class FileArrayVariableStore : public ArrayVariableStore {
public:
// Creates an uninitialized store.
FileArrayVariableStore() = default;
// Resets this to represent the variables defined by the |spec|, loading the
// byte array from the |path|. On error, returns non-OK and modifies nothing.
tensorflow::Status Reset(const ArrayVariableStoreSpec &spec,
const string &path);
private:
// The byte array containing the variables.
UniqueAlignedArray data_;
};
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
#endif // DRAGNN_RUNTIME_FILE_ARRAY_VARIABLE_STORE_H_
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/fixed_embeddings.h"
#include <string.h>
#include <algorithm>
#include <limits>
#include <utility>
#include "dragnn/runtime/math/arithmetic.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
#include "tensorflow/core/lib/strings/strcat.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
// Returns the name of the embedding matrix for the |channel_id|'th fixed
// feature channel of the |component_spec|.
string FixedEmbeddingMatrixVariableName(const ComponentSpec &component_spec,
int channel_id) {
// Cf. _add_hooks_for_fixed_embedding_matrix() in runtime_support.py.
return tensorflow::strings::StrCat(component_spec.name(),
"/fixed_embedding_matrix_", channel_id,
"/trimmed");
}
// Resizes |buffer| to |size| and returns the array it manages. Helper for the
// allocator functors used by ComputeSession::GetInputFeatures().
template <class T>
T *Alloc(int size, std::vector<T> *buffer) {
buffer->resize(size);
return buffer->data();
}
// Returns true if two pointers have the same address.
bool SameAddress(const void *pointer1, const void *pointer2) {
return pointer1 == pointer2;
}
// Number of IDs to allow per embedding.
constexpr size_t kMaxNumFeatureIds = 1;
} // namespace
tensorflow::Status FixedEmbeddingManager::Reset(
const ComponentSpec &component_spec, VariableStore *variable_store,
NetworkStateManager *network_state_manager) {
const int num_channels = component_spec.fixed_feature_size();
std::vector<ChannelConfig> channel_configs(num_channels);
size_t max_dimension = 0; // maximum dimension across all channels
size_t num_embeddings = 0;
for (int channel_id = 0; channel_id < num_channels; ++channel_id) {
const FixedFeatureChannel &channel_spec =
component_spec.fixed_feature(channel_id);
ChannelConfig &channel_config = channel_configs[channel_id];
if (channel_spec.size() < 1) {
return tensorflow::errors::InvalidArgument(
"Invalid channel size for channel ", channel_id, ": ",
channel_spec.ShortDebugString());
}
const size_t channel_size = channel_spec.size();
channel_config.channel_base = num_embeddings;
num_embeddings += channel_size;
channel_config.handles.resize(channel_size);
channel_config.is_embedded = channel_spec.embedding_dim() >= 0;
// Configure non-embedded channels separately.
if (!channel_config.is_embedded) {
for (size_t i = 0; i < channel_size; ++i) {
TF_RETURN_IF_ERROR(network_state_manager->AddLocal(
kMaxNumFeatureIds, &channel_config.handles[i].ids));
}
continue;
}
// The remainder of the loop configures embedded channels.
const size_t dimension = channel_spec.embedding_dim();
max_dimension = std::max(max_dimension, dimension);
for (size_t i = 0; i < channel_size; ++i) {
TF_RETURN_IF_ERROR(network_state_manager->AddLocal(
dimension, &channel_config.handles[i].sum));
}
Matrix<float> &embedding_matrix = channel_config.embedding_matrix;
TF_RETURN_IF_ERROR(variable_store->Lookup(
FixedEmbeddingMatrixVariableName(component_spec, channel_id),
&embedding_matrix));
if (embedding_matrix.num_rows() != channel_spec.vocabulary_size()) {
return tensorflow::errors::InvalidArgument(
"ComponentSpec (", channel_spec.vocabulary_size(),
") and VariableStore (", embedding_matrix.num_rows(),
") disagree on vocabulary size for channel ", channel_id, ": ",
channel_spec.ShortDebugString());
}
if (embedding_matrix.num_columns() != dimension) {
return tensorflow::errors::InvalidArgument(
"ComponentSpec (", dimension, ") and VariableStore (",
embedding_matrix.num_columns(),
") disagree on embedding dim for channel ", channel_id, ": ",
channel_spec.ShortDebugString());
}
}
// Success; make modifications.
component_name_ = component_spec.name();
num_embeddings_ = num_embeddings;
channel_configs_ = std::move(channel_configs);
zeros_.Resize(max_dimension * sizeof(float));
memset(zeros_.view().data(), 0, zeros_.view().size());
return tensorflow::Status::OK();
}
tensorflow::Status FixedEmbeddings::Reset(const FixedEmbeddingManager *manager,
const NetworkStates &network_states,
ComputeSession *compute_session) {
const AlignedView zeros(manager->zeros_.view());
const size_t num_channels = manager->num_channels();
features_.clear();
features_.reserve(manager->num_embeddings());
for (size_t channel_id = 0; channel_id < num_channels; ++channel_id) {
const FixedEmbeddingManager::ChannelConfig &channel_config =
manager->channel_configs_[channel_id];
const std::vector<FixedEmbeddingManager::Handle> &handles =
channel_config.handles;
const size_t channel_base = channel_config.channel_base;
const size_t channel_size = handles.size();
DCHECK_EQ(channel_base, features_.size());
DCHECK_LE(channel_base + channel_size, manager->num_embeddings());
const int num_features = compute_session->GetInputFeatures(
manager->component_name(),
[this](int size) { return Alloc(size, &indices_); },
[this](int size) { return Alloc(size, &ids_); },
[this](int size) { return Alloc(size, &weights_); }, channel_id);
DCHECK_EQ(num_features, indices_.size());
DCHECK_EQ(num_features, ids_.size());
DCHECK_EQ(num_features, weights_.size());
DCHECK(std::all_of(indices_.begin(), indices_.end(),
[channel_size](int32 index) {
return index >= 0 && index < channel_size;
}));
// Handle non-embedded channels separately.
if (!channel_config.is_embedded) {
for (size_t index = 0; index < channel_size; ++index) {
features_.emplace_back(/*is_embedded=*/false);
features_.back().ids = network_states.GetLocal(handles[index].ids);
features_.back().ids[0] = -1; // so we can check that all IDs are set
}
for (int feature = 0; feature < num_features; ++feature) {
const int32 index = indices_[feature];
const int64 id = ids_[feature];
if (id < 0 || id > std::numeric_limits<int32>::max()) {
return tensorflow::errors::Internal(
"Component '", manager->component_name_, "' channel ", channel_id,
" index ", index, ": Invalid non-embedded feature ID ", id);
}
const float weight = weights_[feature];
if (weight != 1.0) {
return tensorflow::errors::Internal(
"Component '", manager->component_name_, "' channel ", channel_id,
" index ", index, ": Invalid non-embedded feature weight ",
weight, " (expected 1.0)");
}
int32 &output_id = features_[channel_base + index].ids[0];
if (output_id != -1) {
return tensorflow::errors::Internal(
"Component '", manager->component_name_, "' channel ", channel_id,
" index ", index, ": Duplicate non-embedded feature ID ", id);
}
output_id = id;
}
for (size_t index = 0; index < channel_size; ++index) {
if (features_[channel_base + index].ids[0] == -1) {
return tensorflow::errors::Internal(
"Component '", manager->component_name_, "' channel ", channel_id,
" index ", index, ": Missing non-embedded feature ID");
}
}
continue;
}
// The remainder of the loop handles embedded channels.
const Matrix<float> &embedding_matrix = channel_config.embedding_matrix;
// Acquire the local sum operands and initialize embeddings to zero.
sums_.resize(channel_size);
for (size_t i = 0; i < channel_size; ++i) {
sums_[i] = network_states.GetLocal(handles[i].sum);
features_.emplace_back(/*is_embedded=*/true);
features_.back().embedding = Vector<float>(zeros, sums_[i].size());
}
// Add in a weighted embedding for each feature. The extracted features do
// not have any ordering guarantee (e.g., sorted by |indices|), which makes
// applying special-case shortcuts difficult, but not impossible. If the
// features did have an ordering guarantee, we could use a less intricate
// algorithm, but it's not clear if it would be much faster.
for (int feature = 0; feature < num_features; ++feature) {
const int32 index = indices_[feature];
const int64 id = ids_[feature];
const float weight = weights_[feature];
const Vector<float> row = embedding_matrix.row(id);
const MutableVector<float> sum = sums_[index];
Vector<float> &embedding = features_[channel_base + index].embedding;
if (SameAddress(embedding.data(), zeros.data())) {
// If the |embedding| points at |zeros|, then this is the first addition
// so we can use simplified arithmetic.
if (weight == 1.0) {
// Trivial scaling: Point at the |row|.
embedding = row;
} else {
// Adding to zero: Scale into the |sum| and point at it.
ScaleElements(weight, row, sum);
embedding = sum;
}
} else {
if (!SameAddress(embedding.data(), sum.data())) {
// If the |embedding| does not point at |zeros| or |sum|, then this is
// the second addition and we also used the "Trivial scaling" shortcut
// in the first addition. Therefore, the |embedding| currently points
// at another row of the embedding matrix. Copy that row to |sum| and
// point at it, so we can add the current row to it.
memcpy(sum.data(), embedding.data(), sum.size() * sizeof(float));
embedding = sum;
}
// General case: Add to the |sum|, which is aliased by the |embedding|.
AddScaledElements(weight, row, sum);
}
DCHECK_EQ(embedding.size(), embedding_matrix.num_columns());
}
}
return tensorflow::Status::OK();
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
// Utils for extracting and embedding fixed features.
//
// Fixed feature embeddings are organized into channels, where each channel
// contains of a fixed number of embedding vectors. Each embedding, in turn, is
// the feature-weighted sum of the rows of an embedding matrix. Note that a
// multi-embedding channel shares the same embedding matrix across all of its
// embedding vectors.
//
// Logically, a multi-embedding channel is the concatenation of its embedding
// vectors. For efficiency, however, the utils here do not actually perform
// this concatenation. The rationale is that almost all downstream use cases
// will concatenate the fixed and linked embeddings together, "wasting" any
// concatenation here.
//
// Instead, the utils here merge the embedding vectors of all channels into a
// single list, such that the concatenation of this list is equivalent to the
// concatenation of the channels. Individual channels can still be accessed,
// when needed, as sub-spans of the list of embedding vectors.
//
// If FixedFeatureChannel.embedding_dim=-1, then the associated fixed feature
// channel is non-embedded. Instead of producing sums of embedding vectors, a
// non-embedded channel produces feature IDs. The features in a non-embedded
// channel must extract exactly one feature ID with weight=1.0.
//
// TODO(googleuser): Support zero/multiple/weighted non-embedded features?
#ifndef DRAGNN_RUNTIME_FIXED_EMBEDDINGS_H_
#define DRAGNN_RUNTIME_FIXED_EMBEDDINGS_H_
#include <stddef.h>
#include <string>
#include <vector>
#include "dragnn/core/compute_session.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/alignment.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/network_states.h"
#include "dragnn/runtime/variable_store.h"
#include "syntaxnet/base.h"
#include "tensorflow/core/lib/core/status.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
// A class that manages a set of embedded fixed features for some component.
// Feature embeddings can be extracted using FixedEmbeddings, defined below.
class FixedEmbeddingManager {
public:
// Creates an empty manager.
FixedEmbeddingManager() = default;
// Resets this to manage the fixed features specified by the |component_spec|.
// Retrieves embedding matrices from the |variable_store|, which must outlive
// this. Adds locals to the |network_state_manager|, which must be positioned
// at the current component. Channel ordering follows the |component_spec|.
// On error, returns non-OK and does not modify this.
tensorflow::Status Reset(const ComponentSpec &component_spec,
VariableStore *variable_store,
NetworkStateManager *network_state_manager);
// Accessors.
const string &component_name() const { return component_name_; }
size_t num_channels() const { return channel_configs_.size(); }
size_t embedding_dim(size_t channel_id) const;
size_t num_embeddings() const { return num_embeddings_; }
size_t channel_base(size_t channel_id) const;
size_t channel_size(size_t channel_id) const;
bool is_embedded(size_t channel_id) const;
LocalVectorHandle<int32> id_handle(size_t channel_id, size_t index) const;
private:
friend class FixedEmbeddings;
friend class SequenceFeatureManager;
// Handles for the features in a channel. Only one handle is used.
struct Handle {
// Embedding sum handle. Only used if |ChannelConfig.is_embedded| is true.
LocalVectorHandle<float> sum;
// Feature ID handle. Only used if |ChannelConfig.is_embedded| is true.
LocalVectorHandle<int32> ids;
};
// Configuration for a single fixed embedding channel.
struct ChannelConfig {
// Index of the first embedding vector in this channel.
size_t channel_base = 0;
// Whether this channel is embedded.
bool is_embedded = true;
// Handles for each embedding in the channel. The active member of each
// handle is determined by |is_embedded|.
std::vector<Handle> handles;
// Embedding matrix of this channel. Only used if |is_embedded| is true.
Matrix<float> embedding_matrix;
};
// Name of the component for which features are extracted.
string component_name_;
// Total number of embedding vectors across all channels.
size_t num_embeddings_ = 0;
// Ordered list of configurations for each channel.
std::vector<ChannelConfig> channel_configs_;
// Array of zeros that can be substituted for any embedding vector, in the
// case that no features are extracted.
UniqueAlignedArray zeros_;
};
// A set of embedded fixed features, configured via the FixedEmbeddingManager.
class FixedEmbeddings {
public:
// Creates an empty set of embedded features.
FixedEmbeddings() = default;
// Resets this to the embedded features managed by the |manager|. Retrieves
// local operands from the |network_states| and extracts features from the
// |compute_session|; both must be positioned at the relevant component. The
// |manager| must live until this is destroyed or Reset(), and should not be
// modified during that time. On error, returns non-OK.
tensorflow::Status Reset(const FixedEmbeddingManager *manager,
const NetworkStates &network_states,
ComputeSession *compute_session);
// Accessors.
size_t num_embeddings() const { return features_.size(); }
Vector<float> embedding(size_t index) const;
Vector<int32> ids(size_t index) const;
private:
// Data for a feature in a channel.
struct Feature {
// Creates a possibly-embedded feature.
explicit Feature(bool is_embedded) : is_embedded(is_embedded) {}
// Whether this feature is embedded.
const bool is_embedded;
// Weighted embedding sum. Only used if |is_embedded| is true.
Vector<float> embedding;
// Singleton vector of feature IDs. Only used if |is_embedded| is false.
// This is mutable to simplify construction. Recall that a non-embedded
// channel must extract exactly one feature ID with weight=1.0.
MutableVector<int32> ids;
};
// The following three arrays are the same length, with exactly one element
// per feature. For the i'th extracted feature, |indices_[i]| is the index of
// the embedding vector it should be added to, |ids_[i]| is its sparse ID, and
// |weights_[i]| is its weight. These are reused by each channel.
std::vector<int32> indices_;
std::vector<int64> ids_;
std::vector<float> weights_;
// List of fixed embedding sums, reused by each channel.
std::vector<MutableVector<float>> sums_;
// Ordered list of features, merged across all channels.
std::vector<Feature> features_;
};
// Implementation details below.
inline size_t FixedEmbeddingManager::embedding_dim(size_t channel_id) const {
// NB: A multi-embedding channel is logically a concatenation of its embedding
// vectors, so its dimension must be scaled accordingly. On the other hand, a
// non-embedded feature is assumed to have dimension=1, as in TF-based DRAGNN;
// see NetworkUnitInterface.__init__().
const ChannelConfig &channel = channel_configs_[channel_id];
return (channel.is_embedded ? channel.embedding_matrix.num_columns() : 1) *
channel_size(channel_id);
}
inline size_t FixedEmbeddingManager::channel_base(size_t channel_id) const {
return channel_configs_[channel_id].channel_base;
}
inline size_t FixedEmbeddingManager::channel_size(size_t channel_id) const {
return channel_configs_[channel_id].handles.size();
}
inline bool FixedEmbeddingManager::is_embedded(size_t channel_id) const {
return channel_configs_[channel_id].is_embedded;
}
inline LocalVectorHandle<int32> FixedEmbeddingManager::id_handle(
size_t channel_id, size_t index) const {
DCHECK(!is_embedded(channel_id));
return channel_configs_[channel_id].handles[index].ids;
}
inline Vector<float> FixedEmbeddings::embedding(size_t index) const {
DCHECK(features_[index].is_embedded);
return features_[index].embedding;
}
inline Vector<int32> FixedEmbeddings::ids(size_t index) const {
DCHECK(!features_[index].is_embedded);
return Vector<int32>(features_[index].ids);
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
#endif // DRAGNN_RUNTIME_FIXED_EMBEDDINGS_H_
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/fixed_embeddings.h"
#include <string>
#include <utility>
#include <vector>
#include "dragnn/core/test/generic.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/runtime/test/network_test_base.h"
#include "syntaxnet/base.h"
#include <gmock/gmock.h>
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/test.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
using ::testing::_;
using ::testing::Invoke;
// A working one-channel ComponentSpec.
const char kSingleSpec[] = R"(fixed_feature {
vocabulary_size: 11
embedding_dim: 35
size: 1
})";
const size_t kSingleRows = 11;
const size_t kSingleColumns = 35;
// A working multi-channel ComponentSpec.
const char kMultiSpec[] = R"(fixed_feature {
vocabulary_size: 13
embedding_dim: 11
size: 1
}
fixed_feature {
vocabulary_size: 19
embedding_dim: 17
size: 3
}
fixed_feature {
vocabulary_size: 29
embedding_dim: 23
size: 2
})";
const size_t kMultiRows[] = {13, 19, 29};
const size_t kMultiColumns[] = {11, 17, 23};
const size_t kMultiBases[] = {0, 1, 4};
const size_t kMultiSizes[] = {1, 3, 2};
const int kMultiNumChannels = 3;
const int kMultiNumEmbeddings = 6;
// A working one-channel ComponentSpec with non-embedded features.
const char kNonEmbeddedSpec[] = R"(fixed_feature {
embedding_dim: -1
size: 3
})";
class FixedEmbeddingManagerTest : public NetworkTestBase {
protected:
// Resets the |manager_| and returns the result of Reset()-ing it using the
// |component_spec_text|, |variable_store_|, and |network_state_manager_|.
tensorflow::Status ResetManager(const string &component_spec_text) {
ComponentSpec component_spec;
CHECK(TextFormat::ParseFromString(component_spec_text, &component_spec));
component_spec.set_name(kTestComponentName);
AddComponent(kTestComponentName);
return manager_.Reset(component_spec, &variable_store_,
&network_state_manager_);
}
FixedEmbeddingManager manager_;
};
// Tests that FixedEmbeddingManager is empty by default.
TEST_F(FixedEmbeddingManagerTest, EmptyByDefault) {
EXPECT_EQ(manager_.num_channels(), 0);
EXPECT_EQ(manager_.num_embeddings(), 0);
}
// Tests that FixedEmbeddingManager is empty when reset to an empty spec.
TEST_F(FixedEmbeddingManagerTest, EmptySpec) {
TF_EXPECT_OK(ResetManager(""));
EXPECT_EQ(manager_.component_name(), kTestComponentName);
EXPECT_EQ(manager_.num_channels(), 0);
EXPECT_EQ(manager_.num_embeddings(), 0);
}
// Tests that FixedEmbeddingManager produces the correct embedding dimension
// when configured with a single channel.
TEST_F(FixedEmbeddingManagerTest, OneChannel) {
AddFixedEmbeddingMatrix(0, kSingleRows, kSingleColumns, 0.25);
TF_EXPECT_OK(ResetManager(kSingleSpec));
EXPECT_EQ(manager_.component_name(), kTestComponentName);
EXPECT_EQ(manager_.num_channels(), 1);
EXPECT_EQ(manager_.embedding_dim(0), kSingleColumns);
EXPECT_EQ(manager_.num_embeddings(), 1);
EXPECT_EQ(manager_.channel_base(0), 0);
EXPECT_EQ(manager_.channel_size(0), 1);
EXPECT_TRUE(manager_.is_embedded(0));
}
// Tests that FixedEmbeddingManager produces the correct embedding dimensions
// when configured with multiple channels.
TEST_F(FixedEmbeddingManagerTest, MultipleChannels) {
for (int i = 0; i < kMultiNumChannels; ++i) {
AddFixedEmbeddingMatrix(i, kMultiRows[i], kMultiColumns[i], -1.0);
}
TF_EXPECT_OK(ResetManager(kMultiSpec));
EXPECT_EQ(manager_.component_name(), kTestComponentName);
EXPECT_EQ(manager_.num_channels(), kMultiNumChannels);
EXPECT_EQ(manager_.num_embeddings(), kMultiNumEmbeddings);
for (int i = 0; i < kMultiNumChannels; ++i) {
EXPECT_EQ(manager_.embedding_dim(i), kMultiColumns[i] * kMultiSizes[i]);
EXPECT_EQ(manager_.channel_base(i), kMultiBases[i]);
EXPECT_EQ(manager_.channel_size(i), kMultiSizes[i]);
EXPECT_TRUE(manager_.is_embedded(i));
}
}
// Tests that FixedEmbeddingManager works for non-embedded features.
TEST_F(FixedEmbeddingManagerTest, NonEmbeddedFeature) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
EXPECT_EQ(manager_.component_name(), kTestComponentName);
EXPECT_EQ(manager_.num_channels(), 1);
EXPECT_EQ(manager_.embedding_dim(0), 3);
EXPECT_EQ(manager_.num_embeddings(), 3);
EXPECT_EQ(manager_.channel_base(0), 0);
EXPECT_EQ(manager_.channel_size(0), 3);
EXPECT_FALSE(manager_.is_embedded(0));
}
// Tests that FixedEmbeddingManager fails when there are no embedding matrices.
TEST_F(FixedEmbeddingManagerTest, NoEmbeddingMatrices) {
EXPECT_THAT(ResetManager(kSingleSpec),
test::IsErrorWithSubstr("Unknown variable"));
}
// Tests that FixedEmbeddingManager fails when there are embedding matrices, but
// not for the right channel.
TEST_F(FixedEmbeddingManagerTest, MissingEmbeddingMatrix) {
AddFixedEmbeddingMatrix(/* bad */ 1, kSingleRows, kSingleColumns, 0.25);
EXPECT_THAT(ResetManager(kSingleSpec),
test::IsErrorWithSubstr("Unknown variable"));
}
// Tests that FixedEmbeddingManager fails when the channel size is 0.
TEST_F(FixedEmbeddingManagerTest, InvalidChannelSize) {
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 8
embedding_dim: 16
size: 0 # bad
})";
AddFixedEmbeddingMatrix(0, 8, 16, 0.25);
EXPECT_THAT(ResetManager(kBadSpec),
test::IsErrorWithSubstr("Invalid channel size"));
}
// Tests that the FixedEmbeddingManager fails when the embedding dimension does
// not match the embedding matrix.
TEST_F(FixedEmbeddingManagerTest, MismatchedEmbeddingDim) {
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 8
embedding_dim: 17 # bad
size: 1
})";
AddFixedEmbeddingMatrix(0, 8, 16, 0.25);
EXPECT_THAT(ResetManager(kBadSpec),
test::IsErrorWithSubstr("ComponentSpec (17) and VariableStore "
"(16) disagree on embedding dim"));
}
// Tests that the FixedEmbeddingManager fails when the vocabulary size does not
// match the embedding matrix.
TEST_F(FixedEmbeddingManagerTest, MismatchedVocabularySize) {
const string kBadSpec = R"(fixed_feature {
vocabulary_size: 7 # bad
embedding_dim: 16
size: 1
})";
AddFixedEmbeddingMatrix(0, 8, 16, 0.25);
EXPECT_THAT(ResetManager(kBadSpec),
test::IsErrorWithSubstr("ComponentSpec (7) and VariableStore "
"(8) disagree on vocabulary size"));
}
class FixedEmbeddingsTest : public FixedEmbeddingManagerTest {
protected:
// Resets the |fixed_embeddings_| using the |manager_|, |network_states_|, and
// |compute_session_|, and returns the resulting status.
tensorflow::Status ResetFixedEmbeddings() {
network_states_.Reset(&network_state_manager_);
StartComponent(0);
return fixed_embeddings_.Reset(&manager_, network_states_,
&compute_session_);
}
// Returns a list of the expected size and value of each fixed embedding sum,
// given that the channel-wise sums are the |channel_sums|.
std::vector<std::pair<size_t, float>> ToEmbeddingSums(
const std::vector<float> &channel_sums) {
CHECK_EQ(channel_sums.size(), kMultiNumChannels);
std::vector<std::pair<size_t, float>> expected_sums;
for (int channel_id = 0; channel_id < kMultiNumChannels; ++channel_id) {
for (int i = 0; i < kMultiSizes[channel_id]; ++i) {
expected_sums.emplace_back(kMultiColumns[channel_id],
channel_sums[channel_id]);
}
}
return expected_sums;
}
// As above, but computes the channel sums as the product of |lhs| and |rhs|.
std::vector<std::pair<size_t, float>> ToEmbeddingSums(
const std::vector<float> &lhs, const std::vector<float> &rhs) {
CHECK_EQ(lhs.size(), rhs.size());
std::vector<float> channel_sums;
for (int i = 0; i < lhs.size(); ++i) {
channel_sums.push_back(lhs[i] * rhs[i]);
}
return ToEmbeddingSums(channel_sums);
}
FixedEmbeddings fixed_embeddings_;
};
// Tests that FixedEmbeddings is empty by default.
TEST_F(FixedEmbeddingsTest, EmptyByDefault) {
EXPECT_EQ(fixed_embeddings_.num_embeddings(), 0);
}
// Tests that FixedEmbeddings is empty when reset with an empty manager.
TEST_F(FixedEmbeddingsTest, EmptyManager) {
TF_ASSERT_OK(ResetManager(""));
TF_ASSERT_OK(ResetFixedEmbeddings());
EXPECT_EQ(fixed_embeddings_.num_embeddings(), 0);
}
// Tests that FixedEmbeddings produces a zero vector when no features are
// extracted.
TEST_F(FixedEmbeddingsTest, OneChannelNoFeatures) {
AddFixedEmbeddingMatrix(0, kSingleRows, kSingleColumns, 0.5);
TF_ASSERT_OK(ResetManager(kSingleSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {})));
TF_ASSERT_OK(ResetFixedEmbeddings());
ASSERT_EQ(fixed_embeddings_.num_embeddings(), 1);
ExpectVector(fixed_embeddings_.embedding(0), kSingleColumns, 0.0);
}
// Tests that FixedEmbeddings produces a row of the embedding matrix when
// exactly one feature with weight=1 is extracted.
TEST_F(FixedEmbeddingsTest, OneChannelOneFeature) {
AddFixedEmbeddingMatrix(0, kSingleRows, kSingleColumns, 0.125);
TF_ASSERT_OK(ResetManager(kSingleSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{kSingleRows - 1, 1.0}})));
TF_ASSERT_OK(ResetFixedEmbeddings());
ASSERT_EQ(fixed_embeddings_.num_embeddings(), 1);
ExpectVector(fixed_embeddings_.embedding(0), kSingleColumns, 0.125);
}
// Tests that FixedEmbeddings produces a scaled row of the embedding matrix when
// exactly one feature with weight!=1 is extracted.
TEST_F(FixedEmbeddingsTest, OneChannelOneWeightedFeature) {
AddFixedEmbeddingMatrix(0, kSingleRows, kSingleColumns, 0.5);
TF_ASSERT_OK(ResetManager(kSingleSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{0, -1.5}})));
TF_ASSERT_OK(ResetFixedEmbeddings());
ASSERT_EQ(fixed_embeddings_.num_embeddings(), 1);
ExpectVector(fixed_embeddings_.embedding(0), kSingleColumns, -0.75);
}
// Tests that FixedEmbeddings produces a weighted embedding sum when multiple
// weighted features are extracted.
TEST_F(FixedEmbeddingsTest, OneChannelManyFeatures) {
AddFixedEmbeddingMatrix(0, kSingleRows, kSingleColumns, 0.5);
TF_ASSERT_OK(ResetManager(kSingleSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{0, 1.0}, {1, -2.0}, {2, 4.0}})));
const float kSum = 1.5; // = 0.5 * (1.0 - 2.0 + 4.0)
TF_ASSERT_OK(ResetFixedEmbeddings());
ASSERT_EQ(fixed_embeddings_.num_embeddings(), 1);
ExpectVector(fixed_embeddings_.embedding(0), kSingleColumns, kSum);
}
// Tests that FixedEmbeddings produces zero vectors for multiple channels that
// extract no features.
TEST_F(FixedEmbeddingsTest, ManyChannelsNoFeatures) {
const std::vector<float> kValues = {0.0, 0.0, 0.0};
for (int i = 0; i < kMultiNumChannels; ++i) {
AddFixedEmbeddingMatrix(i, kMultiRows[i], kMultiColumns[i], 1.0);
}
TF_ASSERT_OK(ResetManager(kMultiSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {})))
.WillOnce(Invoke(ExtractFeatures(1, {})))
.WillOnce(Invoke(ExtractFeatures(2, {})));
TF_ASSERT_OK(ResetFixedEmbeddings());
const auto kSums = ToEmbeddingSums(kValues);
ASSERT_EQ(fixed_embeddings_.num_embeddings(), kSums.size());
for (int i = 0; i < kSums.size(); ++i) {
ExpectVector(fixed_embeddings_.embedding(i), kSums[i].first,
kSums[i].second);
}
}
// Tests that FixedEmbeddings produces rows of the embedding matrix for multiple
// channels that extract exactly one feature with weight=1.
TEST_F(FixedEmbeddingsTest, ManyChannelsOneFeature) {
const std::vector<float> kValues = {1.0, -0.5, 0.75};
ASSERT_EQ(kValues.size(), kMultiNumChannels);
for (int i = 0; i < kMultiNumChannels; ++i) {
AddFixedEmbeddingMatrix(i, kMultiRows[i], kMultiColumns[i], kValues[i]);
}
TF_ASSERT_OK(ResetManager(kMultiSpec));
// NB: Sometimes the feature indices are extracted out-of-order.
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{0, 10, 1.0}})))
.WillOnce(Invoke(ExtractFeatures(1, {{1, 11, 1.0}, //
{0, 11, 1.0}, //
{2, 11, 1.0}})))
.WillOnce(Invoke(ExtractFeatures(2, {{0, 12, 1.0}, //
{1, 12, 1.0}})));
TF_ASSERT_OK(ResetFixedEmbeddings());
const auto kSums = ToEmbeddingSums(kValues);
ASSERT_EQ(fixed_embeddings_.num_embeddings(), kSums.size());
for (int i = 0; i < kSums.size(); ++i) {
ExpectVector(fixed_embeddings_.embedding(i), kSums[i].first,
kSums[i].second);
}
}
// Tests that FixedEmbeddings produces scaled rows of the embedding matrix for
// multiple channels that extract exactly one feature with weight!=1.
TEST_F(FixedEmbeddingsTest, ManyChannelsOneWeightedFeature) {
const std::vector<float> kValues = {1.0, -0.5, 0.75};
const std::vector<float> kFeatures = {1.25, 0.75, -1.5};
ASSERT_EQ(kValues.size(), kMultiNumChannels);
ASSERT_EQ(kFeatures.size(), kMultiNumChannels);
for (int i = 0; i < kMultiNumChannels; ++i) {
AddFixedEmbeddingMatrix(i, kMultiRows[i], kMultiColumns[i], kValues[i]);
}
TF_ASSERT_OK(ResetManager(kMultiSpec));
// NB: Sometimes the feature indices are extracted out-of-order.
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{0, 10, kFeatures[0]}})))
.WillOnce(Invoke(ExtractFeatures(1, {{0, 11, kFeatures[1]}, //
{1, 11, kFeatures[1]}, //
{2, 11, kFeatures[1]}})))
.WillOnce(Invoke(ExtractFeatures(2, {{1, 12, kFeatures[2]}, //
{0, 12, kFeatures[2]}})));
TF_ASSERT_OK(ResetFixedEmbeddings());
const auto kSums = ToEmbeddingSums(kValues, kFeatures);
ASSERT_EQ(fixed_embeddings_.num_embeddings(), kSums.size());
for (int i = 0; i < kSums.size(); ++i) {
ExpectVector(fixed_embeddings_.embedding(i), kSums[i].first,
kSums[i].second);
}
}
// Tests that FixedEmbeddings produces weighted embedding sums for multiple
// channels that extract multiple weighted features.
TEST_F(FixedEmbeddingsTest, ManyChannelsManyFeatures) {
const std::vector<float> kValues = {1.0, -0.5, 0.75};
ASSERT_EQ(kValues.size(), kMultiNumChannels);
for (int i = 0; i < kMultiNumChannels; ++i) {
AddFixedEmbeddingMatrix(i, kMultiRows[i], kMultiColumns[i], kValues[i]);
}
TF_ASSERT_OK(ResetManager(kMultiSpec));
// NB: Sometimes the feature indices are extracted out-of-order.
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{0, 0, 1.0}, //
{0, 1, -2.0}, //
{0, 2, 9.0}})))
.WillOnce(Invoke(ExtractFeatures(1, {{0, 0, 2.0}, //
{1, 1, -4.0}, //
{2, 2, 8.0}, //
{1, 0, 2.0}, //
{2, 1, -4.0}, //
{0, 2, 8.0}, //
{2, 0, 2.0}, //
{0, 1, -4.0}, //
{1, 2, 8.0}})))
.WillOnce(Invoke(ExtractFeatures(2, {{0, 0, 3.0}, //
{0, 1, -6.0}, //
{0, 2, 7.0}, //
{1, 2, 7.0}, //
{1, 1, -6.0}, //
{1, 0, 3.0}})));
const std::vector<float> kFeatures = {1.0 - 2.0 + 9.0,
2.0 - 4.0 + 8.0,
3.0 - 6.0 + 7.0};
ASSERT_EQ(kFeatures.size(), kMultiNumChannels);
TF_ASSERT_OK(ResetFixedEmbeddings());
const auto kSums = ToEmbeddingSums(kValues, kFeatures);
ASSERT_EQ(fixed_embeddings_.num_embeddings(), kSums.size());
for (int i = 0; i < kSums.size(); ++i) {
ExpectVector(fixed_embeddings_.embedding(i), kSums[i].first,
kSums[i].second);
}
}
// Tests that FixedEmbeddings produces feature IDs when configured with a
// non-embedded feature channel.
TEST_F(FixedEmbeddingsTest, NonEmbeddedFeature) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
// These feature values probe the boundaries of valid feature IDs.
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{2, 2147483647, 1.0}, //
{0, 0, 1.0}, //
{1, 34, 1.0}})));
TF_ASSERT_OK(ResetFixedEmbeddings());
ASSERT_EQ(fixed_embeddings_.num_embeddings(), 3);
ASSERT_EQ(fixed_embeddings_.ids(0).size(), 1);
EXPECT_EQ(fixed_embeddings_.ids(0)[0], 0);
ASSERT_EQ(fixed_embeddings_.ids(1).size(), 1);
EXPECT_EQ(fixed_embeddings_.ids(1)[0], 34);
ASSERT_EQ(fixed_embeddings_.ids(2).size(), 1);
EXPECT_EQ(fixed_embeddings_.ids(2)[0], 2147483647);
Vector<int32> ids;
ids = network_states_.GetLocal(manager_.id_handle(0, 0));
ASSERT_EQ(ids.size(), 1);
EXPECT_EQ(ids[0], 0);
ids = network_states_.GetLocal(manager_.id_handle(0, 1));
ASSERT_EQ(ids.size(), 1);
EXPECT_EQ(ids[0], 34);
ids = network_states_.GetLocal(manager_.id_handle(0, 2));
ASSERT_EQ(ids.size(), 1);
EXPECT_EQ(ids[0], 2147483647);
}
// Tests that FixedEmbeddings fails if a feature ID has a negative ID.
TEST_F(FixedEmbeddingsTest, NonEmbeddedFeatureNegativeId) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{2, -1, 1.0}, //
{0, 12, 1.0}, //
{1, 34, 1.0}})));
EXPECT_THAT(ResetFixedEmbeddings(),
test::IsErrorWithSubstr(tensorflow::strings::StrCat(
"Component '", kTestComponentName,
"' channel 0 index 2: Invalid non-embedded feature ID -1")));
}
// Tests that FixedEmbeddings fails if a feature ID has an ID that is too large.
TEST_F(FixedEmbeddingsTest, NonEmbeddedFeatureIdTooLarge) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{2, 56, 1.0}, //
{0, 2147483648, 1.0}, //
{1, 34, 1.0}})));
EXPECT_THAT(ResetFixedEmbeddings(),
test::IsErrorWithSubstr(tensorflow::strings::StrCat(
"Component '", kTestComponentName,
"' channel 0 index 0: Invalid non-embedded feature ID "
"2147483648")));
}
// Tests that FixedEmbeddings fails if a feature weight is not 1.0.
TEST_F(FixedEmbeddingsTest, NonEmbeddedFeatureNonIdentityWeight) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{2, 56, 1.0}, //
{0, 12, 1.0}, //
{1, 34, 1.5}})));
EXPECT_THAT(ResetFixedEmbeddings(),
test::IsErrorWithSubstr(tensorflow::strings::StrCat(
"Component '", kTestComponentName,
"' channel 0 index 1: Invalid non-embedded feature weight "
"1.5 (expected 1.0)")));
}
// Tests that FixedEmbeddings fails if a feature ID is duplicated.
TEST_F(FixedEmbeddingsTest, NonEmbeddedFeatureDuplicateId) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{2, 56, 1.0}, //
{2, 56, 1.0}, //
{0, 12, 1.0}, //
{1, 34, 1.0}})));
EXPECT_THAT(
ResetFixedEmbeddings(),
test::IsErrorWithSubstr(tensorflow::strings::StrCat(
"Component '", kTestComponentName,
"' channel 0 index 2: Duplicate non-embedded feature ID 56")));
}
// Tests that FixedEmbeddings fails if a feature ID is missing.
TEST_F(FixedEmbeddingsTest, NonEmbeddedFeatureMissingId) {
TF_ASSERT_OK(ResetManager(kNonEmbeddedSpec));
EXPECT_CALL(compute_session_, GetInputFeatures(_, _, _, _, _))
.WillOnce(Invoke(ExtractFeatures(0, {{2, 56, 1.0}, //
{1, 34, 1.0}})));
EXPECT_THAT(ResetFixedEmbeddings(),
test::IsErrorWithSubstr(tensorflow::strings::StrCat(
"Component '", kTestComponentName,
"' channel 0 index 0: Missing non-embedded feature ID")));
}
} // namespace
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/runtime/math/avx_vector_array.h"
#include "tensorflow/core/lib/strings/strcat.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
namespace {
// Rounds a number, |rows|, up to a multiple of |multiple|. For example,
// PadRows(6, 4) will return 8, because 8 is the nearest number after 6 that is
// divisible by 4. This method requires that |multiple| be positive. It is used
// for pre-calculating the dimension of a blocked matrix, instead of having to
// read the entire matrix.
inline int PadRows(int rows, int multiple) {
return multiple * ((rows + multiple - 1) / multiple);
}
} // namespace
constexpr char FlexibleMatrixKernel::kSuffix[];
tensorflow::Status FlexibleMatrixKernel::Initialize(
const string &debug_name, const string &weights_name, int output_dimension,
VariableStore *variable_store) {
padded_output_dimension_ = PadRows(output_dimension, kAvxWidth);
// Try retrieving the flexible matrix variable using all matrix formats. Only
// one format will work (see FlexibleMatrixVariableStoreWrapper).
const string variable_name =
tensorflow::strings::StrCat(weights_name, kSuffix);
// Handle the simpler non-blocked case first.
tensorflow::Status status = variable_store->Lookup(variable_name, &weights_);
if (status.ok()) {
LOG(INFO) << "Matrix of size " << weights_.num_rows() << " x "
<< weights_.num_columns() << " for layer " << debug_name
<< " will be computed with non-blocked arithmetic";
weights_type_ = WeightsType::kNormal;
return status;
}
// Otherwise, we must have a blocked format.
BlockedMatrix<float> blocked_transpose;
TF_RETURN_IF_ERROR(variable_store->Lookup(variable_name, &blocked_transpose));
const auto blocked = blocked_transpose.Transpose();
// Blocked matrices must use a supported block size.
switch (blocked.block_size()) {
case 32:
weights_type_ = WeightsType::kBlocked32;
status = fast_weights_32_.Initialize(blocked);
break;
case 48:
weights_type_ = WeightsType::kBlocked48;
status = fast_weights_48_.Initialize(blocked);
break;
default:
return tensorflow::errors::FailedPrecondition(
"Unsupported block size: ", blocked.block_size(), " for weights ",
weights_name, " of layer ", debug_name);
}
if (status.ok()) {
LOG(INFO) << "Matrix of size " << blocked.num_rows() << " x "
<< blocked.num_columns() << " for layer " << debug_name
<< " will be computed with SGEMV<block_size="
<< blocked.block_size() << ">";
} else {
// This should (almost?) never happen, because SgevmMatrix::Initialize()
// only fails on bad block sizes, and the switch above ensures that the
// SgemvMatrix and variable agree on block size.
LOG(ERROR) << "Error formatting SGEMV matrix: " << status
<< " - matrix size " << blocked.num_rows() << " x "
<< blocked.num_columns() << " for layer " << debug_name;
}
return status;
}
int FlexibleMatrixKernel::NumPaddedRows() const {
switch (weights_type_) {
case WeightsType::kNormal:
return weights_.num_rows();
case WeightsType::kBlocked32:
return fast_weights_32_.matrix().num_rows();
case WeightsType::kBlocked48:
return fast_weights_48_.matrix().num_rows();
}
}
int FlexibleMatrixKernel::NumColumns() const {
switch (weights_type_) {
case WeightsType::kNormal:
return weights_.num_columns();
case WeightsType::kBlocked32:
return fast_weights_32_.matrix().num_columns();
case WeightsType::kBlocked48:
return fast_weights_48_.matrix().num_columns();
}
}
bool FlexibleMatrixKernel::MatchesOutputDimension(int output_dimension) const {
int max_padding = 0;
if (weights_type_ == WeightsType::kBlocked32) {
max_padding = 32;
} else if (weights_type_ == WeightsType::kBlocked48) {
max_padding = 48;
}
return (NumPaddedRows() >= output_dimension &&
NumPaddedRows() <= output_dimension + max_padding);
}
string FlexibleMatrixKernel::TypeName(WeightsType value) {
switch (value) {
case WeightsType::kNormal:
return "normal (non-blocked)";
case WeightsType::kBlocked32:
return "32-row blocked";
case WeightsType::kBlocked48:
return "48-row blocked";
}
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef DRAGNN_RUNTIME_FLEXIBLE_MATRIX_KERNEL_H_
#define DRAGNN_RUNTIME_FLEXIBLE_MATRIX_KERNEL_H_
#include "dragnn/runtime/alignment.h"
#include "dragnn/runtime/math/arithmetic.h"
#include "dragnn/runtime/math/sgemvv.h"
#include "dragnn/runtime/math/types.h"
#include "dragnn/runtime/variable_store.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#define DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) inline
namespace syntaxnet {
namespace dragnn {
namespace runtime {
// Matrix-vector multiplication helper, which will switch the type of the
// underlying matrix based on estimates of how well it will perform. For
// example, a 32x32 matrix-multiplication might get a specialized SGEMV routine,
// while a 2x128 matrix-multiplication might use a naive (non-SSE/AVX)
// algorithm.
//
// Call Initialize() before using, then call one of the MatrixVectorProduct()
// routines.
class FlexibleMatrixKernel {
public:
// Suffix appended to variable lookups issued by the kernel.
static constexpr char kSuffix[] = "/FlexibleMatrixKernel";
FlexibleMatrixKernel() = default;
// Initializes the underlying matrices for this kernel; call this method
// before using this class. Arguments: |debug_name| is the name of the matrix
// being accessed, which usually should specify the component name and other
// relevant aspects; |weights_name| is the name of the variable in the
// TensorFlow graph to access; |output_dimension| is the real output
// dimension, which is comparable to the number of rows in the matrix but does
// not include padding; |variable_store| is the store which is queried for
// variables.
tensorflow::Status Initialize(const string &debug_name,
const string &weights_name,
int output_dimension,
VariableStore *variable_store);
// Number of columns for the matrix. This may be padded, if a blocked format
// is chosen.
int NumPaddedRows() const;
// Number of columns for the matrix. This should not be padded.
int NumColumns() const;
// Whether a layer's logical output dimension matches the shape of this class'
// underlying matrix.
bool MatchesOutputDimension(int output_dimension) const;
// Computes the matrix-vector product of a single vector, with an initial
// value. This runs different code based on what kind of blocked matrix was
// chosen. There are generally no restrictions, i.e. it is fairly common to
// have initial == output.
DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
void MatrixVectorProduct(Vector<float> input, Vector<float> initial,
MutableVector<float> output) const;
// Computes the matrix-vector product of two vectors at once. This is the
// entrypoint for SGEMVV, and is more efficient.
DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
void MatrixVectorVectorProduct(Vector<float> input0, Vector<float> input1,
Vector<float> initial0, Vector<float> initial1,
MutableVector<float> output0,
MutableVector<float> output1) const;
// Convenience function, calculating `output += M * input`.
void AddMatrixVectorProduct(Vector<float> input,
MutableVector<float> output) const {
MatrixVectorProduct(input, Vector<float>(output), output);
}
// Same as above, without initial bias.
DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
void MatrixVectorProduct(Vector<float> input,
MutableVector<float> output) const;
private:
enum class WeightsType { kNormal, kBlocked32, kBlocked48 };
// Returns the human-readable name of a WeightsType.
static string TypeName(WeightsType value);
WeightsType weights_type_;
// Actual matrix data. Which matrix is active is determined by
// |weights_type_|.
Matrix<float> weights_;
SgemvMatrix<32> fast_weights_32_;
SgemvMatrix<48> fast_weights_48_;
// Output dimension padded to alignment.
int padded_output_dimension_;
};
// Implementation details below.
DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
void FlexibleMatrixKernel::MatrixVectorProduct(
Vector<float> input, Vector<float> initial,
MutableVector<float> output) const {
SgemvOutputBatch<1> outputs = {{output.data()}};
SgemvInputBatch<1> inputs = {{input.data()}, {initial.data()}};
switch (weights_type_) {
case WeightsType::kNormal:
MultiplyMatrixAndVectorWithBias(weights_, initial, input, output);
return;
case WeightsType::kBlocked32:
fast_weights_32_.MaskedMatrixMultiVectorProduct(
inputs, padded_output_dimension_, &outputs);
return;
case WeightsType::kBlocked48:
fast_weights_48_.MaskedMatrixMultiVectorProduct(
inputs, padded_output_dimension_, &outputs);
return;
}
}
DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
void FlexibleMatrixKernel::MatrixVectorVectorProduct(
Vector<float> input0, Vector<float> input1, Vector<float> initial0,
Vector<float> initial1, MutableVector<float> output0,
MutableVector<float> output1) const {
SgemvOutputBatch<2> outputs = {{output0.data(), output1.data()}};
SgemvInputBatch<2> inputs = {{input0.data(), input1.data()},
{initial0.data(), initial1.data()}};
switch (weights_type_) {
case WeightsType::kNormal:
MultiplyMatrixAndVectorWithBias(weights_, initial0, input0, output0);
MultiplyMatrixAndVectorWithBias(weights_, initial1, input1, output1);
return;
case WeightsType::kBlocked32:
fast_weights_32_.MaskedMatrixMultiVectorProduct(
inputs, padded_output_dimension_, &outputs);
return;
case WeightsType::kBlocked48:
fast_weights_48_.MaskedMatrixMultiVectorProduct(
inputs, padded_output_dimension_, &outputs);
return;
}
}
DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
void FlexibleMatrixKernel::MatrixVectorProduct(
Vector<float> input, MutableVector<float> output) const {
SgemvOutputBatch<1> outputs = {{output.data()}};
SgemvInputBatch<1> inputs = {{input.data()}, {nullptr}};
switch (weights_type_) {
case WeightsType::kNormal:
MultiplyMatrixAndVector(weights_, input, output);
return;
case WeightsType::kBlocked32:
fast_weights_32_.MaskedMatrixMultiVectorProductNoInitial(
inputs, padded_output_dimension_, &outputs);
return;
case WeightsType::kBlocked48:
fast_weights_48_.MaskedMatrixMultiVectorProductNoInitial(
inputs, padded_output_dimension_, &outputs);
return;
}
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
#undef DRAGNN_FMK_ATTRIBUTE_ALWAYS_INLINE
#endif // DRAGNN_RUNTIME_FLEXIBLE_MATRIX_KERNEL_H_
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include <vector>
#include "dragnn/runtime/flexible_matrix_kernel.h"
#include "dragnn/core/test/generic.h"
#include "dragnn/protos/runtime.pb.h"
#include "dragnn/runtime/math/transformations.h"
#include "dragnn/runtime/test/fake_variable_store.h"
#include "dragnn/runtime/test/helpers.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/test.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
std::vector<std::vector<float>> TestValues(int inner_dimension) {
std::vector<std::vector<float>> values;
for (int block = 0; block < 32; ++block) {
std::vector<float> row_values;
for (int value = 0; value < inner_dimension; ++value) {
row_values.push_back(0.1f);
}
values.push_back(row_values);
}
return values;
}
// Tests that the FlexibleMatrixKernel will use a blocked matrix if that is the
// only available format.
TEST(FlexibleMatrixKernelTest, UseBlockedMatrix) {
std::vector<std::vector<float>> values = TestValues(32);
for (int actual_rows : {24, 30, 32}) {
// Add the variable using a blocked format.
FakeVariableStore store;
store.AddOrDie(
tensorflow::strings::StrCat("weights", FlexibleMatrixKernel::kSuffix),
values, VariableSpec::FORMAT_COLUMN_BLOCKED_ROW_MAJOR_MATRIX);
FlexibleMatrixKernel kernel;
TF_EXPECT_OK(
kernel.Initialize("test_weights", "weights", actual_rows, &store));
EXPECT_EQ(kernel.NumPaddedRows(), 32);
UniqueVector<float> vector(values.back());
UniqueVector<float> output(actual_rows);
kernel.MatrixVectorProduct(Vector<float>(*vector), *output);
// Every value in `output` should be 32 * 0.1 * 0.1 = 0.32.
for (int i = 0; i < actual_rows; ++i) {
EXPECT_NEAR((*output)[i], 0.32f, 1e-6f);
}
kernel.MatrixVectorProduct(Vector<float>(*vector), Vector<float>(*output),
*output);
// Every value in `output` should be 2 * 32 * 0.1 * 0.1 = 0.64.
for (int i = 0; i < actual_rows; ++i) {
EXPECT_NEAR((*output)[i], 0.64f, 1e-6f);
}
}
}
// Tests that the FlexibleMatrixKernel will use a non-blocked matrix if that is
// the only available format.
TEST(FlexibleMatrixKernelTest, UseNonBlockedMatrix) {
const int kOutputDim = 32;
std::vector<std::vector<float>> values = TestValues(kOutputDim);
// Add the variable using a non-blocked format.
FakeVariableStore store;
store.AddOrDie(
tensorflow::strings::StrCat("weights", FlexibleMatrixKernel::kSuffix),
values, VariableSpec::FORMAT_ROW_MAJOR_MATRIX);
FlexibleMatrixKernel kernel;
TF_EXPECT_OK(
kernel.Initialize("test_weights", "weights", kOutputDim, &store));
EXPECT_EQ(kernel.NumPaddedRows(), 32);
EXPECT_EQ(kernel.NumColumns(), kOutputDim);
UniqueVector<float> vector(values.back());
UniqueVector<float> output(kOutputDim);
kernel.MatrixVectorProduct(Vector<float>(*vector), *output);
const float kExpectedFirstResult = kOutputDim * 0.1 * 0.1;
for (int i = 0; i < kOutputDim; ++i) {
EXPECT_NEAR((*output)[i], kExpectedFirstResult, 1e-6f);
}
kernel.MatrixVectorProduct(Vector<float>(*vector), Vector<float>(*output),
*output);
const float kExpectedSecondResult = 2.0 * kExpectedFirstResult;
for (int i = 0; i < kOutputDim; ++i) {
EXPECT_NEAR((*output)[i], kExpectedSecondResult, 1e-6f);
}
}
TEST(FlexibleMatrixKernelTest, MissingVariableIsFailure) {
FakeVariableStore store;
FlexibleMatrixKernel kernel;
EXPECT_THAT(kernel.Initialize("test_weights", "weights", 30, &store),
test::IsErrorWithSubstr("Unknown variable: weights"));
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#include "dragnn/runtime/fml_parsing.h"
#include "syntaxnet/fml_parser.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/protobuf.h"
namespace syntaxnet {
namespace dragnn {
namespace runtime {
tensorflow::Status FeatureFunctionAttributes::Reset(
const FeatureFunctionDescriptor &function) {
Attributes::Mapping mapping;
for (const Parameter &parameter : function.parameter()) {
mapping[parameter.name()] = parameter.value();
}
return Attributes::Reset(mapping);
}
tensorflow::Status ParseFeatureChainFml(const string &fml,
const std::vector<string> &types,
FeatureFunctionDescriptor *leaf) {
if (types.empty()) {
return tensorflow::errors::InvalidArgument("Empty chain of feature types");
}
const tensorflow::Status error = tensorflow::errors::InvalidArgument(
"Failed to parse feature chain [",
tensorflow::str_util::Join(types, ", "), "] from FML: ", fml);
FeatureExtractorDescriptor extractor;
FMLParser().Parse(fml, &extractor);
if (extractor.feature_size() != 1) return error;
const FeatureFunctionDescriptor *function = &extractor.feature(0);
// Check prefix of non-leaf features.
for (int i = 0; i + 1 < types.size(); ++i) {
if (function->type() != types[i]) return error;
if (function->argument() != 0) return error;
if (function->parameter_size() != 0) return error;
if (function->feature_size() != 1) return error;
function = &function->feature(0);
}
// Check leaf feature.
if (function->type() != types.back()) return error;
if (function->feature_size() != 0) return error;
// Success; make modifications.
*leaf = *function;
return tensorflow::Status::OK();
}
} // namespace runtime
} // namespace dragnn
} // namespace syntaxnet
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment