"tests/git@developer.sourcefind.cn:OpenDAS/mmcv.git" did not exist on "a848ecfdfcd5bf7f47e20c900ee046976b1e5d69"
Commit 7d30a017 authored by Ivan Bogatyy's avatar Ivan Bogatyy Committed by calberti
Browse files

Release DRAGNN (#1177)

* Release DRAGNN

* Update CoNLL evaluation table & evaluator.py
parent c774cc95
.git
bazel/
Dockerfile*
tensorflow/.git
# Java baseimage, for Bazel.
FROM java:8 FROM java:8
ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin
# Install system packages. This doesn't include everything the TensorFlow
# dockerfile specifies, so if anything goes awry, maybe install more packages
# from there. Also, running apt-get clean before further commands will make the
# Docker images smaller.
RUN mkdir -p $SYNTAXNETDIR \ RUN mkdir -p $SYNTAXNETDIR \
&& cd $SYNTAXNETDIR \ && cd $SYNTAXNETDIR \
&& apt-get update \ && apt-get update \
&& apt-get install git zlib1g-dev file swig python2.7 python-dev python-pip python-mock -y \ && apt-get install -y \
&& pip install --upgrade pip \ file \
&& pip install -U protobuf==3.0.0b2 \ git \
&& pip install asciitree \ graphviz \
&& pip install numpy \ libcurl3-dev \
&& wget https://github.com/bazelbuild/bazel/releases/download/0.4.3/bazel-0.4.3-installer-linux-x86_64.sh \ libfreetype6-dev \
libgraphviz-dev \
liblapack-dev \
libopenblas-dev \
libpng12-dev \
libxft-dev \
python-dev \
python-mock \
python-pip \
python2.7 \
swig \
vim \
zlib1g-dev \
&& apt-get clean \
&& (rm -f /var/cache/apt/archives/*.deb \
/var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true)
# Install common Python dependencies. Similar to above, remove caches
# afterwards to help keep Docker images smaller.
RUN pip install --ignore-installed pip \
&& python -m pip install numpy \
&& rm -rf /root/.cache/pip /tmp/pip*
RUN python -m pip install \
asciitree \
ipykernel \
jupyter \
matplotlib \
pandas \
protobuf \
scipy \
sklearn \
&& python -m ipykernel.kernelspec \
&& python -m pip install pygraphviz \
--install-option="--include-path=/usr/include/graphviz" \
--install-option="--library-path=/usr/lib/graphviz/" \
&& rm -rf /root/.cache/pip /tmp/pip*
# Installs the latest version of Bazel.
RUN wget --quiet https://github.com/bazelbuild/bazel/releases/download/0.4.3/bazel-0.4.3-installer-linux-x86_64.sh \
&& chmod +x bazel-0.4.3-installer-linux-x86_64.sh \ && chmod +x bazel-0.4.3-installer-linux-x86_64.sh \
&& ./bazel-0.4.3-installer-linux-x86_64.sh --user \ && ./bazel-0.4.3-installer-linux-x86_64.sh \
&& git clone --recursive https://github.com/tensorflow/models.git \ && rm ./bazel-0.4.3-installer-linux-x86_64.sh
&& cd $SYNTAXNETDIR/models/syntaxnet/tensorflow \
&& echo -e "\n\n\n\n\n\n\n\n\n" | ./configure \ COPY WORKSPACE $SYNTAXNETDIR/syntaxnet/WORKSPACE
&& apt-get autoremove -y \ COPY tools/bazel.rc $SYNTAXNETDIR/syntaxnet/tools/bazel.rc
&& apt-get clean COPY tensorflow $SYNTAXNETDIR/syntaxnet/tensorflow
# Compile common TensorFlow targets, which don't depend on DRAGNN / SyntaxNet
# source. This makes it more convenient to re-compile DRAGNN / SyntaxNet for
# development (though not as convenient as the docker-devel scripts).
RUN cd $SYNTAXNETDIR/syntaxnet/tensorflow \
&& tensorflow/tools/ci_build/builds/configured CPU \
&& cd $SYNTAXNETDIR/syntaxnet \
&& bazel build -c opt @org_tensorflow//tensorflow:tensorflow_py
RUN cd $SYNTAXNETDIR/models/syntaxnet \ # Build the codez.
&& bazel test --genrule_strategy=standalone syntaxnet/... util/utf8/... WORKDIR $SYNTAXNETDIR/syntaxnet
COPY dragnn $SYNTAXNETDIR/syntaxnet/dragnn
COPY syntaxnet $SYNTAXNETDIR/syntaxnet/syntaxnet
COPY third_party $SYNTAXNETDIR/syntaxnet/third_party
COPY util/utf8 $SYNTAXNETDIR/syntaxnet/util/utf8
RUN bazel build -c opt //dragnn/python:all //dragnn/tools:all
WORKDIR $SYNTAXNETDIR/models/syntaxnet # This makes the IP exposed actually "*"; we'll do host restrictions by passing
# a hostname to the `docker run` command.
COPY tensorflow/tensorflow/tools/docker/jupyter_notebook_config.py /root/.jupyter/
EXPOSE 8888
CMD [ "sh", "-c", "echo 'Bob brought the pizza to Alice.' | syntaxnet/demo.sh" ] # This does not need to be compiled, only copied.
COPY examples $SYNTAXNETDIR/syntaxnet/examples
# Todo: Move this earlier in the file (don't want to invalidate caches for now).
RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension
# COMMANDS to build and run CMD /bin/bash -c "bazel-bin/dragnn/tools/oss_notebook_launcher notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples"
# ===============================
# mkdir build && cp Dockerfile build/ && cd build
# docker build -t syntaxnet .
# docker run syntaxnet
This diff is collapsed.
...@@ -3,10 +3,23 @@ local_repository( ...@@ -3,10 +3,23 @@ local_repository(
path = "tensorflow", path = "tensorflow",
) )
# We need to pull in @io_bazel_rules_closure for TensorFlow. Bazel design
# documentation states that this verbosity is intentional, to prevent
# TensorFlow/SyntaxNet from depending on different versions of
# @io_bazel_rules_closure.
http_archive(
name = "io_bazel_rules_closure",
sha256 = "60fc6977908f999b23ca65698c2bb70213403824a84f7904310b6000d78be9ce",
strip_prefix = "rules_closure-5ca1dab6df9ad02050f7ba4e816407f88690cf7d",
urls = [
"http://bazel-mirror.storage.googleapis.com/github.com/bazelbuild/rules_closure/archive/5ca1dab6df9ad02050f7ba4e816407f88690cf7d.tar.gz", # 2017-02-03
"https://github.com/bazelbuild/rules_closure/archive/5ca1dab6df9ad02050f7ba4e816407f88690cf7d.tar.gz",
],
)
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace") load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
tf_workspace(path_prefix="", tf_repo_name="org_tensorflow") tf_workspace(path_prefix="", tf_repo_name="org_tensorflow")
# Test that Bazel is up-to-date. # Test that Bazel is up-to-date.
load("@org_tensorflow//tensorflow:workspace.bzl", "check_version") load("@org_tensorflow//tensorflow:workspace.bzl", "check_version")
check_version("0.4.3") check_version("0.4.2")
#!/bin/bash
#
# This file puts you in a Docker sub-shell where you can build SyntaxNet
# targets. It is intended for development, as the Dockerfile (build file) does
# not actually build any of SyntaxNet, but instead mounts it in a volume.
script_path="$(readlink -f "$0")"
root_path="$(dirname "$(dirname "${script_path}")")"
set -e
if [[ -z "$(docker images -q dragnn-oss)" ]]; then
docker build -t dragnn-oss .
else
echo "NOTE: dragnn-oss image already exists, not re-building." >&2
echo "Please run \`docker build -t dragnn-oss .\` if you need." >&2
fi
echo -e "\n\nRun bazel commands like \`bazel test syntaxnet/...\`"
# NOTE: Unfortunately, we need to mount /tensorflow over /syntaxnet/tensorflow
# (which happens via devel_entrypoint.sh). This requires privileged mode.
syntaxnet_base="/opt/tensorflow/syntaxnet"
docker run --rm -ti \
-v "${root_path}"/syntaxnet:"${syntaxnet_base}"/syntaxnet \
-v "${root_path}"/dragnn:"${syntaxnet_base}"/dragnn \
-p 127.0.0.1:8888:8888 \
dragnn-oss "$@"
#!/bin/bash
#
# Convenience script to build wheel files in Docker, and copy them out of the
# container.
#
# Usage: docker-devel/build_wheels.sh (takes no arguments; run it from the base
# directory).
set -e
docker build -t dragnn-oss .
# Start building the wheels.
script="bazel run //dragnn/tools:build_pip_package \
-- --output-dir=/opt/tensorflow/syntaxnet; \
bazel run //dragnn/tools:build_pip_package \
-- --output-dir=/opt/tensorflow/syntaxnet --include-tensorflow"
container_id="$(docker run -d dragnn-oss /bin/bash -c "${script}")"
echo "Waiting for container ${container_id} to finish building the wheel ..."
if [[ "$(docker wait "${container_id}")" != 0 ]]; then
echo "Container failed! Please run \`docker logs <id>\` to see errors." >&2
exit 1
fi
# The build_pip_package.py script prints lines like "Wrote x.whl". The wheel
# names are prefixed by architecture and such, so don't guess them.
wheels=(
$(docker logs "${container_id}" 2>/dev/null | grep Wrote | awk '{print $2;}'))
for wheel in "${wheels[@]}"; do
output=./"$(basename "${wheel}")"
docker cp "${container_id}:${wheel}" "${output}"
echo "Wrote ${output} ($(du -h "${output}" | awk '{print $1;}'))"
done
echo "Removing ${container_id} ..."
docker rm "${container_id}" >/dev/null
package_group(
name = "dragnn_visibility",
packages = [
],
)
package(default_visibility = ["//visibility:public"])
cc_library(
name = "syntaxnet_component",
srcs = ["syntaxnet_component.cc"],
hdrs = ["syntaxnet_component.h"],
deps = [
":syntaxnet_link_feature_extractor",
":syntaxnet_transition_state",
"//dragnn/components/util:bulk_feature_extractor",
"//dragnn/core:beam",
"//dragnn/core:component_registry",
"//dragnn/core:input_batch_cache",
"//dragnn/core/interfaces:component",
"//dragnn/core/interfaces:transition_state",
"//dragnn/io:sentence_input_batch",
"//dragnn/io:syntaxnet_sentence",
"//dragnn/protos:data_proto",
"//dragnn/protos:spec_proto",
"//dragnn/protos:trace_proto",
"//syntaxnet:base",
"//syntaxnet:parser_transitions",
"//syntaxnet:registry",
"//syntaxnet:sparse_proto",
"//syntaxnet:task_context",
"//syntaxnet:task_spec_proto",
"//syntaxnet:utils",
"@org_tensorflow//tensorflow/core:lib", # For tf/core/platform/logging.h
],
alwayslink = 1,
)
cc_library(
name = "syntaxnet_link_feature_extractor",
srcs = ["syntaxnet_link_feature_extractor.cc"],
hdrs = ["syntaxnet_link_feature_extractor.h"],
deps = [
"//dragnn/protos:spec_proto",
"//syntaxnet:embedding_feature_extractor",
"//syntaxnet:parser_transitions",
"//syntaxnet:task_context",
"@org_tensorflow//tensorflow/core:lib", # For tf/core/platform/logging.h
],
)
cc_library(
name = "syntaxnet_transition_state",
srcs = ["syntaxnet_transition_state.cc"],
hdrs = ["syntaxnet_transition_state.h"],
deps = [
"//dragnn/core/interfaces:cloneable_transition_state",
"//dragnn/core/interfaces:transition_state",
"//dragnn/io:syntaxnet_sentence",
"//dragnn/protos:trace_proto",
"//syntaxnet:base",
"//syntaxnet:parser_transitions",
"@org_tensorflow//tensorflow/core:lib", # For tf/core/platform/logging.h
],
)
# Test data.
filegroup(
name = "testdata",
data = glob(["testdata/**"]),
)
# Tests.
cc_test(
name = "syntaxnet_component_test",
srcs = ["syntaxnet_component_test.cc"],
data = [":testdata"],
deps = [
":syntaxnet_component",
"//dragnn/core:input_batch_cache",
"//dragnn/core/test:generic",
"//dragnn/core/test:mock_transition_state",
"//dragnn/io:sentence_input_batch",
"//syntaxnet:sentence_proto",
"@org_tensorflow//tensorflow/core:lib",
"@org_tensorflow//tensorflow/core:test",
],
)
cc_test(
name = "syntaxnet_link_feature_extractor_test",
srcs = ["syntaxnet_link_feature_extractor_test.cc"],
deps = [
":syntaxnet_link_feature_extractor",
"//dragnn/core/test:generic",
"//dragnn/protos:spec_proto",
"//syntaxnet:task_context",
"//syntaxnet:test_main",
"@org_tensorflow//tensorflow/core:test",
"@org_tensorflow//tensorflow/core:testlib",
],
)
cc_test(
name = "syntaxnet_transition_state_test",
srcs = ["syntaxnet_transition_state_test.cc"],
data = [":testdata"],
deps = [
":syntaxnet_component",
":syntaxnet_transition_state",
"//dragnn/core:input_batch_cache",
"//dragnn/core/test:generic",
"//dragnn/core/test:mock_transition_state",
"//dragnn/io:sentence_input_batch",
"//dragnn/protos:spec_proto",
"//syntaxnet:sentence_proto",
"//syntaxnet:test_main",
"@org_tensorflow//tensorflow/core:lib",
"@org_tensorflow//tensorflow/core:test",
"@org_tensorflow//tensorflow/core:testlib",
],
)
This diff is collapsed.
#ifndef NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_COMPONENT_H_
#define NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_COMPONENT_H_
#include <vector>
#include "dragnn/components/syntaxnet/syntaxnet_link_feature_extractor.h"
#include "dragnn/components/syntaxnet/syntaxnet_transition_state.h"
#include "dragnn/components/util/bulk_feature_extractor.h"
#include "dragnn/core/beam.h"
#include "dragnn/core/input_batch_cache.h"
#include "dragnn/core/interfaces/component.h"
#include "dragnn/core/interfaces/transition_state.h"
#include "dragnn/protos/data.pb.h"
#include "dragnn/protos/spec.pb.h"
#include "dragnn/protos/trace.pb.h"
#include "syntaxnet/base.h"
#include "syntaxnet/parser_transitions.h"
#include "syntaxnet/registry.h"
#include "syntaxnet/task_context.h"
namespace syntaxnet {
namespace dragnn {
class SyntaxNetComponent : public Component {
public:
// Create a SyntaxNet-backed DRAGNN component.
SyntaxNetComponent();
// Initializes this component from the spec.
void InitializeComponent(const ComponentSpec &spec) override;
// Provides the previous beam to the component.
void InitializeData(
const std::vector<std::vector<const TransitionState *>> &states,
int max_beam_size, InputBatchCache *input_data) override;
// Returns true if the component has had InitializeData called on it since
// the last time it was reset.
bool IsReady() const override;
// Returns the string name of this component.
string Name() const override;
// Returns the number of steps taken by the given batch in this component.
int StepsTaken(int batch_index) const override;
// Returns the current batch size of the component's underlying data.
int BatchSize() const override;
// Returns the maximum beam size of this component.
int BeamSize() const override;
// Return the beam index of the item which is currently at index
// 'index', when the beam was at step 'step', for batch element 'batch'.
int GetBeamIndexAtStep(int step, int current_index, int batch) const override;
// Return the source index of the item which is currently at index 'index'
// for batch element 'batch'. This index is into the final beam of the
// Component that this Component was initialized from.
int GetSourceBeamIndex(int current_index, int batch) const override;
// Request a translation function based on the given method string.
// The translation function will be called with arguments (batch, beam, value)
// and should return the step index corresponding to the given value, for the
// data in the given beam and batch.
std::function<int(int, int, int)> GetStepLookupFunction(
const string &method) override;
// Advances this component from the given transition matrix.
void AdvanceFromPrediction(const float transition_matrix[],
int transition_matrix_length) override;
// Advances this component from the state oracles.
void AdvanceFromOracle() override;
// Returns true if all states within this component are terminal.
bool IsTerminal() const override;
// Returns the current batch of beams for this component.
std::vector<std::vector<const TransitionState *>> GetBeam() override;
// Extracts and populates the vector of FixedFeatures for the specified
// channel.
int GetFixedFeatures(std::function<int32 *(int)> allocate_indices,
std::function<int64 *(int)> allocate_ids,
std::function<float *(int)> allocate_weights,
int channel_id) const override;
// Extracts and populates all FixedFeatures for all channels, advancing this
// component via the oracle until it is terminal.
int BulkGetFixedFeatures(const BulkFeatureExtractor &extractor) override;
// Extracts and returns the vector of LinkFeatures for the specified
// channel. Note: these are NOT translated.
std::vector<LinkFeatures> GetRawLinkFeatures(int channel_id) const override;
// Returns a vector of oracle labels for each element in the beam and
// batch.
std::vector<std::vector<int>> GetOracleLabels() const override;
// Annotate the underlying data object with the results of this Component's
// calculation.
void FinalizeData() override;
// Reset this component.
void ResetComponent() override;
// Initializes the component for tracing execution. This will typically have
// the side effect of slowing down all subsequent Component calculations
// and storing a trace in memory that can be returned by GetTraceProtos().
void InitializeTracing() override;
// Disables tracing, freeing any additional memory and avoiding triggering
// additional computation in the future.
void DisableTracing() override;
std::vector<std::vector<ComponentTrace>> GetTraceProtos() const override;
void AddTranslatedLinkFeaturesToTrace(
const std::vector<LinkFeatures> &features, int channel_id) override;
private:
friend class SyntaxNetComponentTest;
friend class SyntaxNetTransitionStateTest;
// Permission function for this component.
bool IsAllowed(SyntaxNetTransitionState *state, int action) const;
// Returns true if this state is final
bool IsFinal(SyntaxNetTransitionState *state) const;
// Oracle function for this component.
int GetOracleLabel(SyntaxNetTransitionState *state) const;
// State advance function for this component.
void Advance(SyntaxNetTransitionState *state, int action,
Beam<SyntaxNetTransitionState> *beam);
// Creates a new state for the given nlp_saft::SentenceExample.
std::unique_ptr<SyntaxNetTransitionState> CreateState(
SyntaxNetSentence *example);
// Creates a newly initialized Beam.
std::unique_ptr<Beam<SyntaxNetTransitionState>> CreateBeam(int max_size);
// Transition system.
std::unique_ptr<ParserTransitionSystem> transition_system_;
// Label map for transition system.
const TermFrequencyMap *label_map_;
// Extractor for fixed features
ParserEmbeddingFeatureExtractor feature_extractor_;
// Extractor for linked features.
SyntaxNetLinkFeatureExtractor link_feature_extractor_;
// Internal workspace registry for use in feature extraction.
WorkspaceRegistry workspace_registry_;
// Switch for simulating legacy parser behaviour.
bool rewrite_root_labels_;
// The ComponentSpec used to initialize this component.
ComponentSpec spec_;
// State search beams
std::vector<std::unique_ptr<Beam<SyntaxNetTransitionState>>> batch_;
// Current max beam size.
int max_beam_size_;
// Underlying input data.
InputBatchCache *input_data_;
// Whether or not to trace for each batch and beam element.
bool do_tracing_ = false;
};
} // namespace dragnn
} // namespace syntaxnet
#endif // NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_COMPONENT_H_
#include "dragnn/components/syntaxnet/syntaxnet_link_feature_extractor.h"
#include "tensorflow/core/platform/logging.h"
namespace syntaxnet {
namespace dragnn {
void SyntaxNetLinkFeatureExtractor::Setup(TaskContext *context) {
ParserEmbeddingFeatureExtractor::Setup(context);
if (NumEmbeddings() > 0) {
channel_sources_ = utils::Split(
context->Get(
tensorflow::strings::StrCat(ArgPrefix(), "_", "source_components"),
""),
';');
channel_layers_ = utils::Split(
context->Get(
tensorflow::strings::StrCat(ArgPrefix(), "_", "source_layers"), ""),
';');
channel_translators_ = utils::Split(
context->Get(
tensorflow::strings::StrCat(ArgPrefix(), "_", "source_translators"),
""),
';');
}
CHECK_EQ(channel_sources_.size(), NumEmbeddings());
CHECK_EQ(channel_layers_.size(), NumEmbeddings());
CHECK_EQ(channel_translators_.size(), NumEmbeddings());
}
void SyntaxNetLinkFeatureExtractor::AddLinkedFeatureChannelProtos(
ComponentSpec *spec) const {
for (int embedding_idx = 0; embedding_idx < NumEmbeddings();
++embedding_idx) {
LinkedFeatureChannel *channel = spec->add_linked_feature();
channel->set_name(embedding_name(embedding_idx));
channel->set_fml(embedding_fml()[embedding_idx]);
channel->set_embedding_dim(EmbeddingDims(embedding_idx));
channel->set_size(FeatureSize(embedding_idx));
channel->set_source_layer(channel_layers_[embedding_idx]);
channel->set_source_component(channel_sources_[embedding_idx]);
channel->set_source_translator(channel_translators_[embedding_idx]);
}
}
} // namespace dragnn
} // namespace syntaxnet
#ifndef NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_LINK_FEATURE_EXTRACTOR_H_
#define NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_LINK_FEATURE_EXTRACTOR_H_
#include <string>
#include <vector>
#include "dragnn/protos/spec.pb.h"
#include "syntaxnet/embedding_feature_extractor.h"
#include "syntaxnet/parser_state.h"
#include "syntaxnet/parser_transitions.h"
#include "syntaxnet/task_context.h"
namespace syntaxnet {
namespace dragnn {
// Provides feature extraction for linked features in the
// WrapperParserComponent. This re-ues the EmbeddingFeatureExtractor
// architecture to get another set of feature extractors. Note that we should
// ignore predicate maps here, and we don't care about the vocabulary size
// because all the feature values will be used for translation, but this means
// we can configure the extractor from the GCL using the standard
// neurosis-lib.wf syntax.
//
// Because it uses a different prefix, it can be executed in the same wf.stage
// as the regular fixed extractor.
class SyntaxNetLinkFeatureExtractor : public ParserEmbeddingFeatureExtractor {
public:
SyntaxNetLinkFeatureExtractor() : ParserEmbeddingFeatureExtractor("link") {}
~SyntaxNetLinkFeatureExtractor() override {}
const string ArgPrefix() const override { return "link"; }
// Parses the TaskContext to get additional information like target layers,
// etc.
void Setup(TaskContext *context) override;
// Called during InitComponentProtoTask to add the specification from the
// wrapped feature extractor as LinkedFeatureChannel protos.
void AddLinkedFeatureChannelProtos(ComponentSpec *spec) const;
private:
// Source component names for each channel.
std::vector<string> channel_sources_;
// Source layer names for each channel.
std::vector<string> channel_layers_;
// Source translator name for each channel.
std::vector<string> channel_translators_;
};
} // namespace dragnn
} // namespace syntaxnet
#endif // NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_LINK_FEATURE_EXTRACTOR_H_
#include "dragnn/components/syntaxnet/syntaxnet_link_feature_extractor.h"
#include <string>
#include "dragnn/core/test/generic.h"
#include "dragnn/protos/spec.pb.h"
#include "syntaxnet/task_context.h"
#include "tensorflow/core/platform/test.h"
using syntaxnet::test::EqualsProto;
namespace syntaxnet {
namespace dragnn {
class ExportSpecTest : public ::testing::Test {
public:
};
TEST_F(ExportSpecTest, WritesChannelSpec) {
TaskContext context;
context.SetParameter("neurosis_feature_syntax_version", "2");
context.SetParameter("link_features", "input.focus;stack.focus");
context.SetParameter("link_embedding_names", "tagger;parser");
context.SetParameter("link_predicate_maps", "none;none");
context.SetParameter("link_embedding_dims", "16;16");
context.SetParameter("link_source_components", "tagger;parser");
context.SetParameter("link_source_layers", "hidden0;lstm");
context.SetParameter("link_source_translators", "token;last_action");
SyntaxNetLinkFeatureExtractor link_features;
link_features.Setup(&context);
link_features.Init(&context);
ComponentSpec spec;
link_features.AddLinkedFeatureChannelProtos(&spec);
const string expected_spec_str = R"(
linked_feature {
name: "tagger"
fml: "input.focus"
embedding_dim: 16
size: 1
source_component: "tagger"
source_translator: "token"
source_layer: "hidden0"
}
linked_feature {
name: "parser"
fml: "stack.focus"
embedding_dim: 16
size: 1
source_component: "parser"
source_translator: "last_action"
source_layer: "lstm"
}
)";
ComponentSpec expected_spec;
TextFormat::ParseFromString(expected_spec_str, &expected_spec);
EXPECT_THAT(spec, EqualsProto(expected_spec));
}
} // namespace dragnn
} // namespace syntaxnet
#include "dragnn/components/syntaxnet/syntaxnet_transition_state.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
namespace syntaxnet {
namespace dragnn {
SyntaxNetTransitionState::SyntaxNetTransitionState(
std::unique_ptr<ParserState> parser_state, SyntaxNetSentence *sentence)
: parser_state_(std::move(parser_state)), sentence_(sentence) {
score_ = 0;
current_beam_index_ = -1;
parent_beam_index_ = 0;
step_for_token_.resize(sentence->sentence()->token_size(), -1);
parent_for_token_.resize(sentence->sentence()->token_size(), -1);
parent_step_for_token_.resize(sentence->sentence()->token_size(), -1);
}
void SyntaxNetTransitionState::Init(const TransitionState &parent) {
score_ = parent.GetScore();
parent_beam_index_ = parent.GetBeamIndex();
}
std::unique_ptr<SyntaxNetTransitionState> SyntaxNetTransitionState::Clone()
const {
// Create a new state from a clone of the underlying parser state.
std::unique_ptr<ParserState> cloned_state(parser_state_->Clone());
std::unique_ptr<SyntaxNetTransitionState> new_state(
new SyntaxNetTransitionState(std::move(cloned_state), sentence_));
// Copy relevant data members and set non-copied ones to flag values.
new_state->score_ = score_;
new_state->current_beam_index_ = current_beam_index_;
new_state->parent_beam_index_ = parent_beam_index_;
new_state->step_for_token_ = step_for_token_;
new_state->parent_step_for_token_ = parent_step_for_token_;
new_state->parent_for_token_ = parent_for_token_;
// Copy trace if it exists.
if (trace_) {
new_state->trace_.reset(new ComponentTrace(*trace_));
}
return new_state;
}
const int SyntaxNetTransitionState::ParentBeamIndex() const {
return parent_beam_index_;
}
const int SyntaxNetTransitionState::GetBeamIndex() const {
return current_beam_index_;
}
void SyntaxNetTransitionState::SetBeamIndex(const int index) {
current_beam_index_ = index;
}
const float SyntaxNetTransitionState::GetScore() const { return score_; }
void SyntaxNetTransitionState::SetScore(const float score) { score_ = score; }
string SyntaxNetTransitionState::HTMLRepresentation() const {
// Crude HTML string showing the stack and the word on the input.
string html = "Stack: ";
for (int i = parser_state_->StackSize() - 1; i >= 0; --i) {
const int word_idx = parser_state_->Stack(i);
if (word_idx >= 0) {
tensorflow::strings::StrAppend(
&html, parser_state_->GetToken(word_idx).word(), " ");
}
}
tensorflow::strings::StrAppend(&html, "| Input: ");
const int word_idx = parser_state_->Input(0);
if (word_idx >= 0) {
tensorflow::strings::StrAppend(
&html, parser_state_->GetToken(word_idx).word(), " ");
}
return html;
}
} // namespace dragnn
} // namespace syntaxnet
#ifndef NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_TRANSITION_STATE_H_
#define NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_TRANSITION_STATE_H_
#include <vector>
#include "dragnn/core/interfaces/cloneable_transition_state.h"
#include "dragnn/core/interfaces/transition_state.h"
#include "dragnn/io/syntaxnet_sentence.h"
#include "dragnn/protos/trace.pb.h"
#include "syntaxnet/base.h"
#include "syntaxnet/parser_state.h"
namespace syntaxnet {
namespace dragnn {
class SyntaxNetTransitionState
: public CloneableTransitionState<SyntaxNetTransitionState> {
public:
// Create a SyntaxNetTransitionState to wrap this nlp_saft::ParserState.
SyntaxNetTransitionState(std::unique_ptr<ParserState> parser_state,
SyntaxNetSentence *sentence);
// Initialize this TransitionState from a previous TransitionState. The
// ParentBeamIndex is the location of that previous TransitionState in the
// provided beam.
void Init(const TransitionState &parent) override;
// Produces a new state with the same backing data as this state.
std::unique_ptr<SyntaxNetTransitionState> Clone() const override;
// Return the beam index of the state passed into the initializer of this
// TransitionState.
const int ParentBeamIndex() const override;
// Get the current beam index for this state.
const int GetBeamIndex() const override;
// Set the current beam index for this state.
void SetBeamIndex(const int index) override;
// Get the score associated with this transition state.
const float GetScore() const override;
// Set the score associated with this transition state.
void SetScore(const float score) override;
// Depicts this state as an HTML-language string.
string HTMLRepresentation() const override;
// **** END INHERITED INTERFACE ****
// TODO(googleuser): Make these comments actually mean something.
// Data accessor.
int step_for_token(int token) {
if (token < 0 || token >= step_for_token_.size()) {
return -1;
} else {
return step_for_token_.at(token);
}
}
// Data setter.
void set_step_for_token(int token, int step) {
step_for_token_.insert(step_for_token_.begin() + token, step);
}
// Data accessor.
int parent_step_for_token(int token) {
if (token < 0 || token >= step_for_token_.size()) {
return -1;
} else {
return parent_step_for_token_.at(token);
}
}
// Data setter.
void set_parent_step_for_token(int token, int parent_step) {
parent_step_for_token_.insert(parent_step_for_token_.begin() + token,
parent_step);
}
// Data accessor.
int parent_for_token(int token) {
if (token < 0 || token >= step_for_token_.size()) {
return -1;
} else {
return parent_for_token_.at(token);
}
}
// Data setter.
void set_parent_for_token(int token, int parent) {
parent_for_token_.insert(parent_for_token_.begin() + token, parent);
}
// Accessor for the underlying nlp_saft::ParserState.
ParserState *parser_state() { return parser_state_.get(); }
// Accessor for the underlying sentence object.
SyntaxNetSentence *sentence() { return sentence_; }
ComponentTrace *mutable_trace() {
CHECK(trace_) << "Trace is not initialized";
return trace_.get();
}
void set_trace(std::unique_ptr<ComponentTrace> trace) {
trace_ = std::move(trace);
}
private:
// Underlying ParserState object that is being wrapped.
std::unique_ptr<ParserState> parser_state_;
// Sentence object that is being examined with this state.
SyntaxNetSentence *sentence_;
// The current score of this state.
float score_;
// The current beam index of this state.
int current_beam_index_;
// The parent beam index for this state.
int parent_beam_index_;
// Maintains a list of which steps in the history correspond to
// representations for each of the tokens on the stack.
std::vector<int> step_for_token_;
// Maintains a list of which steps in the history correspond to the actions
// that assigned a parent for tokens when reduced.
std::vector<int> parent_step_for_token_;
// Maintain the parent index of a token in the system.
std::vector<int> parent_for_token_;
// Trace of the history to produce this state.
std::unique_ptr<ComponentTrace> trace_;
};
} // namespace dragnn
} // namespace syntaxnet
#endif // NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_TRANSITION_STATE_H_
#include "dragnn/components/syntaxnet/syntaxnet_transition_state.h"
#include "dragnn/components/syntaxnet/syntaxnet_component.h"
#include "dragnn/core/input_batch_cache.h"
#include "dragnn/core/test/generic.h"
#include "dragnn/core/test/mock_transition_state.h"
#include "dragnn/io/sentence_input_batch.h"
#include "dragnn/protos/spec.pb.h"
#include "syntaxnet/sentence.pb.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/protobuf.h"
#include "tensorflow/core/platform/test.h"
// This test suite is intended to validate the contracts that the DRAGNN
// system expects from all transition state subclasses. Developers creating
// new TransitionStates should copy this test and modify it as necessary,
// using it to ensure their state conforms to DRAGNN expectations.
namespace syntaxnet {
namespace dragnn {
namespace {
const char kSentence0[] = R"(
token {
word: "Sentence" start: 0 end: 7 tag: "NN" category: "NOUN" label: "ROOT"
break_level: NO_BREAK
}
token {
word: "0" start: 9 end: 9 head: 0 tag: "CD" category: "NUM" label: "num"
break_level: SPACE_BREAK
}
token {
word: "." start: 10 end: 10 head: 0 tag: "." category: "." label: "punct"
break_level: NO_BREAK
}
)";
} // namespace
using testing::Return;
class SyntaxNetTransitionStateTest : public ::testing::Test {
public:
std::unique_ptr<SyntaxNetTransitionState> CreateState() {
// Get the master spec proto from the test data directory.
MasterSpec master_spec;
string file_name = tensorflow::io::JoinPath(
test::GetTestDataPrefix(), "dragnn/components/syntaxnet/testdata",
"master_spec.textproto");
TF_CHECK_OK(tensorflow::ReadTextProto(tensorflow::Env::Default(), file_name,
&master_spec));
// Get all the resource protos from the test data directory.
for (Resource &resource :
*(master_spec.mutable_component(0)->mutable_resource())) {
resource.mutable_part(0)->set_file_pattern(tensorflow::io::JoinPath(
test::GetTestDataPrefix(), "dragnn/components/syntaxnet/testdata",
resource.part(0).file_pattern()));
}
// Create an empty input batch and beam vector to initialize the parser.
Sentence sentence_0;
TextFormat::ParseFromString(kSentence0, &sentence_0);
string sentence_0_str;
sentence_0.SerializeToString(&sentence_0_str);
data_.reset(new InputBatchCache(sentence_0_str));
SentenceInputBatch *sentences = data_->GetAs<SentenceInputBatch>();
// Create a parser comoponent that will generate a parser state for this
// test.
SyntaxNetComponent component;
component.InitializeComponent(*(master_spec.mutable_component(0)));
std::vector<std::vector<const TransitionState *>> states;
constexpr int kBeamSize = 1;
component.InitializeData(states, kBeamSize, data_.get());
// Get a transition state from the component.
std::unique_ptr<SyntaxNetTransitionState> test_state =
component.CreateState(&(sentences->data()->at(0)));
return test_state;
}
std::unique_ptr<InputBatchCache> data_;
};
// Validates the consistency of the beam index setter and getter.
TEST_F(SyntaxNetTransitionStateTest, CanSetAndGetBeamIndex) {
// Create and initialize a test state.
MockTransitionState mock_state;
auto test_state = CreateState();
test_state->Init(mock_state);
constexpr int kOldBeamIndex = 12;
test_state->SetBeamIndex(kOldBeamIndex);
EXPECT_EQ(test_state->GetBeamIndex(), kOldBeamIndex);
constexpr int kNewBeamIndex = 7;
test_state->SetBeamIndex(kNewBeamIndex);
EXPECT_EQ(test_state->GetBeamIndex(), kNewBeamIndex);
}
// Validates the consistency of the score setter and getter.
TEST_F(SyntaxNetTransitionStateTest, CanSetAndGetScore) {
// Create and initialize a test state.
MockTransitionState mock_state;
auto test_state = CreateState();
test_state->Init(mock_state);
constexpr float kOldScore = 12.1;
test_state->SetScore(kOldScore);
EXPECT_EQ(test_state->GetScore(), kOldScore);
constexpr float kNewScore = 7.2;
test_state->SetScore(kNewScore);
EXPECT_EQ(test_state->GetScore(), kNewScore);
}
// This test ensures that the initializing state's current index is saved
// as the parent beam index of the state being initialized.
TEST_F(SyntaxNetTransitionStateTest, ReportsParentBeamIndex) {
// Create a mock transition state that wil report a specific current index.
// This index should become the parent state index for the test state.
MockTransitionState mock_state;
constexpr int kParentBeamIndex = 1138;
EXPECT_CALL(mock_state, GetBeamIndex())
.WillRepeatedly(Return(kParentBeamIndex));
auto test_state = CreateState();
test_state->Init(mock_state);
EXPECT_EQ(test_state->ParentBeamIndex(), kParentBeamIndex);
}
// This test ensures that the initializing state's current score is saved
// as the current score of the state being initialized.
TEST_F(SyntaxNetTransitionStateTest, InitializationCopiesParentScore) {
// Create a mock transition state that wil report a specific current index.
// This index should become the parent state index for the test state.
MockTransitionState mock_state;
constexpr float kParentScore = 24.12;
EXPECT_CALL(mock_state, GetScore()).WillRepeatedly(Return(kParentScore));
auto test_state = CreateState();
test_state->Init(mock_state);
EXPECT_EQ(test_state->GetScore(), kParentScore);
}
// This test ensures that calling Clone maintains the state data (parent beam
// index, beam index, score, etc.) of the state that was cloned.
TEST_F(SyntaxNetTransitionStateTest, CloningMaintainsState) {
// Create and initialize the state->
MockTransitionState mock_state;
constexpr int kParentBeamIndex = 1138;
EXPECT_CALL(mock_state, GetBeamIndex())
.WillRepeatedly(Return(kParentBeamIndex));
auto test_state = CreateState();
test_state->Init(mock_state);
// Validate the internal state of the test state.
constexpr float kOldScore = 20.0;
test_state->SetScore(kOldScore);
EXPECT_EQ(test_state->GetScore(), kOldScore);
constexpr int kOldBeamIndex = 12;
test_state->SetBeamIndex(kOldBeamIndex);
EXPECT_EQ(test_state->GetBeamIndex(), kOldBeamIndex);
auto clone = test_state->Clone();
// The clone should have identical state to the old state.
EXPECT_EQ(clone->ParentBeamIndex(), kParentBeamIndex);
EXPECT_EQ(clone->GetScore(), kOldScore);
EXPECT_EQ(clone->GetBeamIndex(), kOldBeamIndex);
}
// Validates the consistency of the step_for_token setter and getter.
TEST_F(SyntaxNetTransitionStateTest, CanSetAndGetStepForToken) {
// Create and initialize a test state.
MockTransitionState mock_state;
auto test_state = CreateState();
test_state->Init(mock_state);
constexpr int kStepForTokenZero = 12;
constexpr int kStepForTokenTwo = 34;
test_state->set_step_for_token(0, kStepForTokenZero);
test_state->set_step_for_token(2, kStepForTokenTwo);
// Expect that the set tokens return values and the unset steps return the
// default.
constexpr int kDefaultValue = -1;
EXPECT_EQ(kStepForTokenZero, test_state->step_for_token(0));
EXPECT_EQ(kDefaultValue, test_state->step_for_token(1));
EXPECT_EQ(kStepForTokenTwo, test_state->step_for_token(2));
// Expect that out of bound accesses will return the default. (There are only
// 3 tokens in the backing sentence, so token 3 and greater are out of bound.)
EXPECT_EQ(kDefaultValue, test_state->step_for_token(-1));
EXPECT_EQ(kDefaultValue, test_state->step_for_token(3));
}
// Validates the consistency of the parent_step_for_token setter and getter.
TEST_F(SyntaxNetTransitionStateTest, CanSetAndGetParentStepForToken) {
// Create and initialize a test state.
MockTransitionState mock_state;
auto test_state = CreateState();
test_state->Init(mock_state);
constexpr int kStepForTokenZero = 12;
constexpr int kStepForTokenTwo = 34;
test_state->set_parent_step_for_token(0, kStepForTokenZero);
test_state->set_parent_step_for_token(2, kStepForTokenTwo);
// Expect that the set tokens return values and the unset steps return the
// default.
constexpr int kDefaultValue = -1;
EXPECT_EQ(kStepForTokenZero, test_state->parent_step_for_token(0));
EXPECT_EQ(kDefaultValue, test_state->parent_step_for_token(1));
EXPECT_EQ(kStepForTokenTwo, test_state->parent_step_for_token(2));
// Expect that out of bound accesses will return the default. (There are only
// 3 tokens in the backing sentence, so token 3 and greater are out of bound.)
EXPECT_EQ(kDefaultValue, test_state->parent_step_for_token(-1));
EXPECT_EQ(kDefaultValue, test_state->parent_step_for_token(3));
}
// Validates the consistency of the parent_for_token setter and getter.
TEST_F(SyntaxNetTransitionStateTest, CanSetAndGetParentForToken) {
// Create and initialize a test state.
MockTransitionState mock_state;
auto test_state = CreateState();
test_state->Init(mock_state);
constexpr int kParentForTokenZero = 12;
constexpr int kParentForTokenTwo = 34;
test_state->set_parent_for_token(0, kParentForTokenZero);
test_state->set_parent_for_token(2, kParentForTokenTwo);
// Expect that the set tokens return values and the unset steps return the
// default.
constexpr int kDefaultValue = -1;
EXPECT_EQ(kParentForTokenZero, test_state->parent_for_token(0));
EXPECT_EQ(kDefaultValue, test_state->parent_for_token(1));
EXPECT_EQ(kParentForTokenTwo, test_state->parent_for_token(2));
// Expect that out of bound accesses will return the default. (There are only
// 3 tokens in the backing sentence, so token 3 and greater are out of bound.)
EXPECT_EQ(kDefaultValue, test_state->parent_for_token(-1));
EXPECT_EQ(kDefaultValue, test_state->parent_for_token(3));
}
// Validates the consistency of trace proto setter/getter.
TEST_F(SyntaxNetTransitionStateTest, CanSetAndGetTrace) {
// Create and initialize a test state.
MockTransitionState mock_state;
auto test_state = CreateState();
test_state->Init(mock_state);
const string kTestComponentName = "test";
std::unique_ptr<ComponentTrace> trace;
trace.reset(new ComponentTrace());
trace->set_name(kTestComponentName);
test_state->set_trace(std::move(trace));
EXPECT_EQ(trace.get(), nullptr);
EXPECT_EQ(test_state->mutable_trace()->name(), kTestComponentName);
// Should be preserved when cloing.
auto cloned_state = test_state->Clone();
EXPECT_EQ(cloned_state->mutable_trace()->name(), kTestComponentName);
EXPECT_EQ(test_state->mutable_trace()->name(), kTestComponentName);
}
} // namespace dragnn
} // namespace syntaxnet
component {
name: "parser"
transition_system {
registered_name: "arc-standard"
}
resource {
name: 'label-map'
part {
file_pattern: 'syntaxnet-tagger.label-map'
file_format: 'text'
}
}
resource {
name: 'tag-map'
part {
file_pattern: 'syntaxnet-tagger.tag-map'
file_format: 'text'
}
}
fixed_feature {
name: "tags"
fml: "input.tag input(1).tag"
embedding_dim: 32
vocabulary_size: 46
size: 2
predicate_map: "hashed"
}
fixed_feature {
name: "tags"
fml: "input(-1).tag input.tag input(1).tag"
embedding_dim: 32
vocabulary_size: 46
size: 3
predicate_map: "hashed"
}
linked_feature {
name: "recurrent_stack"
fml: "stack.focus stack(1).focus"
embedding_dim: 32
size: 2
source_component: "parser"
source_translator: "identity"
source_layer: "hidden_0"
}
backend {
registered_name: "SyntaxNetComponent"
}
}
46
punct 243160
prep 194627
pobj 186958
det 170592
nsubj 144821
nn 144800
amod 117242
ROOT 90592
dobj 88551
aux 76523
advmod 72893
conj 59384
cc 57532
num 36350
poss 35117
dep 34986
ccomp 29470
cop 25991
mark 25141
xcomp 25111
rcmod 16234
auxpass 15740
advcl 14996
possessive 14866
nsubjpass 14133
pcomp 12488
appos 11112
partmod 11106
neg 11090
number 10658
prt 7123
quantmod 6653
tmod 5418
infmod 5134
npadvmod 3213
parataxis 3012
mwe 2793
expl 2712
iobj 1642
acomp 1632
discourse 1381
csubj 1225
predet 1160
preconj 749
goeswith 146
csubjpass 41
component {
name: "tagger"
num_actions : 49
transition_system {
registered_name: "tagger"
parameters {
key: "join_category_to_pos"
value: "true"
}
}
resource {
name: "tag-map"
part {
file_pattern: "TESTDATA/syntaxnet-tagger.tag-map"
file_format: "text"
}
}
resource {
name: "word-map"
part {
file_pattern: "TESTDATA/syntaxnet-tagger.word-map"
file_format: "text"
}
}
resource {
name: "label-map"
part {
file_pattern: "TESTDATA/syntaxnet-tagger.label-map"
file_format: "text"
}
}
fixed_feature {
name: "words"
fml: "input(-1).word input(-2).word input(-3).word input.word input(1).word input(2).word input(3).word"
embedding_dim: 64
vocabulary_size: 39397
size: 7
}
fixed_feature {
name: "words"
fml: "input(-3).word input.word input(1).word input(2).word input(3).word"
embedding_dim: 64
vocabulary_size: 39397
size: 5
}
linked_feature {
name: "rnn"
fml: "stack.focus"
embedding_dim: 32
size: 1
source_component: "tagger"
source_translator: "shift-reduce-step"
source_layer: "layer_0"
}
backend {
registered_name: "SyntaxNetComponent"
}
network_unit {
registered_name: 'feed-forward'
parameters {
key: 'hidden_layer_sizes'
value: '64'
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment