Adding SyntaxNet to tensorflow/models (#63)

32ab5a58 · calberti · Martin Wicke · 148a15fb · 32ab5a58 · 32ab5a58
Commit 32ab5a58 authored May 12, 2016 by calberti Committed by Martin Wicke May 12, 2016
20 changed files
--- a/syntaxnet/syntaxnet/dictionary.proto
+++ b/syntaxnet/syntaxnet/dictionary.proto
+// Protocol buffers for serializing string<=>index dictionaries.
+
+syntax = "proto2";
+
+package syntaxnet;
+
+// Serializable representation of a string=>string pair.
+message StringToStringPair {
+  // String representing the key.
+  required string key = 1;
+
+  // String representing the value.
+  required string value = 2;
+}
+
+// Serializable representation of a string=>string mapping.
+message StringToStringMap {
+  // Key=>value pairs.
+  repeated StringToStringPair pair = 1;
+}
+
+// Affix table entry, for serialization of the affix tables.
+message AffixTableEntry {
+  // Nested message for serializing a single affix.
+  message AffixEntry {
+    // The affix as a string.
+    required string form = 1;
+
+    // The length of the affix (this is non-trivial to compute due to UTF-8).
+    required int32 length = 2;
+
+    // The ID of the affix that is one character shorter, or -1 if none exists.
+    required int32 shorter_id = 3;
+  }
+
+  // The type of affix table, as a string.
+  required string type = 1;
+
+  // The maximum affix length.
+  required int32 max_length = 2;
+
+  // The list of affixes, in order of affix ID.
+  repeated AffixEntry affix = 3;
+}
+
+// A light-weight proto to store vectors in binary format.
+message TokenEmbedding {
+  required bytes token = 1;  // can be word or phrase, or URL, etc.
+
+  // If available, raw count of this token in the training corpus.
+  optional int64 count = 3;
+
+  message Vector {
+    repeated float values = 1 [packed = true];
+  }
+  optional Vector vector = 2;
+};
--- a/syntaxnet/syntaxnet/document_filters.cc
+++ b/syntaxnet/syntaxnet/document_filters.cc
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Various utilities for handling documents.
+
+#include <stddef.h>
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "syntaxnet/base.h"
+#include "syntaxnet/feature_extractor.h"
+#include "syntaxnet/sentence.pb.h"
+#include "syntaxnet/utils.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status.h"
+
+using tensorflow::DEVICE_CPU;
+using tensorflow::OpKernel;
+using tensorflow::OpKernelConstruction;
+using tensorflow::OpKernelContext;
+using tensorflow::Tensor;
+using tensorflow::TensorShape;
+using tensorflow::errors::InvalidArgument;
+
+namespace syntaxnet {
+
+namespace {
+
+void GetTaskContext(OpKernelConstruction *context, TaskContext *task_context) {
+  string file_path, data;
+  OP_REQUIRES_OK(context, context->GetAttr("task_context", &file_path));
+  OP_REQUIRES_OK(
+      context, ReadFileToString(tensorflow::Env::Default(), file_path, &data));
+  OP_REQUIRES(context,
+              TextFormat::ParseFromString(data, task_context->mutable_spec()),
+              InvalidArgument("Could not parse task context at ", file_path));
+}
+
+// Outputs the given batch of sentences as a tensor and deletes them.
+void OutputDocuments(OpKernelContext *context,
+                     vector<Sentence *> *document_batch) {
+  const int64 size = document_batch->size();
+  Tensor *output;
+  OP_REQUIRES_OK(context,
+                 context->allocate_output(0, TensorShape({size}), &output));
+  for (int64 i = 0; i < size; ++i) {
+    output->vec<string>()(i) = (*document_batch)[i]->SerializeAsString();
+  }
+  utils::STLDeleteElements(document_batch);
+}
+
+}  // namespace
+
+class DocumentSource : public OpKernel {
+ public:
+  explicit DocumentSource(OpKernelConstruction *context) : OpKernel(context) {
+    GetTaskContext(context, &task_context_);
+    string corpus_name;
+    OP_REQUIRES_OK(context, context->GetAttr("corpus_name", &corpus_name));
+    OP_REQUIRES_OK(context, context->GetAttr("batch_size", &batch_size_));
+    OP_REQUIRES(context, batch_size_ > 0,
+                InvalidArgument("invalid batch_size provided"));
+    corpus_.reset(new TextReader(*task_context_.GetInput(corpus_name)));
+  }
+
+  void Compute(OpKernelContext *context) override {
+    mutex_lock lock(mu_);
+    Sentence *document;
+    vector<Sentence *> document_batch;
+    while ((document = corpus_->Read()) != NULL) {
+      document_batch.push_back(document);
+      if (static_cast<int>(document_batch.size()) == batch_size_) {
+        OutputDocuments(context, &document_batch);
+        OutputLast(context, false);
+        return;
+      }
+    }
+    OutputDocuments(context, &document_batch);
+    OutputLast(context, true);
+  }
+
+ private:
+  void OutputLast(OpKernelContext *context, bool last) {
+    Tensor *output;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(1, TensorShape({}), &output));
+    output->scalar<bool>()() = last;
+  }
+
+  // Task context used to configure this op.
+  TaskContext task_context_;
+
+  // mutex to synchronize access to Compute.
+  mutex mu_;
+
+  std::unique_ptr<TextReader> corpus_;
+  string documents_path_;
+  int batch_size_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("DocumentSource").Device(DEVICE_CPU),
+                        DocumentSource);
+
+class DocumentSink : public OpKernel {
+ public:
+  explicit DocumentSink(OpKernelConstruction *context) : OpKernel(context) {
+    GetTaskContext(context, &task_context_);
+    string corpus_name;
+    OP_REQUIRES_OK(context, context->GetAttr("corpus_name", &corpus_name));
+    writer_.reset(new TextWriter(*task_context_.GetInput(corpus_name)));
+  }
+
+  void Compute(OpKernelContext *context) override {
+    mutex_lock lock(mu_);
+    auto documents = context->input(0).vec<string>();
+    for (int i = 0; i < documents.size(); ++i) {
+      Sentence document;
+      OP_REQUIRES(context, document.ParseFromString(documents(i)),
+                  InvalidArgument("failed to parse sentence"));
+      writer_->Write(document);
+    }
+  }
+
+ private:
+  // Task context used to configure this op.
+  TaskContext task_context_;
+
+  // mutex to synchronize access to Compute.
+  mutex mu_;
+
+  string documents_path_;
+  std::unique_ptr<TextWriter> writer_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("DocumentSink").Device(DEVICE_CPU),
+                        DocumentSink);
+
+// Sentence filter for filtering out documents where the parse trees are not
+// well-formed, i.e. they contain cycles.
+class WellFormedFilter : public OpKernel {
+ public:
+  explicit WellFormedFilter(OpKernelConstruction *context) : OpKernel(context) {
+    GetTaskContext(context, &task_context_);
+    OP_REQUIRES_OK(context, context->GetAttr("keep_malformed_documents",
+                                             &keep_malformed_));
+  }
+
+  void Compute(OpKernelContext *context) override {
+    auto documents = context->input(0).vec<string>();
+    vector<Sentence *> output_documents;
+    for (int i = 0; i < documents.size(); ++i) {
+      Sentence *document = new Sentence;
+      OP_REQUIRES(context, document->ParseFromString(documents(i)),
+                  InvalidArgument("failed to parse sentence"));
+      if (ShouldKeep(*document)) {
+        output_documents.push_back(document);
+      } else {
+        delete document;
+      }
+    }
+    OutputDocuments(context, &output_documents);
+  }
+
+ private:
+  bool ShouldKeep(const Sentence &doc)  {
+    vector<int> visited(doc.token_size(), -1);
+    for (int i = 0; i < doc.token_size(); ++i) {
+      // Already visited node.
+      if (visited[i] != -1) continue;
+      int t = i;
+      while (t != -1) {
+        if (visited[t] == -1) {
+          // If it is not visited yet, mark it.
+          visited[t] = i;
+        } else if (visited[t] < i) {
+          // If the index number is smaller than index and not -1, the token has
+          // already been visited.
+          break;
+        } else {
+          // Loop detected.
+          LOG(ERROR) << "Loop detected in document " << doc.DebugString();
+          return keep_malformed_;
+        }
+        t = doc.token(t).head();
+      }
+    }
+    return true;
+  }
+
+ private:
+  // Task context used to configure this op.
+  TaskContext task_context_;
+
+  bool keep_malformed_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("WellFormedFilter").Device(DEVICE_CPU),
+                        WellFormedFilter);
+
+// Sentence filter that modifies dependency trees to make them projective. This
+// could be made more efficient by looping over sentences instead of the entire
+// document. Assumes that the document is well-formed in the sense of having
+// no looping dependencies.
+//
+// Task arguments:
+//   bool discard_non_projective (false) : If true, discards documents with
+//     non-projective trees instead of projectivizing them.
+class ProjectivizeFilter : public OpKernel {
+ public:
+  explicit ProjectivizeFilter(OpKernelConstruction *context)
+      : OpKernel(context) {
+    GetTaskContext(context, &task_context_);
+    OP_REQUIRES_OK(context, context->GetAttr("discard_non_projective",
+                                             &discard_non_projective_));
+  }
+
+  void Compute(OpKernelContext *context) override {
+    auto documents = context->input(0).vec<string>();
+    vector<Sentence *> output_documents;
+    for (int i = 0; i < documents.size(); ++i) {
+      Sentence *document = new Sentence;
+      OP_REQUIRES(context, document->ParseFromString(documents(i)),
+                  InvalidArgument("failed to parse sentence"));
+      if (Process(document)) {
+        output_documents.push_back(document);
+      } else {
+        delete document;
+      }
+    }
+    OutputDocuments(context, &output_documents);
+  }
+
+  bool Process(Sentence *doc) {
+    const int num_tokens = doc->token_size();
+
+    // Left and right boundaries for arcs. The left and right ends of an arc are
+    // bounded by the arcs that pass over it. If an arc exceeds these bounds it
+    // will cross an arc passing over it, making it a non-projective arc.
+    vector<int> left(num_tokens);
+    vector<int> right(num_tokens);
+
+    // Lift the shortest non-projective arc until the document is projective.
+    while (true) {
+      // Initialize boundaries to the whole document for all arcs.
+      for (int i = 0; i < num_tokens; ++i) {
+        left[i] = -1;
+        right[i] = num_tokens - 1;
+      }
+
+      // Find left and right bounds for each token.
+      for (int i = 0; i < num_tokens; ++i) {
+        int head_index = doc->token(i).head();
+
+        // Find left and right end of arc.
+        int l = std::min(i, head_index);
+        int r = std::max(i, head_index);
+
+        // Bound all tokens under the arc.
+        for (int j = l + 1; j < r; ++j) {
+          if (left[j] < l) left[j] = l;
+          if (right[j] > r) right[j] = r;
+        }
+      }
+
+      // Find deepest non-projective arc.
+      int deepest_arc = -1;
+      int max_depth = -1;
+
+      // The non-projective arcs are those that exceed their bounds.
+      for (int i = 0; i < num_tokens; ++i) {
+        int head_index = doc->token(i).head();
+        if (head_index == -1) continue;  // any crossing arc must be deeper
+
+        int l = std::min(i, head_index);
+        int r = std::max(i, head_index);
+
+        int left_bound = std::max(left[l], left[r]);
+        int right_bound = std::min(right[l], right[r]);
+
+        if (l < left_bound || r > right_bound) {
+          // Found non-projective arc.
+          if (discard_non_projective_) return false;
+
+          // Pick the deepest as the best candidate for lifting.
+          int depth = 0;
+          int j = i;
+          while (j != -1) {
+            ++depth;
+            j = doc->token(j).head();
+          }
+          if (depth > max_depth) {
+            deepest_arc = i;
+            max_depth = depth;
+          }
+        }
+      }
+
+      // If there are no more non-projective arcs we are done.
+      if (deepest_arc == -1) return true;
+
+      // Lift non-projective arc.
+      int lifted_head = doc->token(doc->token(deepest_arc).head()).head();
+      doc->mutable_token(deepest_arc)->set_head(lifted_head);
+    }
+  }
+
+ private:
+  // Task context used to configure this op.
+  TaskContext task_context_;
+
+  // Whether or not to throw away non-projective documents.
+  bool discard_non_projective_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("ProjectivizeFilter").Device(DEVICE_CPU),
+                        ProjectivizeFilter);
+
+}  // namespace syntaxnet
--- a/syntaxnet/syntaxnet/document_format.cc
+++ b/syntaxnet/syntaxnet/document_format.cc
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "syntaxnet/document_format.h"
+
+namespace syntaxnet {
+
+// Component registry for document formatters.
+REGISTER_CLASS_REGISTRY("document format", DocumentFormat);
+
+}  // namespace syntaxnet
--- a/syntaxnet/syntaxnet/document_format.h
+++ b/syntaxnet/syntaxnet/document_format.h
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// An interface for document formats.
+
+#ifndef $TARGETDIR_DOCUMENT_FORMAT_H__
+#define $TARGETDIR_DOCUMENT_FORMAT_H__
+
+#include <string>
+#include <vector>
+
+#include "syntaxnet/utils.h"
+#include "syntaxnet/registry.h"
+#include "syntaxnet/sentence.pb.h"
+#include "syntaxnet/task_context.h"
+#include "tensorflow/core/lib/io/inputbuffer.h"
+
+namespace syntaxnet {
+
+// A document format component converts a key/value pair from a record to one or
+// more documents. The record format is used for selecting the document format
+// component. A document format component can be registered with the
+// REGISTER_DOCUMENT_FORMAT macro.
+class DocumentFormat : public RegisterableClass<DocumentFormat> {
+ public:
+  DocumentFormat() {}
+  virtual ~DocumentFormat() {}
+
+  // Reads a record from the given input buffer with format specific logic.
+  // Returns false if no record could be read because we reached end of file.
+  virtual bool ReadRecord(tensorflow::io::InputBuffer *buffer,
+                          string *record) = 0;
+
+  // Converts a key/value pair to one or more documents.
+  virtual void ConvertFromString(const string &key, const string &value,
+                                 vector<Sentence *> *documents) = 0;
+
+  // Converts a document to a key/value pair.
+  virtual void ConvertToString(const Sentence &document,
+                               string *key, string *value) = 0;
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(DocumentFormat);
+};
+
+#define REGISTER_DOCUMENT_FORMAT(type, component) \
+  REGISTER_CLASS_COMPONENT(DocumentFormat, type, component)
+
+}  // namespace syntaxnet
+
+#endif  // $TARGETDIR_DOCUMENT_FORMAT_H__
--- a/syntaxnet/syntaxnet/embedding_feature_extractor.cc
+++ b/syntaxnet/syntaxnet/embedding_feature_extractor.cc
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "syntaxnet/embedding_feature_extractor.h"
+
+#include <vector>
+
+#include "syntaxnet/feature_extractor.h"
+#include "syntaxnet/parser_features.h"
+#include "syntaxnet/task_context.h"
+#include "syntaxnet/utils.h"
+
+namespace syntaxnet {
+
+void GenericEmbeddingFeatureExtractor::Setup(TaskContext *context) {
+  // Don't use version to determine how to get feature FML.
+  const string features = context->Get(
+      tensorflow::strings::StrCat(ArgPrefix(), "_", "features"), "");
+  const string embedding_names =
+      context->Get(GetParamName("embedding_names"), "");
+  const string embedding_dims =
+      context->Get(GetParamName("embedding_dims"), "");
+  LOG(INFO) << "Features: " << features;
+  LOG(INFO) << "Embedding names: " << embedding_names;
+  LOG(INFO) << "Embedding dims: " << embedding_dims;
+  embedding_fml_ = utils::Split(features, ';');
+  add_strings_ = context->Get(GetParamName("add_varlen_strings"), false);
+  embedding_names_ = utils::Split(embedding_names, ';');
+  for (const string &dim : utils::Split(embedding_dims, ';')) {
+    embedding_dims_.push_back(utils::ParseUsing<int>(dim, utils::ParseInt32));
+  }
+}
+
+void GenericEmbeddingFeatureExtractor::Init(TaskContext *context) {
+}
+
+vector<vector<SparseFeatures>> GenericEmbeddingFeatureExtractor::ConvertExample(
+    const vector<FeatureVector> &feature_vectors) const {
+  // Extract the features.
+  vector<vector<SparseFeatures>> sparse_features(feature_vectors.size());
+  for (size_t i = 0; i < feature_vectors.size(); ++i) {
+    // Convert the nlp_parser::FeatureVector to dist belief format.
+    sparse_features[i] =
+        vector<SparseFeatures>(generic_feature_extractor(i).feature_types());
+
+    for (int j = 0; j < feature_vectors[i].size(); ++j) {
+      const FeatureType &feature_type = *feature_vectors[i].type(j);
+      const FeatureValue value = feature_vectors[i].value(j);
+      const bool is_continuous = feature_type.name().find("continuous") == 0;
+      const int64 id = is_continuous ? FloatFeatureValue(value).id : value;
+      const int base = feature_type.base();
+      if (id >= 0) {
+        sparse_features[i][base].add_id(id);
+        if (is_continuous) {
+          sparse_features[i][base].add_weight(FloatFeatureValue(value).weight);
+        }
+        if (add_strings_) {
+          sparse_features[i][base].add_description(tensorflow::strings::StrCat(
+              feature_type.name(), "=", feature_type.GetFeatureValueName(id)));
+        }
+      }
+    }
+  }
+
+  return sparse_features;
+}
+
+}  // namespace syntaxnet
--- a/syntaxnet/syntaxnet/embedding_feature_extractor.h
+++ b/syntaxnet/syntaxnet/embedding_feature_extractor.h
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef $TARGETDIR_EMBEDDING_FEATURE_EXTRACTOR_H_
+#define $TARGETDIR_EMBEDDING_FEATURE_EXTRACTOR_H_
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "syntaxnet/utils.h"
+#include "syntaxnet/feature_extractor.h"
+#include "syntaxnet/feature_types.h"
+#include "syntaxnet/parser_features.h"
+#include "syntaxnet/sentence_features.h"
+#include "syntaxnet/sparse.pb.h"
+#include "syntaxnet/task_context.h"
+#include "syntaxnet/workspace.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+namespace syntaxnet {
+
+// An EmbeddingFeatureExtractor manages the extraction of features for
+// embedding-based models. It wraps a sequence of underlying classes of feature
+// extractors, along with associated predicate maps. Each class of feature
+// extractors is associated with a name, e.g., "words", "labels", "tags".
+//
+// The class is split between a generic abstract version,
+// GenericEmbeddingFeatureExtractor (that can be initialized without knowing the
+// signature of the ExtractFeatures method) and a typed version.
+//
+// The predicate maps must be initialized before use: they can be loaded using
+// Read() or updated via UpdateMapsForExample.
+class GenericEmbeddingFeatureExtractor {
+ public:
+  virtual ~GenericEmbeddingFeatureExtractor() {}
+
+  // Get the prefix string to put in front of all arguments, so they don't
+  // conflict with other embedding models.
+  virtual const string ArgPrefix() const = 0;
+
+  // Sets up predicate maps and embedding space names that are common for all
+  // embedding based feature extractors.
+  virtual void Setup(TaskContext *context);
+  virtual void Init(TaskContext *context);
+
+  // Requests workspace for the underlying feature extractors. This is
+  // implemented in the typed class.
+  virtual void RequestWorkspaces(WorkspaceRegistry *registry) = 0;
+
+  // Number of predicates for the embedding at a given index (vocabulary size.)
+  int EmbeddingSize(int index) const {
+    return generic_feature_extractor(index).GetDomainSize();
+  }
+
+  // Returns number of embedding spaces.
+  int NumEmbeddings() const { return embedding_dims_.size(); }
+
+  // Returns the number of features in the embedding space.
+  const int FeatureSize(int idx) const {
+    return generic_feature_extractor(idx).feature_types();
+  }
+
+  // Returns the dimensionality of the embedding space.
+  int EmbeddingDims(int index) const { return embedding_dims_[index]; }
+
+  // Accessor for embedding dims (dimensions of the embedding spaces).
+  const vector<int> &embedding_dims() const { return embedding_dims_; }
+
+  const vector<string> &embedding_fml() const { return embedding_fml_; }
+
+  // Get parameter name by concatenating the prefix and the original name.
+  string GetParamName(const string &param_name) const {
+    return tensorflow::strings::StrCat(ArgPrefix(), "_", param_name);
+  }
+
+ protected:
+  // Provides the generic class with access to the templated extractors. This is
+  // used to get the type information out of the feature extractor without
+  // knowing the specific calling arguments of the extractor itself.
+  virtual const GenericFeatureExtractor &generic_feature_extractor(
+      int idx) const = 0;
+
+  // Converts a vector of extracted features into
+  // dist_belief::SparseFeatures. Each feature in each feature vector becomes a
+  // single SparseFeatures. The predicates are mapped through map_fn which
+  // should point to either mutable_map_fn or const_map_fn depending on whether
+  // or not the predicate maps should be updated.
+  vector<vector<SparseFeatures>> ConvertExample(
+      const vector<FeatureVector> &feature_vectors) const;
+
+ private:
+  // Embedding space names for parameter sharing.
+  vector<string> embedding_names_;
+
+  // FML strings for each feature extractor.
+  vector<string> embedding_fml_;
+
+  // Size of each of the embedding spaces (maximum predicate id).
+  vector<int> embedding_sizes_;
+
+  // Embedding dimensions of the embedding spaces (i.e. 32, 64 etc.)
+  vector<int> embedding_dims_;
+
+  // Whether or not to add string descriptions to converted examples.
+  bool add_strings_;
+};
+
+// Templated, object-specific implementation of the
+// EmbeddingFeatureExtractor. EXTRACTOR should be a FeatureExtractor<OBJ,
+// ARGS...> class that has the appropriate FeatureTraits() to ensure that
+// locator type features work.
+//
+// Note: for backwards compatibility purposes, this always reads the FML spec
+// from "<prefix>_features".
+template <class EXTRACTOR, class OBJ, class... ARGS>
+class EmbeddingFeatureExtractor : public GenericEmbeddingFeatureExtractor {
+ public:
+  // Sets up all predicate maps, feature extractors, and flags.
+  void Setup(TaskContext *context) override {
+    GenericEmbeddingFeatureExtractor::Setup(context);
+    feature_extractors_.resize(embedding_fml().size());
+    for (int i = 0; i < embedding_fml().size(); ++i) {
+      feature_extractors_[i].Parse(embedding_fml()[i]);
+      feature_extractors_[i].Setup(context);
+    }
+  }
+
+  // Initializes resources needed by the feature extractors.
+  void Init(TaskContext *context) override {
+    GenericEmbeddingFeatureExtractor::Init(context);
+    for (auto &feature_extractor : feature_extractors_) {
+      feature_extractor.Init(context);
+    }
+  }
+
+  // Requests workspaces from the registry. Must be called after Init(), and
+  // before Preprocess().
+  void RequestWorkspaces(WorkspaceRegistry *registry) override {
+    for (auto &feature_extractor : feature_extractors_) {
+      feature_extractor.RequestWorkspaces(registry);
+    }
+  }
+
+  // Must be called on the object one state for each sentence, before any
+  // feature extraction (e.g., UpdateMapsForExample, ExtractSparseFeatures).
+  void Preprocess(WorkspaceSet *workspaces, OBJ *obj) const {
+    for (auto &feature_extractor : feature_extractors_) {
+      feature_extractor.Preprocess(workspaces, obj);
+    }
+  }
+
+  // Returns a ragged array of SparseFeatures, for 1) each feature extractor
+  // class e, and 2) each feature f extracted by e. Underlying predicate maps
+  // will not be updated and so unrecognized predicates may occur. In such a
+  // case the SparseFeatures object associated with a given extractor class and
+  // feature will be empty.
+  vector<vector<SparseFeatures>> ExtractSparseFeatures(
+      const WorkspaceSet &workspaces, const OBJ &obj, ARGS... args) const {
+    vector<FeatureVector> features(feature_extractors_.size());
+    ExtractFeatures(workspaces, obj, args..., &features);
+    return ConvertExample(features);
+  }
+
+  // Extracts features using the extractors. Note that features must already
+  // be initialized to the correct number of feature extractors. No predicate
+  // mapping is applied.
+  void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &obj,
+                       ARGS... args,
+                       vector<FeatureVector> *features) const {
+    DCHECK(features != nullptr);
+    DCHECK_EQ(features->size(), feature_extractors_.size());
+    for (int i = 0; i < feature_extractors_.size(); ++i) {
+      (*features)[i].clear();
+      feature_extractors_[i].ExtractFeatures(workspaces, obj, args...,
+                                             &(*features)[i]);
+    }
+  }
+
+ protected:
+  // Provides generic access to the feature extractors.
+  const GenericFeatureExtractor &generic_feature_extractor(
+      int idx) const override {
+    DCHECK_LT(idx, feature_extractors_.size());
+    DCHECK_GE(idx, 0);
+    return feature_extractors_[idx];
+  }
+
+ private:
+  // Templated feature extractor class.
+  vector<EXTRACTOR> feature_extractors_;
+};
+
+class ParserEmbeddingFeatureExtractor
+    : public EmbeddingFeatureExtractor<ParserFeatureExtractor, ParserState> {
+ public:
+  explicit ParserEmbeddingFeatureExtractor(const string &arg_prefix)
+      : arg_prefix_(arg_prefix) {}
+
+ private:
+  const string ArgPrefix() const override { return arg_prefix_; }
+
+  // Prefix for context parameters.
+  string arg_prefix_;
+};
+
+}  // namespace syntaxnet
+
+#endif  // $TARGETDIR_EMBEDDING_FEATURE_EXTRACTOR_H_
--- a/syntaxnet/syntaxnet/feature_extractor.cc
+++ b/syntaxnet/syntaxnet/feature_extractor.cc
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "syntaxnet/feature_extractor.h"
+
+#include "syntaxnet/feature_types.h"
+#include "syntaxnet/fml_parser.h"
+
+namespace syntaxnet {
+
+constexpr FeatureValue GenericFeatureFunction::kNone;
+
+GenericFeatureExtractor::GenericFeatureExtractor() {}
+
+GenericFeatureExtractor::~GenericFeatureExtractor() {}
+
+void GenericFeatureExtractor::Parse(const string &source) {
+  // Parse feature specification into descriptor.
+  FMLParser parser;
+  parser.Parse(source, mutable_descriptor());
+
+  // Initialize feature extractor from descriptor.
+  InitializeFeatureFunctions();
+}
+
+void GenericFeatureExtractor::InitializeFeatureTypes() {
+  // Register all feature types.
+  GetFeatureTypes(&feature_types_);
+  for (size_t i = 0; i < feature_types_.size(); ++i) {
+    FeatureType *ft = feature_types_[i];
+    ft->set_base(i);
+
+    // Check for feature space overflow.
+    double domain_size = ft->GetDomainSize();
+    if (domain_size < 0) {
+      LOG(FATAL) << "Illegal domain size for feature " << ft->name()
+                 << domain_size;
+    }
+  }
+
+  vector<string> types_names;
+  GetFeatureTypeNames(&types_names);
+  CHECK_EQ(feature_types_.size(), types_names.size());
+}
+
+void GenericFeatureExtractor::GetFeatureTypeNames(
+    vector<string> *type_names) const {
+  for (size_t i = 0; i < feature_types_.size(); ++i) {
+    FeatureType *ft = feature_types_[i];
+    type_names->push_back(ft->name());
+  }
+}
+
+FeatureValue GenericFeatureExtractor::GetDomainSize() const {
+  // Domain size of the set of features is equal to:
+  //   [largest domain size of any feature types] * [number of feature types]
+  FeatureValue max_feature_type_dsize = 0;
+  for (size_t i = 0; i < feature_types_.size(); ++i) {
+    FeatureType *ft = feature_types_[i];
+    const FeatureValue feature_type_dsize = ft->GetDomainSize();
+    if (feature_type_dsize > max_feature_type_dsize) {
+      max_feature_type_dsize = feature_type_dsize;
+    }
+  }
+
+  return max_feature_type_dsize;
+}
+
+string GenericFeatureFunction::GetParameter(const string &name) const {
+  // Find named parameter in feature descriptor.
+  for (int i = 0; i < descriptor_->parameter_size(); ++i) {
+    if (name == descriptor_->parameter(i).name()) {
+      return descriptor_->parameter(i).value();
+    }
+  }
+  return "";
+}
+
+GenericFeatureFunction::GenericFeatureFunction() {}
+
+GenericFeatureFunction::~GenericFeatureFunction() {
+  delete feature_type_;
+}
+
+int GenericFeatureFunction::GetIntParameter(const string &name,
+                                            int default_value) const {
+  string value = GetParameter(name);
+  return utils::ParseUsing<int>(value, default_value,
+                                tensorflow::strings::safe_strto32);
+}
+
+void GenericFeatureFunction::GetFeatureTypes(
+    vector<FeatureType *> *types) const {
+  if (feature_type_ != nullptr) types->push_back(feature_type_);
+}
+
+FeatureType *GenericFeatureFunction::GetFeatureType() const {
+  // If a single feature type has been registered return it.
+  if (feature_type_ != nullptr) return feature_type_;
+
+  // Get feature types for function.
+  vector<FeatureType *> types;
+  GetFeatureTypes(&types);
+
+  // If there is exactly one feature type return this, else return null.
+  if (types.size() == 1) return types[0];
+  return nullptr;
+}
+
+}  // namespace syntaxnet
--- a/syntaxnet/syntaxnet/feature_extractor.h
+++ b/syntaxnet/syntaxnet/feature_extractor.h
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Generic feature extractor for extracting features from objects. The feature
+// extractor can be used for extracting features from any object. The feature
+// extractor and feature function classes are template classes that have to
+// be instantiated for extracting feature from a specific object type.
+//
+// A feature extractor consists of a hierarchy of feature functions. Each
+// feature function extracts one or more feature type and value pairs from the
+// object.
+//
+// The feature extractor has a modular design where new feature functions can be
+// registered as components. The feature extractor is initialized from a
+// descriptor represented by a protocol buffer. The feature extractor can also
+// be initialized from a text-based source specification of the feature
+// extractor. Feature specification parsers can be added as components. By
+// default the feature extractor can be read from an ASCII protocol buffer or in
+// a simple feature modeling language (fml).
+
+// A feature function is invoked with a focus. Nested feature function can be
+// invoked with another focus determined by the parent feature function.
+
+#ifndef $TARGETDIR_FEATURE_EXTRACTOR_H_
+#define $TARGETDIR_FEATURE_EXTRACTOR_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "syntaxnet/feature_extractor.pb.h"
+#include "syntaxnet/feature_types.h"
+#include "syntaxnet/proto_io.h"
+#include "syntaxnet/registry.h"
+#include "syntaxnet/sentence.pb.h"
+#include "syntaxnet/task_context.h"
+#include "syntaxnet/utils.h"
+#include "syntaxnet/workspace.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/io/inputbuffer.h"
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/lib/io/record_writer.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace syntaxnet {
+
+// Use the same type for feature values as is used for predicated.
+typedef int64 Predicate;
+typedef Predicate FeatureValue;
+
+// Output feature model in FML format.
+void ToFMLFunction(const FeatureFunctionDescriptor &function, string *output);
+void ToFML(const FeatureFunctionDescriptor &function, string *output);
+
+// A feature vector contains feature type and value pairs.
+class FeatureVector {
+ public:
+  FeatureVector() {}
+
+  // Adds feature type and value pair to feature vector.
+  void add(FeatureType *type, FeatureValue value) {
+    features_.emplace_back(type, value);
+  }
+
+  // Removes all elements from the feature vector.
+  void clear() { features_.clear(); }
+
+  // Returns the number of elements in the feature vector.
+  int size() const { return features_.size(); }
+
+  // Reserves space in the underlying feature vector.
+  void reserve(int n) { features_.reserve(n); }
+
+  // Returns feature type for an element in the feature vector.
+  FeatureType *type(int index) const { return features_[index].type; }
+
+  // Returns feature value for an element in the feature vector.
+  FeatureValue value(int index) const { return features_[index].value; }
+
+ private:
+  // Structure for holding feature type and value pairs.
+  struct Element {
+    Element() : type(NULL), value(-1) {}
+    Element(FeatureType *t, FeatureValue v) : type(t), value(v) {}
+
+    FeatureType *type;
+    FeatureValue value;
+  };
+
+  // Array for storing feature vector elements.
+  vector<Element> features_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(FeatureVector);
+};
+
+// The generic feature extractor is the type-independent part of a feature
+// extractor. This holds the descriptor for the feature extractor and the
+// collection of feature types used in the feature extractor.  The feature
+// types are not available until FeatureExtractor<>::Init() has been called.
+class GenericFeatureExtractor {
+ public:
+  GenericFeatureExtractor();
+  virtual ~GenericFeatureExtractor();
+
+  // Initializes the feature extractor from a source representation of the
+  // feature extractor. The first line is used for determining the feature
+  // specification language. If the first line starts with #! followed by a name
+  // then this name is used for instantiating a feature specification parser
+  // with that name. If the language cannot be detected this way it falls back
+  // to using the default language supplied.
+  void Parse(const string &source);
+
+  // Returns the feature extractor descriptor.
+  const FeatureExtractorDescriptor &descriptor() const { return descriptor_; }
+  FeatureExtractorDescriptor *mutable_descriptor() { return &descriptor_; }
+
+  // Returns the number of feature types in the feature extractor.  Invalid
+  // before Init() has been called.
+  int feature_types() const { return feature_types_.size(); }
+
+  // Returns all feature types names used by the extractor. The names are
+  // added to the types_names array.  Invalid before Init() has been called.
+  void GetFeatureTypeNames(vector<string> *type_names) const;
+
+  // Returns a feature type used in the extractor.  Invalid before Init() has
+  // been called.
+  const FeatureType *feature_type(int index) const {
+    return feature_types_[index];
+  }
+
+  // Returns the feature domain size of this feature extractor.
+  // NOTE: The way that domain size is calculated is, for some, unintuitive. It
+  // is the largest domain size of any feature type.
+  FeatureValue GetDomainSize() const;
+
+ protected:
+  // Initializes the feature types used by the extractor.  Called from
+  // FeatureExtractor<>::Init().
+  void InitializeFeatureTypes();
+
+ private:
+  // Initializes the top-level feature functions.
+  virtual void InitializeFeatureFunctions() = 0;
+
+  // Returns all feature types used by the extractor. The feature types are
+  // added to the result array.
+  virtual void GetFeatureTypes(vector<FeatureType *> *types) const = 0;
+
+  // Descriptor for the feature extractor. This is a protocol buffer that
+  // contains all the information about the feature extractor. The feature
+  // functions are initialized from the information in the descriptor.
+  FeatureExtractorDescriptor descriptor_;
+
+  // All feature types used by the feature extractor. The collection of all the
+  // feature types describes the feature space of the feature set produced by
+  // the feature extractor.  Not owned.
+  vector<FeatureType *> feature_types_;
+};
+
+// The generic feature function is the type-independent part of a feature
+// function. Each feature function is associated with the descriptor that it is
+// instantiated from.  The feature types associated with this feature function
+// will be established by the time FeatureExtractor<>::Init() completes.
+class GenericFeatureFunction {
+ public:
+  // A feature value that represents the absence of a value.
+  static constexpr FeatureValue kNone = -1;
+
+  GenericFeatureFunction();
+  virtual ~GenericFeatureFunction();
+
+  // Sets up the feature function. NB: FeatureTypes of nested functions are not
+  // guaranteed to be available until Init().
+  virtual void Setup(TaskContext *context) {}
+
+  // Initializes the feature function. NB: The FeatureType of this function must
+  // be established when this method completes.
+  virtual void Init(TaskContext *context) {}
+
+  // Requests workspaces from a registry to obtain indices into a WorkspaceSet
+  // for any Workspace objects used by this feature function. NB: This will be
+  // called after Init(), so it can depend on resources and arguments.
+  virtual void RequestWorkspaces(WorkspaceRegistry *registry) {}
+
+  // Appends the feature types produced by the feature function to types.  The
+  // default implementation appends feature_type(), if non-null.  Invalid
+  // before Init() has been called.
+  virtual void GetFeatureTypes(vector<FeatureType *> *types) const;
+
+  // Returns the feature type for feature produced by this feature function. If
+  // the feature function produces features of different types this returns
+  // null.  Invalid before Init() has been called.
+  virtual FeatureType *GetFeatureType() const;
+
+  // Returns the name of the registry used for creating the feature function.
+  // This can be used for checking if two feature functions are of the same
+  // kind.
+  virtual const char *RegistryName() const = 0;
+
+  // Returns the value of a named parameter in the feature functions descriptor.
+  // If the named parameter is not found the global parameters are searched.
+  string GetParameter(const string &name) const;
+  int GetIntParameter(const string &name, int default_value) const;
+
+  // Returns the FML function description for the feature function, i.e. the
+  // name and parameters without the nested features.
+  string FunctionName() const {
+    string output;
+    ToFMLFunction(*descriptor_, &output);
+    return output;
+  }
+
+  // Returns the prefix for nested feature functions. This is the prefix of this
+  // feature function concatenated with the feature function name.
+  string SubPrefix() const {
+    return prefix_.empty() ? FunctionName() : prefix_ + "." + FunctionName();
+  }
+
+  // Returns/sets the feature extractor this function belongs to.
+  GenericFeatureExtractor *extractor() const { return extractor_; }
+  void set_extractor(GenericFeatureExtractor *extractor) {
+    extractor_ = extractor;
+  }
+
+  // Returns/sets the feature function descriptor.
+  FeatureFunctionDescriptor *descriptor() const { return descriptor_; }
+  void set_descriptor(FeatureFunctionDescriptor *descriptor) {
+    descriptor_ = descriptor;
+  }
+
+  // Returns a descriptive name for the feature function. The name is taken from
+  // the descriptor for the feature function. If the name is empty or the
+  // feature function is a variable the name is the FML representation of the
+  // feature, including the prefix.
+  string name() const {
+    string output;
+    if (descriptor_->name().empty()) {
+      if (!prefix_.empty()) {
+        output.append(prefix_);
+        output.append(".");
+      }
+      ToFML(*descriptor_, &output);
+    } else {
+      output = descriptor_->name();
+    }
+    tensorflow::StringPiece stripped(output);
+    utils::RemoveWhitespaceContext(&stripped);
+    return stripped.ToString();
+  }
+
+  // Returns the argument from the feature function descriptor. It defaults to
+  // 0 if the argument has not been specified.
+  int argument() const {
+    return descriptor_->has_argument() ? descriptor_->argument() : 0;
+  }
+
+  // Returns/sets/clears function name prefix.
+  const string &prefix() const { return prefix_; }
+  void set_prefix(const string &prefix) { prefix_ = prefix; }
+
+ protected:
+  // Returns the feature type for single-type feature functions.
+  FeatureType *feature_type() const { return feature_type_; }
+
+  // Sets the feature type for single-type feature functions.  This takes
+  // ownership of feature_type.  Can only be called once.
+  void set_feature_type(FeatureType *feature_type) {
+    CHECK(feature_type_ == nullptr);
+    feature_type_ = feature_type;
+  }
+
+ private:
+  // Feature extractor this feature function belongs to.  Not owned.
+  GenericFeatureExtractor *extractor_ = nullptr;
+
+  // Descriptor for feature function.  Not owned.
+  FeatureFunctionDescriptor *descriptor_ = nullptr;
+
+  // Feature type for features produced by this feature function. If the
+  // feature function produces features of multiple feature types this is null
+  // and the feature function must return it's feature types in
+  // GetFeatureTypes().  Owned.
+  FeatureType *feature_type_ = nullptr;
+
+  // Prefix used for sub-feature types of this function.
+  string prefix_;
+};
+
+// Feature function that can extract features from an object.  Templated on
+// two type arguments:
+//
+// OBJ:  The "object" from which features are extracted; e.g., a sentence.  This
+//       should be a plain type, rather than a reference or pointer.
+//
+// ARGS: A set of 0 or more types that are used to "index" into some part of the
+//       object that should be extracted, e.g. an int token index for a sentence
+//       object.  This should not be a reference type.
+template<class OBJ, class ...ARGS>
+class FeatureFunction
+    : public GenericFeatureFunction,
+      public RegisterableClass< FeatureFunction<OBJ, ARGS...> > {
+ public:
+  using Self = FeatureFunction<OBJ, ARGS...>;
+
+  // Preprocesses the object.  This will be called prior to calling Evaluate()
+  // or Compute() on that object.
+  virtual void Preprocess(WorkspaceSet *workspaces, OBJ *object) const {}
+
+  // Appends features computed from the object and focus to the result.  The
+  // default implementation delegates to Compute(), adding a single value if
+  // available.  Multi-valued feature functions must override this method.
+  virtual void Evaluate(const WorkspaceSet &workspaces, const OBJ &object,
+                        ARGS... args, FeatureVector *result) const {
+    FeatureValue value = Compute(workspaces, object, args..., result);
+    if (value != kNone) result->add(feature_type(), value);
+  }
+
+  // Returns a feature value computed from the object and focus, or kNone if no
+  // value is computed.  Single-valued feature functions only need to override
+  // this method.
+  virtual FeatureValue Compute(const WorkspaceSet &workspaces,
+                               const OBJ &object,
+                               ARGS... args,
+                               const FeatureVector *fv) const {
+    return kNone;
+  }
+
+  // Instantiates a new feature function in a feature extractor from a feature
+  // descriptor.
+  static Self *Instantiate(GenericFeatureExtractor *extractor,
+                           FeatureFunctionDescriptor *fd,
+                           const string &prefix) {
+    Self *f = Self::Create(fd->type());
+    f->set_extractor(extractor);
+    f->set_descriptor(fd);
+    f->set_prefix(prefix);
+    return f;
+  }
+
+  // Returns the name of the registry for the feature function.
+  const char *RegistryName() const override {
+    return Self::registry()->name;
+  }
+
+ private:
+  // Special feature function class for resolving variable references. The type
+  // of the feature function is used for resolving the variable reference. When
+  // evaluated it will either get the feature value(s) from the variable portion
+  // of the feature vector, if present, or otherwise it will call the referenced
+  // feature extractor function directly to extract the feature(s).
+  class Reference;
+};
+
+// Base class for features with nested feature functions. The nested functions
+// are of type NES, which may be different from the type of the parent function.
+// NB: NestedFeatureFunction will ensure that all initialization of nested
+// functions takes place during Setup() and Init() -- after the nested features
+// are initialized, the parent feature is initialized via SetupNested() and
+// InitNested(). Alternatively, a derived classes that overrides Setup() and
+// Init() directly should call Parent::Setup(), Parent::Init(), etc. first.
+//
+// Note: NestedFeatureFunction cannot know how to call Preprocess, Evaluate, or
+// Compute, since the nested functions may be of a different type.
+template<class NES, class OBJ, class ...ARGS>
+class NestedFeatureFunction : public FeatureFunction<OBJ, ARGS...> {
+ public:
+  using Parent = NestedFeatureFunction<NES, OBJ, ARGS...>;
+
+  // Clean up nested functions.
+  ~NestedFeatureFunction() override { utils::STLDeleteElements(&nested_); }
+
+  // By default, just appends the nested feature types.
+  void GetFeatureTypes(vector<FeatureType *> *types) const override {
+    CHECK(!this->nested().empty())
+        << "Nested features require nested features to be defined.";
+    for (auto *function : nested_) function->GetFeatureTypes(types);
+  }
+
+  // Sets up the nested features.
+  void Setup(TaskContext *context) override {
+    CreateNested(this->extractor(), this->descriptor(), &nested_,
+                 this->SubPrefix());
+    for (auto *function : nested_) function->Setup(context);
+    SetupNested(context);
+  }
+
+  // Sets up this NestedFeatureFunction specifically.
+  virtual void SetupNested(TaskContext *context) {}
+
+  // Initializes the nested features.
+  void Init(TaskContext *context) override {
+    for (auto *function : nested_) function->Init(context);
+    InitNested(context);
+  }
+
+  // Initializes this NestedFeatureFunction specifically.
+  virtual void InitNested(TaskContext *context) {}
+
+  // Gets all the workspaces needed for the nested functions.
+  void RequestWorkspaces(WorkspaceRegistry *registry) override {
+    for (auto *function : nested_) function->RequestWorkspaces(registry);
+  }
+
+  // Returns the list of nested feature functions.
+  const vector<NES *> &nested() const { return nested_; }
+
+  // Instantiates nested feature functions for a feature function. Creates and
+  // initializes one feature function for each sub-descriptor in the feature
+  // descriptor.
+  static void CreateNested(GenericFeatureExtractor *extractor,
+                           FeatureFunctionDescriptor *fd,
+                           vector<NES *> *functions,
+                           const string &prefix) {
+    for (int i = 0; i < fd->feature_size(); ++i) {
+      FeatureFunctionDescriptor *sub = fd->mutable_feature(i);
+      NES *f = NES::Instantiate(extractor, sub, prefix);
+      functions->push_back(f);
+    }
+  }
+
+ protected:
+  // The nested feature functions, if any, in order of declaration in the
+  // feature descriptor.  Owned.
+  vector<NES *> nested_;
+};
+
+// Base class for a nested feature function that takes nested features with the
+// same signature as these features, i.e. a meta feature. For this class, we can
+// provide preprocessing of the nested features.
+template<class OBJ, class ...ARGS>
+class MetaFeatureFunction : public NestedFeatureFunction<
+  FeatureFunction<OBJ, ARGS...>, OBJ, ARGS...> {
+ public:
+  // Preprocesses using the nested features.
+  void Preprocess(WorkspaceSet *workspaces, OBJ *object) const override {
+    for (auto *function : this->nested_) {
+      function->Preprocess(workspaces, object);
+    }
+  }
+};
+
+// Template for a special type of locator: The locator of type
+// FeatureFunction<OBJ, ARGS...> calls nested functions of type
+// FeatureFunction<OBJ, IDX, ARGS...>, where the derived class DER is
+// responsible for translating by providing the following:
+//
+// // Gets the new additional focus.
+// IDX GetFocus(const WorkspaceSet &workspaces, const OBJ &object);
+//
+// This is useful to e.g. add a token focus to a parser state based on some
+// desired property of that state.
+template<class DER, class OBJ, class IDX, class ...ARGS>
+class FeatureAddFocusLocator : public NestedFeatureFunction<
+  FeatureFunction<OBJ, IDX, ARGS...>, OBJ, ARGS...> {
+ public:
+  void Preprocess(WorkspaceSet *workspaces, OBJ *object) const override {
+    for (auto *function : this->nested_) {
+      function->Preprocess(workspaces, object);
+    }
+  }
+
+  void Evaluate(const WorkspaceSet &workspaces, const OBJ &object,
+                ARGS... args, FeatureVector *result) const override {
+    IDX focus = static_cast<const DER *>(this)->GetFocus(
+        workspaces, object, args...);
+    for (auto *function : this->nested()) {
+      function->Evaluate(workspaces, object, focus, args..., result);
+    }
+  }
+
+  // Returns the first nested feature's computed value.
+  FeatureValue Compute(const WorkspaceSet &workspaces,
+                       const OBJ &object,
+                       ARGS... args,
+                       const FeatureVector *result) const override {
+    IDX focus = static_cast<const DER *>(this)->GetFocus(
+        workspaces, object, args...);
+    return this->nested()[0]->Compute(
+        workspaces, object, focus, args..., result);
+  }
+};
+
+// CRTP feature locator class. This is a meta feature that modifies ARGS and
+// then calls the nested feature functions with the modified ARGS. Note that in
+// order for this template to work correctly, all of ARGS must be types for
+// which the reference operator & can be interpreted as a pointer to the
+// argument. The derived class DER must implement the UpdateFocus method which
+// takes pointers to the ARGS arguments:
+//
+// // Updates the current arguments.
+// void UpdateArgs(const OBJ &object, ARGS *...args) const;
+template<class DER, class OBJ, class ...ARGS>
+class FeatureLocator : public MetaFeatureFunction<OBJ, ARGS...> {
+ public:
+  // Feature locators have an additional check that there is no intrinsic type.
+  void GetFeatureTypes(vector<FeatureType *> *types) const override {
+    CHECK(this->feature_type() == nullptr)
+        << "FeatureLocators should not have an intrinsic type.";
+    MetaFeatureFunction<OBJ, ARGS...>::GetFeatureTypes(types);
+  }
+
+  // Evaluates the locator.
+  void Evaluate(const WorkspaceSet &workspaces, const OBJ &object,
+                ARGS... args, FeatureVector *result) const override {
+    static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
+    for (auto *function : this->nested()) {
+      function->Evaluate(workspaces, object, args..., result);
+    }
+  }
+
+  // Returns the first nested feature's computed value.
+  FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
+                       ARGS... args,
+                       const FeatureVector *result) const override {
+    static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
+    return this->nested()[0]->Compute(workspaces, object, args..., result);
+  }
+};
+
+// Feature extractor for extracting features from objects of a certain class.
+// Template type parameters are as defined for FeatureFunction.
+template<class OBJ, class ...ARGS>
+class FeatureExtractor : public GenericFeatureExtractor {
+ public:
+  // Feature function type for top-level functions in the feature extractor.
+  typedef FeatureFunction<OBJ, ARGS...> Function;
+  typedef FeatureExtractor<OBJ, ARGS...> Self;
+
+  // Feature locator type for the feature extractor.
+  template<class DER>
+  using Locator = FeatureLocator<DER, OBJ, ARGS...>;
+
+  // Initializes feature extractor.
+  FeatureExtractor() {}
+
+  ~FeatureExtractor() override { utils::STLDeleteElements(&functions_); }
+
+  // Sets up the feature extractor. Note that only top-level functions exist
+  // until Setup() is called. This does not take ownership over the context,
+  // which must outlive this.
+  void Setup(TaskContext *context) {
+    for (Function *function : functions_) function->Setup(context);
+  }
+
+  // Initializes the feature extractor.  Must be called after Setup().  This
+  // does not take ownership over the context, which must outlive this.
+  void Init(TaskContext *context) {
+    for (Function *function : functions_) function->Init(context);
+    this->InitializeFeatureTypes();
+  }
+
+  // Requests workspaces from the registry. Must be called after Init(), and
+  // before Preprocess(). Does not take ownership over registry. This should be
+  // the same registry used to initialize the WorkspaceSet used in Preprocess()
+  // and ExtractFeatures(). NB: This is a different ordering from that used in
+  // SentenceFeatureRepresentation style feature computation.
+  void RequestWorkspaces(WorkspaceRegistry *registry) {
+    for (auto *function : functions_) function->RequestWorkspaces(registry);
+  }
+
+  // Preprocesses the object using feature functions for the phase.  Must be
+  // called before any calls to ExtractFeatures() on that object and phase.
+  void Preprocess(WorkspaceSet *workspaces, OBJ *object) const {
+    for (Function *function : functions_) {
+      function->Preprocess(workspaces, object);
+    }
+  }
+
+  // Extracts features from an object with a focus. This invokes all the
+  // top-level feature functions in the feature extractor. Only feature
+  // functions belonging to the specified phase are invoked.
+  void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &object,
+                       ARGS... args, FeatureVector *result) const {
+    result->reserve(this->feature_types());
+
+    // Extract features.
+    for (int i = 0; i < functions_.size(); ++i) {
+      functions_[i]->Evaluate(workspaces, object, args..., result);
+    }
+  }
+
+ private:
+  // Creates and initializes all feature functions in the feature extractor.
+  void InitializeFeatureFunctions() override {
+    // Create all top-level feature functions.
+    for (int i = 0; i < descriptor().feature_size(); ++i) {
+      FeatureFunctionDescriptor *fd = mutable_descriptor()->mutable_feature(i);
+      Function *function = Function::Instantiate(this, fd, "");
+      functions_.push_back(function);
+    }
+  }
+
+  // Collect all feature types used in the feature extractor.
+  void GetFeatureTypes(vector<FeatureType *> *types) const override {
+    for (int i = 0; i < functions_.size(); ++i) {
+      functions_[i]->GetFeatureTypes(types);
+    }
+  }
+
+  // Top-level feature functions (and variables) in the feature extractor.
+  // Owned.
+  vector<Function *> functions_;
+};
+
+#define REGISTER_FEATURE_FUNCTION(base, name, component) \
+  REGISTER_CLASS_COMPONENT(base, name, component)
+
+}  // namespace syntaxnet
+
+#endif  // $TARGETDIR_FEATURE_EXTRACTOR_H_
--- a/syntaxnet/syntaxnet/feature_extractor.proto
+++ b/syntaxnet/syntaxnet/feature_extractor.proto
+// Protocol buffers for feature extractor.
+
+syntax = "proto2";
+
+package syntaxnet;
+
+message Parameter {
+  optional string name = 1;
+  optional string value = 2;
+}
+
+// Descriptor for feature function.
+message FeatureFunctionDescriptor {
+  // Feature function type.
+  required string type = 1;
+
+  // Feature function name.
+  optional string name = 2;
+
+  // Default argument for feature function.
+  optional int32 argument = 3 [default = 0];
+
+  // Named parameters for feature descriptor.
+  repeated Parameter parameter = 4;
+
+  // Nested sub-feature function descriptors.
+  repeated FeatureFunctionDescriptor feature = 7;
+};
+
+// Descriptor for feature extractor.
+message FeatureExtractorDescriptor {
+  // Top-level feature function for extractor.
+  repeated FeatureFunctionDescriptor feature = 1;
+};
--- a/syntaxnet/syntaxnet/feature_types.h
+++ b/syntaxnet/syntaxnet/feature_types.h
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Common feature types for parser components.
+
+#ifndef $TARGETDIR_FEATURE_TYPES_H_
+#define $TARGETDIR_FEATURE_TYPES_H_
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+
+#include "syntaxnet/utils.h"
+
+namespace syntaxnet {
+
+// Use the same type for feature values as is used for predicated.
+typedef int64 Predicate;
+typedef Predicate FeatureValue;
+
+// Each feature value in a feature vector has a feature type. The feature type
+// is used for converting feature type and value pairs to predicate values. The
+// feature type can also return names for feature values and calculate the size
+// of the feature value domain. The FeatureType class is abstract and must be
+// specialized for the concrete feature types.
+class FeatureType {
+ public:
+  // Initializes a feature type.
+  explicit FeatureType(const string &name)
+      : name_(name), base_(0) {}
+
+  virtual ~FeatureType() {}
+
+  // Converts a feature value to a name.
+  virtual string GetFeatureValueName(FeatureValue value) const = 0;
+
+  // Returns the size of the feature values domain.
+  virtual int64 GetDomainSize() const = 0;
+
+  // Returns the feature type name.
+  const string &name() const { return name_; }
+
+  Predicate base() const { return base_; }
+  void set_base(Predicate base) { base_ = base; }
+
+ private:
+  // Feature type name.
+  string name_;
+
+  // "Base" feature value: i.e. a "slot" in a global ordering of features.
+  Predicate base_;
+};
+
+// Templated generic resource based feature type. This feature type delegates
+// look up of feature value names to an unknown resource class, which is not
+// owned. Optionally, this type can also store a mapping of extra values which
+// are not in the resource.
+//
+// Note: this class assumes that Resource->GetFeatureValueName() will return
+// successfully for values ONLY in the range [0, Resource->NumValues()) Any
+// feature value not in the extra value map and not in the above range of
+// Resource will result in a ERROR and return of "<INVALID>".
+template<class Resource>
+class ResourceBasedFeatureType : public FeatureType {
+ public:
+  // Creates a new type with given name, resource object, and a mapping of
+  // special values. The values must be greater or equal to
+  // resource->NumValues() so as to avoid collisions; this is verified with
+  // CHECK at creation.
+  ResourceBasedFeatureType(const string &name, const Resource *resource,
+                           const map<FeatureValue, string> &values)
+      : FeatureType(name), resource_(resource), values_(values) {
+    max_value_ = resource->NumValues() - 1;
+    for (const auto &pair : values) {
+      CHECK_GE(pair.first, resource->NumValues()) << "Invalid extra value: "
+               << pair.first << "," << pair.second;
+      max_value_ = pair.first > max_value_ ? pair.first : max_value_;
+    }
+  }
+
+  // Creates a new type with no special values.
+  ResourceBasedFeatureType(const string &name, const Resource *resource)
+      : ResourceBasedFeatureType(name, resource, {}) {}
+
+  // Returns the feature name for a given feature value. First checks the values
+  // map, then checks the resource to look up the name.
+  string GetFeatureValueName(FeatureValue value) const override {
+    if (values_.find(value) != values_.end()) {
+      return values_.find(value)->second;
+    }
+    if (value >= 0 && value < resource_->NumValues()) {
+      return resource_->GetFeatureValueName(value);
+    } else {
+      LOG(ERROR) << "Invalid feature value " << value << " for " << name();
+      return "<INVALID>";
+    }
+  }
+
+  // Returns the number of possible values for this feature type. This is the
+  // based on the largest value that was observed in the extra values.
+  FeatureValue GetDomainSize() const override { return max_value_ + 1; }
+
+ protected:
+  // Shared resource. Not owned.
+  const Resource *resource_ = nullptr;
+
+  // Maximum possible value this feature could take.
+  FeatureValue max_value_;
+
+  // Mapping for extra feature values not in the resource.
+  map<FeatureValue, string> values_;
+};
+
+// Feature type that is defined using an explicit map from FeatureValue to
+// string values.  This can reduce some of the boilerplate when defining
+// features that generate enum values.  Example usage:
+//
+//   class BeverageSizeFeature : public FeatureFunction<Beverage>
+//     enum FeatureValue { SMALL, MEDIUM, LARGE };  // values for this feature
+//     void Init(TaskContext *context) override {
+//       set_feature_type(new EnumFeatureType("beverage_size",
+//           {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}});
+//     }
+//     [...]
+//   };
+class EnumFeatureType : public FeatureType {
+ public:
+  EnumFeatureType(const string &name,
+                  const map<FeatureValue, string> &value_names)
+      : FeatureType(name), value_names_(value_names) {
+    for (const auto &pair : value_names) {
+      CHECK_GE(pair.first, 0)
+          << "Invalid feature value: " << pair.first << ", " << pair.second;
+      domain_size_ = std::max(domain_size_, pair.first + 1);
+    }
+  }
+
+  // Returns the feature name for a given feature value.
+  string GetFeatureValueName(FeatureValue value) const override {
+    auto it = value_names_.find(value);
+    if (it == value_names_.end()) {
+      LOG(ERROR)
+          << "Invalid feature value " << value << " for " << name();
+      return "<INVALID>";
+    }
+    return it->second;
+  }
+
+  // Returns the number of possible values for this feature type. This is one
+  // greater than the largest value in the value_names map.
+  FeatureValue GetDomainSize() const override { return domain_size_; }
+
+ protected:
+  // Maximum possible value this feature could take.
+  FeatureValue domain_size_ = 0;
+
+  // Names of feature values.
+  map<FeatureValue, string> value_names_;
+};
+
+}  // namespace syntaxnet
+
+#endif  // $TARGETDIR_FEATURE_TYPES_H_
--- a/syntaxnet/syntaxnet/fml_parser.cc
+++ b/syntaxnet/syntaxnet/fml_parser.cc
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "syntaxnet/fml_parser.h"
+
+#include <ctype.h>
+#include <string>
+
+#include "syntaxnet/utils.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+namespace syntaxnet {
+
+void FMLParser::Initialize(const string &source) {
+  // Initialize parser state.
+  source_ = source;
+  current_ = source_.begin();
+  item_start_ = line_start_ = current_;
+  line_number_ = item_line_number_ = 1;
+
+  // Read first input item.
+  NextItem();
+}
+
+void FMLParser::Error(const string &error_message) {
+  LOG(FATAL) << "Error in feature model, line " << item_line_number_
+             << ", position " << (item_start_ - line_start_ + 1)
+             << ": " << error_message
+             << "\n    " << string(line_start_, current_) << " <--HERE";
+}
+
+void FMLParser::Next() {
+  // Move to the next input character. If we are at a line break update line
+  // number and line start position.
+  if (*current_ == '\n') {
+    ++line_number_;
+    ++current_;
+    line_start_ = current_;
+  } else {
+    ++current_;
+  }
+}
+
+void FMLParser::NextItem() {
+  // Skip white space and comments.
+  while (!eos()) {
+    if (*current_ == '#') {
+      // Skip comment.
+      while (!eos() && *current_ != '\n') Next();
+    } else if (isspace(*current_)) {
+      // Skip whitespace.
+      while (!eos() && isspace(*current_)) Next();
+    } else {
+      break;
+    }
+  }
+
+  // Record start position for next item.
+  item_start_ = current_;
+  item_line_number_ = line_number_;
+
+  // Check for end of input.
+  if (eos()) {
+    item_type_ = END;
+    return;
+  }
+
+  // Parse number.
+  if (isdigit(*current_) || *current_ == '+' || *current_ == '-') {
+    string::iterator start = current_;
+    Next();
+    while (isdigit(*current_) || *current_ == '.') Next();
+    item_text_.assign(start, current_);
+    item_type_ = NUMBER;
+    return;
+  }
+
+  // Parse string.
+  if (*current_ == '"') {
+    Next();
+    string::iterator start = current_;
+    while (*current_ != '"') {
+      if (eos()) Error("Unterminated string");
+      Next();
+    }
+    item_text_.assign(start, current_);
+    item_type_ = STRING;
+    Next();
+    return;
+  }
+
+  // Parse identifier name.
+  if (isalpha(*current_) || *current_ == '_' || *current_ == '/') {
+    string::iterator start = current_;
+    while (isalnum(*current_) || *current_ == '_' || *current_ == '-' ||
+           *current_ == '/') Next();
+    item_text_.assign(start, current_);
+    item_type_ = NAME;
+    return;
+  }
+
+  // Single character item.
+  item_type_ = *current_;
+  Next();
+}
+
+void FMLParser::Parse(const string &source,
+                      FeatureExtractorDescriptor *result) {
+  // Initialize parser.
+  Initialize(source);
+
+  while (item_type_ != END) {
+    // Parse either a parameter name or a feature.
+    if (item_type_ != NAME) Error("Feature type name expected");
+    string name = item_text_;
+    NextItem();
+
+    if (item_type_ == '=') {
+      Error("Invalid syntax: feature expected");
+    } else {
+      // Parse feature.
+      FeatureFunctionDescriptor *descriptor = result->add_feature();
+      descriptor->set_type(name);
+      ParseFeature(descriptor);
+    }
+  }
+}
+
+void FMLParser::ParseFeature(FeatureFunctionDescriptor *result) {
+  // Parse argument and parameters.
+  if (item_type_ == '(') {
+    NextItem();
+    ParseParameter(result);
+    while (item_type_ == ',') {
+      NextItem();
+      ParseParameter(result);
+    }
+
+    if (item_type_ != ')') Error(") expected");
+    NextItem();
+  }
+
+  // Parse feature name.
+  if (item_type_ == ':') {
+    NextItem();
+    if (item_type_ != NAME && item_type_ != STRING) {
+      Error("Feature name expected");
+    }
+    string name = item_text_;
+    NextItem();
+
+    // Set feature name.
+    result->set_name(name);
+  }
+
+  // Parse sub-features.
+  if (item_type_ == '.') {
+    // Parse dotted sub-feature.
+    NextItem();
+    if (item_type_ != NAME) Error("Feature type name expected");
+    string type = item_text_;
+    NextItem();
+
+    // Parse sub-feature.
+    FeatureFunctionDescriptor *subfeature = result->add_feature();
+    subfeature->set_type(type);
+    ParseFeature(subfeature);
+  } else if (item_type_ == '{') {
+    // Parse sub-feature block.
+    NextItem();
+    while (item_type_ != '}') {
+      if (item_type_ != NAME) Error("Feature type name expected");
+      string type = item_text_;
+      NextItem();
+
+      // Parse sub-feature.
+      FeatureFunctionDescriptor *subfeature = result->add_feature();
+      subfeature->set_type(type);
+      ParseFeature(subfeature);
+    }
+    NextItem();
+  }
+}
+
+void FMLParser::ParseParameter(FeatureFunctionDescriptor *result) {
+  if (item_type_ == NUMBER) {
+    int argument =
+        utils::ParseUsing<int>(item_text_, tensorflow::strings::safe_strto32);
+    NextItem();
+
+    // Set default argument for feature.
+    result->set_argument(argument);
+  } else if (item_type_ == NAME) {
+     string name = item_text_;
+     NextItem();
+     if (item_type_ != '=') Error("= expected");
+     NextItem();
+     if (item_type_ >= END) Error("Parameter value expected");
+     string value = item_text_;
+     NextItem();
+
+     // Add parameter to feature.
+     Parameter *parameter;
+     parameter = result->add_parameter();
+     parameter->set_name(name);
+     parameter->set_value(value);
+  } else {
+    Error("Syntax error in parameter list");
+  }
+}
+
+void ToFMLFunction(const FeatureFunctionDescriptor &function, string *output) {
+  output->append(function.type());
+  if (function.argument() != 0 || function.parameter_size() > 0) {
+    output->append("(");
+    bool first = true;
+    if (function.argument() != 0) {
+      tensorflow::strings::StrAppend(output, function.argument());
+      first = false;
+    }
+    for (int i = 0; i < function.parameter_size(); ++i) {
+      if (!first) output->append(",");
+      output->append(function.parameter(i).name());
+      output->append("=");
+      output->append("\"");
+      output->append(function.parameter(i).value());
+      output->append("\"");
+      first = false;
+    }
+    output->append(")");
+  }
+}
+
+void ToFML(const FeatureFunctionDescriptor &function, string *output) {
+  ToFMLFunction(function, output);
+  if (function.feature_size() == 1) {
+    output->append(".");
+    ToFML(function.feature(0), output);
+  } else if (function.feature_size() > 1) {
+    output->append(" { ");
+    for (int i = 0; i < function.feature_size(); ++i) {
+      if (i > 0) output->append(" ");
+      ToFML(function.feature(i), output);
+    }
+    output->append(" } ");
+  }
+}
+
+void ToFML(const FeatureExtractorDescriptor &extractor, string *output) {
+  for (int i = 0; i < extractor.feature_size(); ++i) {
+    ToFML(extractor.feature(i), output);
+    output->append("\n");
+  }
+}
+
+string AsFML(const FeatureFunctionDescriptor &function) {
+  string str;
+  ToFML(function, &str);
+  return str;
+}
+
+string AsFML(const FeatureExtractorDescriptor &extractor) {
+  string str;
+  ToFML(extractor, &str);
+  return str;
+}
+
+void StripFML(string *fml_string) {
+  auto it = fml_string->begin();
+  while (it != fml_string->end()) {
+    if (*it == '"') {
+      it = fml_string->erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+}  // namespace syntaxnet
--- a/syntaxnet/syntaxnet/fml_parser.h
+++ b/syntaxnet/syntaxnet/fml_parser.h
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Feature modeling language (fml) parser.
+//
+// BNF grammar for fml:
+//
+// <feature model> ::= { <feature extractor> }
+//
+// <feature extractor> ::= <extractor spec> |
+//                         <extractor spec> '.' <feature extractor> |
+//                         <extractor spec> '{' { <feature extractor> } '}'
+//
+// <extractor spec> ::= <extractor type>
+//                      [ '(' <parameter list> ')' ]
+//                      [ ':' <extractor name> ]
+//
+// <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> }
+//
+// <parameter> ::= <parameter name> '=' <parameter value>
+//
+// <extractor type> ::= NAME
+// <extractor name> ::= NAME | STRING
+// <argument> ::= NUMBER
+// <parameter name> ::= NAME
+// <parameter value> ::= NUMBER | STRING | NAME
+
+#ifndef $TARGETDIR_FML_PARSER_H_
+#define $TARGETDIR_FML_PARSER_H_
+
+#include <string>
+
+#include "syntaxnet/utils.h"
+#include "syntaxnet/feature_extractor.pb.h"
+
+namespace syntaxnet {
+
+class FMLParser {
+ public:
+  // Parses fml specification into feature extractor descriptor.
+  void Parse(const string &source, FeatureExtractorDescriptor *result);
+
+ private:
+  // Initializes the parser with the source text.
+  void Initialize(const string &source);
+
+  // Outputs error message and exits.
+  void Error(const string &error_message);
+
+  // Moves to the next input character.
+  void Next();
+
+  // Moves to the next input item.
+  void NextItem();
+
+  // Parses a feature descriptor.
+  void ParseFeature(FeatureFunctionDescriptor *result);
+
+  // Parses a parameter specification.
+  void ParseParameter(FeatureFunctionDescriptor *result);
+
+  // Returns true if end of source input has been reached.
+  bool eos() { return current_ == source_.end(); }
+
+  // Item types.
+  enum ItemTypes {
+    END = 0,
+    NAME = -1,
+    NUMBER = -2,
+    STRING = -3,
+  };
+
+  // Source text.
+  string source_;
+
+  // Current input position.
+  string::iterator current_;
+
+  // Line number for current input position.
+  int line_number_;
+
+  // Start position for current item.
+  string::iterator item_start_;
+
+  // Start position for current line.
+  string::iterator line_start_;
+
+  // Line number for current item.
+  int item_line_number_;
+
+  // Item type for current item. If this is positive it is interpreted as a
+  // character. If it is negative it is interpreted as an item type.
+  int item_type_;
+
+  // Text for current item.
+  string item_text_;
+};
+
+}  // namespace syntaxnet
+
+#endif  // $TARGETDIR_FML_PARSER_H_
--- a/syntaxnet/syntaxnet/graph_builder.py
+++ b/syntaxnet/syntaxnet/graph_builder.py
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Builds parser models."""
+
+import tensorflow as tf
+
+import syntaxnet.load_parser_ops
+
+from tensorflow.python.ops import control_flow_ops as cf
+from tensorflow.python.ops import state_ops
+from tensorflow.python.platform import logging
+
+from syntaxnet.ops import gen_parser_ops
+
+
+def BatchedSparseToDense(sparse_indices, output_size):
+  """Batch compatible sparse to dense conversion.
+
+  This is useful for one-hot coded target labels.
+
+  Args:
+    sparse_indices: [batch_size] tensor containing one index per batch
+    output_size: needed in order to generate the correct dense output
+
+  Returns:
+    A [batch_size, output_size] dense tensor.
+  """
+  eye = tf.diag(tf.fill([output_size], tf.constant(1, tf.float32)))
+  return tf.nn.embedding_lookup(eye, sparse_indices)
+
+
+def EmbeddingLookupFeatures(params, sparse_features, allow_weights):
+  """Computes embeddings for each entry of sparse features sparse_features.
+
+  Args:
+    params: list of 2D tensors containing vector embeddings
+    sparse_features: 1D tensor of strings. Each entry is a string encoding of
+      dist_belief.SparseFeatures, and represents a variable length list of
+      feature ids, and optionally, corresponding weights values.
+    allow_weights: boolean to control whether the weights returned from the
+      SparseFeatures are used to multiply the embeddings.
+
+  Returns:
+    A tensor representing the combined embeddings for the sparse features.
+    For each entry s in sparse_features, the function looks up the embeddings
+    for each id and sums them into a single tensor weighing them by the
+    weight of each id. It returns a tensor with each entry of sparse_features
+    replaced by this combined embedding.
+  """
+  if not isinstance(params, list):
+    params = [params]
+  # Lookup embeddings.
+  sparse_features = tf.convert_to_tensor(sparse_features)
+  indices, ids, weights = gen_parser_ops.unpack_sparse_features(sparse_features)
+  embeddings = tf.nn.embedding_lookup(params, ids)
+
+  if allow_weights:
+    # Multiply by weights, reshaping to allow broadcast.
+    broadcast_weights_shape = tf.concat(0, [tf.shape(weights), [1]])
+    embeddings *= tf.reshape(weights, broadcast_weights_shape)
+
+  # Sum embeddings by index.
+  return tf.unsorted_segment_sum(embeddings, indices, tf.size(sparse_features))
+
+
+class GreedyParser(object):
+  """Builds a Chen & Manning style greedy neural net parser.
+
+  Builds a graph with an optional reader op connected at one end and
+  operations needed to train the network on the other. Supports multiple
+  network instantiations sharing the same parameters and network topology.
+
+  The following named nodes are added to the training and eval networks:
+    epochs: a tensor containing the current epoch number
+    cost: a tensor containing the current training step cost
+    gold_actions: a tensor containing actions from gold decoding
+    feature_endpoints: a list of sparse feature vectors
+    logits: output of the final layer before computing softmax
+  The training network also contains:
+    train_op: an op that executes a single training step
+
+  Typical usage:
+
+  parser = graph_builder.GreedyParser(num_actions, num_features,
+                                      num_feature_ids, embedding_sizes,
+                                      hidden_layer_sizes)
+  parser.AddTraining(task_context, batch_size=5)
+  with tf.Session('local') as sess:
+    # This works because the session uses the same default graph as the
+    # GraphBuilder did.
+    sess.run(parser.inits.values())
+    while True:
+      tf_epoch, _ = sess.run([parser.training['epoch'],
+                              parser.training['train_op']])
+      if tf_epoch[0] > 0:
+        break
+  """
+
+  def __init__(self,
+               num_actions,
+               num_features,
+               num_feature_ids,
+               embedding_sizes,
+               hidden_layer_sizes,
+               seed=None,
+               gate_gradients=False,
+               use_locking=False,
+               embedding_init=1.0,
+               relu_init=1e-4,
+               bias_init=0.2,
+               softmax_init=1e-4,
+               averaging_decay=0.9999,
+               use_averaging=True,
+               check_parameters=True,
+               check_every=1,
+               allow_feature_weights=False,
+               only_train='',
+               arg_prefix=None,
+               **unused_kwargs):
+    """Initialize the graph builder with parameters defining the network.
+
+    Args:
+      num_actions: int size of the set of parser actions
+      num_features: int list of dimensions of the feature vectors
+      num_feature_ids: int list of same length as num_features corresponding to
+        the sizes of the input feature spaces
+      embedding_sizes: int list of same length as num_features of the desired
+        embedding layer sizes
+      hidden_layer_sizes: int list of desired relu layer sizes; may be empty
+      seed: optional random initializer seed to enable reproducibility
+      gate_gradients: if True, gradient updates are computed synchronously,
+        ensuring consistency and reproducibility
+      use_locking: if True, use locking to avoid read-write contention when
+        updating Variables
+      embedding_init: sets the std dev of normal initializer of embeddings to
+        embedding_init / embedding_size ** .5
+      relu_init: sets the std dev of normal initializer of relu weights
+        to relu_init
+      bias_init: sets constant initializer of relu bias to bias_init
+      softmax_init: sets the std dev of normal initializer of softmax init
+        to softmax_init
+      averaging_decay: decay for exponential moving average when computing
+        averaged parameters, set to 1 to do vanilla averaging
+      use_averaging: whether to use moving averages of parameters during evals
+      check_parameters: whether to check for NaN/Inf parameters during
+        training
+      check_every: checks numerics every check_every steps.
+      allow_feature_weights: whether feature weights are allowed.
+      only_train: the comma separated set of parameter names to train. If empty,
+        all model parameters will be trained.
+      arg_prefix: prefix for context parameters.
+    """
+    self._num_actions = num_actions
+    self._num_features = num_features
+    self._num_feature_ids = num_feature_ids
+    self._embedding_sizes = embedding_sizes
+    self._hidden_layer_sizes = hidden_layer_sizes
+    self._seed = seed
+    self._gate_gradients = gate_gradients
+    self._use_locking = use_locking
+    self._use_averaging = use_averaging
+    self._check_parameters = check_parameters
+    self._check_every = check_every
+    self._allow_feature_weights = allow_feature_weights
+    self._only_train = set(only_train.split(',')) if only_train else None
+    self._feature_size = len(embedding_sizes)
+    self._embedding_init = embedding_init
+    self._relu_init = relu_init
+    self._softmax_init = softmax_init
+    self._arg_prefix = arg_prefix
+    # Parameters of the network with respect to which training is done.
+    self.params = {}
+    # Other variables, with respect to which no training is done, but which we
+    # nonetheless need to save in order to capture the state of the graph.
+    self.variables = {}
+    # Operations to initialize any nodes that require initialization.
+    self.inits = {}
+    # Training- and eval-related nodes.
+    self.training = {}
+    self.evaluation = {}
+    self.saver = None
+    # Nodes to compute moving averages of parameters, called every train step.
+    self._averaging = {}
+    self._averaging_decay = averaging_decay
+    # Pretrained embeddings that can be used instead of constant initializers.
+    self._pretrained_embeddings = {}
+    # After the following 'with' statement, we'll be able to re-enter the
+    # 'params' scope by re-using the self._param_scope member variable. See for
+    # instance _AddParam.
+    with tf.name_scope('params') as self._param_scope:
+      self._relu_bias_init = tf.constant_initializer(bias_init)
+
+  @property
+  def embedding_size(self):
+    size = 0
+    for i in range(self._feature_size):
+      size += self._num_features[i] * self._embedding_sizes[i]
+    return size
+
+  def _AddParam(self,
+                shape,
+                dtype,
+                name,
+                initializer=None,
+                return_average=False):
+    """Add a model parameter w.r.t. we expect to compute gradients.
+
+    _AddParam creates both regular parameters (usually for training) and
+    averaged nodes (usually for inference). It returns one or the other based
+    on the 'return_average' arg.
+
+    Args:
+      shape: int list, tensor shape of the parameter to create
+      dtype: tf.DataType, data type of the parameter
+      name: string, name of the parameter in the TF graph
+      initializer: optional initializer for the paramter
+      return_average: if False, return parameter otherwise return moving average
+
+    Returns:
+      parameter or averaged parameter
+    """
+    if name not in self.params:
+      step = tf.cast(self.GetStep(), tf.float32)
+      # Put all parameters and their initializing ops in their own scope
+      # irrespective of the current scope (training or eval).
+      with tf.name_scope(self._param_scope):
+        self.params[name] = tf.get_variable(name, shape, dtype, initializer)
+        param = self.params[name]
+        if initializer is not None:
+          self.inits[name] = state_ops.init_variable(param, initializer)
+        if self._averaging_decay == 1:
+          logging.info('Using vanilla averaging of parameters.')
+          ema = tf.train.ExponentialMovingAverage(decay=(step / (step + 1.0)),
+                                                  num_updates=None)
+        else:
+          ema = tf.train.ExponentialMovingAverage(decay=self._averaging_decay,
+                                                  num_updates=step)
+        self._averaging[name + '_avg_update'] = ema.apply([param])
+        self.variables[name + '_avg_var'] = ema.average(param)
+        self.inits[name + '_avg_init'] = state_ops.init_variable(
+            ema.average(param), tf.zeros_initializer)
+    return (self.variables[name + '_avg_var'] if return_average else
+            self.params[name])
+
+  def GetStep(self):
+    def OnesInitializer(shape, dtype=tf.float32):
+      return tf.ones(shape, dtype)
+    return self._AddVariable([], tf.int32, 'step', OnesInitializer)
+
+  def _AddVariable(self, shape, dtype, name, initializer=None):
+    if name in self.variables:
+      return self.variables[name]
+    self.variables[name] = tf.get_variable(name, shape, dtype, initializer)
+    if initializer is not None:
+      self.inits[name] = state_ops.init_variable(self.variables[name],
+                                                 initializer)
+    return self.variables[name]
+
+  def _ReluWeightInitializer(self):
+    with tf.name_scope(self._param_scope):
+      return tf.random_normal_initializer(stddev=self._relu_init,
+                                          seed=self._seed)
+
+  def _EmbeddingMatrixInitializer(self, index, embedding_size):
+    if index in self._pretrained_embeddings:
+      return self._pretrained_embeddings[index]
+    else:
+      return tf.random_normal_initializer(
+          stddev=self._embedding_init / embedding_size**.5,
+          seed=self._seed)
+
+  def _AddEmbedding(self,
+                    features,
+                    num_features,
+                    num_ids,
+                    embedding_size,
+                    index,
+                    return_average=False):
+    """Adds an embedding matrix and passes the `features` vector through it."""
+    embedding_matrix = self._AddParam(
+        [num_ids, embedding_size],
+        tf.float32,
+        'embedding_matrix_%d' % index,
+        self._EmbeddingMatrixInitializer(index, embedding_size),
+        return_average=return_average)
+    embedding = EmbeddingLookupFeatures(embedding_matrix,
+                                        tf.reshape(features,
+                                                   [-1],
+                                                   name='feature_%d' % index),
+                                        self._allow_feature_weights)
+    return tf.reshape(embedding, [-1, num_features * embedding_size])
+
+  def _BuildNetwork(self, feature_endpoints, return_average=False):
+    """Builds a feed-forward part of the net given features as input.
+
+    The network topology is already defined in the constructor, so multiple
+    calls to BuildForward build multiple networks whose parameters are all
+    shared. It is the source of the input features and the use of the output
+    that distinguishes each network.
+
+    Args:
+      feature_endpoints: tensors with input features to the network
+      return_average: whether to use moving averages as model parameters
+
+    Returns:
+      logits: output of the final layer before computing softmax
+    """
+    assert len(feature_endpoints) == self._feature_size
+
+    # Create embedding layer.
+    embeddings = []
+    for i in range(self._feature_size):
+      embeddings.append(self._AddEmbedding(feature_endpoints[i],
+                                           self._num_features[i],
+                                           self._num_feature_ids[i],
+                                           self._embedding_sizes[i],
+                                           i,
+                                           return_average=return_average))
+
+    last_layer = tf.concat(1, embeddings)
+    last_layer_size = self.embedding_size
+
+    # Create ReLU layers.
+    for i, hidden_layer_size in enumerate(self._hidden_layer_sizes):
+      weights = self._AddParam(
+          [last_layer_size, hidden_layer_size],
+          tf.float32,
+          'weights_%d' % i,
+          self._ReluWeightInitializer(),
+          return_average=return_average)
+      bias = self._AddParam([hidden_layer_size],
+                            tf.float32,
+                            'bias_%d' % i,
+                            self._relu_bias_init,
+                            return_average=return_average)
+      last_layer = tf.nn.relu_layer(last_layer,
+                                    weights,
+                                    bias,
+                                    name='layer_%d' % i)
+      last_layer_size = hidden_layer_size
+
+    # Create softmax layer.
+    softmax_weight = self._AddParam(
+        [last_layer_size, self._num_actions],
+        tf.float32,
+        'softmax_weight',
+        tf.random_normal_initializer(stddev=self._softmax_init,
+                                     seed=self._seed),
+        return_average=return_average)
+    softmax_bias = self._AddParam(
+        [self._num_actions],
+        tf.float32,
+        'softmax_bias',
+        tf.zeros_initializer,
+        return_average=return_average)
+    logits = tf.nn.xw_plus_b(last_layer,
+                             softmax_weight,
+                             softmax_bias,
+                             name='logits')
+    return {'logits': logits}
+
+  def _AddGoldReader(self, task_context, batch_size, corpus_name):
+    features, epochs, gold_actions = (
+        gen_parser_ops.gold_parse_reader(task_context,
+                                         self._feature_size,
+                                         batch_size,
+                                         corpus_name=corpus_name,
+                                         arg_prefix=self._arg_prefix))
+    return {'gold_actions': tf.identity(gold_actions,
+                                        name='gold_actions'),
+            'epochs': tf.identity(epochs,
+                                  name='epochs'),
+            'feature_endpoints': features}
+
+  def _AddDecodedReader(self, task_context, batch_size, transition_scores,
+                        corpus_name):
+    features, epochs, eval_metrics, documents = (
+        gen_parser_ops.decoded_parse_reader(transition_scores,
+                                            task_context,
+                                            self._feature_size,
+                                            batch_size,
+                                            corpus_name=corpus_name,
+                                            arg_prefix=self._arg_prefix))
+    return {'eval_metrics': eval_metrics,
+            'epochs': tf.identity(epochs,
+                                  name='epochs'),
+            'feature_endpoints': features,
+            'documents': documents}
+
+  def _AddCostFunction(self, batch_size, gold_actions, logits):
+    """Cross entropy plus L2 loss on weights and biases of the hidden layers."""
+    dense_golden = BatchedSparseToDense(gold_actions, self._num_actions)
+    cross_entropy = tf.div(
+        tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(
+            logits, dense_golden)), batch_size)
+    regularized_params = [tf.nn.l2_loss(p)
+                          for k, p in self.params.items()
+                          if k.startswith('weights') or k.startswith('bias')]
+    l2_loss = 1e-4 * tf.add_n(regularized_params) if regularized_params else 0
+    return {'cost': tf.add(cross_entropy, l2_loss, name='cost')}
+
+  def AddEvaluation(self,
+                    task_context,
+                    batch_size,
+                    evaluation_max_steps=300,
+                    corpus_name='documents'):
+    """Builds the forward network only without the training operation.
+
+    Args:
+      task_context: file path from which to read the task context.
+      batch_size: batch size to request from reader op.
+      evaluation_max_steps: max number of parsing actions during evaluation,
+          only used in beam parsing.
+      corpus_name: name of the task input to read parses from.
+
+    Returns:
+      Dictionary of named eval nodes.
+    """
+    def _AssignTransitionScores():
+      return tf.assign(nodes['transition_scores'],
+                       nodes['logits'], validate_shape=False)
+    def _Pass():
+      return tf.constant(-1.0)
+    unused_evaluation_max_steps = evaluation_max_steps
+    with tf.name_scope('evaluation'):
+      nodes = self.evaluation
+      nodes['transition_scores'] = self._AddVariable(
+          [batch_size, self._num_actions], tf.float32, 'transition_scores',
+          tf.constant_initializer(-1.0))
+      nodes.update(self._AddDecodedReader(task_context, batch_size, nodes[
+          'transition_scores'], corpus_name))
+      nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
+                                      return_average=self._use_averaging))
+      nodes['eval_metrics'] = cf.with_dependencies(
+          [tf.cond(tf.greater(tf.size(nodes['logits']), 0),
+                   _AssignTransitionScores, _Pass)],
+          nodes['eval_metrics'], name='eval_metrics')
+    return nodes
+
+  def _IncrementCounter(self, counter):
+    return state_ops.assign_add(counter, 1, use_locking=True)
+
+  def _AddLearningRate(self, initial_learning_rate, decay_steps):
+    """Returns a learning rate that decays by 0.96 every decay_steps.
+
+    Args:
+      initial_learning_rate: initial value of the learning rate
+      decay_steps: decay by 0.96 every this many steps
+
+    Returns:
+      learning rate variable.
+    """
+    step = self.GetStep()
+    return cf.with_dependencies(
+        [self._IncrementCounter(step)],
+        tf.train.exponential_decay(initial_learning_rate,
+                                   step,
+                                   decay_steps,
+                                   0.96,
+                                   staircase=True))
+
+  def AddPretrainedEmbeddings(self, index, embeddings_path, task_context):
+    """Embeddings at the given index will be set to pretrained values."""
+
+    def _Initializer(shape, dtype=tf.float32):
+      unused_dtype = dtype
+      t = gen_parser_ops.word_embedding_initializer(
+          vectors=embeddings_path,
+          task_context=task_context,
+          embedding_init=self._embedding_init)
+
+      t.set_shape(shape)
+      return t
+
+    self._pretrained_embeddings[index] = _Initializer
+
+  def AddTraining(self,
+                  task_context,
+                  batch_size,
+                  learning_rate=0.1,
+                  decay_steps=4000,
+                  momentum=0.9,
+                  corpus_name='documents'):
+    """Builds a trainer to minimize the cross entropy cost function.
+
+    Args:
+      task_context: file path from which to read the task context
+      batch_size: batch size to request from reader op
+      learning_rate: initial value of the learning rate
+      decay_steps: decay learning rate by 0.96 every this many steps
+      momentum: momentum parameter used when training with momentum
+      corpus_name: name of the task input to read parses from
+
+    Returns:
+      Dictionary of named training nodes.
+    """
+    with tf.name_scope('training'):
+      nodes = self.training
+      nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name))
+      nodes.update(self._BuildNetwork(nodes['feature_endpoints'],
+                                      return_average=False))
+      nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'],
+                                         nodes['logits']))
+      # Add the optimizer
+      if self._only_train:
+        trainable_params = [v
+                            for k, v in self.params.iteritems()
+                            if k in self._only_train]
+      else:
+        trainable_params = self.params.values()
+      lr = self._AddLearningRate(learning_rate, decay_steps)
+      optimizer = tf.train.MomentumOptimizer(lr,
+                                             momentum,
+                                             use_locking=self._use_locking)
+      train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params)
+      for param in trainable_params:
+        slot = optimizer.get_slot(param, 'momentum')
+        self.inits[slot.name] = state_ops.init_variable(slot,
+                                                        tf.zeros_initializer)
+        self.variables[slot.name] = slot
+      numerical_checks = [
+          tf.check_numerics(param,
+                            message='Parameter is not finite.')
+          for param in trainable_params
+          if param.dtype.base_dtype in [tf.float32, tf.float64]
+      ]
+      check_op = tf.group(*numerical_checks)
+      avg_update_op = tf.group(*self._averaging.values())
+      train_ops = [train_op]
+      if self._check_parameters:
+        train_ops.append(check_op)
+      if self._use_averaging:
+        train_ops.append(avg_update_op)
+      nodes['train_op'] = tf.group(*train_ops, name='train_op')
+    return nodes
+
+  def AddSaver(self, slim_model=False):
+    """Adds ops to save and restore model parameters.
+
+    Args:
+      slim_model: whether only averaged variables are saved.
+
+    Returns:
+      the saver object.
+    """
+    # We have to put the save op in the root scope otherwise running
+    # "save/restore_all" won't find the "save/Const" node it expects.
+    with tf.name_scope(None):
+      variables_to_save = self.params.copy()
+      variables_to_save.update(self.variables)
+      if slim_model:
+        for key in variables_to_save.keys():
+          if not key.endswith('avg_var'):
+            del variables_to_save[key]
+      self.saver = tf.train.Saver(variables_to_save)
+    return self.saver
--- a/syntaxnet/syntaxnet/graph_builder_test.py
+++ b/syntaxnet/syntaxnet/graph_builder_test.py
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for graph_builder."""
+
+
+# disable=no-name-in-module,unused-import,g-bad-import-order,maybe-no-member
+import os.path
+
+import tensorflow as tf
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import googletest
+
+from syntaxnet import graph_builder
+from syntaxnet import sparse_pb2
+from syntaxnet.ops import gen_parser_ops
+
+FLAGS = tf.app.flags.FLAGS
+if not hasattr(FLAGS, 'test_srcdir'):
+  FLAGS.test_srcdir = ''
+if not hasattr(FLAGS, 'test_tmpdir'):
+  FLAGS.test_tmpdir = tf.test.get_temp_dir()
+
+
+class GraphBuilderTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    # Creates a task context with the correct testing paths.
+    initial_task_context = os.path.join(
+        FLAGS.test_srcdir,
+        'syntaxnet/'
+        'testdata/context.pbtxt')
+    self._task_context = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt')
+    with open(initial_task_context, 'r') as fin:
+      with open(self._task_context, 'w') as fout:
+        fout.write(fin.read().replace('SRCDIR', FLAGS.test_srcdir)
+                   .replace('OUTPATH', FLAGS.test_tmpdir))
+
+    # Creates necessary term maps.
+    with self.test_session() as sess:
+      gen_parser_ops.lexicon_builder(task_context=self._task_context,
+                                     corpus_name='training-corpus').run()
+      self._num_features, self._num_feature_ids, _, self._num_actions = (
+          sess.run(gen_parser_ops.feature_size(task_context=self._task_context,
+                                               arg_prefix='brain_parser')))
+
+  def MakeBuilder(self, use_averaging=True, **kw_args):
+    # Set the seed and gate_gradients to ensure reproducibility.
+    return graph_builder.GreedyParser(
+        self._num_actions, self._num_features, self._num_feature_ids,
+        embedding_sizes=[8, 8, 8], hidden_layer_sizes=[32, 32], seed=42,
+        gate_gradients=True, use_averaging=use_averaging, **kw_args)
+
+  def FindNode(self, name):
+    for node in tf.get_default_graph().as_graph_def().node:
+      if node.name == name:
+        return node
+    return None
+
+  def NodeFound(self, name):
+    return self.FindNode(name) is not None
+
+  def testScope(self):
+    # Set up the network topology
+    graph = tf.Graph()
+    with graph.as_default():
+      parser = self.MakeBuilder()
+      parser.AddTraining(self._task_context,
+                         batch_size=10,
+                         corpus_name='training-corpus')
+      parser.AddEvaluation(self._task_context,
+                           batch_size=2,
+                           corpus_name='tuning-corpus')
+      parser.AddSaver()
+
+      # Check that the node ids we may rely on are there with the expected
+      # names.
+      self.assertEqual(parser.training['logits'].name, 'training/logits:0')
+      self.assertTrue(self.NodeFound('training/logits'))
+      self.assertTrue(self.NodeFound('training/feature_0'))
+      self.assertTrue(self.NodeFound('training/feature_1'))
+      self.assertTrue(self.NodeFound('training/feature_2'))
+      self.assertFalse(self.NodeFound('training/feature_3'))
+
+      self.assertEqual(parser.evaluation['logits'].name, 'evaluation/logits:0')
+      self.assertTrue(self.NodeFound('evaluation/logits'))
+
+      # The saver node is expected to be in the root scope.
+      self.assertTrue(self.NodeFound('save/restore_all'))
+
+      # Also check that the parameters have the scope we expect.
+      self.assertTrue(self.NodeFound('embedding_matrix_0'))
+      self.assertTrue(self.NodeFound('embedding_matrix_1'))
+      self.assertTrue(self.NodeFound('embedding_matrix_2'))
+      self.assertFalse(self.NodeFound('embedding_matrix_3'))
+
+  def testNestedScope(self):
+    # It's OK to put the whole graph in a scope of its own.
+    graph = tf.Graph()
+    with graph.as_default():
+      with graph.name_scope('top'):
+        parser = self.MakeBuilder()
+        parser.AddTraining(self._task_context,
+                           batch_size=10,
+                           corpus_name='training-corpus')
+        parser.AddSaver()
+
+      self.assertTrue(self.NodeFound('top/training/logits'))
+      self.assertTrue(self.NodeFound('top/training/feature_0'))
+
+      # The saver node is expected to be in the root scope no matter what.
+      self.assertFalse(self.NodeFound('top/save/restore_all'))
+      self.assertTrue(self.NodeFound('save/restore_all'))
+
+  def testUseCustomGraphs(self):
+    batch_size = 10
+
+    # Use separate custom graphs.
+    custom_train_graph = tf.Graph()
+    with custom_train_graph.as_default():
+      train_parser = self.MakeBuilder()
+      train_parser.AddTraining(self._task_context,
+                               batch_size,
+                               corpus_name='training-corpus')
+
+    custom_eval_graph = tf.Graph()
+    with custom_eval_graph.as_default():
+      eval_parser = self.MakeBuilder()
+      eval_parser.AddEvaluation(self._task_context,
+                                batch_size,
+                                corpus_name='tuning-corpus')
+
+    # The following session runs should not fail.
+    with self.test_session(graph=custom_train_graph) as sess:
+      self.assertTrue(self.NodeFound('training/logits'))
+      sess.run(train_parser.inits.values())
+      sess.run(['training/logits:0'])
+
+    with self.test_session(graph=custom_eval_graph) as sess:
+      self.assertFalse(self.NodeFound('training/logits'))
+      self.assertTrue(self.NodeFound('evaluation/logits'))
+      sess.run(eval_parser.inits.values())
+      sess.run(['evaluation/logits:0'])
+
+  def testTrainingAndEvalAreIndependent(self):
+    batch_size = 10
+    graph = tf.Graph()
+    with graph.as_default():
+      parser = self.MakeBuilder(use_averaging=False)
+      parser.AddTraining(self._task_context,
+                         batch_size,
+                         corpus_name='training-corpus')
+      parser.AddEvaluation(self._task_context,
+                           batch_size,
+                           corpus_name='tuning-corpus')
+    with self.test_session(graph=graph) as sess:
+      sess.run(parser.inits.values())
+      # Before any training updates are performed, both training and eval nets
+      # should return the same computations.
+      eval_logits, = sess.run([parser.evaluation['logits']])
+      training_logits, = sess.run([parser.training['logits']])
+      self.assertNear(abs((eval_logits - training_logits).sum()), 0, 1e-6)
+
+      # After training, activations should differ.
+      for _ in range(5):
+        eval_logits = parser.evaluation['logits'].eval()
+      for _ in range(5):
+        training_logits, _ = sess.run([parser.training['logits'],
+                                       parser.training['train_op']])
+      self.assertGreater(abs((eval_logits - training_logits).sum()), 0, 1e-3)
+
+  def testReproducibility(self):
+    batch_size = 10
+
+    def ComputeACost(graph):
+      with graph.as_default():
+        parser = self.MakeBuilder(use_averaging=False)
+        parser.AddTraining(self._task_context,
+                           batch_size,
+                           corpus_name='training-corpus')
+        parser.AddEvaluation(self._task_context,
+                             batch_size,
+                             corpus_name='tuning-corpus')
+      with self.test_session(graph=graph) as sess:
+        sess.run(parser.inits.values())
+        for _ in range(5):
+          cost, _ = sess.run([parser.training['cost'],
+                              parser.training['train_op']])
+      return cost
+
+    cost1 = ComputeACost(tf.Graph())
+    cost2 = ComputeACost(tf.Graph())
+    self.assertNear(cost1, cost2, 1e-8)
+
+  def testAddTrainingAndEvalOrderIndependent(self):
+    batch_size = 10
+
+    graph1 = tf.Graph()
+    with graph1.as_default():
+      parser = self.MakeBuilder(use_averaging=False)
+      parser.AddTraining(self._task_context,
+                         batch_size,
+                         corpus_name='training-corpus')
+      parser.AddEvaluation(self._task_context,
+                           batch_size,
+                           corpus_name='tuning-corpus')
+    with self.test_session(graph=graph1) as sess:
+      sess.run(parser.inits.values())
+      metrics1 = None
+      for _ in range(500):
+        cost1, _ = sess.run([parser.training['cost'],
+                             parser.training['train_op']])
+        em1 = parser.evaluation['eval_metrics'].eval()
+        metrics1 = metrics1 + em1 if metrics1 is not None else em1
+
+    # Reverse the order in which Training and Eval stacks are added.
+    graph2 = tf.Graph()
+    with graph2.as_default():
+      parser = self.MakeBuilder(use_averaging=False)
+      parser.AddEvaluation(self._task_context,
+                           batch_size,
+                           corpus_name='tuning-corpus')
+      parser.AddTraining(self._task_context,
+                         batch_size,
+                         corpus_name='training-corpus')
+    with self.test_session(graph=graph2) as sess:
+      sess.run(parser.inits.values())
+      metrics2 = None
+      for _ in range(500):
+        cost2, _ = sess.run([parser.training['cost'],
+                             parser.training['train_op']])
+        em2 = parser.evaluation['eval_metrics'].eval()
+        metrics2 = metrics2 + em2 if metrics2 is not None else em2
+
+    self.assertNear(cost1, cost2, 1e-8)
+    self.assertEqual(abs(metrics1 - metrics2).sum(), 0)
+
+  def testEvalMetrics(self):
+    batch_size = 10
+    graph = tf.Graph()
+    with graph.as_default():
+      parser = self.MakeBuilder()
+      parser.AddEvaluation(self._task_context,
+                           batch_size,
+                           corpus_name='tuning-corpus')
+    with self.test_session(graph=graph) as sess:
+      sess.run(parser.inits.values())
+      tokens = 0
+      correct_heads = 0
+      for _ in range(100):
+        eval_metrics = sess.run(parser.evaluation['eval_metrics'])
+        tokens += eval_metrics[0]
+        correct_heads += eval_metrics[1]
+      self.assertGreater(tokens, 0)
+      self.assertGreaterEqual(tokens, correct_heads)
+      self.assertGreaterEqual(correct_heads, 0)
+
+  def MakeSparseFeatures(self, ids, weights):
+    f = sparse_pb2.SparseFeatures()
+    for i, w in zip(ids, weights):
+      f.id.append(i)
+      f.weight.append(w)
+    return f.SerializeToString()
+
+  def testEmbeddingOp(self):
+    graph = tf.Graph()
+    with self.test_session(graph=graph):
+      params = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
+                           tf.float32)
+
+      var = variables.Variable([self.MakeSparseFeatures([1, 2], [1.0, 1.0]),
+                                self.MakeSparseFeatures([], [])])
+      var.initializer.run()
+      embeddings = graph_builder.EmbeddingLookupFeatures(params, var,
+                                                         True).eval()
+      self.assertAllClose([[8.0, 10.0], [0.0, 0.0]], embeddings)
+
+      var = variables.Variable([self.MakeSparseFeatures([], []),
+                                self.MakeSparseFeatures([0, 2],
+                                                        [0.5, 2.0])])
+      var.initializer.run()
+      embeddings = graph_builder.EmbeddingLookupFeatures(params, var,
+                                                         True).eval()
+      self.assertAllClose([[0.0, 0.0], [10.5, 13.0]], embeddings)
+
+  def testOnlyTrainSomeParameters(self):
+    batch_size = 10
+    graph = tf.Graph()
+    with graph.as_default():
+      parser = self.MakeBuilder(use_averaging=False, only_train='softmax_bias')
+      parser.AddTraining(self._task_context,
+                         batch_size,
+                         corpus_name='training-corpus')
+    with self.test_session(graph=graph) as sess:
+      sess.run(parser.inits.values())
+      # Before training, save the state of two of the parameters.
+      bias0, weight0 = sess.run([parser.params['softmax_bias'],
+                                 parser.params['softmax_weight']])
+
+      for _ in range(5):
+        bias, weight, _ = sess.run([parser.params['softmax_bias'],
+                                    parser.params['softmax_weight'],
+                                    parser.training['train_op']])
+
+      # After training, only one of the parameters should have changed.
+      self.assertAllEqual(weight, weight0)
+      self.assertGreater(abs(bias - bias0).sum(), 0, 1e-5)
+
+
+if __name__ == '__main__':
+  googletest.main()
--- a/syntaxnet/syntaxnet/kbest_syntax.proto
+++ b/syntaxnet/syntaxnet/kbest_syntax.proto
+// K-best part-of-speech and dependency annotations for tokens.
+
+syntax = "proto2";
+
+import "syntaxnet/sentence.proto";
+
+package syntaxnet;
+
+// A list of alternative (k-best) syntax analyses, grouped by sentences.
+message KBestSyntaxAnalyses {
+  extend Sentence {
+    optional KBestSyntaxAnalyses extension = 60366242;
+  }
+
+  // Alternative analyses for each sentence. Sentences are listed in the
+  // order visited by a SentenceIterator.
+  repeated KBestSyntaxAnalysesForSentence sentence = 1;
+
+  // Alternative analyses for each token.
+  repeated KBestSyntaxAnalysesForToken token = 2;
+}
+
+// A list of alternative (k-best) analyses for a sentence spanning from a start
+// token index to an end token index. The alternative analyses are ordered by
+// decreasing model score from best to worst. The first analysis is the 1-best
+// analysis, which is typically also stored in the document tokens.
+message KBestSyntaxAnalysesForSentence {
+  // First token of sentence.
+  optional int32 start = 1 [default = -1];
+
+  // Last token of sentence.
+  optional int32 end = 2 [default = -1];
+
+  // K-best analyses for the tokens in this sentence. All of the analyses in
+  // the list have the same "type"; e.g., k-best taggings,
+  // k-best {tagging+parse}s, etc.
+  // Note also that the type of analysis stored in this list can change
+  // depending on where we are in the document processing pipeline; e.g.,
+  // may initially be taggings, and then switch to parses.  The first
+  // token_analysis would be the 1-best analysis, which is typically also stored
+  // in the document.  Note: some post-processors will update the document's
+  // syntax trees, but will leave these unchanged.
+  repeated AlternativeTokenAnalysis token_analysis = 3;
+}
+
+// A list of scored alternative (k-best) analyses for a particular token. These
+// are all distinct from each other and ordered by decreasing model score. The
+// first is the 1-best analysis, which may or may not match the document tokens
+// depending on how the k-best analyses are selected.
+message KBestSyntaxAnalysesForToken {
+  // All token analyses in this repeated field refer to the same token.
+  // Each alternative analysis will contain a single entry for repeated fields
+  // such as head, tag, category and label.
+  repeated AlternativeTokenAnalysis token_analysis = 3;
+}
+
+// An alternative analysis of tokens in the document. The repeated fields
+// are indexed relative to the beginning of a sentence. Fields not
+// represented in the alternative analysis are assumed to be unchanged.
+// Currently only alternatives for tags, categories and (labeled) dependency
+// heads are supported.
+// Each repeated field should either have length=0 or length=number of tokens.
+message AlternativeTokenAnalysis {
+  // Head of this token in the dependency tree: the id of the token which has
+  // an arc going to this one. If it is the root token of a sentence, then it
+  // is set to -1.
+  repeated int32 head = 1;
+
+  // Part-of-speech tag for token.
+  repeated string tag = 2;
+
+  // Coarse-grained word category for token.
+  repeated string category = 3;
+
+  // Label for dependency relation between this token and its head.
+  repeated string label = 4;
+
+  // The score of this analysis, where bigger values typically indicate better
+  // quality, but there are no guarantees and there is also no pre-defined
+  // range.
+  optional double score = 5;
+}
--- a/syntaxnet/syntaxnet/lexicon_builder.cc
+++ b/syntaxnet/syntaxnet/lexicon_builder.cc
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+#include <string>
+
+#include "syntaxnet/utils.h"
+#include "syntaxnet/affix.h"
+#include "syntaxnet/dictionary.pb.h"
+#include "syntaxnet/feature_extractor.h"
+#include "syntaxnet/sentence_batch.h"
+#include "syntaxnet/sentence.pb.h"
+#include "syntaxnet/term_frequency_map.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/env.h"
+
+// A task that collects term statistics over a corpus and saves a set of
+// term maps; these saved mappings are used to map strings to ints in both the
+// chunker trainer and the chunker processors.
+
+using tensorflow::DEVICE_CPU;
+using tensorflow::DT_INT32;
+using tensorflow::OpKernel;
+using tensorflow::OpKernelConstruction;
+using tensorflow::OpKernelContext;
+using tensorflow::Tensor;
+using tensorflow::TensorShape;
+using tensorflow::errors::InvalidArgument;
+
+namespace syntaxnet {
+
+// A workflow task that creates term maps (e.g., word, tag, etc.).
+//
+// Non-flag task parameters:
+// int lexicon_max_prefix_length (3):
+//   The maximum prefix length for lexicon words.
+// int lexicon_max_suffix_length (3):
+//   The maximum suffix length for lexicon words.
+class LexiconBuilder : public OpKernel {
+ public:
+  explicit LexiconBuilder(OpKernelConstruction *context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("corpus_name", &corpus_name_));
+    OP_REQUIRES_OK(context, context->GetAttr("lexicon_max_prefix_length",
+                                             &max_prefix_length_));
+    OP_REQUIRES_OK(context, context->GetAttr("lexicon_max_suffix_length",
+                                             &max_suffix_length_));
+
+    string file_path, data;
+    OP_REQUIRES_OK(context, context->GetAttr("task_context", &file_path));
+    OP_REQUIRES_OK(context, ReadFileToString(tensorflow::Env::Default(),
+                                             file_path, &data));
+    OP_REQUIRES(context,
+                TextFormat::ParseFromString(data, task_context_.mutable_spec()),
+                InvalidArgument("Could not parse task context at ", file_path));
+  }
+
+  // Counts term frequencies.
+  void Compute(OpKernelContext *context) override {
+    // Term frequency maps to be populated by the corpus.
+    TermFrequencyMap words;
+    TermFrequencyMap lcwords;
+    TermFrequencyMap tags;
+    TermFrequencyMap categories;
+    TermFrequencyMap labels;
+
+    // Affix tables to be populated by the corpus.
+    AffixTable prefixes(AffixTable::PREFIX, max_prefix_length_);
+    AffixTable suffixes(AffixTable::SUFFIX, max_suffix_length_);
+
+    // Tag-to-category mapping.
+    TagToCategoryMap tag_to_category;
+
+    // Make a pass over the corpus.
+    int64 num_tokens = 0;
+    int64 num_documents = 0;
+    Sentence *document;
+    TextReader corpus(*task_context_.GetInput(corpus_name_));
+    while ((document = corpus.Read()) != NULL) {
+      // Gather token information.
+      for (int t = 0; t < document->token_size(); ++t) {
+        // Get token and lowercased word.
+        const Token &token = document->token(t);
+        string word = token.word();
+        utils::NormalizeDigits(&word);
+        string lcword = tensorflow::str_util::Lowercase(word);
+
+        // Make sure the token does not contain a newline.
+        CHECK(lcword.find('\n') == string::npos);
+
+        // Increment frequencies (only for terms that exist).
+        if (!word.empty() && !HasSpaces(word)) words.Increment(word);
+        if (!lcword.empty() && !HasSpaces(lcword)) lcwords.Increment(lcword);
+        if (!token.tag().empty()) tags.Increment(token.tag());
+        if (!token.category().empty()) categories.Increment(token.category());
+        if (!token.label().empty()) labels.Increment(token.label());
+
+        // Add prefixes/suffixes for the current word.
+        prefixes.AddAffixesForWord(word.c_str(), word.size());
+        suffixes.AddAffixesForWord(word.c_str(), word.size());
+
+        // Add mapping from tag to category.
+        tag_to_category.SetCategory(token.tag(), token.category());
+
+        // Update the number of processed tokens.
+        ++num_tokens;
+      }
+
+      delete document;
+      ++num_documents;
+    }
+    LOG(INFO) << "Term maps collected over " << num_tokens << " tokens from "
+              << num_documents << " documents";
+
+    // Write mappings to disk.
+    words.Save(TaskContext::InputFile(*task_context_.GetInput("word-map")));
+    lcwords.Save(TaskContext::InputFile(*task_context_.GetInput("lcword-map")));
+    tags.Save(TaskContext::InputFile(*task_context_.GetInput("tag-map")));
+    categories.Save(
+        TaskContext::InputFile(*task_context_.GetInput("category-map")));
+    labels.Save(TaskContext::InputFile(*task_context_.GetInput("label-map")));
+
+    // Write affixes to disk.
+    WriteAffixTable(prefixes, TaskContext::InputFile(
+                                  *task_context_.GetInput("prefix-table")));
+    WriteAffixTable(suffixes, TaskContext::InputFile(
+                                  *task_context_.GetInput("suffix-table")));
+
+    // Write tag-to-category mapping to disk.
+    tag_to_category.Save(
+        TaskContext::InputFile(*task_context_.GetInput("tag-to-category")));
+  }
+
+ private:
+  // Returns true if the word contains spaces.
+  static bool HasSpaces(const string &word) {
+    for (char c : word) {
+      if (c == ' ') return true;
+    }
+    return false;
+  }
+
+  // Writes an affix table to a task output.
+  static void WriteAffixTable(const AffixTable &affixes,
+                              const string &output_file) {
+    ProtoRecordWriter writer(output_file);
+    affixes.Write(&writer);
+  }
+
+  // Name of the context input to compute lexicons.
+  string corpus_name_;
+
+  // Max length for prefix table.
+  int max_prefix_length_;
+
+  // Max length for suffix table.
+  int max_suffix_length_;
+
+  // Task context used to configure this op.
+  TaskContext task_context_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("LexiconBuilder").Device(DEVICE_CPU),
+                        LexiconBuilder);
+
+class FeatureSize : public OpKernel {
+ public:
+  explicit FeatureSize(OpKernelConstruction *context) : OpKernel(context) {
+    string task_context_path;
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("task_context", &task_context_path));
+    OP_REQUIRES_OK(context, context->GetAttr("arg_prefix", &arg_prefix_));
+    OP_REQUIRES_OK(context, context->MatchSignature(
+                                {}, {DT_INT32, DT_INT32, DT_INT32, DT_INT32}));
+    string data;
+    OP_REQUIRES_OK(context, ReadFileToString(tensorflow::Env::Default(),
+                                             task_context_path, &data));
+    OP_REQUIRES(
+        context,
+        TextFormat::ParseFromString(data, task_context_.mutable_spec()),
+        InvalidArgument("Could not parse task context at ", task_context_path));
+    string label_map_path =
+        TaskContext::InputFile(*task_context_.GetInput("label-map"));
+    label_map_ = SharedStoreUtils::GetWithDefaultName<TermFrequencyMap>(
+        label_map_path, 0, 0);
+  }
+
+  ~FeatureSize() override { SharedStore::Release(label_map_); }
+
+  void Compute(OpKernelContext *context) override {
+    // Computes feature sizes.
+    ParserEmbeddingFeatureExtractor features(arg_prefix_);
+    features.Setup(&task_context_);
+    features.Init(&task_context_);
+    const int num_embeddings = features.NumEmbeddings();
+    Tensor *feature_sizes = nullptr;
+    Tensor *domain_sizes = nullptr;
+    Tensor *embedding_dims = nullptr;
+    Tensor *num_actions = nullptr;
+    TF_CHECK_OK(context->allocate_output(0, TensorShape({num_embeddings}),
+                                         &feature_sizes));
+    TF_CHECK_OK(context->allocate_output(1, TensorShape({num_embeddings}),
+                                         &domain_sizes));
+    TF_CHECK_OK(context->allocate_output(2, TensorShape({num_embeddings}),
+                                         &embedding_dims));
+    TF_CHECK_OK(context->allocate_output(3, TensorShape({}), &num_actions));
+    for (int i = 0; i < num_embeddings; ++i) {
+      feature_sizes->vec<int32>()(i) = features.FeatureSize(i);
+      domain_sizes->vec<int32>()(i) = features.EmbeddingSize(i);
+      embedding_dims->vec<int32>()(i) = features.EmbeddingDims(i);
+    }
+
+    // Computes number of actions in the transition system.
+    std::unique_ptr<ParserTransitionSystem> transition_system(
+        ParserTransitionSystem::Create(task_context_.Get(
+            features.GetParamName("transition_system"), "arc-standard")));
+    transition_system->Setup(&task_context_);
+    transition_system->Init(&task_context_);
+    num_actions->scalar<int32>()() =
+        transition_system->NumActions(label_map_->Size());
+  }
+
+ private:
+  // Task context used to configure this op.
+  TaskContext task_context_;
+
+  // Dependency label map used in transition system.
+  const TermFrequencyMap *label_map_;
+
+  // Prefix for context parameters.
+  string arg_prefix_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("FeatureSize").Device(DEVICE_CPU), FeatureSize);
+
+}  // namespace syntaxnet
--- a/syntaxnet/syntaxnet/lexicon_builder_test.py
+++ b/syntaxnet/syntaxnet/lexicon_builder_test.py
+# coding=utf-8
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for lexicon_builder."""
+
+
+# disable=no-name-in-module,unused-import,g-bad-import-order,maybe-no-member
+import os.path
+
+import tensorflow as tf
+
+import syntaxnet.load_parser_ops
+
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+from tensorflow.python.platform import logging
+
+from syntaxnet import sentence_pb2
+from syntaxnet import task_spec_pb2
+from syntaxnet.ops import gen_parser_ops
+
+FLAGS = tf.app.flags.FLAGS
+
+CONLL_DOC1 = u'''1 बात _ n NN _ _ _ _ _
+2 गलत _ adj JJ _ _ _ _ _
+3 हो _ v VM _ _ _ _ _
+4 तो _ avy CC _ _ _ _ _
+5 गुस्सा _ n NN _ _ _ _ _
+6 सेलेब्रिटिज _ n NN _ _ _ _ _
+7 को _ psp PSP _ _ _ _ _
+8 भी _ avy RP _ _ _ _ _
+9 आना _ v VM _ _ _ _ _
+10 लाजमी _ adj JJ _ _ _ _ _
+11 है _ v VM _ _ _ _ _
+12 । _ punc SYM _ _ _ _ _'''
+
+CONLL_DOC2 = u'''1 लेकिन _ avy CC _ _ _ _ _
+2 अभिनेत्री _ n NN _ _ _ _ _
+3 के _ psp PSP _ _ _ _ _
+4 इस _ pn DEM _ _ _ _ _
+5 कदम _ n NN _ _ _ _ _
+6 से _ psp PSP _ _ _ _ _
+7 वहां _ pn PRP _ _ _ _ _
+8 रंग _ n NN _ _ _ _ _
+9 में _ psp PSP _ _ _ _ _
+10 भंग _ adj JJ _ _ _ _ _
+11 पड़ _ v VM _ _ _ _ _
+12 गया _ v VAUX _ _ _ _ _
+13 । _ punc SYM _ _ _ _ _'''
+
+TAGS = ['NN', 'JJ', 'VM', 'CC', 'PSP', 'RP', 'JJ', 'SYM', 'DEM', 'PRP', 'VAUX']
+
+CATEGORIES = ['n', 'adj', 'v', 'avy', 'n', 'psp', 'punc', 'pn']
+
+TOKENIZED_DOCS = u'''बात गलत हो तो गुस्सा सेलेब्रिटिज को भी आना लाजमी है ।
+लेकिन अभिनेत्री के इस कदम से वहां रंग में भंग पड़ गया ।
+'''
+
+COMMENTS = u'# Line with fake comments.'
+
+
+class LexiconBuilderTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    if not hasattr(FLAGS, 'test_srcdir'):
+      FLAGS.test_srcdir = ''
+    if not hasattr(FLAGS, 'test_tmpdir'):
+      FLAGS.test_tmpdir = tf.test.get_temp_dir()
+    self.corpus_file = os.path.join(FLAGS.test_tmpdir, 'documents.conll')
+    self.context_file = os.path.join(FLAGS.test_tmpdir, 'context.pbtxt')
+
+  def AddInput(self, name, file_pattern, record_format, context):
+    inp = context.input.add()
+    inp.name = name
+    inp.record_format.append(record_format)
+    inp.part.add().file_pattern = file_pattern
+
+  def WriteContext(self, corpus_format):
+    context = task_spec_pb2.TaskSpec()
+    self.AddInput('documents', self.corpus_file, corpus_format, context)
+    for name in ('word-map', 'lcword-map', 'tag-map',
+                 'category-map', 'label-map', 'prefix-table',
+                 'suffix-table', 'tag-to-category'):
+      self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context)
+    logging.info('Writing context to: %s', self.context_file)
+    with open(self.context_file, 'w') as f:
+      f.write(str(context))
+
+  def ReadNextDocument(self, sess, doc_source):
+    doc_str, last = sess.run(doc_source)
+    if doc_str:
+      doc = sentence_pb2.Sentence()
+      doc.ParseFromString(doc_str[0])
+    else:
+      doc = None
+    return doc, last
+
+  def ValidateDocuments(self):
+    doc_source = gen_parser_ops.document_source(self.context_file, batch_size=1)
+    with self.test_session() as sess:
+      logging.info('Reading document1')
+      doc, last = self.ReadNextDocument(sess, doc_source)
+      self.assertEqual(len(doc.token), 12)
+      self.assertEqual(u'लाजमी', doc.token[9].word)
+      self.assertFalse(last)
+      logging.info('Reading document2')
+      doc, last = self.ReadNextDocument(sess, doc_source)
+      self.assertEqual(len(doc.token), 13)
+      self.assertEqual(u'भंग', doc.token[9].word)
+      self.assertFalse(last)
+      logging.info('Hitting end of the dataset')
+      doc, last = self.ReadNextDocument(sess, doc_source)
+      self.assertTrue(doc is None)
+      self.assertTrue(last)
+
+  def ValidateTagToCategoryMap(self):
+    with file(os.path.join(FLAGS.test_tmpdir, 'tag-to-category'), 'r') as f:
+      entries = [line.strip().split('\t') for line in f.readlines()]
+    for tag, category in entries:
+      self.assertIn(tag, TAGS)
+      self.assertIn(category, CATEGORIES)
+
+  def BuildLexicon(self):
+    with self.test_session():
+      gen_parser_ops.lexicon_builder(task_context=self.context_file).run()
+
+  def testCoNLLFormat(self):
+    self.WriteContext('conll-sentence')
+    logging.info('Writing conll file to: %s', self.corpus_file)
+    with open(self.corpus_file, 'w') as f:
+      f.write((CONLL_DOC1 + u'\n\n' + CONLL_DOC2 + u'\n')
+              .replace(' ', '\t').encode('utf-8'))
+    self.ValidateDocuments()
+    self.BuildLexicon()
+    self.ValidateTagToCategoryMap()
+
+  def testCoNLLFormatExtraNewlinesAndComments(self):
+    self.WriteContext('conll-sentence')
+    with open(self.corpus_file, 'w') as f:
+      f.write((u'\n\n\n' + CONLL_DOC1 + u'\n\n\n' + COMMENTS +
+               u'\n\n' + CONLL_DOC2).replace(' ', '\t').encode('utf-8'))
+    self.ValidateDocuments()
+    self.BuildLexicon()
+    self.ValidateTagToCategoryMap()
+
+  def testTokenizedTextFormat(self):
+    self.WriteContext('tokenized-text')
+    with open(self.corpus_file, 'w') as f:
+      f.write(TOKENIZED_DOCS.encode('utf-8'))
+    self.ValidateDocuments()
+    self.BuildLexicon()
+
+  def testTokenizedTextFormatExtraNewlines(self):
+    self.WriteContext('tokenized-text')
+    with open(self.corpus_file, 'w') as f:
+      f.write((u'\n\n\n' + TOKENIZED_DOCS + u'\n\n\n').encode('utf-8'))
+    self.ValidateDocuments()
+    self.BuildLexicon()
+
+if __name__ == '__main__':
+  googletest.main()
--- a/syntaxnet/syntaxnet/load_parser_ops.py
+++ b/syntaxnet/syntaxnet/load_parser_ops.py
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Loads parser_ops shared library."""
+
+import os.path
+import tensorflow as tf
+
+tf.load_op_library(
+    os.path.join(tf.resource_loader.get_data_files_path(),
+                 'parser_ops.so'))
--- a/syntaxnet/syntaxnet/models/parsey_mcparseface/context.pbtxt
+++ b/syntaxnet/syntaxnet/models/parsey_mcparseface/context.pbtxt
+Parameter {
+  name: "brain_parser_embedding_dims"
+  value: "32;32;64"
+}
+Parameter {
+  name: "brain_parser_embedding_names"
+  value: "labels;tags;words"
+}
+Parameter {
+  name: 'brain_parser_scoring'
+  value: 'default'
+}
+Parameter {
+  name: "brain_parser_features"
+  value:
+  'stack.child(1).label '
+  'stack.child(1).sibling(-1).label '
+  'stack.child(-1).label '
+  'stack.child(-1).sibling(1).label '
+  'stack.child(2).label '
+  'stack.child(-2).label '
+  'stack(1).child(1).label '
+  'stack(1).child(1).sibling(-1).label '
+  'stack(1).child(-1).label '
+  'stack(1).child(-1).sibling(1).label '
+  'stack(1).child(2).label '
+  'stack(1).child(-2).label; '
+  'input.token.tag '
+  'input(1).token.tag '
+  'input(2).token.tag '
+  'input(3).token.tag '
+  'stack.token.tag '
+  'stack.child(1).token.tag '
+  'stack.child(1).sibling(-1).token.tag '
+  'stack.child(-1).token.tag '
+  'stack.child(-1).sibling(1).token.tag '
+  'stack.child(2).token.tag '
+  'stack.child(-2).token.tag '
+  'stack(1).token.tag '
+  'stack(1).child(1).token.tag '
+  'stack(1).child(1).sibling(-1).token.tag '
+  'stack(1).child(-1).token.tag '
+  'stack(1).child(-1).sibling(1).token.tag '
+  'stack(1).child(2).token.tag '
+  'stack(1).child(-2).token.tag '
+  'stack(2).token.tag '
+  'stack(3).token.tag; '
+  'input.token.word '
+  'input(1).token.word '
+  'input(2).token.word '
+  'input(3).token.word '
+  'stack.token.word '
+  'stack.child(1).token.word '
+  'stack.child(1).sibling(-1).token.word '
+  'stack.child(-1).token.word '
+  'stack.child(-1).sibling(1).token.word '
+  'stack.child(2).token.word '
+  'stack.child(-2).token.word '
+  'stack(1).token.word '
+  'stack(1).child(1).token.word '
+  'stack(1).child(1).sibling(-1).token.word '
+  'stack(1).child(-1).token.word '
+  'stack(1).child(-1).sibling(1).token.word '
+  'stack(1).child(2).token.word '
+  'stack(1).child(-2).token.word '
+  'stack(2).token.word '
+  'stack(3).token.word '
+}
+Parameter {
+  name: "brain_parser_transition_system"
+  value: "arc-standard"
+}
+
+Parameter {
+  name: "brain_tagger_embedding_dims"
+  value: "8;16;16;16;16;64"
+}
+Parameter {
+  name: "brain_tagger_embedding_names"
+  value: "other;prefix2;prefix3;suffix2;suffix3;words"
+}
+Parameter {
+  name: "brain_tagger_features"
+  value:
+  'input.digit '
+  'input.hyphen; '
+  'input.prefix(length="2") '
+  'input(1).prefix(length="2") '
+  'input(2).prefix(length="2") '
+  'input(3).prefix(length="2") '
+  'input(-1).prefix(length="2") '
+  'input(-2).prefix(length="2") '
+  'input(-3).prefix(length="2") '
+  'input(-4).prefix(length="2"); '
+  'input.prefix(length="3") '
+  'input(1).prefix(length="3") '
+  'input(2).prefix(length="3") '
+  'input(3).prefix(length="3") '
+  'input(-1).prefix(length="3") '
+  'input(-2).prefix(length="3") '
+  'input(-3).prefix(length="3") '
+  'input(-4).prefix(length="3"); '
+  'input.suffix(length="2") '
+  'input(1).suffix(length="2") '
+  'input(2).suffix(length="2") '
+  'input(3).suffix(length="2") '
+  'input(-1).suffix(length="2") '
+  'input(-2).suffix(length="2") '
+  'input(-3).suffix(length="2") '
+  'input(-4).suffix(length="2"); '
+  'input.suffix(length="3") '
+  'input(1).suffix(length="3") '
+  'input(2).suffix(length="3") '
+  'input(3).suffix(length="3") '
+  'input(-1).suffix(length="3") '
+  'input(-2).suffix(length="3") '
+  'input(-3).suffix(length="3") '
+  'input(-4).suffix(length="3"); '
+  'input.token.word '
+  'input(1).token.word '
+  'input(2).token.word '
+  'input(3).token.word '
+  'input(-1).token.word '
+  'input(-2).token.word '
+  'input(-3).token.word '
+  'input(-4).token.word '
+}
+Parameter {
+  name: "brain_tagger_transition_system"
+  value: "tagger"
+}
+
+input {
+  name: "tag-map"
+  Part {
+    file_pattern: "syntaxnet/models/parsey_mcparseface/tag-map"
+  }
+}
+input {
+  name: "tag-to-category"
+  Part {
+    file_pattern: "syntaxnet/models/parsey_mcparseface/fine-to-universal.map"
+  }
+}
+input {
+  name: "word-map"
+  Part {
+    file_pattern: "syntaxnet/models/parsey_mcparseface/word-map"
+  }
+}
+input {
+  name: "label-map"
+  Part {
+    file_pattern: "syntaxnet/models/parsey_mcparseface/label-map"
+  }
+}
+input {
+  name: "prefix-table"
+  Part {
+    file_pattern: "syntaxnet/models/parsey_mcparseface/prefix-table"
+  }
+}
+input {
+  name: "suffix-table"
+  Part {
+    file_pattern: "syntaxnet/models/parsey_mcparseface/suffix-table"
+  }
+}
+input {
+  name: 'stdin'
+  record_format: 'english-text'
+  Part {
+    file_pattern: '-'
+  }
+}
+input {
+  name: 'stdin-conll'
+  record_format: 'conll-sentence'
+  Part {
+    file_pattern: '-'
+  }
+}
+input {
+  name: 'stdout-conll'
+  record_format: 'conll-sentence'
+  Part {
+    file_pattern: '-'
+  }
+}
--- a/syntaxnet/syntaxnet/models/parsey_mcparseface/fine-to-universal.map
+++ b/syntaxnet/syntaxnet/models/parsey_mcparseface/fine-to-universal.map
+#	.
+$	.
+''	.
+-LRB-	.
+-RRB-	.
+,	.
+.	.
+:	.
+ADD	X
+AFX	PRT
+CC	CONJ
+CD	NUM
+DT	DET
+EX	DET
+FW	X
+GW	X
+HYPH	.
+IN	ADP
+JJ	ADJ
+JJR	ADJ
+JJS	ADJ
+LS	X
+MD	VERB
+NFP	.
+NN	NOUN
+NNP	NOUN
+NNPS	NOUN
+NNS	NOUN
+PDT	DET
+POS	PRT
+PRP	PRON
+PRP$	PRON
+RB	ADV
+RBR	ADV
+RBS	ADV
+RP	PRT
+SYM	X
+TO	PRT
+UH	X
+VB	VERB
+VBD	VERB
+VBG	VERB
+VBN	VERB
+VBP	VERB
+VBZ	VERB
+WDT	DET
+WP	PRON
+WP$	PRON
+WRB	ADV
+``	.
+X	X
+XX	X