"vscode:/vscode.git/clone" did not exist on "207171fffd746ef408bc6e51bb434c4cd07dfcca"
Commit d66941ac authored by Ivan Bogatyy's avatar Ivan Bogatyy Committed by calberti
Browse files

Sync w TF r0.12 & Bazel 0.4.3, internal updates (#953)

parent efa4a6cf
......@@ -73,8 +73,8 @@ class SentenceFeaturesTest : public ::testing::Test {
// Extracts a vector of string representations from evaluating the prepared
// set feature (returning multiple values) at the given index.
virtual vector<string> ExtractMultiFeature(int index) {
vector<string> values;
virtual std::vector<string> ExtractMultiFeature(int index) {
std::vector<string> values;
FeatureVector result;
extractor_->ExtractFeatures(workspaces_, sentence_, index,
&result);
......@@ -97,8 +97,8 @@ class SentenceFeaturesTest : public ::testing::Test {
// Checks that a vector workspace is equal to a target vector.
void CheckVectorWorkspace(const VectorIntWorkspace &workspace,
vector<int> target) {
vector<int> src;
std::vector<int> target) {
std::vector<int> src;
for (int i = 0; i < workspace.size(); ++i) {
src.push_back(workspace.element(i));
}
......
......@@ -36,7 +36,8 @@ def AddCrossEntropy(batch_size, n):
return tf.constant(0, dtype=tf.float32, shape=[1])
for beam_id in range(batch_size):
beam_gold_slot = tf.reshape(tf.slice(n['gold_slot'], [beam_id], [1]), [1])
beam_gold_slot = tf.reshape(
tf.strided_slice(n['gold_slot'], [beam_id], [beam_id + 1], [1]), [1])
def _ComputeCrossEntropy():
"""Adds ops to compute cross entropy of the gold path in a beam."""
# Requires a cast so that UnsortedSegmentSum, in the gradient,
......@@ -48,8 +49,9 @@ def AddCrossEntropy(batch_size, n):
beam_scores = tf.reshape(tf.gather(n['all_path_scores'], idx), [1, -1])
num = tf.shape(idx)
return tf.nn.softmax_cross_entropy_with_logits(
beam_scores, tf.expand_dims(
tf.sparse_to_dense(beam_gold_slot, num, [1.], 0.), 0))
labels=tf.expand_dims(
tf.sparse_to_dense(beam_gold_slot, num, [1.], 0.), 0),
logits=beam_scores)
# The conditional here is needed to deal with the last few batches of the
# corpus which can contain -1 in beam_gold_slot for empty batch slots.
cross_entropies.append(cf.cond(
......
......@@ -128,10 +128,10 @@ class TaggerTransitionState : public ParserTransitionState {
private:
// Currently assigned POS tags for each token in this sentence.
vector<int> tag_;
std::vector<int> tag_;
// Gold POS tags from the input document.
vector<int> gold_tag_;
std::vector<int> gold_tag_;
// Tag map used for conversions between integer and string representations
// part of speech tags. Not owned.
......
......@@ -72,7 +72,7 @@ class TaskContext {
// Vector of parameters required by this task. These must be specified in the
// task rather than relying on default values.
vector<string> required_parameters_;
std::vector<string> required_parameters_;
};
} // namespace syntaxnet
......
......@@ -32,7 +32,7 @@ int TermFrequencyMap::Increment(const string &term) {
const TermIndex::const_iterator it = term_index_.find(term);
if (term_index_.find(term) != term_index_.end()) {
// Increment the existing term.
pair<string, int64> &data = term_data_[it->second];
std::pair<string, int64> &data = term_data_[it->second];
CHECK_EQ(term, data.first);
++(data.second);
return it->second;
......@@ -41,7 +41,7 @@ int TermFrequencyMap::Increment(const string &term) {
const int index = term_index_.size();
CHECK_LT(index, std::numeric_limits<int32>::max()); // overflow
term_index_[term] = index;
term_data_.push_back(pair<string, int64>(term, 1));
term_data_.push_back(std::pair<string, int64>(term, 1));
return index;
}
}
......@@ -74,7 +74,7 @@ void TermFrequencyMap::Load(const string &filename, int min_frequency,
int64 last_frequency = -1;
for (int i = 0; i < total && i < max_num_terms; ++i) {
TF_CHECK_OK(buffer.ReadLine(&line));
vector<string> elements = utils::Split(line, ' ');
std::vector<string> elements = utils::Split(line, ' ');
CHECK_EQ(2, elements.size());
CHECK(!elements[0].empty());
CHECK(!elements[1].empty());
......@@ -97,7 +97,7 @@ void TermFrequencyMap::Load(const string &filename, int min_frequency,
// Assign the next available index.
const int index = term_index_.size();
term_index_[term] = index;
term_data_.push_back(pair<string, int64>(term, frequency));
term_data_.push_back(std::pair<string, int64>(term, frequency));
}
CHECK_EQ(term_index_.size(), term_data_.size());
LOG(INFO) << "Loaded " << term_index_.size() << " terms from " << filename
......@@ -107,8 +107,8 @@ void TermFrequencyMap::Load(const string &filename, int min_frequency,
struct TermFrequencyMap::SortByFrequencyThenTerm {
// Return a > b to sort in descending order of frequency; otherwise,
// lexicographic sort on term.
bool operator()(const pair<string, int64> &a,
const pair<string, int64> &b) const {
bool operator()(const std::pair<string, int64> &a,
const std::pair<string, int64> &b) const {
return (a.second > b.second || (a.second == b.second && a.first < b.first));
}
};
......@@ -117,7 +117,7 @@ void TermFrequencyMap::Save(const string &filename) const {
CHECK_EQ(term_index_.size(), term_data_.size());
// Copy and sort the term data.
vector<pair<string, int64>> sorted_data(term_data_);
std::vector<std::pair<string, int64>> sorted_data(term_data_);
std::sort(sorted_data.begin(), sorted_data.end(), SortByFrequencyThenTerm());
// Write the number of terms.
......@@ -149,7 +149,7 @@ TagToCategoryMap::TagToCategoryMap(const string &filename) {
tensorflow::io::BufferedInputStream buffer(&stream, kInputBufferSize);
string line;
while (buffer.ReadLine(&line) == tensorflow::Status::OK()) {
vector<string> pair = utils::Split(line, '\t');
std::vector<string> pair = utils::Split(line, '\t');
CHECK(line.empty() || pair.size() == 2) << line;
tag_to_category_[pair[0]] = pair[1];
}
......
......@@ -83,7 +83,7 @@ class TermFrequencyMap {
TermIndex term_index_;
// Mapping from indices to term and frequency.
vector<pair<string, int64>> term_data_;
std::vector<std::pair<string, int64>> term_data_;
TF_DISALLOW_COPY_AND_ASSIGN(TermFrequencyMap);
};
......@@ -107,7 +107,7 @@ class TagToCategoryMap {
void Save(const string &filename) const;
private:
map<string, string> tag_to_category_;
std::map<string, string> tag_to_category_;
TF_DISALLOW_COPY_AND_ASSIGN(TagToCategoryMap);
};
......
......@@ -83,16 +83,16 @@ class CoNLLSyntaxFormat : public DocumentFormat {
}
void ConvertFromString(const string &key, const string &value,
vector<Sentence *> *sentences) override {
std::vector<Sentence *> *sentences) override {
// Create new sentence.
Sentence *sentence = new Sentence();
// Each line corresponds to one token.
string text;
vector<string> lines = utils::Split(value, '\n');
std::vector<string> lines = utils::Split(value, '\n');
// Add each token to the sentence.
vector<string> fields;
std::vector<string> fields;
int expected_id = 1;
for (size_t i = 0; i < lines.size(); ++i) {
// Split line into tab-separated fields.
......@@ -166,12 +166,12 @@ class CoNLLSyntaxFormat : public DocumentFormat {
void ConvertToString(const Sentence &sentence, string *key,
string *value) override {
*key = sentence.docid();
vector<string> lines;
std::vector<string> lines;
for (int i = 0; i < sentence.token_size(); ++i) {
Token token = sentence.token(i);
if (join_category_to_pos_) SplitCategoryFromPos(&token);
if (add_pos_as_attribute_) RemovePosFromAttributes(&token);
vector<string> fields(10);
std::vector<string> fields(10);
fields[0] = tensorflow::strings::Printf("%d", i + 1);
fields[1] = UnderscoreIfEmpty(token.word());
fields[2] = "_";
......@@ -198,14 +198,14 @@ class CoNLLSyntaxFormat : public DocumentFormat {
void AddMorphAttributes(const string &attributes, Token *token) {
TokenMorphology *morph =
token->MutableExtension(TokenMorphology::morphology);
vector<string> att_vals = utils::Split(attributes, '|');
std::vector<string> att_vals = utils::Split(attributes, '|');
for (int i = 0; i < att_vals.size(); ++i) {
vector<string> att_val = utils::SplitOne(att_vals[i], '=');
std::vector<string> att_val = utils::SplitOne(att_vals[i], '=');
// Format is either:
// 1) a1=v1|a2=v2..., e.g., Czech CoNLL data, or,
// 2) v1|v2|..., e.g., German CoNLL data.
const pair<string, string> name_value =
const std::pair<string, string> name_value =
att_val.size() == 2 ? std::make_pair(att_val[0], att_val[1])
: std::make_pair(att_val[0], "on");
......@@ -282,7 +282,7 @@ class CoNLLSyntaxFormat : public DocumentFormat {
TF_DISALLOW_COPY_AND_ASSIGN(CoNLLSyntaxFormat);
};
REGISTER_DOCUMENT_FORMAT("conll-sentence", CoNLLSyntaxFormat);
REGISTER_SYNTAXNET_DOCUMENT_FORMAT("conll-sentence", CoNLLSyntaxFormat);
// Reader for segmentation training data format. This reader assumes the input
// format is similar to CoNLL format but with only two fileds:
......@@ -325,16 +325,16 @@ class SegmentationTrainingDataFormat : public CoNLLSyntaxFormat {
// to SPACE_BREAK to indicate that the corresponding gold transition for that
// character token is START. Otherwise NO_BREAK to indicate MERGE.
void ConvertFromString(const string &key, const string &value,
vector<Sentence *> *sentences) override {
std::vector<Sentence *> *sentences) override {
// Create new sentence.
Sentence *sentence = new Sentence();
// Each line corresponds to one token.
string text;
vector<string> lines = utils::Split(value, '\n');
std::vector<string> lines = utils::Split(value, '\n');
// Add each token to the sentence.
vector<string> fields;
std::vector<string> fields;
for (size_t i = 0; i < lines.size(); ++i) {
// Split line into tab-separated fields.
fields.clear();
......@@ -362,7 +362,7 @@ class SegmentationTrainingDataFormat : public CoNLLSyntaxFormat {
}
// Add character-based token to sentence.
vector<tensorflow::StringPiece> chars;
std::vector<tensorflow::StringPiece> chars;
SegmenterUtils::GetUTF8Chars(word, &chars);
bool is_first_char = true;
for (auto utf8char : chars) {
......@@ -398,7 +398,8 @@ class SegmentationTrainingDataFormat : public CoNLLSyntaxFormat {
}
};
REGISTER_DOCUMENT_FORMAT("segment-train-data", SegmentationTrainingDataFormat);
REGISTER_SYNTAXNET_DOCUMENT_FORMAT("segment-train-data",
SegmentationTrainingDataFormat);
// Reader for tokenized text. This reader expects every sentence to be on a
// single line and tokens on that line to be separated by single spaces.
......@@ -414,7 +415,7 @@ class TokenizedTextFormat : public DocumentFormat {
}
void ConvertFromString(const string &key, const string &value,
vector<Sentence *> *sentences) override {
std::vector<Sentence *> *sentences) override {
Sentence *sentence = new Sentence();
string text;
for (const string &word : utils::Split(value, ' ')) {
......@@ -463,7 +464,7 @@ class TokenizedTextFormat : public DocumentFormat {
TF_DISALLOW_COPY_AND_ASSIGN(TokenizedTextFormat);
};
REGISTER_DOCUMENT_FORMAT("tokenized-text", TokenizedTextFormat);
REGISTER_SYNTAXNET_DOCUMENT_FORMAT("tokenized-text", TokenizedTextFormat);
// Reader for un-tokenized text. This reader expects every sentence to be on a
// single line. For each line in the input, a sentence proto will be created,
......@@ -474,9 +475,9 @@ class UntokenizedTextFormat : public TokenizedTextFormat {
UntokenizedTextFormat() {}
void ConvertFromString(const string &key, const string &value,
vector<Sentence *> *sentences) override {
std::vector<Sentence *> *sentences) override {
Sentence *sentence = new Sentence();
vector<tensorflow::StringPiece> chars;
std::vector<tensorflow::StringPiece> chars;
SegmenterUtils::GetUTF8Chars(value, &chars);
int start = 0;
for (auto utf8char : chars) {
......@@ -502,7 +503,7 @@ class UntokenizedTextFormat : public TokenizedTextFormat {
TF_DISALLOW_COPY_AND_ASSIGN(UntokenizedTextFormat);
};
REGISTER_DOCUMENT_FORMAT("untokenized-text", UntokenizedTextFormat);
REGISTER_SYNTAXNET_DOCUMENT_FORMAT("untokenized-text", UntokenizedTextFormat);
// Text reader that attmpts to perform Penn Treebank tokenization on arbitrary
// raw text. Adapted from https://www.cis.upenn.edu/~treebank/tokenizer.sed
......@@ -514,8 +515,8 @@ class EnglishTextFormat : public TokenizedTextFormat {
EnglishTextFormat() {}
void ConvertFromString(const string &key, const string &value,
vector<Sentence *> *sentences) override {
vector<pair<string, string>> preproc_rules = {
std::vector<Sentence *> *sentences) override {
std::vector<std::pair<string, string>> preproc_rules = {
// Punctuation.
{"’", "'"},
{"…", "..."},
......@@ -570,7 +571,7 @@ class EnglishTextFormat : public TokenizedTextFormat {
{"♦", ""},
};
vector<pair<string, string>> rules = {
std::vector<std::pair<string, string>> rules = {
// attempt to get correct directional quotes
{R"re(^")re", "`` "},
{R"re(([ \([{<])")re", "\\1 `` "},
......@@ -639,10 +640,10 @@ class EnglishTextFormat : public TokenizedTextFormat {
};
string rewritten = value;
for (const pair<string, string> &rule : preproc_rules) {
for (const std::pair<string, string> &rule : preproc_rules) {
RE2::GlobalReplace(&rewritten, rule.first, rule.second);
}
for (const pair<string, string> &rule : rules) {
for (const std::pair<string, string> &rule : rules) {
RE2::GlobalReplace(&rewritten, rule.first, rule.second);
}
TokenizedTextFormat::ConvertFromString(key, rewritten, sentences);
......@@ -652,6 +653,6 @@ class EnglishTextFormat : public TokenizedTextFormat {
TF_DISALLOW_COPY_AND_ASSIGN(EnglishTextFormat);
};
REGISTER_DOCUMENT_FORMAT("english-text", EnglishTextFormat);
REGISTER_SYNTAXNET_DOCUMENT_FORMAT("english-text", EnglishTextFormat);
} // namespace syntaxnet
......@@ -37,7 +37,7 @@ VectorIntWorkspace::VectorIntWorkspace(int size) : elements_(size) {}
VectorIntWorkspace::VectorIntWorkspace(int size, int value)
: elements_(size, value) {}
VectorIntWorkspace::VectorIntWorkspace(const vector<int> &elements)
VectorIntWorkspace::VectorIntWorkspace(const std::vector<int> &elements)
: elements_(elements) {}
string VectorIntWorkspace::TypeName() { return "Vector"; }
......
......@@ -57,7 +57,7 @@ class WorkspaceRegistry {
int Request(const string &name) {
const std::type_index id = std::type_index(typeid(W));
workspace_types_[id] = W::TypeName();
vector<string> &names = workspace_names_[id];
std::vector<string> &names = workspace_names_[id];
for (int i = 0; i < names.size(); ++i) {
if (names[i] == name) return i;
}
......@@ -65,8 +65,8 @@ class WorkspaceRegistry {
return names.size() - 1;
}
const std::unordered_map<std::type_index, vector<string> > &WorkspaceNames()
const {
const std::unordered_map<std::type_index, std::vector<string> >
&WorkspaceNames() const {
return workspace_names_;
}
......@@ -78,7 +78,7 @@ class WorkspaceRegistry {
std::unordered_map<std::type_index, string> workspace_types_;
// Workspace names, indexed as workspace_names_[typeid][workspace].
std::unordered_map<std::type_index, vector<string> > workspace_names_;
std::unordered_map<std::type_index, std::vector<string> > workspace_names_;
TF_DISALLOW_COPY_AND_ASSIGN(WorkspaceRegistry);
};
......@@ -137,7 +137,7 @@ class WorkspaceSet {
private:
// The set of workspaces, indexed as workspaces_[typeid][index].
std::unordered_map<std::type_index, vector<Workspace *> > workspaces_;
std::unordered_map<std::type_index, std::vector<Workspace *> > workspaces_;
};
// A workspace that wraps around a single int.
......@@ -170,7 +170,7 @@ class VectorIntWorkspace : public Workspace {
explicit VectorIntWorkspace(int size);
// Creates a vector initialized with the given array.
explicit VectorIntWorkspace(const vector<int> &elements);
explicit VectorIntWorkspace(const std::vector<int> &elements);
// Creates a vector of the given size, with each element initialized to the
// given value.
......@@ -189,7 +189,7 @@ class VectorIntWorkspace : public Workspace {
private:
// The enclosed vector.
vector<int> elements_;
std::vector<int> elements_;
};
// A workspace that wraps around a vector of vector of int.
......@@ -202,14 +202,14 @@ class VectorVectorIntWorkspace : public Workspace {
static string TypeName();
// Returns the i'th vector of elements.
const vector<int> &elements(int i) const { return elements_[i]; }
const std::vector<int> &elements(int i) const { return elements_[i]; }
// Mutable access to the i'th vector of elements.
vector<int> *mutable_elements(int i) { return &(elements_[i]); }
std::vector<int> *mutable_elements(int i) { return &(elements_[i]); }
private:
// The enclosed vector of vector of elements.
vector<vector<int> > elements_;
std::vector<std::vector<int> > elements_;
};
} // namespace syntaxnet
......
Subproject commit aab099711d7e04034cf742ddb9b00dd15edbe99c
Subproject commit 45ab528211c962b19e12f6b77165848310271624
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment