Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
4364390a
Commit
4364390a
authored
Nov 13, 2017
by
Ivan Bogatyy
Committed by
calberti
Nov 13, 2017
Browse files
Release DRAGNN bulk networks (#2785)
* Release DRAGNN bulk networks
parent
638fd759
Changes
166
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1762 additions
and
571 deletions
+1762
-571
research/syntaxnet/syntaxnet/document_format.h
research/syntaxnet/syntaxnet/document_format.h
+3
-0
research/syntaxnet/syntaxnet/embedding_feature_extractor.cc
research/syntaxnet/syntaxnet/embedding_feature_extractor.cc
+1
-1
research/syntaxnet/syntaxnet/feature_extractor.h
research/syntaxnet/syntaxnet/feature_extractor.h
+36
-0
research/syntaxnet/syntaxnet/feature_types.h
research/syntaxnet/syntaxnet/feature_types.h
+20
-7
research/syntaxnet/syntaxnet/generic_features.cc
research/syntaxnet/syntaxnet/generic_features.cc
+103
-0
research/syntaxnet/syntaxnet/generic_features.h
research/syntaxnet/syntaxnet/generic_features.h
+856
-0
research/syntaxnet/syntaxnet/generic_features_test.cc
research/syntaxnet/syntaxnet/generic_features_test.cc
+387
-0
research/syntaxnet/syntaxnet/graph_builder.py
research/syntaxnet/syntaxnet/graph_builder.py
+1
-0
research/syntaxnet/syntaxnet/head_label_transitions.cc
research/syntaxnet/syntaxnet/head_label_transitions.cc
+148
-0
research/syntaxnet/syntaxnet/head_label_transitions.h
research/syntaxnet/syntaxnet/head_label_transitions.h
+96
-0
research/syntaxnet/syntaxnet/head_label_transitions_test.cc
research/syntaxnet/syntaxnet/head_label_transitions_test.cc
+105
-0
research/syntaxnet/syntaxnet/head_transitions.h
research/syntaxnet/syntaxnet/head_transitions.h
+2
-5
research/syntaxnet/syntaxnet/head_transitions_test.cc
research/syntaxnet/syntaxnet/head_transitions_test.cc
+2
-1
research/syntaxnet/syntaxnet/lexicon_builder.cc
research/syntaxnet/syntaxnet/lexicon_builder.cc
+1
-0
research/syntaxnet/syntaxnet/models/parsey_universal/context-tokenize-zh.pbtxt
...ntaxnet/models/parsey_universal/context-tokenize-zh.pbtxt
+0
-64
research/syntaxnet/syntaxnet/models/parsey_universal/context.pbtxt
...syntaxnet/syntaxnet/models/parsey_universal/context.pbtxt
+0
-362
research/syntaxnet/syntaxnet/models/parsey_universal/parse.sh
...arch/syntaxnet/syntaxnet/models/parsey_universal/parse.sh
+0
-68
research/syntaxnet/syntaxnet/models/parsey_universal/tokenize.sh
...h/syntaxnet/syntaxnet/models/parsey_universal/tokenize.sh
+0
-31
research/syntaxnet/syntaxnet/models/parsey_universal/tokenize_zh.sh
...yntaxnet/syntaxnet/models/parsey_universal/tokenize_zh.sh
+0
-30
research/syntaxnet/syntaxnet/morphology_label_set.h
research/syntaxnet/syntaxnet/morphology_label_set.h
+1
-2
No files found.
research/syntaxnet/syntaxnet/document_format.h
View file @
4364390a
...
...
@@ -60,6 +60,9 @@ class DocumentFormat : public RegisterableClass<DocumentFormat> {
#define REGISTER_SYNTAXNET_DOCUMENT_FORMAT(type, component) \
REGISTER_SYNTAXNET_CLASS_COMPONENT(DocumentFormat, type, component)
// Component registry for document formatters.
DECLARE_SYNTAXNET_CLASS_REGISTRY
(
"document format"
,
DocumentFormat
);
}
// namespace syntaxnet
#endif // SYNTAXNET_DOCUMENT_FORMAT_H__
research/syntaxnet/syntaxnet/embedding_feature_extractor.cc
View file @
4364390a
...
...
@@ -94,7 +94,7 @@ GenericEmbeddingFeatureExtractor::ConvertExample(
for
(
int
j
=
0
;
j
<
feature_vectors
[
i
].
size
();
++
j
)
{
const
FeatureType
&
feature_type
=
*
feature_vectors
[
i
].
type
(
j
);
const
FeatureValue
value
=
feature_vectors
[
i
].
value
(
j
);
const
bool
is_continuous
=
feature_type
.
name
().
find
(
"
continuous
"
)
==
0
;
const
bool
is_continuous
=
feature_type
.
is_
continuous
()
;
const
int64
id
=
is_continuous
?
FloatFeatureValue
(
value
).
id
:
value
;
const
int
base
=
feature_type
.
base
();
if
(
id
>=
0
)
{
...
...
research/syntaxnet/syntaxnet/feature_extractor.h
View file @
4364390a
...
...
@@ -80,6 +80,42 @@ class FeatureVector {
// Returns the number of elements in the feature vector.
int
size
()
const
{
return
features_
.
size
();
}
// Truncates the feature vector. Requires that new_size <= size().
void
Truncate
(
int
new_size
)
{
DCHECK_GE
(
new_size
,
0
);
DCHECK_LE
(
new_size
,
size
());
features_
.
resize
(
new_size
);
}
// Returns string representation of feature vector.
string
ToString
()
const
{
string
str
;
str
.
append
(
"["
);
for
(
int
i
=
0
;
i
<
size
();
++
i
)
{
if
(
i
>
0
)
str
.
append
(
","
);
if
(
!
type
(
i
)
->
name
().
empty
())
{
// Get the name and erase any quotation characters.
string
name_str
=
type
(
i
)
->
name
();
auto
it
=
name_str
.
begin
();
while
(
it
!=
name_str
.
end
())
{
if
(
*
it
==
'"'
)
{
it
=
name_str
.
erase
(
it
);
}
else
{
++
it
;
}
}
str
.
append
(
name_str
);
str
.
append
(
"="
);
}
str
.
append
(
type
(
i
)
->
GetFeatureValueName
(
value
(
i
)));
}
str
.
append
(
"]"
);
return
str
;
}
// Reserves space in the underlying feature vector.
void
reserve
(
int
n
)
{
features_
.
reserve
(
n
);
}
...
...
research/syntaxnet/syntaxnet/feature_types.h
View file @
4364390a
...
...
@@ -40,9 +40,14 @@ class FeatureType {
public:
// Initializes a feature type.
explicit
FeatureType
(
const
string
&
name
)
:
name_
(
name
),
base_
(
0
)
{}
:
name_
(
name
),
base_
(
0
),
is_continuous_
(
name
.
find
(
"continuous"
)
!=
string
::
npos
)
{
// TODO(googleuser): Switch to explicitly setting is_continuous.
VLOG
(
2
)
<<
"Feature: "
<<
name
<<
":"
<<
is_continuous_
;
}
virtual
~
FeatureType
()
{}
virtual
~
FeatureType
()
=
default
;
// Converts a feature value to a name.
virtual
string
GetFeatureValueName
(
FeatureValue
value
)
const
=
0
;
...
...
@@ -56,12 +61,21 @@ class FeatureType {
Predicate
base
()
const
{
return
base_
;
}
void
set_base
(
Predicate
base
)
{
base_
=
base
;
}
// True if the underlying feature is continuous.
bool
is_continuous
()
const
{
return
is_continuous_
;
}
// Sets whenther the underlying feature should be represented as continuous.
void
set_is_continuous
(
bool
is_continuous
)
{
is_continuous_
=
is_continuous
;
}
private:
// Feature type name.
string
name_
;
// "Base" feature value: i.e. a "slot" in a global ordering of features.
Predicate
base_
;
// True if this feature is continuous.
bool
is_continuous_
;
};
// Templated generic resource based feature type. This feature type delegates
...
...
@@ -73,7 +87,7 @@ class FeatureType {
// successfully for values ONLY in the range [0, Resource->NumValues()) Any
// feature value not in the extra value map and not in the above range of
// Resource will result in a ERROR and return of "<INVALID>".
template
<
class
Resource
>
template
<
class
Resource
>
class
ResourceBasedFeatureType
:
public
FeatureType
{
public:
// Creates a new type with given name, resource object, and a mapping of
...
...
@@ -85,8 +99,8 @@ class ResourceBasedFeatureType : public FeatureType {
:
FeatureType
(
name
),
resource_
(
resource
),
values_
(
values
)
{
max_value_
=
resource
->
NumValues
()
-
1
;
for
(
const
auto
&
pair
:
values
)
{
CHECK_GE
(
pair
.
first
,
resource
->
NumValues
())
<<
"Invalid extra value: "
<<
pair
.
first
<<
","
<<
pair
.
second
;
CHECK_GE
(
pair
.
first
,
resource
->
NumValues
())
<<
"Invalid extra value: "
<<
pair
.
first
<<
","
<<
pair
.
second
;
max_value_
=
pair
.
first
>
max_value_
?
pair
.
first
:
max_value_
;
}
}
...
...
@@ -152,8 +166,7 @@ class EnumFeatureType : public FeatureType {
string
GetFeatureValueName
(
FeatureValue
value
)
const
override
{
auto
it
=
value_names_
.
find
(
value
);
if
(
it
==
value_names_
.
end
())
{
LOG
(
ERROR
)
<<
"Invalid feature value "
<<
value
<<
" for "
<<
name
();
LOG
(
ERROR
)
<<
"Invalid feature value "
<<
value
<<
" for "
<<
name
();
return
"<INVALID>"
;
}
return
it
->
second
;
...
...
research/syntaxnet/syntaxnet/generic_features.cc
0 → 100644
View file @
4364390a
/* Copyright 2016 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "syntaxnet/generic_features.h"
#include <limits>
#include <string>
#include "syntaxnet/base.h"
using
tensorflow
::
strings
::
StrAppend
;
using
tensorflow
::
strings
::
StrCat
;
namespace
syntaxnet
{
GenericFeatureTypes
::
TupleFeatureTypeBase
::
TupleFeatureTypeBase
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
)
:
FeatureType
(
CreateTypeName
(
prefix
,
sub_types
)),
types_
(
sub_types
.
begin
(),
sub_types
.
end
())
{
CHECK
(
!
types_
.
empty
());
}
string
GenericFeatureTypes
::
TupleFeatureTypeBase
::
GetFeatureValueName
(
FeatureValue
value
)
const
{
if
(
value
<
0
||
value
>=
size_
)
return
"<INVALID>"
;
string
name
=
"("
;
for
(
uint32
i
=
0
;
i
<
types_
.
size
();
++
i
)
{
const
FeatureType
*
sub_type
=
types_
[
i
];
const
FeatureValue
sub_size
=
sub_type
->
GetDomainSize
();
const
FeatureValue
sub_value
=
value
%
sub_size
;
const
string
sub_name
=
sub_type
->
GetFeatureValueName
(
sub_value
);
const
string
delimiter
=
i
+
1
<
types_
.
size
()
?
","
:
")"
;
StrAppend
(
&
name
,
sub_name
,
delimiter
);
value
/=
sub_size
;
}
return
name
;
}
FeatureValue
GenericFeatureTypes
::
TupleFeatureTypeBase
::
GetDomainSize
()
const
{
return
size_
;
}
void
GenericFeatureTypes
::
TupleFeatureTypeBase
::
InitDomainSizes
(
vector
<
FeatureValue
>
*
sizes
)
{
CHECK_EQ
(
sizes
->
size
(),
types_
.
size
());
// Populate sub-sizes.
for
(
uint32
i
=
0
;
i
<
types_
.
size
();
++
i
)
{
sizes
->
at
(
i
)
=
types_
[
i
]
->
GetDomainSize
();
}
// Compute the cardinality of the tuple.
size_
=
1
;
double
real_size
=
1.0
;
// for overflow detection
for
(
const
FeatureValue
sub_size
:
*
sizes
)
{
size_
*=
sub_size
;
real_size
*=
static_cast
<
double
>
(
sub_size
);
}
// Check for overflow.
if
(
real_size
>
std
::
numeric_limits
<
FeatureValue
>::
max
())
{
string
message
;
for
(
uint32
i
=
0
;
i
<
types_
.
size
();
++
i
)
{
StrAppend
(
&
message
,
"
\n
"
,
types_
[
i
]
->
name
(),
")="
,
sizes
->
at
(
i
));
}
LOG
(
FATAL
)
<<
"Feature space overflow in feature "
<<
name
()
<<
message
;
}
}
string
GenericFeatureTypes
::
TupleFeatureTypeBase
::
CreateTypeName
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
)
{
string
prefix_to_strip
=
prefix
.
empty
()
?
""
:
StrCat
(
prefix
,
"."
);
string
name
=
StrCat
(
prefix
,
" {"
);
for
(
const
FeatureType
*
type
:
sub_types
)
{
string
stripped_name
=
type
->
name
();
if
(
stripped_name
.
find_first_of
(
prefix_to_strip
)
==
0
)
{
stripped_name
=
stripped_name
.
substr
(
prefix_to_strip
.
length
());
}
StrAppend
(
&
name
,
" "
,
stripped_name
);
}
StrAppend
(
&
name
,
" }"
);
return
name
;
}
GenericFeatureTypes
::
DynamicTupleFeatureType
::
DynamicTupleFeatureType
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
)
:
TupleFeatureTypeBase
(
prefix
,
sub_types
),
sizes_
(
sub_types
.
size
())
{
CHECK_GE
(
sizes_
.
size
(),
2
);
InitDomainSizes
(
&
sizes_
);
}
}
// namespace syntaxnet
research/syntaxnet/syntaxnet/generic_features.h
0 → 100644
View file @
4364390a
/* Copyright 2016 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Generic feature functions. These feature functions are independent of the
// feature function template types.
//
// The generic features should be instantiated and registered using the
// REGISTER_SYNTAXNET_GENERIC_FEATURES() macro:
//
// typedef GenericFeatures<Foo, int> GenericFooFeatures;
// REGISTER_SYNTAXNET_GENERIC_FEATURES(GenericFooFeatures);
//
#ifndef SYNTAXNET_GENERIC_FEATURES_H_
#define SYNTAXNET_GENERIC_FEATURES_H_
#include <string>
#include <utility>
#include <vector>
#include "syntaxnet/base.h"
#include "syntaxnet/feature_extractor.h"
namespace
syntaxnet
{
class
TaskContext
;
class
WorkspaceSet
;
// A class encapsulating all generic feature types.
class
GenericFeatureTypes
{
public:
// Base class for tuple feature types.
class
TupleFeatureTypeBase
:
public
FeatureType
{
public:
// Creates a tuple whose elements are defined by the sub-types. This does
// not take ownership of the sub-types, which must remain live while this
// is in use.
TupleFeatureTypeBase
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
);
// Returns a string representation of the tuple value.
string
GetFeatureValueName
(
FeatureValue
value
)
const
override
;
// Returns the domain size of this feature.
FeatureValue
GetDomainSize
()
const
override
;
protected:
// Sets the feature domain sizes and computes the total domain size of the
// tuple. Derived classes should call this method from their constructor.
void
InitDomainSizes
(
vector
<
FeatureValue
>
*
sizes
);
private:
// Returns a string name for a type using the prefix and sub-types.
static
string
CreateTypeName
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
);
// The types of the sub-features. Not owned.
const
std
::
vector
<
const
FeatureType
*>
types_
;
// The domain size of the tuple.
FeatureValue
size_
=
0
;
};
// Feature type for tuples of fixed size.
template
<
int
kNumElements
>
class
StaticTupleFeatureType
:
public
TupleFeatureTypeBase
{
public:
static_assert
(
kNumElements
>=
2
,
"At least two elements required"
);
// Creates a fixed-size tuple of sub-types. This does not take ownership
// of the sub-types, which must remain live while this is in use.
StaticTupleFeatureType
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
)
:
TupleFeatureTypeBase
(
prefix
,
sub_types
)
{
CHECK_EQ
(
sub_types
.
size
(),
kNumElements
);
sizes_
.
resize
(
kNumElements
);
InitDomainSizes
(
&
sizes_
);
}
// Returns the conjoined tuple value for a list of sub-values. The range
// values[0,kNumElements) must be valid and non-absent.
FeatureValue
Conjoin
(
const
FeatureValue
*
values
)
const
{
DCHECK_GE
(
values
[
kNumElements
-
1
],
0
);
DCHECK_LT
(
values
[
kNumElements
-
1
],
sizes_
[
kNumElements
-
1
]);
DCHECK_NE
(
values
[
kNumElements
-
1
],
GenericFeatureFunction
::
kNone
);
FeatureValue
conjoined
=
values
[
kNumElements
-
1
];
for
(
int
i
=
kNumElements
-
2
;
i
>=
0
;
--
i
)
{
DCHECK_GE
(
values
[
i
],
0
);
DCHECK_LT
(
values
[
i
],
sizes_
[
i
]);
DCHECK_NE
(
values
[
i
],
GenericFeatureFunction
::
kNone
);
conjoined
=
values
[
i
]
+
conjoined
*
sizes_
[
i
];
}
return
conjoined
;
}
private:
// The domain sizes of the sub-types.
vector
<
FeatureValue
>
sizes_
;
};
// Feature type for tuples of dynamic size.
class
DynamicTupleFeatureType
:
public
TupleFeatureTypeBase
{
public:
// Creates a tuple of sub-types. This does not take ownership of the
// sub-types, which must remain live while this is in use.
DynamicTupleFeatureType
(
const
string
&
prefix
,
const
std
::
vector
<
FeatureType
*>
&
sub_types
);
// Returns the conjoined tuple value for a list of sub-values, which must
// be the same size as the number of elements and non-absent.
FeatureValue
Conjoin
(
const
std
::
vector
<
FeatureValue
>
&
values
)
const
{
DCHECK_EQ
(
values
.
size
(),
sizes_
.
size
());
DCHECK_GE
(
values
.
back
(),
0
);
DCHECK_LT
(
values
.
back
(),
sizes_
.
back
());
DCHECK_NE
(
values
.
back
(),
GenericFeatureFunction
::
kNone
);
FeatureValue
conjoined
=
values
.
back
();
for
(
int
i
=
static_cast
<
int
>
(
sizes_
.
size
())
-
2
;
i
>=
0
;
--
i
)
{
DCHECK_GE
(
values
[
i
],
0
);
DCHECK_LT
(
values
[
i
],
sizes_
[
i
]);
DCHECK_NE
(
values
[
i
],
GenericFeatureFunction
::
kNone
);
conjoined
=
values
[
i
]
+
conjoined
*
sizes_
[
i
];
}
return
conjoined
;
}
private:
// The domain sizes of the sub-types.
std
::
vector
<
FeatureValue
>
sizes_
;
};
// A wrapper which simply delegates to the sub-type. This does not take
// ownership of the sub-type, which must remain live while this is in use.
class
WrappedFeatureType
:
public
FeatureType
{
public:
explicit
WrappedFeatureType
(
FeatureType
*
sub_type
)
:
FeatureType
(
sub_type
->
name
()),
sub_type_
(
sub_type
)
{}
string
GetFeatureValueName
(
FeatureValue
value
)
const
override
{
return
sub_type_
->
GetFeatureValueName
(
value
);
}
FeatureValue
GetDomainSize
()
const
override
{
return
sub_type_
->
GetDomainSize
();
}
private:
FeatureType
*
sub_type_
;
};
};
// A class encapsulating all generic feature functions.
template
<
class
OBJ
,
class
...
ARGS
>
class
GenericFeatures
{
public:
// Base class for feature functions.
typedef
FeatureFunction
<
OBJ
,
ARGS
...
>
Base
;
// Base class for nested feature functions: these still have their own feature
// type, so make sure not to pass to the nested ones.
class
MetaBase
:
public
MetaFeatureFunction
<
OBJ
,
ARGS
...
>
{
public:
// Don't use the nested logic for feature types by default.
void
GetFeatureTypes
(
std
::
vector
<
FeatureType
*>
*
types
)
const
override
{
GenericFeatureFunction
::
GetFeatureTypes
(
types
);
}
};
// Feature function that adds a bias value to the feature vector.
class
Bias
:
public
Base
{
enum
BiasFeatureValue
{
ON
};
public:
// Initializes the feature.
void
Init
(
TaskContext
*
context
)
override
{
this
->
set_feature_type
(
new
EnumFeatureType
(
this
->
name
(),
{{
BiasFeatureValue
::
ON
,
"ON"
}}));
}
// Returns the bias value.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
return
0
;
}
};
// Feature function that returns a constant value.
class
Constant
:
public
Base
{
public:
// Initializes the feature.
void
Init
(
TaskContext
*
context
)
override
{
value_
=
this
->
GetIntParameter
(
"value"
,
0
);
this
->
set_feature_type
(
new
NumericFeatureType
(
this
->
name
(),
value_
+
1
));
}
// Returns the constant's value.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
return
value_
;
}
private:
int
value_
=
0
;
};
// A feature function that tests equality between two nested features. This
// can be used, for example, to check morphological agreement.
class
Equals
:
public
MetaBase
{
enum
EqualsFeatureValue
{
DIFFERENT
,
EQUAL
};
public:
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
const
auto
&
nested
=
this
->
nested
();
CHECK_EQ
(
nested
.
size
(),
2
)
<<
"The 'equals' feature requires two nested features."
;
this
->
set_feature_type
(
new
EnumFeatureType
(
this
->
name
(),
{{
EqualsFeatureValue
::
DIFFERENT
,
"DIFFERENT"
},
{
EqualsFeatureValue
::
EQUAL
,
"EQUAL"
}}));
}
// Returns the equality value, or kNone if either value is absent.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
const
FeatureValue
a
=
nested
[
0
]
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
a
==
Base
::
kNone
)
return
Base
::
kNone
;
const
FeatureValue
b
=
nested
[
1
]
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
b
==
Base
::
kNone
)
return
Base
::
kNone
;
return
a
==
b
?
1
:
0
;
}
};
// Abstract base class for features that compare a nested feature's value
// to a target value (specified via the 'value' parameter).
//
// Subclasses must implement InitTypes() and ComputeValue().
class
CompareValue
:
public
MetaBase
{
public:
// Initialize the type information.
virtual
void
InitTypes
()
=
0
;
// Compute the feature value given the nested feature value and the target
// value (i.e., what was passed as the 'value' parameter).
virtual
FeatureValue
ComputeValue
(
FeatureValue
nested_feature_value
,
FeatureValue
target_value
)
const
=
0
;
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
string
value_str
=
this
->
GetParameter
(
"value"
);
CHECK_GT
(
value_str
.
size
(),
0
)
<<
"The '"
<<
this
->
FunctionName
()
<<
"' feature requires a 'value' parameter."
;
const
auto
&
nested
=
this
->
nested
();
CHECK_EQ
(
nested
.
size
(),
1
)
<<
"The '"
<<
this
->
FunctionName
()
<<
"' feature requires one nested feature."
;
// Only allow nested features with exactly one feature type.
FeatureType
*
nested_feature_type
=
CHECK_NOTNULL
(
nested
.
front
()
->
GetFeatureType
());
for
(
int
i
=
0
;
i
<
nested_feature_type
->
GetDomainSize
();
++
i
)
{
if
(
nested_feature_type
->
GetFeatureValueName
(
i
)
==
value_str
)
{
value_
=
i
;
break
;
}
}
CHECK_NE
(
value_
,
-
1
)
<<
"Unknown feature value specified: "
<<
value_str
<<
"."
;
InitTypes
();
}
// Extracts the nested feature value, and delegates computation of the
// final feature value to ComputeValue().
// Returns kNone if the nested feature value is absent.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
FeatureValue
feature_value
=
nested
.
front
()
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
feature_value
==
Base
::
kNone
)
return
Base
::
kNone
;
return
ComputeValue
(
feature_value
,
value_
);
}
private:
// The value to compare the feature against.
int
value_
=
-
1
;
};
// A feature function that fires if and only if the nested feature has the
// given value.
class
Filter
:
public
CompareValue
{
enum
FilterFeatureValue
{
ON
};
public:
void
InitTypes
()
override
{
this
->
set_feature_type
(
new
EnumFeatureType
(
this
->
name
(),
{{
FilterFeatureValue
::
ON
,
"ON"
}}));
}
FeatureValue
ComputeValue
(
FeatureValue
nested_feature_value
,
FeatureValue
target_value
)
const
override
{
return
nested_feature_value
==
target_value
?
0
:
Base
::
kNone
;
}
};
// A feature function that tests equality between a feature and a value.
class
Is
:
public
CompareValue
{
enum
IsFeatureValue
{
FALSE
,
TRUE
};
public:
void
InitTypes
()
override
{
this
->
set_feature_type
(
new
EnumFeatureType
(
this
->
name
(),
{{
IsFeatureValue
::
FALSE
,
"FALSE"
},
{
IsFeatureValue
::
TRUE
,
"TRUE"
}}));
}
FeatureValue
ComputeValue
(
FeatureValue
nested_feature_value
,
FeatureValue
target_value
)
const
override
{
return
nested_feature_value
==
target_value
;
}
};
// A feature function that forwards the nested feature value, unless it equals
// the target value (in which case, the feature doesn't fire).
class
Ignore
:
public
CompareValue
{
public:
void
InitTypes
()
override
{
this
->
set_feature_type
(
new
GenericFeatureTypes
::
WrappedFeatureType
(
this
->
nested
().
front
()
->
GetFeatureType
()));
}
FeatureValue
ComputeValue
(
FeatureValue
nested_feature_value
,
FeatureValue
target_value
)
const
override
{
return
nested_feature_value
==
target_value
?
GenericFeatureFunction
::
kNone
:
nested_feature_value
;
}
};
// Abstract base class for features that reduce several binary values to a
// to a single binary value.
//
// Subclasses must implement Compute().
class
BinaryReduce
:
public
MetaBase
{
enum
BinaryReduceFeatureValue
{
FALSE
,
TRUE
};
public:
// Initializes the feature.
// Checks that all the nested features are binary, and sets the output
// feature type to binary.
void
InitNested
(
TaskContext
*
context
)
override
{
for
(
const
Base
*
function
:
this
->
nested
())
{
FeatureType
*
nested_type
=
CHECK_NOTNULL
(
function
->
GetFeatureType
());
CHECK_EQ
(
nested_type
->
GetDomainSize
(),
2
)
<<
this
->
name
()
<<
" requires nested binary feature types only."
;
}
this
->
set_feature_type
(
new
EnumFeatureType
(
this
->
name
(),
{{
BinaryReduceFeatureValue
::
FALSE
,
"FALSE"
},
{
BinaryReduceFeatureValue
::
TRUE
,
"TRUE"
}}));
}
};
// A feature function that takes any number of binary nested features, and
// returns whether they all evaluate to 1.
class
All
:
public
BinaryReduce
{
public:
// Returns whether all nested feature values are 1, or kNone if any of them
// are unavailable.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
for
(
const
Base
*
function
:
this
->
nested
())
{
const
FeatureValue
value
=
function
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
value
==
Base
::
kNone
)
return
Base
::
kNone
;
if
(
value
==
0
)
return
0
;
}
return
1
;
}
};
// A feature function that takes any number of binary nested features, and
// returns whether any of them evaluate to 1.
class
Any
:
public
BinaryReduce
{
public:
// Returns whether any nested feature values are 1, or kNone if any of them
// are unavailable.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
for
(
const
Base
*
function
:
this
->
nested
())
{
const
FeatureValue
value
=
function
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
value
==
Base
::
kNone
)
return
Base
::
kNone
;
if
(
value
==
1
)
return
1
;
}
return
0
;
}
};
// A feature function that computes a fixed-size tuple.
template
<
int
kNumElements
>
class
StaticTuple
:
public
MetaBase
{
public:
// The associated fixed-size tuple type.
typedef
GenericFeatureTypes
::
StaticTupleFeatureType
<
kNumElements
>
Type
;
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
std
::
vector
<
FeatureType
*>
sub_types
;
for
(
const
Base
*
function
:
this
->
nested
())
{
sub_types
.
push_back
(
CHECK_NOTNULL
(
function
->
GetFeatureType
()));
}
this
->
set_feature_type
(
new
Type
(
this
->
SubPrefix
(),
sub_types
));
}
// Returns the tuple value, or kNone if any sub-value is unavailable.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
FeatureValue
values
[
kNumElements
];
for
(
int
i
=
0
;
i
<
kNumElements
;
++
i
)
{
const
FeatureValue
value
=
nested
[
i
]
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
value
==
Base
::
kNone
)
return
Base
::
kNone
;
values
[
i
]
=
value
;
}
return
static_cast
<
Type
*>
(
this
->
feature_type
())
->
Conjoin
(
values
);
}
};
// Convenience aliases for common fixed-size tuples.
typedef
StaticTuple
<
2
>
Pair
;
typedef
StaticTuple
<
3
>
Triple
;
typedef
StaticTuple
<
4
>
Quad
;
typedef
StaticTuple
<
5
>
Quint
;
// A feature function that computes a dynamically-sized tuple.
class
Tuple
:
public
MetaBase
{
public:
// The associated tuple type.
typedef
GenericFeatureTypes
::
DynamicTupleFeatureType
Type
;
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
std
::
vector
<
FeatureType
*>
sub_types
;
for
(
const
Base
*
function
:
this
->
nested
())
{
sub_types
.
push_back
(
CHECK_NOTNULL
(
function
->
GetFeatureType
()));
}
this
->
set_feature_type
(
new
Type
(
this
->
SubPrefix
(),
sub_types
));
}
// Returns the tuple value, or kNone if any sub-value is unavailable.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
const
FeatureVector
*
fv
)
const
override
{
std
::
vector
<
FeatureValue
>
values
;
for
(
const
Base
*
function
:
this
->
nested
())
{
const
FeatureValue
value
=
function
->
Compute
(
workspaces
,
object
,
args
...,
fv
);
if
(
value
==
Base
::
kNone
)
return
Base
::
kNone
;
values
.
push_back
(
value
);
}
return
static_cast
<
Type
*>
(
this
->
feature_type
())
->
Conjoin
(
values
);
}
};
// A feature function that creates all pairs of the features extracted by the
// nested feature functions. All the nested feature functions must return
// single valued features.
//
// Parameters:
// bool unary (false):
// If true, then unary features are also emitted.
class
Pairs
:
public
MetaBase
{
public:
// The pair feature type.
typedef
GenericFeatureTypes
::
StaticTupleFeatureType
<
2
>
Type
;
// Discards the pair types.
~
Pairs
()
override
{
for
(
Type
*
type
:
pairs_
)
delete
type
;
}
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
unary_
=
this
->
GetParameter
(
"unary"
)
==
"true"
;
const
auto
&
nested
=
this
->
nested
();
CHECK_GE
(
nested
.
size
(),
2
)
<<
"The 'pairs' feature requires at least two sub-features."
;
// Get the types of all nested features.
types_
.
clear
();
for
(
const
Base
*
function
:
nested
)
{
types_
.
push_back
(
CHECK_NOTNULL
(
function
->
GetFeatureType
()));
}
// Initialize the pair types for all features.
pairs_
.
resize
(
NumPairs
(
nested
.
size
()));
for
(
int
right
=
1
;
right
<
nested
.
size
();
++
right
)
{
for
(
int
left
=
0
;
left
<
right
;
++
left
)
{
pairs_
[
PairIndex
(
left
,
right
)]
=
new
Type
(
this
->
SubPrefix
(),
{
types_
[
left
],
types_
[
right
]});
}
}
}
// Produces all feature types.
void
GetFeatureTypes
(
std
::
vector
<
FeatureType
*>
*
types
)
const
override
{
if
(
unary_
)
types
->
insert
(
types
->
end
(),
types_
.
begin
(),
types_
.
end
());
types
->
insert
(
types
->
end
(),
pairs_
.
begin
(),
pairs_
.
end
());
}
// Evaluates the feature.
void
Evaluate
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
FeatureVector
*
result
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
// Collect all active feature sub-values.
std
::
vector
<
FeatureValue
>
values
(
nested
.
size
());
std
::
vector
<
int
>
active_indices
;
active_indices
.
reserve
(
nested
.
size
());
for
(
int
i
=
0
;
i
<
nested
.
size
();
++
i
)
{
values
[
i
]
=
nested
[
i
]
->
Compute
(
workspaces
,
object
,
args
...,
result
);
if
(
values
[
i
]
!=
Base
::
kNone
)
active_indices
.
push_back
(
i
);
}
// Optionally generate unary features.
if
(
unary_
)
{
for
(
int
index
:
active_indices
)
{
result
->
add
(
types_
[
index
],
values
[
index
]);
}
}
// Generate all feature pairs.
FeatureValue
pair_values
[
2
];
for
(
int
right
=
1
;
right
<
active_indices
.
size
();
++
right
)
{
int
right_index
=
active_indices
[
right
];
pair_values
[
1
]
=
values
[
right_index
];
for
(
int
left
=
0
;
left
<
right
;
++
left
)
{
int
left_index
=
active_indices
[
left
];
pair_values
[
0
]
=
values
[
left_index
];
Type
*
type
=
pairs_
[
PairIndex
(
left_index
,
right_index
)];
result
->
add
(
type
,
type
->
Conjoin
(
pair_values
));
}
}
}
private:
// Returns the number of pairs (i,j) where 0 <= i < j < size.
static
int
NumPairs
(
int
size
)
{
DCHECK_GE
(
size
,
0
);
return
(
size
*
(
size
-
1
))
/
2
;
}
// Returns the index for a pair (left,right) where left < right. The
// indices are suitable for densely linearizing pairs into an array.
static
int
PairIndex
(
int
left
,
int
right
)
{
DCHECK_LE
(
0
,
left
);
DCHECK_LT
(
left
,
right
);
return
left
+
NumPairs
(
right
);
}
// Whether to also emit unary features.
bool
unary_
=
false
;
// Feature types for all nested features. Not owned.
std
::
vector
<
FeatureType
*>
types_
;
// Feature types for all pairs. Indexed according to PairIndex(). Owned.
std
::
vector
<
Type
*>
pairs_
;
};
// Feature function for conjoining the first sub-feature with each of the
// rest of the sub-features.
//
// Parameters:
// bool unary (false):
// If true, then unary features are also emitted.
class
Conjoin
:
public
MetaBase
{
public:
// The pair feature type.
typedef
GenericFeatureTypes
::
StaticTupleFeatureType
<
2
>
Type
;
// Discards the pair types.
~
Conjoin
()
override
{
for
(
Type
*
type
:
pairs_
)
delete
type
;
}
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
unary_
=
this
->
GetParameter
(
"unary"
)
==
"true"
;
const
auto
&
nested
=
this
->
nested
();
CHECK_GE
(
nested
.
size
(),
2
)
<<
"The 'conjoin' feature requires at least two sub-features."
;
// Get the types of the rest of the nested features.
types_
.
clear
();
for
(
const
Base
*
function
:
nested
)
{
types_
.
push_back
(
CHECK_NOTNULL
(
function
->
GetFeatureType
()));
}
// Initialize the pair types.
pairs_
.
assign
(
1
,
nullptr
);
for
(
int
i
=
1
;
i
<
types_
.
size
();
++
i
)
{
pairs_
.
push_back
(
new
Type
(
this
->
SubPrefix
(),
{
types_
[
0
],
types_
[
i
]}));
}
}
// Produces all feature types.
void
GetFeatureTypes
(
std
::
vector
<
FeatureType
*>
*
types
)
const
override
{
if
(
unary_
)
types
->
insert
(
types
->
end
(),
types_
.
begin
()
+
1
,
types_
.
end
());
types
->
insert
(
types
->
end
(),
pairs_
.
begin
()
+
1
,
pairs_
.
end
());
}
// Evaluates the feature.
void
Evaluate
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
FeatureVector
*
result
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
FeatureValue
values
[
2
];
values
[
0
]
=
nested
[
0
]
->
Compute
(
workspaces
,
object
,
args
...,
result
);
// Stop early if the first feature is absent.
if
(
values
[
0
]
==
Base
::
kNone
)
{
if
(
unary_
)
{
for
(
int
i
=
1
;
i
<
nested
.
size
();
++
i
)
{
values
[
1
]
=
nested
[
i
]
->
Compute
(
workspaces
,
object
,
args
...,
result
);
if
(
values
[
1
]
==
Base
::
kNone
)
continue
;
result
->
add
(
types_
[
i
],
values
[
1
]);
}
}
return
;
}
// Otherwise, the first feature exists; conjoin it with the rest.
for
(
int
i
=
1
;
i
<
nested
.
size
();
++
i
)
{
values
[
1
]
=
nested
[
i
]
->
Compute
(
workspaces
,
object
,
args
...,
result
);
if
(
values
[
1
]
==
Base
::
kNone
)
continue
;
if
(
unary_
)
result
->
add
(
types_
[
i
],
values
[
1
]);
result
->
add
(
pairs_
[
i
],
pairs_
[
i
]
->
Conjoin
(
values
));
}
}
private:
// Whether to also emit unary features.
bool
unary_
=
false
;
// Feature types for all nested features. Not owned.
std
::
vector
<
FeatureType
*>
types_
;
// Feature types for all pairs. The first element is null, in order to
// align this list with types_. Owned.
std
::
vector
<
Type
*>
pairs_
;
};
// Feature function for creating pairs of multi-valued features. By default,
// the feature computes the Cartesian product of the extracted sub-features,
// but a parallel product can be specified via the options.
//
// Parameters:
// bool parallel (false):
// If true, output features for parallel pairs, like a dot product. The
// two sub-features must produce identical numbers of features.
class
MultiPair
:
public
MetaBase
{
public:
// The pair feature type.
typedef
GenericFeatureTypes
::
StaticTupleFeatureType
<
2
>
Type
;
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
parallel_
=
this
->
GetParameter
(
"parallel"
)
==
"true"
;
std
::
vector
<
FeatureType
*>
sub_types
;
for
(
const
Base
*
function
:
this
->
nested
())
{
sub_types
.
push_back
(
CHECK_NOTNULL
(
function
->
GetFeatureType
()));
}
this
->
set_feature_type
(
new
Type
(
this
->
SubPrefix
(),
sub_types
));
}
// Evaluates the feature.
void
Evaluate
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
FeatureVector
*
result
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
const
int
orig_size
=
result
->
size
();
// Extract features from left half. Values are extracted directly into
// the result so that optimized variable references are handled properly.
nested
[
0
]
->
Evaluate
(
workspaces
,
object
,
args
...,
result
);
if
(
orig_size
==
result
->
size
())
return
;
// no left features
std
::
vector
<
FeatureValue
>
left
;
for
(
int
i
=
orig_size
;
i
<
result
->
size
();
++
i
)
{
left
.
push_back
(
result
->
value
(
i
));
}
result
->
Truncate
(
orig_size
);
// Extract features from right half.
nested
[
1
]
->
Evaluate
(
workspaces
,
object
,
args
...,
result
);
if
(
orig_size
==
result
->
size
())
return
;
// no right features
std
::
vector
<
FeatureValue
>
right
;
for
(
int
i
=
orig_size
;
i
<
result
->
size
();
++
i
)
{
right
.
push_back
(
result
->
value
(
i
));
}
result
->
Truncate
(
orig_size
);
// Compute the pair values.
FeatureValue
values
[
2
];
Type
*
type
=
static_cast
<
Type
*>
(
this
->
feature_type
());
if
(
parallel_
)
{
// Produce parallel pairs.
CHECK_EQ
(
left
.
size
(),
right
.
size
());
for
(
int
i
=
0
;
i
<
left
.
size
();
++
i
)
{
values
[
0
]
=
left
[
i
];
values
[
1
]
=
right
[
i
];
result
->
add
(
type
,
type
->
Conjoin
(
values
));
}
}
else
{
// Produce all pairs.
for
(
const
FeatureValue
left_value
:
left
)
{
values
[
0
]
=
left_value
;
for
(
const
FeatureValue
right_value
:
right
)
{
values
[
1
]
=
right_value
;
result
->
add
(
type
,
type
->
Conjoin
(
values
));
}
}
}
}
private:
// Whether to do a parallel product instead of a Cartesian product.
bool
parallel_
=
false
;
};
// Feature function for conjoining the first multi-valued sub-feature with
// each of the rest of the multi-valued sub-features.
class
MultiConjoin
:
public
MetaBase
{
public:
// The pair feature type.
typedef
GenericFeatureTypes
::
StaticTupleFeatureType
<
2
>
Type
;
// Discards the pair types.
~
MultiConjoin
()
override
{
for
(
Type
*
type
:
pairs_
)
delete
type
;
}
// Initializes the feature.
void
InitNested
(
TaskContext
*
context
)
override
{
const
auto
&
nested
=
this
->
nested
();
CHECK_GE
(
nested
.
size
(),
2
)
<<
"The 'multiconjoin' feature requires at least two sub-features."
;
// Get the types of the rest of the nested features.
std
::
vector
<
FeatureType
*>
types
;
types
.
reserve
(
nested
.
size
());
for
(
const
Base
*
function
:
nested
)
{
types
.
push_back
(
CHECK_NOTNULL
(
function
->
GetFeatureType
()));
}
// Initialize the pair types.
pairs_
.
clear
();
for
(
int
i
=
1
;
i
<
types
.
size
();
++
i
)
{
pairs_
.
push_back
(
new
Type
(
this
->
SubPrefix
(),
{
types
[
0
],
types
[
i
]}));
}
}
// Produces all feature types.
void
GetFeatureTypes
(
std
::
vector
<
FeatureType
*>
*
types
)
const
override
{
types
->
insert
(
types
->
end
(),
pairs_
.
begin
(),
pairs_
.
end
());
}
// Evaluates the feature.
void
Evaluate
(
const
WorkspaceSet
&
workspaces
,
const
OBJ
&
object
,
ARGS
...
args
,
FeatureVector
*
result
)
const
override
{
const
auto
&
nested
=
this
->
nested
();
const
int
orig_size
=
result
->
size
();
// Gather the lists of sub-values for each nested feature. Sub-values
// are extracted directly into the result so that optimized variable
// references are handled properly.
std
::
vector
<
std
::
vector
<
FeatureValue
>
>
sub_values
(
nested
.
size
());
for
(
int
i
=
0
;
i
<
nested
.
size
();
++
i
)
{
nested
[
i
]
->
Evaluate
(
workspaces
,
object
,
args
...,
result
);
if
(
orig_size
==
result
->
size
())
{
if
(
i
==
0
)
{
return
;
// no first values; nothing will be extracted
}
else
{
continue
;
// no non-first values; skip to next feature
}
}
std
::
vector
<
FeatureValue
>
&
values
=
sub_values
[
i
];
for
(
int
j
=
orig_size
;
j
<
result
->
size
();
++
j
)
{
values
.
push_back
(
result
->
value
(
j
));
}
result
->
Truncate
(
orig_size
);
}
// Produce conjoined features.
const
std
::
vector
<
FeatureValue
>
&
first_values
=
sub_values
[
0
];
FeatureValue
values
[
2
];
for
(
int
i
=
1
;
i
<
sub_values
.
size
();
++
i
)
{
const
std
::
vector
<
FeatureValue
>
&
other_values
=
sub_values
[
i
];
if
(
other_values
.
empty
())
continue
;
Type
*
type
=
pairs_
[
i
-
1
];
for
(
const
FeatureValue
first_value
:
first_values
)
{
values
[
0
]
=
first_value
;
for
(
const
FeatureValue
other_value
:
other_values
)
{
values
[
1
]
=
other_value
;
result
->
add
(
type
,
type
->
Conjoin
(
values
));
}
}
}
}
private:
// Feature types for all pairs. Owned.
std
::
vector
<
Type
*>
pairs_
;
};
};
#define REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, name, type) \
typedef generics::type __##type##generics; \
REGISTER_SYNTAXNET_FEATURE_FUNCTION(generics::Base, name, __##type##generics)
#define REGISTER_SYNTAXNET_GENERIC_FEATURES(generics) \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "bias", Bias); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "constant", Constant); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "equals", Equals); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "filter", Filter); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "is", Is); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "all", All); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "any", Any); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "pair", Pair); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "triple", Triple); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "quad", Quad); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "quint", Quint); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "tuple", Tuple); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "pairs", Pairs); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "conjoin", Conjoin); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "multipair", MultiPair); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "ignore", Ignore); \
REGISTER_SYNTAXNET_GENERIC_FEATURE(generics, "multiconjoin", MultiConjoin)
}
// namespace syntaxnet
#endif // SYNTAXNET_GENERIC_FEATURES_H_
research/syntaxnet/syntaxnet/generic_features_test.cc
0 → 100644
View file @
4364390a
/* Copyright 2016 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "syntaxnet/generic_features.h"
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "syntaxnet/registry.h"
#include "syntaxnet/task_context.h"
#include <gmock/gmock.h>
namespace
syntaxnet
{
// Test feature extractor.
class
TestFeatureExtractor
:
public
FeatureExtractor
<
std
::
vector
<
int
>
,
int
>
{};
// Registration macro.
#define REGISTER_TEST_FEATURE_FUNCTION(name, component) \
REGISTER_SYNTAXNET_FEATURE_FUNCTION(TestFeatureExtractor::Function, name, \
component)
// The registry must be declared in the global namespace.
REGISTER_SYNTAXNET_CLASS_REGISTRY
(
"syntaxnet test feature function"
,
syntaxnet
::
TestFeatureExtractor
::
Function
);
typedef
GenericFeatures
<
std
::
vector
<
int
>
,
int
>
GenericTestFeatures
;
REGISTER_SYNTAXNET_GENERIC_FEATURES
(
GenericTestFeatures
);
class
TestVectorFeatureFunction
:
public
TestFeatureExtractor
::
Function
{
public:
// Initializes the feature.
void
Init
(
TaskContext
*
context
)
override
{
int
arg
=
argument
();
while
(
arg
>
0
)
{
offsets_
.
push_back
(
arg
%
10
);
arg
/=
10
;
}
std
::
reverse
(
offsets_
.
begin
(),
offsets_
.
end
());
if
(
offsets_
.
empty
())
offsets_
.
push_back
(
0
);
set_feature_type
(
new
NumericFeatureType
(
name
(),
10
));
}
// Evaluates the feature.
void
Evaluate
(
const
WorkspaceSet
&
workspace
,
const
std
::
vector
<
int
>
&
object
,
int
focus
,
FeatureVector
*
features
)
const
override
{
for
(
const
uint32
offset
:
offsets_
)
{
const
uint32
index
=
focus
+
offset
;
if
(
index
>=
object
.
size
())
continue
;
features
->
add
(
feature_type
(),
object
[
index
]);
}
}
// Returns the first extracted feature, if available.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspace
,
const
std
::
vector
<
int
>
&
object
,
int
focus
,
const
FeatureVector
*
fv
)
const
override
{
CHECK_EQ
(
1
,
offsets_
.
size
());
FeatureVector
features
;
Evaluate
(
workspace
,
object
,
focus
,
&
features
);
return
features
.
size
()
==
0
?
kNone
:
features
.
value
(
0
);
}
private:
// A list of offsets extracted from the feature's argument.
std
::
vector
<
uint32
>
offsets_
;
};
REGISTER_TEST_FEATURE_FUNCTION
(
"f"
,
TestVectorFeatureFunction
);
class
TestParityFeatureFunction
:
public
TestFeatureExtractor
::
Function
{
public:
// Initializes the feature.
void
Init
(
TaskContext
*
context
)
override
{
// "even" corresponds to feature value 0, "odd" to 1.
enum
ParityFeatureValue
{
EVEN
,
ODD
};
set_feature_type
(
new
EnumFeatureType
(
name
(),
{{
EVEN
,
"even"
},
{
ODD
,
"odd"
}}));
// Check the "offset" parameter.
for
(
const
auto
&
param
:
this
->
descriptor
()
->
parameter
())
{
if
(
param
.
name
()
==
"offset"
)
{
offset_
=
std
::
stoi
(
param
.
value
());
CHECK
(
&
offset_
);
}
}
}
// Evaluates the feature.
void
Evaluate
(
const
WorkspaceSet
&
workspace
,
const
std
::
vector
<
int
>
&
object
,
int
focus
,
FeatureVector
*
features
)
const
override
{
uint32
offset_focus
=
focus
+=
offset_
;
if
(
offset_focus
<
object
.
size
())
{
features
->
add
(
feature_type
(),
object
[
offset_focus
]
&
1
);
}
}
// Returns the first extracted feature, if available.
FeatureValue
Compute
(
const
WorkspaceSet
&
workspace
,
const
std
::
vector
<
int
>
&
object
,
int
focus
,
const
FeatureVector
*
fv
)
const
override
{
FeatureVector
features
;
Evaluate
(
workspace
,
object
,
focus
,
&
features
);
return
features
.
size
()
==
0
?
kNone
:
features
.
value
(
0
);
}
private:
int
offset_
=
0
;
};
REGISTER_TEST_FEATURE_FUNCTION
(
"parity"
,
TestParityFeatureFunction
);
// Testing rig.
class
GenericFeaturesTest
:
public
::
testing
::
Test
{
public:
// Deallocates test state.
void
TearDown
()
override
{
object_
.
reset
();
extractor_
.
reset
();
context_
.
reset
();
}
// Initializes the test.
void
Init
(
const
string
&
spec
,
const
std
::
vector
<
int
>
&
object
)
{
context_
.
reset
(
new
TaskContext
());
extractor_
.
reset
(
new
TestFeatureExtractor
());
extractor_
->
Parse
(
spec
);
extractor_
->
Setup
(
context_
.
get
());
extractor_
->
Init
(
context_
.
get
());
object_
.
reset
(
new
std
::
vector
<
int
>
(
object
));
}
// Tests extraction on the current object.
void
TestExtract
(
int
focus
,
const
string
&
feature_string
)
const
{
FeatureVector
features
;
WorkspaceSet
workspace
;
extractor_
->
Preprocess
(
&
workspace
,
object_
.
get
());
extractor_
->
ExtractFeatures
(
workspace
,
*
object_
,
focus
,
&
features
);
EXPECT_EQ
(
feature_string
,
features
.
ToString
());
}
private:
// The task context for tests.
std
::
unique_ptr
<
TaskContext
>
context_
;
// Feature extractor for tests.
std
::
unique_ptr
<
TestFeatureExtractor
>
extractor_
;
// Object for tests.
std
::
unique_ptr
<
std
::
vector
<
int
>
>
object_
;
};
TEST_F
(
GenericFeaturesTest
,
Singleton
)
{
Init
(
"f"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[f=5]"
);
TestExtract
(
1
,
"[f=3]"
);
TestExtract
(
4
,
"[f=6]"
);
TestExtract
(
5
,
"[]"
);
}
TEST_F
(
GenericFeaturesTest
,
TwoFeatures
)
{
Init
(
"f(0) f(1)"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[f=5,f(1)=3]"
);
}
TEST_F
(
GenericFeaturesTest
,
Bias
)
{
Init
(
"bias"
,
{
0
,
1
});
TestExtract
(
0
,
"[bias=ON]"
);
}
TEST_F
(
GenericFeaturesTest
,
Constant
)
{
Init
(
"constant(value=2)"
,
{
0
,
1
});
TestExtract
(
0
,
"[constant(value=2)=2]"
);
}
TEST_F
(
GenericFeaturesTest
,
Equals
)
{
Init
(
"equals { f(0) f(1) }"
,
{
0
,
1
,
0
});
TestExtract
(
0
,
"[equals { f f(1) }=DIFFERENT]"
);
Init
(
"equals { f(0) f(2) }"
,
{
0
,
1
,
0
});
TestExtract
(
0
,
"[equals { f f(2) }=EQUAL]"
);
}
TEST_F
(
GenericFeaturesTest
,
Filter
)
{
Init
(
"filter(value=5).f"
,
{
3
,
5
});
TestExtract
(
0
,
"[]"
);
TestExtract
(
1
,
"[filter(value=5).f=ON]"
);
// Check that we are actually parsing feature value names.
Init
(
"filter(value=odd).parity"
,
{
3
,
4
});
TestExtract
(
0
,
"[filter(value=odd).parity=ON]"
);
TestExtract
(
1
,
"[]"
);
Init
(
"filter(value=even).parity"
,
{
3
,
4
});
TestExtract
(
0
,
"[]"
);
TestExtract
(
1
,
"[filter(value=even).parity=ON]"
);
}
TEST_F
(
GenericFeaturesTest
,
Is
)
{
Init
(
"is(value=5).f"
,
{
3
,
5
});
TestExtract
(
0
,
"[is(value=5).f=FALSE]"
);
TestExtract
(
1
,
"[is(value=5).f=TRUE]"
);
// Check that we are actually parsing feature value names.
Init
(
"is(value=odd).parity"
,
{
3
,
4
});
TestExtract
(
0
,
"[is(value=odd).parity=TRUE]"
);
TestExtract
(
1
,
"[is(value=odd).parity=FALSE]"
);
Init
(
"is(value=even).parity"
,
{
3
,
4
});
TestExtract
(
0
,
"[is(value=even).parity=FALSE]"
);
TestExtract
(
1
,
"[is(value=even).parity=TRUE]"
);
}
TEST_F
(
GenericFeaturesTest
,
Ignore
)
{
Init
(
"ignore(value=5).f"
,
{
3
,
5
});
TestExtract
(
0
,
"[ignore(value=5).f=3]"
);
TestExtract
(
1
,
"[]"
);
// Check that we are actually parsing feature value names.
Init
(
"ignore(value=odd).parity"
,
{
3
,
4
});
TestExtract
(
0
,
"[]"
);
TestExtract
(
1
,
"[ignore(value=odd).parity=even]"
);
Init
(
"ignore(value=even).parity"
,
{
3
,
4
});
TestExtract
(
0
,
"[ignore(value=even).parity=odd]"
);
TestExtract
(
1
,
"[]"
);
}
TEST_F
(
GenericFeaturesTest
,
All
)
{
Init
(
"all { parity parity(offset=1) }"
,
{
2
,
2
});
TestExtract
(
0
,
"[all { parity parity(offset=1) }=FALSE]"
);
Init
(
"all { parity parity(offset=1) }"
,
{
2
,
3
});
TestExtract
(
0
,
"[all { parity parity(offset=1) }=FALSE]"
);
Init
(
"all { parity parity(offset=1) }"
,
{
3
,
2
});
TestExtract
(
0
,
"[all { parity parity(offset=1) }=FALSE]"
);
Init
(
"all { parity parity(offset=1) }"
,
{
3
,
3
});
TestExtract
(
0
,
"[all { parity parity(offset=1) }=TRUE]"
);
}
TEST_F
(
GenericFeaturesTest
,
Any
)
{
Init
(
"any { parity parity(offset=1) }"
,
{
2
,
2
});
TestExtract
(
0
,
"[any { parity parity(offset=1) }=FALSE]"
);
Init
(
"any { parity parity(offset=1) }"
,
{
2
,
3
});
TestExtract
(
0
,
"[any { parity parity(offset=1) }=TRUE]"
);
Init
(
"any { parity parity(offset=1) }"
,
{
3
,
2
});
TestExtract
(
0
,
"[any { parity parity(offset=1) }=TRUE]"
);
Init
(
"any { parity parity(offset=1) }"
,
{
3
,
3
});
TestExtract
(
0
,
"[any { parity parity(offset=1) }=TRUE]"
);
}
TEST_F
(
GenericFeaturesTest
,
Pair
)
{
Init
(
"pair { f(0) f(1) }"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[pair { f f(1) }=(5,3)]"
);
}
TEST_F
(
GenericFeaturesTest
,
NestedPair
)
{
Init
(
"pair { pair { f(0) f(1) } pair { f(2) f(3) } }"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[pair { pair { f f(1) } pair { f(2) f(3) } }=((5,3),(2,4))]"
);
}
TEST_F
(
GenericFeaturesTest
,
Triple
)
{
Init
(
"triple { f(0) f(1) f(2) }"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[triple { f f(1) f(2) }=(5,3,2)]"
);
}
TEST_F
(
GenericFeaturesTest
,
Quad
)
{
Init
(
"quad { f(0) f(1) f(2) f(3) }"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[quad { f f(1) f(2) f(3) }=(5,3,2,4)]"
);
}
TEST_F
(
GenericFeaturesTest
,
Quint
)
{
Init
(
"quint { f(0) f(1) f(2) f(3) f(4) }"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[quint { f f(1) f(2) f(3) f(4) }=(5,3,2,4,6)]"
);
}
TEST_F
(
GenericFeaturesTest
,
Tuple
)
{
Init
(
"tuple { f(0) f(1) f(2) f(3) f(4) }"
,
{
5
,
3
,
2
,
4
,
6
});
TestExtract
(
0
,
"[tuple { f f(1) f(2) f(3) f(4) }=(5,3,2,4,6)]"
);
}
TEST_F
(
GenericFeaturesTest
,
Pairs
)
{
Init
(
"pairs { f(0) f(1) f(2) f(3) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[pairs { f f(1) }=(0,1)"
",pairs { f f(2) }=(0,2)"
",pairs { f(1) f(2) }=(1,2)"
",pairs { f f(3) }=(0,3)"
",pairs { f(1) f(3) }=(1,3)"
",pairs { f(2) f(3) }=(2,3)]"
);
}
TEST_F
(
GenericFeaturesTest
,
PairsWithUnary
)
{
Init
(
"pairs(unary=true) { f(0) f(1) f(2) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[pairs(unary=true).f=0"
",pairs(unary=true).f(1)=1"
",pairs(unary=true).f(2)=2"
",pairs(unary=true) { f f(1) }=(0,1)"
",pairs(unary=true) { f f(2) }=(0,2)"
",pairs(unary=true) { f(1) f(2) }=(1,2)]"
);
}
TEST_F
(
GenericFeaturesTest
,
Conjoin
)
{
Init
(
"conjoin { f(0) f(1) f(2) f(3) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[conjoin { f f(1) }=(0,1)"
",conjoin { f f(2) }=(0,2)"
",conjoin { f f(3) }=(0,3)]"
);
}
TEST_F
(
GenericFeaturesTest
,
ConjoinWithUnary
)
{
Init
(
"conjoin(unary=true) { f(0) f(1) f(2) f(3) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[conjoin(unary=true).f(1)=1"
",conjoin(unary=true) { f f(1) }=(0,1)"
",conjoin(unary=true).f(2)=2"
",conjoin(unary=true) { f f(2) }=(0,2)"
",conjoin(unary=true).f(3)=3"
",conjoin(unary=true) { f f(3) }=(0,3)]"
);
}
TEST_F
(
GenericFeaturesTest
,
SingletonMultiValue
)
{
Init
(
"f(12)"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[f(12)=1,f(12)=2]"
);
}
TEST_F
(
GenericFeaturesTest
,
MultiPairOneSided
)
{
Init
(
"multipair { f(12) f(3) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[multipair { f(12) f(3) }=(1,3)"
",multipair { f(12) f(3) }=(2,3)]"
);
}
TEST_F
(
GenericFeaturesTest
,
MultiPairTwoSided
)
{
Init
(
"multipair { f(12) f(34) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[multipair { f(12) f(34) }=(1,3)"
",multipair { f(12) f(34) }=(1,4)"
",multipair { f(12) f(34) }=(2,3)"
",multipair { f(12) f(34) }=(2,4)]"
);
}
TEST_F
(
GenericFeaturesTest
,
MultiPairParallel
)
{
Init
(
"multipair(parallel=true) { f(12) f(34) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[multipair(parallel=true) { f(12) f(34) }=(1,3)"
",multipair(parallel=true) { f(12) f(34) }=(2,4)]"
);
}
TEST_F
(
GenericFeaturesTest
,
MultiConjoinFirstOnly
)
{
Init
(
"multiconjoin { f(12) f(3) f(0) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[multiconjoin { f(12) f(3) }=(1,3)"
",multiconjoin { f(12) f(3) }=(2,3)"
",multiconjoin { f(12) f }=(1,0)"
",multiconjoin { f(12) f }=(2,0)]"
);
}
TEST_F
(
GenericFeaturesTest
,
MultiConjoinFirstAndRest
)
{
Init
(
"multiconjoin { f(12) f(34) f(0) }"
,
{
0
,
1
,
2
,
3
,
4
});
TestExtract
(
0
,
"[multiconjoin { f(12) f(34) }=(1,3)"
",multiconjoin { f(12) f(34) }=(1,4)"
",multiconjoin { f(12) f(34) }=(2,3)"
",multiconjoin { f(12) f(34) }=(2,4)"
",multiconjoin { f(12) f }=(1,0)"
",multiconjoin { f(12) f }=(2,0)]"
);
}
}
// namespace syntaxnet
research/syntaxnet/syntaxnet/graph_builder.py
View file @
4364390a
...
...
@@ -485,6 +485,7 @@ class GreedyParser(object):
vectors
=
embeddings_path
,
task_context
=
task_context
,
embedding_init
=
self
.
_embedding_init
,
cache_vectors_locally
=
False
,
seed
=
seed1
,
seed2
=
seed2
)
...
...
research/syntaxnet/syntaxnet/head_label_transitions.cc
0 → 100644
View file @
4364390a
/* Copyright 2017 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "syntaxnet/head_label_transitions.h"
#include "syntaxnet/base.h"
using
tensorflow
::
strings
::
StrAppend
;
using
tensorflow
::
strings
::
StrCat
;
namespace
syntaxnet
{
// Parser transition state for head & label transitions.
class
HeadLabelTransitionSystem
::
State
:
public
ParserTransitionState
{
public:
// Returns a copy of this state.
State
*
Clone
()
const
override
{
return
new
State
(
*
this
);
}
// Does nothing; no need for additional initialization.
void
Init
(
ParserState
*
state
)
override
{}
// Copies the selected heads to the |sentence|.
void
AddParseToDocument
(
const
ParserState
&
state
,
bool
rewrite_root_labels
,
Sentence
*
sentence
)
const
override
{
for
(
int
i
=
0
;
i
<
state
.
NumTokens
();
++
i
)
{
Token
*
token
=
sentence
->
mutable_token
(
i
);
token
->
set_head
(
state
.
Head
(
i
));
token
->
set_label
(
state
.
LabelAsString
(
state
.
Label
(
i
)));
if
(
rewrite_root_labels
&&
state
.
Head
(
i
)
==
-
1
)
{
token
->
set_label
(
state
.
LabelAsString
(
state
.
RootLabel
()));
}
}
}
// Returns true if the head and gold head match.
bool
IsTokenCorrect
(
const
ParserState
&
state
,
int
index
)
const
override
{
return
state
.
GoldHead
(
index
)
==
state
.
Head
(
index
);
}
// Returns a string representation of the |state|.
string
ToString
(
const
ParserState
&
state
)
const
override
{
string
str
=
"["
;
for
(
int
i
=
0
;
i
<
state
.
NumTokens
();
++
i
)
{
StrAppend
(
&
str
,
i
==
0
?
""
:
" "
,
state
.
Head
(
i
));
}
StrAppend
(
&
str
,
"]"
);
return
str
;
}
};
ParserAction
HeadLabelTransitionSystem
::
GetDefaultAction
(
const
ParserState
&
state
)
const
{
const
int
default_head
=
state
.
Next
();
const
int
default_label
=
state
.
RootLabel
();
return
EncodeActionWithState
(
default_head
,
default_label
,
state
);
}
ParserAction
HeadLabelTransitionSystem
::
GetNextGoldAction
(
const
ParserState
&
state
)
const
{
if
(
state
.
EndOfInput
())
{
LOG
(
ERROR
)
<<
"Oracle called on invalid state: "
<<
state
.
ToString
();
return
0
;
}
const
int
current
=
state
.
Next
();
int
head
=
state
.
GoldHead
(
current
);
const
int
label
=
state
.
GoldLabel
(
current
);
// In syntaxnet.Sentence, root arcs are token.head() == -1, whereas
// here, we use a self-loop to represent roots. So we need to convert here.
head
=
head
==
-
1
?
current
:
head
;
return
EncodeActionWithState
(
head
,
label
,
state
);
}
void
HeadLabelTransitionSystem
::
PerformActionWithoutHistory
(
ParserAction
action
,
ParserState
*
state
)
const
{
CHECK
(
IsAllowedAction
(
action
,
*
state
))
<<
"Illegal action "
<<
action
<<
" at state: "
<<
state
->
ToString
();
const
int
current
=
state
->
Next
();
int
head
,
label
;
DecodeActionWithState
(
action
,
*
state
,
&
head
,
&
label
);
VLOG
(
2
)
<<
"Adding arc: "
<<
label
<<
" ("
<<
current
<<
" <- "
<<
head
<<
")"
;
state
->
AddArc
(
current
,
head
==
current
?
-
1
:
head
,
label
);
state
->
Advance
();
}
bool
HeadLabelTransitionSystem
::
IsAllowedAction
(
ParserAction
action
,
const
ParserState
&
state
)
const
{
if
(
state
.
EndOfInput
())
return
false
;
// Unlike the labels transition system, we allow root tokens to receive
// non-root dependency labels and vice versa.
return
action
>=
0
&&
action
<
state
.
NumTokens
()
*
state
.
NumLabels
();
}
bool
HeadLabelTransitionSystem
::
IsFinalState
(
const
ParserState
&
state
)
const
{
return
state
.
EndOfInput
();
}
string
HeadLabelTransitionSystem
::
ActionAsString
(
ParserAction
action
,
const
ParserState
&
state
)
const
{
if
(
!
IsAllowedAction
(
action
,
state
))
return
StrCat
(
"INVALID:"
,
action
);
const
auto
&
sentence
=
state
.
sentence
();
const
int
current
=
state
.
Next
();
int
head
,
label
;
DecodeActionWithState
(
action
,
state
,
&
head
,
&
label
);
return
StrCat
(
state
.
LabelAsString
(
label
),
"("
,
sentence
.
token
(
current
).
word
(),
"<-"
,
head
==
current
?
"ROOT"
:
sentence
.
token
(
head
).
word
(),
")"
);
}
ParserTransitionState
*
HeadLabelTransitionSystem
::
NewTransitionState
(
bool
training_mode
)
const
{
return
new
State
();
}
void
HeadLabelTransitionSystem
::
DecodeActionWithState
(
ParserAction
action
,
const
ParserState
&
state
,
ParserAction
*
base_action
,
int
*
label
)
const
{
const
int
num_labels
=
state
.
NumLabels
();
*
base_action
=
action
/
num_labels
;
*
label
=
action
%
num_labels
;
}
ParserAction
HeadLabelTransitionSystem
::
EncodeActionWithState
(
ParserAction
base_action
,
int
label
,
const
ParserState
&
state
)
const
{
return
base_action
*
state
.
NumLabels
()
+
label
;
}
REGISTER_TRANSITION_SYSTEM
(
"heads_labels"
,
HeadLabelTransitionSystem
);
}
// namespace syntaxnet
research/syntaxnet/syntaxnet/head_label_transitions.h
0 → 100644
View file @
4364390a
/* Copyright 2017 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef SYNTAXNET_HEAD_LABEL_TRANSITIONS_H_
#define SYNTAXNET_HEAD_LABEL_TRANSITIONS_H_
#include "syntaxnet/parser_state.h"
#include "syntaxnet/parser_transitions.h"
namespace
syntaxnet
{
// Heads and labels transition system. Predicts the syntactic heads and labels
// of a sentence directly.
//
// In this transition system actions encode heads and their labels, so the
// space of actions is num_labels*N (for a sentence with N tokens.) A token
// that points to itself is interpreted as a root. Unlike the heads transition
// system followed by labels, we allow root arcs to receive non-root
// dependency labels and vice versa since, unlike in the labels transition
// system, it is unclear whether the arc or label prediction should take
// precedence.
//
// Actions are interpreted as follows:
//
// For input pointer at position i:
// head = A / num_labels
// label = A % num_labels
// if head == i : Add a root arc to token i (with given label)
// if head != i : Add an arc head -> i (with given label)
//
// Note that in syntaxnet.Sentence, root arcs are token.head() == -1, whereas
// here, we use a self-loop to represent roots.
class
HeadLabelTransitionSystem
:
public
ParserTransitionSystem
{
public:
class
State
;
// defined in the .cc file
int
NumActionTypes
()
const
override
{
return
1
;
}
int
NumActions
(
int
num_labels
)
const
override
{
return
kDynamicNumActions
;
}
// The default action is to assign itself as root.
ParserAction
GetDefaultAction
(
const
ParserState
&
state
)
const
override
;
// Returns the next gold action for a given state according to the
// underlying annotated sentence.
ParserAction
GetNextGoldAction
(
const
ParserState
&
state
)
const
override
;
// Checks if the action is allowed in a given parser state.
bool
IsAllowedAction
(
ParserAction
action
,
const
ParserState
&
state
)
const
override
;
// Performs the specified action on a given parser state, without adding the
// action to the state's history.
void
PerformActionWithoutHistory
(
ParserAction
action
,
ParserState
*
state
)
const
override
;
// Returns true if the state is at the end of the input.
bool
IsFinalState
(
const
ParserState
&
state
)
const
override
;
// Returns a string representation of a parser action.
string
ActionAsString
(
ParserAction
action
,
const
ParserState
&
state
)
const
override
;
// Returns a new transition state to be used to enhance the parser state.
ParserTransitionState
*
NewTransitionState
(
bool
training_mode
)
const
override
;
// Returns false, since no states are deterministic.
bool
IsDeterministicState
(
const
ParserState
&
state
)
const
override
{
return
false
;
}
private:
// Given a ParseState, decodes an action into a base action and a label.
void
DecodeActionWithState
(
ParserAction
action
,
const
ParserState
&
state
,
ParserAction
*
base_action
,
int
*
label
)
const
;
// Given a ParseState, encodes a base action and a label into a single-valued
// function.
ParserAction
EncodeActionWithState
(
ParserAction
base_action
,
int
label
,
const
ParserState
&
state
)
const
;
};
}
// namespace syntaxnet
#endif // SYNTAXNET_HEAD_LABEL_TRANSITIONS_H_
research/syntaxnet/syntaxnet/head_label_transitions_test.cc
0 → 100644
View file @
4364390a
/* Copyright 2017 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <memory>
#include "syntaxnet/base.h"
#include "syntaxnet/parser_state.h"
#include "syntaxnet/parser_transitions.h"
#include "syntaxnet/sentence.pb.h"
#include "syntaxnet/task_context.h"
#include "syntaxnet/term_frequency_map.h"
#include "tensorflow/core/platform/test.h"
namespace
syntaxnet
{
namespace
{
const
char
kSentence
[]
=
R"(
text: 'I saw a man with a telescope.'
token { word: 'I' start: 0 end: 0 tag: 'PRP' category: 'PRON'
head: 1 label: 'nsubj' break_level: NO_BREAK }
token { word: 'saw' start: 2 end: 4 tag: 'VBD' category: 'VERB'
label: 'ROOT' break_level: SPACE_BREAK }
token { word: 'a' start: 6 end: 6 tag: 'DT' category: 'DET'
head: 3 label: 'det' break_level: SPACE_BREAK }
token { word: 'man' start: 8 end: 10 tag: 'NN' category: 'NOUN'
head: 1 label: 'dobj' break_level: SPACE_BREAK }
token { word: 'with' start: 12 end: 15 tag: 'IN' category: 'ADP'
head: 1 label: 'prep' break_level: SPACE_BREAK }
token { word: 'a' start: 17 end: 17 tag: 'DT' category: 'DET'
head: 6 label: 'det' break_level: SPACE_BREAK }
token { word: 'telescope' start: 19 end: 27 tag: 'NN' category: 'NOUN'
head: 4 label: 'pobj' break_level: SPACE_BREAK }
token { word: '.' start: 28 end: 28 tag: '.' category: '.'
head: 1 label: 'p' break_level: NO_BREAK }
)"
;
class
HeadLabelTransitionTest
:
public
::
testing
::
Test
{
public:
HeadLabelTransitionTest
()
{
transition_system_
->
Setup
(
&
context_
);
transition_system_
->
Init
(
&
context_
);
CHECK
(
TextFormat
::
ParseFromString
(
kSentence
,
&
sentence_
));
for
(
auto
&
token
:
sentence_
.
token
())
label_map_
.
Increment
(
token
.
label
());
state_
.
reset
(
new
ParserState
(
&
sentence_
,
transition_system_
->
NewTransitionState
(
true
),
&
label_map_
));
}
protected:
TermFrequencyMap
label_map_
;
TaskContext
context_
;
std
::
unique_ptr
<
ParserTransitionSystem
>
transition_system_
{
ParserTransitionSystem
::
Create
(
"heads_labels"
)};
Sentence
sentence_
;
std
::
unique_ptr
<
ParserState
>
state_
;
};
TEST_F
(
HeadLabelTransitionTest
,
TestPerformActionSelfRoot
)
{
const
int
current
=
state_
->
Next
();
const
int
head
=
current
;
const
int
label
=
state_
->
RootLabel
();
const
int
action
=
head
*
state_
->
NumLabels
()
+
label
;
transition_system_
->
PerformActionWithoutHistory
(
action
,
state_
.
get
());
EXPECT_EQ
(
state_
->
Head
(
current
),
-
1
);
EXPECT_EQ
(
state_
->
Label
(
current
),
label
);
}
TEST_F
(
HeadLabelTransitionTest
,
TestPerformActionAssignRootOtherLabel
)
{
const
int
label
=
label_map_
.
LookupIndex
(
"det"
,
-
1
);
const
int
current
=
state_
->
Next
();
const
int
head
=
current
;
const
int
action
=
head
*
state_
->
NumLabels
()
+
label
;
transition_system_
->
PerformActionWithoutHistory
(
action
,
state_
.
get
());
EXPECT_EQ
(
state_
->
Head
(
current
),
-
1
);
EXPECT_EQ
(
state_
->
Label
(
current
),
label
);
}
TEST_F
(
HeadLabelTransitionTest
,
GoldParsesCorrectly
)
{
LOG
(
INFO
)
<<
"Initial parser state: "
<<
state_
->
ToString
();
while
(
!
transition_system_
->
IsFinalState
(
*
state_
))
{
ParserAction
action
=
transition_system_
->
GetNextGoldAction
(
*
state_
);
EXPECT_TRUE
(
transition_system_
->
IsAllowedAction
(
action
,
*
state_
));
LOG
(
INFO
)
<<
"Performing action "
<<
action
<<
": "
<<
transition_system_
->
ActionAsString
(
action
,
*
state_
);
transition_system_
->
PerformActionWithoutHistory
(
action
,
state_
.
get
());
LOG
(
INFO
)
<<
"Parser state: "
<<
state_
->
ToString
();
}
for
(
int
i
=
0
;
i
<
state_
->
NumTokens
();
++
i
)
{
EXPECT_EQ
(
state_
->
GoldHead
(
i
),
state_
->
Head
(
i
));
EXPECT_EQ
(
state_
->
GoldLabel
(
i
),
state_
->
Label
(
i
));
}
}
}
// namespace
}
// namespace syntaxnet
research/syntaxnet/syntaxnet/head_transitions.h
View file @
4364390a
...
...
@@ -30,17 +30,14 @@ namespace syntaxnet {
// Action A == i : Add a root arc to token i.
// Action A != i : Add an arc A -> i.
//
// Note that in
nlp_saft.Document
, root arcs are token.head() == -1, whereas
// Note that in
the Sentence proto
, root arcs are token.head() == -1, whereas
// here, we use a self-loop to represent roots.
class
HeadTransitionSystem
:
public
ParserTransitionSystem
{
public:
class
State
;
// defined in the .cc file
// Returns 1 for number of actions. This is because each action should be
// scored separately; e.g. instead of a fixed output set, we have a single
// scoring function.
int
NumActionTypes
()
const
override
{
return
1
;
}
int
NumActions
(
int
num_labels
)
const
override
{
return
1
;
}
int
NumActions
(
int
num_labels
)
const
override
{
return
kDynamicNumActions
;
}
// Returns the default action, which is to assign itself as root.
ParserAction
GetDefaultAction
(
const
ParserState
&
state
)
const
override
;
...
...
research/syntaxnet/syntaxnet/head_transitions_test.cc
View file @
4364390a
...
...
@@ -68,7 +68,8 @@ class HeadTransitionSystemTest : public ::testing::Test {
TEST_F
(
HeadTransitionSystemTest
,
Characteristics
)
{
EXPECT_EQ
(
1
,
transition_system_
->
NumActionTypes
());
EXPECT_EQ
(
1
,
transition_system_
->
NumActions
(
10
));
EXPECT_EQ
(
ParserTransitionSystem
::
kDynamicNumActions
,
transition_system_
->
NumActions
(
10
));
}
TEST_F
(
HeadTransitionSystemTest
,
GoldParsesCorrectly
)
{
...
...
research/syntaxnet/syntaxnet/lexicon_builder.cc
View file @
4364390a
...
...
@@ -26,6 +26,7 @@ limitations under the License.
#include "syntaxnet/utils.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/env.h"
// A task that collects term statistics over a corpus and saves a set of
...
...
research/syntaxnet/syntaxnet/models/parsey_universal/context-tokenize-zh.pbtxt
deleted
100644 → 0
View file @
638fd759
Parameter {
name: "brain_tokenizer_zh_embedding_dims"
value: "32;32"
}
Parameter {
name: "brain_tokenizer_zh_embedding_names"
value: "chars;words"
}
Parameter {
name: "brain_tokenizer_zh_features"
value: "input.char "
"input(1).char "
"input(2).char "
"input(3).char "
"input(-1).char "
"input(-2).char "
"input(-3).char "
"stack.char "
"stack.offset(1).char "
"stack.offset(-1).char "
"stack(1).char "
"stack(1).offset(1).char "
"stack(1).offset(-1).char "
"stack(2).char; "
"last-word(1,min-freq=2) "
"last-word(2,min-freq=2) "
"last-word(3,min-freq=2)"
}
Parameter {
name: "brain_tokenizer_zh_transition_system"
value: "binary-segment-transitions"
}
input {
name: "word-map"
Part {
file_pattern: "last-word-map"
}
}
input {
name: "char-map"
Part {
file_pattern: "char-map"
}
}
input {
name: "label-map"
Part {
file_pattern: "label-map"
}
}
input {
name: 'stdin-untoken'
record_format: 'untokenized-text'
Part {
file_pattern: '-'
}
}
input {
name: 'stdout-conll'
record_format: 'conll-sentence'
Part {
file_pattern: '-'
}
}
research/syntaxnet/syntaxnet/models/parsey_universal/context.pbtxt
deleted
100644 → 0
View file @
638fd759
Parameter {
name: "brain_tokenizer_embedding_dims"
value: "16;16;16"
}
Parameter {
name: "brain_tokenizer_embedding_names"
value: "chars;digits;puncts"
}
Parameter {
name: "brain_tokenizer_features"
value: "input.char "
"input(-1).char "
"input(1).char; "
"input.digit "
"input(-1).digit "
"input(1).digit; "
"input.punctuation-amount "
"input(-1).punctuation-amount "
"input(1).punctuation-amount "
}
Parameter {
name: "brain_tokenizer_transition_system"
value: "binary-segment-transitions"
}
Parameter {
name: "brain_morpher_embedding_dims"
value: "2;16;8;16;16;16;16;16;64"
}
Parameter {
name: "brain_morpher_embedding_names"
value: "capitalization;char_ngram;other;prefix2;prefix3;suffix2;suffix3;tags;words"
}
Parameter {
name: "brain_morpher_features"
value: "input.capitalization "
"input(1).capitalization "
"input(2).capitalization "
"input(3).capitalization "
"input(-1).capitalization "
"input(-2).capitalization "
"input(-3).capitalization "
"input(-4).capitalization; "
"input.token.char-ngram "
"input(1).token.char-ngram "
"input(2).token.char-ngram "
"input(3).token.char-ngram "
"input(-1).token.char-ngram "
"input(-2).token.char-ngram "
"input(-3).token.char-ngram "
"input(-4).token.char-ngram; "
"input.digit "
"input.hyphen "
"input.token.punctuation-amount "
"input.token.quote; "
"input.token.prefix(length=2) "
"input(1).token.prefix(length=2) "
"input(2).token.prefix(length=2) "
"input(3).token.prefix(length=2) "
"input(-1).token.prefix(length=2) "
"input(-2).token.prefix(length=2) "
"input(-3).token.prefix(length=2) "
"input(-4).token.prefix(length=2); "
"input.token.prefix(length=3) "
"input(1).token.prefix(length=3) "
"input(2).token.prefix(length=3) "
"input(3).token.prefix(length=3) "
"input(-1).token.prefix(length=3) "
"input(-2).token.prefix(length=3) "
"input(-3).token.prefix(length=3) "
"input(-4).token.prefix(length=3); "
"input.token.suffix(length=2) "
"input(1).token.suffix(length=2) "
"input(2).token.suffix(length=2) "
"input(3).token.suffix(length=2) "
"input(-1).token.suffix(length=2) "
"input(-2).token.suffix(length=2) "
"input(-3).token.suffix(length=2) "
"input(-4).token.suffix(length=2); "
"input.token.suffix(length=3) "
"input(1).token.suffix(length=3) "
"input(2).token.suffix(length=3) "
"input(3).token.suffix(length=3) "
"input(-1).token.suffix(length=3) "
"input(-2).token.suffix(length=3) "
"input(-3).token.suffix(length=3) "
"input(-4).token.suffix(length=3); "
"input(-1).pred-morph-tag "
"input(-2).pred-morph-tag "
"input(-3).pred-morph-tag "
"input(-4).pred-morph-tag; "
"input.token.word "
"input(1).token.word "
"input(2).token.word "
"input(3).token.word "
"input(-1).token.word "
"input(-2).token.word "
"input(-3).token.word "
"input(-4).token.word"
}
Parameter {
name: "brain_morpher_transition_system"
value: "morpher"
}
Parameter {
name: "brain_tagger_embedding_dims"
value: "2;16;8;16;16;16;16;16;64"
}
Parameter {
name: "brain_tagger_embedding_names"
value: "capitalization;char_ngram;other;prefix2;prefix3;suffix2;suffix3;tags;words"
}
Parameter {
name: "brain_tagger_features"
value: "input.capitalization "
"input(1).capitalization "
"input(2).capitalization "
"input(3).capitalization "
"input(-1).capitalization "
"input(-2).capitalization "
"input(-3).capitalization "
"input(-4).capitalization; "
"input.token.char-ngram "
"input(1).token.char-ngram "
"input(2).token.char-ngram "
"input(3).token.char-ngram "
"input(-1).token.char-ngram "
"input(-2).token.char-ngram "
"input(-3).token.char-ngram "
"input(-4).token.char-ngram; "
"input.digit "
"input.hyphen "
"input.token.punctuation-amount "
"input.token.quote; "
"input.token.prefix(length=2) "
"input(1).token.prefix(length=2) "
"input(2).token.prefix(length=2) "
"input(3).token.prefix(length=2) "
"input(-1).token.prefix(length=2) "
"input(-2).token.prefix(length=2) "
"input(-3).token.prefix(length=2) "
"input(-4).token.prefix(length=2); "
"input.token.prefix(length=3) "
"input(1).token.prefix(length=3) "
"input(2).token.prefix(length=3) "
"input(3).token.prefix(length=3) "
"input(-1).token.prefix(length=3) "
"input(-2).token.prefix(length=3) "
"input(-3).token.prefix(length=3) "
"input(-4).token.prefix(length=3); "
"input.token.suffix(length=2) "
"input(1).token.suffix(length=2) "
"input(2).token.suffix(length=2) "
"input(3).token.suffix(length=2) "
"input(-1).token.suffix(length=2) "
"input(-2).token.suffix(length=2) "
"input(-3).token.suffix(length=2) "
"input(-4).token.suffix(length=2); "
"input.token.suffix(length=3) "
"input(1).token.suffix(length=3) "
"input(2).token.suffix(length=3) "
"input(3).token.suffix(length=3) "
"input(-1).token.suffix(length=3) "
"input(-2).token.suffix(length=3) "
"input(-3).token.suffix(length=3) "
"input(-4).token.suffix(length=3); "
"input(-1).pred-tag "
"input(-2).pred-tag "
"input(-3).pred-tag "
"input(-4).pred-tag; "
"input.token.word "
"input(1).token.word "
"input(2).token.word "
"input(3).token.word "
"input(-1).token.word "
"input(-2).token.word "
"input(-3).token.word "
"input(-4).token.word"
}
Parameter {
name: "brain_tagger_transition_system"
value: "tagger"
}
Parameter {
name: "brain_parser_embedding_dims"
value: "32;32;32;64"
}
Parameter {
name: "brain_parser_embedding_names"
value: "labels;morphology;tags;words"
}
Parameter {
name: "brain_parser_features"
value: "stack.child(1).label "
"stack.child(1).sibling(-1).label "
"stack.child(-1).label "
"stack.child(-1).sibling(1).label "
"stack.child(2).label "
"stack.child(-2).label "
"stack(1).child(1).label "
"stack(1).child(1).sibling(-1).label "
"stack(1).child(-1).label "
"stack(1).child(-1).sibling(1).label "
"stack(1).child(2).label "
"stack(1).child(-2).label; "
"input.token.morphology-set "
"input(1).token.morphology-set "
"input(2).token.morphology-set "
"input(3).token.morphology-set "
"stack.token.morphology-set "
"stack.child(1).token.morphology-set "
"stack.child(1).sibling(-1).token.morphology-set "
"stack.child(-1).token.morphology-set "
"stack.child(-1).sibling(1).token.morphology-set "
"stack.child(2).token.morphology-set "
"stack.child(-2).token.morphology-set "
"stack(1).token.morphology-set "
"stack(1).child(1).token.morphology-set "
"stack(1).child(1).sibling(-1).token.morphology-set "
"stack(1).child(-1).token.morphology-set "
"stack(1).child(-1).sibling(1).token.morphology-set "
"stack(1).child(2).token.morphology-set "
"stack(1).child(-2).token.morphology-set "
"stack(2).token.morphology-set "
"stack(3).token.morphology-set; "
"input.token.tag "
"input(1).token.tag "
"input(2).token.tag "
"input(3).token.tag "
"stack.token.tag "
"stack.child(1).token.tag "
"stack.child(1).sibling(-1).token.tag "
"stack.child(-1).token.tag "
"stack.child(-1).sibling(1).token.tag "
"stack.child(2).token.tag "
"stack.child(-2).token.tag "
"stack(1).token.tag "
"stack(1).child(1).token.tag "
"stack(1).child(1).sibling(-1).token.tag "
"stack(1).child(-1).token.tag "
"stack(1).child(-1).sibling(1).token.tag "
"stack(1).child(2).token.tag "
"stack(1).child(-2).token.tag "
"stack(2).token.tag "
"stack(3).token.tag; "
"input.token.word "
"input(1).token.word "
"input(2).token.word "
"input(3).token.word "
"stack.token.word "
"stack.child(1).token.word "
"stack.child(1).sibling(-1).token.word "
"stack.child(-1).token.word "
"stack.child(-1).sibling(1).token.word "
"stack.child(2).token.word "
"stack.child(-2).token.word "
"stack(1).token.word "
"stack(1).child(1).token.word "
"stack(1).child(1).sibling(-1).token.word "
"stack(1).child(-1).token.word "
"stack(1).child(-1).sibling(1).token.word "
"stack(1).child(2).token.word "
"stack(1).child(-2).token.word "
"stack(2).token.word "
"stack(3).token.word "
}
Parameter {
name: "brain_parser_transition_system"
value: "arc-standard"
}
Parameter {
name: "join_category_to_pos"
value: "true"
}
input {
name: "word-map"
Part {
file_pattern: "word-map"
}
}
input {
name: "char-map"
Part {
file_pattern: "char-map"
}
}
input {
name: "tag-map"
Part {
file_pattern: "tag-map"
}
}
input {
name: "tag-to-category"
Part {
file_pattern: "tag-to-category"
}
}
input {
name: "label-map"
Part {
file_pattern: "label-map"
}
}
input {
name: "char-ngram-map"
Part {
file_pattern: "char-ngram-map"
}
}
input {
name: "prefix-table"
Part {
file_pattern: "prefix-table"
}
}
input {
name: "suffix-table"
Part {
file_pattern: "suffix-table"
}
}
input {
name: "morph-label-set"
Part {
file_pattern: "morph-label-set"
}
}
input {
name: "morphology-map"
Part {
file_pattern: "morphology-map"
}
}
input {
name: 'stdin'
record_format: 'tokenized-text'
Part {
file_pattern: '-'
}
}
input {
name: 'stdin-conll'
record_format: 'conll-sentence'
Part {
file_pattern: '-'
}
}
input {
name: 'stdin-untoken'
record_format: 'untokenized-text'
Part {
file_pattern: '-'
}
}
input {
name: 'stdout-conll'
record_format: 'conll-sentence'
Part {
file_pattern: '-'
}
}
research/syntaxnet/syntaxnet/models/parsey_universal/parse.sh
deleted
100755 → 0
View file @
638fd759
# A script that runs a morphological analyzer, a part-of-speech tagger and a
# dependency parser on a text file, with one sentence per line.
#
# Example usage:
# bazel build syntaxnet:parser_eval
# cat sentences.txt |
# syntaxnet/models/parsey_universal/parse.sh \
# $MODEL_DIRECTORY > output.conll
#
# To run on a conll formatted file, add the --conll command line argument:
# cat sentences.conll |
# syntaxnet/models/parsey_universal/parse.sh \
# --conll $MODEL_DIRECTORY > output.conll
#
# Models can be downloaded from
# http://download.tensorflow.org/models/parsey_universal/<language>.zip
# for the languages listed at
# https://github.com/tensorflow/models/blob/master/research/syntaxnet/universal.md
#
PARSER_EVAL
=
bazel-bin/syntaxnet/parser_eval
CONTEXT
=
syntaxnet/models/parsey_universal/context.pbtxt
if
[[
"
$1
"
==
"--conll"
]]
;
then
INPUT_FORMAT
=
stdin-conll
shift
else
INPUT_FORMAT
=
stdin
fi
MODEL_DIR
=
$1
$PARSER_EVAL
\
--input
=
$INPUT_FORMAT
\
--output
=
stdout-conll
\
--hidden_layer_sizes
=
64
\
--arg_prefix
=
brain_morpher
\
--graph_builder
=
structured
\
--task_context
=
$CONTEXT
\
--resource_dir
=
$MODEL_DIR
\
--model_path
=
$MODEL_DIR
/morpher-params
\
--slim_model
\
--batch_size
=
1024
\
--alsologtostderr
\
|
\
$PARSER_EVAL
\
--input
=
stdin-conll
\
--output
=
stdout-conll
\
--hidden_layer_sizes
=
64
\
--arg_prefix
=
brain_tagger
\
--graph_builder
=
structured
\
--task_context
=
$CONTEXT
\
--resource_dir
=
$MODEL_DIR
\
--model_path
=
$MODEL_DIR
/tagger-params
\
--slim_model
\
--batch_size
=
1024
\
--alsologtostderr
\
|
\
$PARSER_EVAL
\
--input
=
stdin-conll
\
--output
=
stdout-conll
\
--hidden_layer_sizes
=
512,512
\
--arg_prefix
=
brain_parser
\
--graph_builder
=
structured
\
--task_context
=
$CONTEXT
\
--resource_dir
=
$MODEL_DIR
\
--model_path
=
$MODEL_DIR
/parser-params
\
--slim_model
\
--batch_size
=
1024
\
--alsologtostderr
research/syntaxnet/syntaxnet/models/parsey_universal/tokenize.sh
deleted
100755 → 0
View file @
638fd759
# A script that runs a tokenizer on a text file with one sentence per line.
#
# Example usage:
# bazel build syntaxnet:parser_eval
# cat untokenized-sentences.txt |
# syntaxnet/models/parsey_universal/tokenize.sh \
# $MODEL_DIRECTORY > output.conll
#
# Models can be downloaded from
# http://download.tensorflow.org/models/parsey_universal/<language>.zip
# for the languages listed at
# https://github.com/tensorflow/models/blob/master/research/syntaxnet/universal.md
#
PARSER_EVAL
=
bazel-bin/syntaxnet/parser_eval
CONTEXT
=
syntaxnet/models/parsey_universal/context.pbtxt
INPUT_FORMAT
=
stdin-untoken
MODEL_DIR
=
$1
$PARSER_EVAL
\
--input
=
$INPUT_FORMAT
\
--output
=
stdin-untoken
\
--hidden_layer_sizes
=
128,128
\
--arg_prefix
=
brain_tokenizer
\
--graph_builder
=
greedy
\
--task_context
=
$CONTEXT
\
--resource_dir
=
$MODEL_DIR
\
--model_path
=
$MODEL_DIR
/tokenizer-params
\
--batch_size
=
32
\
--alsologtostderr
\
--slim_model
research/syntaxnet/syntaxnet/models/parsey_universal/tokenize_zh.sh
deleted
100755 → 0
View file @
638fd759
# A script that runs a traditional Chinese tokenizer on a text file with one
# sentence per line.
#
# Example usage:
# bazel build syntaxnet:parser_eval
# cat untokenized-sentences.txt |
# syntaxnet/models/parsey_universal/tokenize_zh.sh \
# $MODEL_DIRECTORY > output.conll
#
# The traditional Chinese model can be downloaded from
# http://download.tensorflow.org/models/parsey_universal/Chinese.zip
#
PARSER_EVAL
=
bazel-bin/syntaxnet/parser_eval
CONTEXT
=
syntaxnet/models/parsey_universal/context-tokenize-zh.pbtxt
INPUT_FORMAT
=
stdin-untoken
MODEL_DIR
=
$1
$PARSER_EVAL
\
--input
=
$INPUT_FORMAT
\
--output
=
stdin-untoken
\
--hidden_layer_sizes
=
256,256
\
--arg_prefix
=
brain_tokenizer_zh
\
--graph_builder
=
structured
\
--task_context
=
$CONTEXT
\
--resource_dir
=
$MODEL_DIR
\
--model_path
=
$MODEL_DIR
/tokenizer-params
\
--batch_size
=
1024
\
--alsologtostderr
\
--slim_model
research/syntaxnet/syntaxnet/morphology_label_set.h
View file @
4364390a
...
...
@@ -43,8 +43,7 @@ class MorphologyLabelSet {
int
Add
(
const
TokenMorphology
&
morph
);
// Look up an existing TokenMorphology. If it is not present, return -1.
// Note: This is slow, and should not be called outside of training workflow
// or init.
// Note: This is slow, and should not be called outside of training or init.
int
LookupExisting
(
const
TokenMorphology
&
morph
)
const
;
// Return the TokenMorphology at position i. The input i should be in the
...
...
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment