Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
52040517
"doc/git@developer.sourcefind.cn:wangsen/paddle_dbnet.git" did not exist on "aaae49584f7d336c80a714e067d870b2a6f69493"
Unverified
Commit
52040517
authored
Nov 09, 2020
by
Sylvain Gugger
Committed by
GitHub
Nov 09, 2020
Browse files
Deprecate old data/metrics functions (#8420)
parent
d4d1fbfc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
101 additions
and
6 deletions
+101
-6
src/transformers/data/datasets/glue.py
src/transformers/data/datasets/glue.py
+7
-0
src/transformers/data/datasets/language_modeling.py
src/transformers/data/datasets/language_modeling.py
+32
-6
src/transformers/data/metrics/__init__.py
src/transformers/data/metrics/__init__.py
+14
-0
src/transformers/data/processors/glue.py
src/transformers/data/processors/glue.py
+48
-0
No files found.
src/transformers/data/datasets/glue.py
View file @
52040517
import
os
import
time
import
warnings
from
dataclasses
import
dataclass
,
field
from
enum
import
Enum
from
typing
import
List
,
Optional
,
Union
...
...
@@ -69,6 +70,12 @@ class GlueDataset(Dataset):
mode
:
Union
[
str
,
Split
]
=
Split
.
train
,
cache_dir
:
Optional
[
str
]
=
None
,
):
warnings
.
warn
(
"This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets "
"library. You can have a look at this example script for pointers: "
"https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py"
,
FutureWarning
,
)
self
.
args
=
args
self
.
processor
=
glue_processors
[
args
.
task_name
]()
self
.
output_mode
=
glue_output_modes
[
args
.
task_name
]
...
...
src/transformers/data/datasets/language_modeling.py
View file @
52040517
...
...
@@ -19,7 +19,8 @@ logger = logging.get_logger(__name__)
DEPRECATION_WARNING
=
(
"This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library."
"This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets "
"library. You can have a look at this example script for pointers: {0}"
)
...
...
@@ -36,7 +37,12 @@ class TextDataset(Dataset):
overwrite_cache
=
False
,
cache_dir
:
Optional
[
str
]
=
None
,
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
),
FutureWarning
,
)
assert
os
.
path
.
isfile
(
file_path
),
f
"Input file path
{
file_path
}
not found"
block_size
=
block_size
-
tokenizer
.
num_special_tokens_to_add
(
pair
=
False
)
...
...
@@ -101,7 +107,12 @@ class LineByLineTextDataset(Dataset):
"""
def
__init__
(
self
,
tokenizer
:
PreTrainedTokenizer
,
file_path
:
str
,
block_size
:
int
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
),
FutureWarning
,
)
assert
os
.
path
.
isfile
(
file_path
),
f
"Input file path
{
file_path
}
not found"
# Here, we do not cache the features, operating under the assumption
# that we will soon use fast multithreaded tokenizers from the
...
...
@@ -128,7 +139,12 @@ class LineByLineWithRefDataset(Dataset):
"""
def
__init__
(
self
,
tokenizer
:
PreTrainedTokenizer
,
file_path
:
str
,
block_size
:
int
,
ref_path
:
str
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm_wwm.py"
),
FutureWarning
,
)
assert
os
.
path
.
isfile
(
file_path
),
f
"Input file path
{
file_path
}
not found"
assert
os
.
path
.
isfile
(
ref_path
),
f
"Ref file path
{
file_path
}
not found"
# Here, we do not cache the features, operating under the assumption
...
...
@@ -165,7 +181,12 @@ class LineByLineWithSOPTextDataset(Dataset):
"""
def
__init__
(
self
,
tokenizer
:
PreTrainedTokenizer
,
file_dir
:
str
,
block_size
:
int
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
),
FutureWarning
,
)
assert
os
.
path
.
isdir
(
file_dir
)
logger
.
info
(
f
"Creating features from dataset file folder at
{
file_dir
}
"
)
self
.
examples
=
[]
...
...
@@ -315,7 +336,12 @@ class TextDatasetForNextSentencePrediction(Dataset):
short_seq_probability
=
0.1
,
nsp_probability
=
0.5
,
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
),
FutureWarning
,
)
assert
os
.
path
.
isfile
(
file_path
),
f
"Input file path
{
file_path
}
not found"
self
.
block_size
=
block_size
-
tokenizer
.
num_special_tokens_to_add
(
pair
=
True
)
...
...
src/transformers/data/metrics/__init__.py
View file @
52040517
...
...
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
warnings
from
...file_utils
import
is_sklearn_available
,
requires_sklearn
...
...
@@ -23,12 +25,21 @@ if is_sklearn_available():
from
scipy.stats
import
pearsonr
,
spearmanr
DEPRECATION_WARNING
=
(
"This metric will be removed from the library soon, metrics should be handled with the 🤗 Datasets "
"library. You can have a look at this example script for pointers: "
"https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py"
,
)
def
simple_accuracy
(
preds
,
labels
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
requires_sklearn
(
simple_accuracy
)
return
(
preds
==
labels
).
mean
()
def
acc_and_f1
(
preds
,
labels
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
requires_sklearn
(
acc_and_f1
)
acc
=
simple_accuracy
(
preds
,
labels
)
f1
=
f1_score
(
y_true
=
labels
,
y_pred
=
preds
)
...
...
@@ -40,6 +51,7 @@ def acc_and_f1(preds, labels):
def
pearson_and_spearman
(
preds
,
labels
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
requires_sklearn
(
pearson_and_spearman
)
pearson_corr
=
pearsonr
(
preds
,
labels
)[
0
]
spearman_corr
=
spearmanr
(
preds
,
labels
)[
0
]
...
...
@@ -51,6 +63,7 @@ def pearson_and_spearman(preds, labels):
def
glue_compute_metrics
(
task_name
,
preds
,
labels
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
requires_sklearn
(
glue_compute_metrics
)
assert
len
(
preds
)
==
len
(
labels
),
f
"Predictions and labels have mismatched lengths
{
len
(
preds
)
}
and
{
len
(
labels
)
}
"
if
task_name
==
"cola"
:
...
...
@@ -80,6 +93,7 @@ def glue_compute_metrics(task_name, preds, labels):
def
xnli_compute_metrics
(
task_name
,
preds
,
labels
):
warnings
.
warn
(
DEPRECATION_WARNING
,
FutureWarning
)
requires_sklearn
(
xnli_compute_metrics
)
assert
len
(
preds
)
==
len
(
labels
),
f
"Predictions and labels have mismatched lengths
{
len
(
preds
)
}
and
{
len
(
labels
)
}
"
if
task_name
==
"xnli"
:
...
...
src/transformers/data/processors/glue.py
View file @
52040517
...
...
@@ -16,6 +16,7 @@
""" GLUE processors and helpers """
import
os
import
warnings
from
dataclasses
import
asdict
from
enum
import
Enum
from
typing
import
List
,
Optional
,
Union
...
...
@@ -31,6 +32,12 @@ if is_tf_available():
logger
=
logging
.
get_logger
(
__name__
)
DEPRECATION_WARNING
=
(
"This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets "
"library. You can have a look at this example script for pointers: "
"https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py"
)
def
glue_convert_examples_to_features
(
examples
:
Union
[
List
[
InputExample
],
"tf.data.Dataset"
],
...
...
@@ -57,6 +64,7 @@ def glue_convert_examples_to_features(
``InputFeatures`` which can be fed to the model.
"""
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"function"
),
FutureWarning
)
if
is_tf_available
()
and
isinstance
(
examples
,
tf
.
data
.
Dataset
):
if
task
is
None
:
raise
ValueError
(
"When calling glue_convert_examples_to_features from TF, the task parameter is required."
)
...
...
@@ -162,6 +170,10 @@ class OutputMode(Enum):
class
MrpcProcessor
(
DataProcessor
):
"""Processor for the MRPC data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -205,6 +217,10 @@ class MrpcProcessor(DataProcessor):
class
MnliProcessor
(
DataProcessor
):
"""Processor for the MultiNLI data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -247,6 +263,10 @@ class MnliProcessor(DataProcessor):
class
MnliMismatchedProcessor
(
MnliProcessor
):
"""Processor for the MultiNLI Mismatched data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_dev_examples
(
self
,
data_dir
):
"""See base class."""
return
self
.
_create_examples
(
self
.
_read_tsv
(
os
.
path
.
join
(
data_dir
,
"dev_mismatched.tsv"
)),
"dev_mismatched"
)
...
...
@@ -259,6 +279,10 @@ class MnliMismatchedProcessor(MnliProcessor):
class
ColaProcessor
(
DataProcessor
):
"""Processor for the CoLA data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -302,6 +326,10 @@ class ColaProcessor(DataProcessor):
class
Sst2Processor
(
DataProcessor
):
"""Processor for the SST-2 data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -344,6 +372,10 @@ class Sst2Processor(DataProcessor):
class
StsbProcessor
(
DataProcessor
):
"""Processor for the STS-B data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -386,6 +418,10 @@ class StsbProcessor(DataProcessor):
class
QqpProcessor
(
DataProcessor
):
"""Processor for the QQP data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -434,6 +470,10 @@ class QqpProcessor(DataProcessor):
class
QnliProcessor
(
DataProcessor
):
"""Processor for the QNLI data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -476,6 +516,10 @@ class QnliProcessor(DataProcessor):
class
RteProcessor
(
DataProcessor
):
"""Processor for the RTE data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
@@ -518,6 +562,10 @@ class RteProcessor(DataProcessor):
class
WnliProcessor
(
DataProcessor
):
"""Processor for the WNLI data set (GLUE version)."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
warnings
.
warn
(
DEPRECATION_WARNING
.
format
(
"processor"
),
FutureWarning
)
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
"""See base class."""
return
InputExample
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment