Commit 36f592cc authored by LysandreJik's avatar LysandreJik Committed by Lysandre Debut
Browse files

Updated doc for `InputExample` and `InputFeatures`

parent ad4a393e
......@@ -9,7 +9,9 @@ Processors
All processors follow the same architecture which is that of the
:class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
of :class:`~pytorch_transformers.data.processors.utils.InputExample`. These
:class:`~pytorch_transformers.data.processors.utils.InputExample` can be converted to
:class:`~pytorch_transformers.data.processors.utils.InputFeatures` in order to be fed to the model.
.. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
:members:
......@@ -19,6 +21,10 @@ of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
:members:
.. autoclass:: pytorch_transformers.data.processors.utils.InputFeatures
:members:
GLUE
~~~~~~~~~~~~~~~~~~~~~
......
......@@ -20,9 +20,8 @@ import copy
import json
class InputExample(object):
"""A single training/test example for simple sequence classification."""
def __init__(self, guid, text_a, text_b=None, label=None):
"""Constructs a InputExample.
"""
A single training/test example for simple sequence classification.
Args:
guid: Unique id for the example.
......@@ -33,6 +32,7 @@ class InputExample(object):
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
def __init__(self, guid, text_a, text_b=None, label=None):
self.guid = guid
self.text_a = text_a
self.text_b = text_b
......@@ -52,7 +52,17 @@ class InputExample(object):
class InputFeatures(object):
"""A single set of features of data."""
"""
A single set of features of data.
Args:
input_ids: Indices of input sequence tokens in the vocabulary.
attention_mask: Mask to avoid performing attention on padding token indices.
Mask values selected in ``[0, 1]``:
Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
token_type_ids: Segment token indices to indicate first and second portions of the inputs.
label: Label corresponding to the input
"""
def __init__(self, input_ids, attention_mask, token_type_ids, label):
self.input_ids = input_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment