Updated doc for `InputExample` and `InputFeatures`

36f592cc · LysandreJik · Lysandre Debut · ad4a393e · 36f592cc · 36f592cc
Commit 36f592cc authored Sep 25, 2019 by LysandreJik Committed by Lysandre Debut Sep 26, 2019
Show whitespace changes
Inline Side-by-side

Showing with 30 additions and 14 deletions

docs/source/main_classes/processors.rst docs/source/main_classes/processors.rst +7 -1

transformers/data/processors/utils.py transformers/data/processors/utils.py +23 -13

No files found.
--- a/docs/source/main_classes/processors.rst
+++ b/docs/source/main_classes/processors.rst
@@ -9,7 +9,9 @@ Processors

 All processors follow the same architecture which is that of the
 :class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
-of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
+of :class:`~pytorch_transformers.data.processors.utils.InputExample`. These
+:class:`~pytorch_transformers.data.processors.utils.InputExample` can be converted to
+:class:`~pytorch_transformers.data.processors.utils.InputFeatures` in order to be fed to the model.

 .. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
    :members:
@@ -19,6 +21,10 @@ of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
    :members:


+.. autoclass:: pytorch_transformers.data.processors.utils.InputFeatures
+    :members:
+
+
 GLUE
 ~~~~~~~~~~~~~~~~~~~~~


--- a/transformers/data/processors/utils.py
+++ b/transformers/data/processors/utils.py
@@ -20,9 +20,8 @@ import copy
 import json

 class InputExample(object):
-    """A single training/test example for simple sequence classification."""
-    def __init__(self, guid, text_a, text_b=None, label=None):
-        """Constructs a InputExample.
+    """
+    A single training/test example for simple sequence classification.

    Args:
        guid: Unique id for the example.
@@ -33,6 +32,7 @@ class InputExample(object):
        label: (Optional) string. The label of the example. This should be
        specified for train and dev examples, but not for test examples.
    """
+    def __init__(self, guid, text_a, text_b=None, label=None):
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
@@ -52,7 +52,17 @@ class InputExample(object):


 class InputFeatures(object):
-    """A single set of features of data."""
+    """
+    A single set of features of data.
+
+    Args:
+        input_ids: Indices of input sequence tokens in the vocabulary.
+        attention_mask: Mask to avoid performing attention on padding token indices.
+            Mask values selected in ``[0, 1]``:
+            Usually  ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
+        token_type_ids: Segment token indices to indicate first and second portions of the inputs.
+        label: Label corresponding to the input
+    """

    def __init__(self, input_ids, attention_mask, token_type_ids, label):
        self.input_ids = input_ids