Allow tensors placement on specific device through CLI and pipeline.

9c391277 · Morgan Funtowicz · 955d7ecb · 9c391277 · 9c391277
Commit 9c391277 authored Dec 16, 2019 by Morgan Funtowicz
Hide whitespace changes
Inline Side-by-side

Showing with 54 additions and 26 deletions

transformers/commands/run.py transformers/commands/run.py +2 -1

transformers/pipelines.py transformers/pipelines.py +52 -25

No files found.
--- a/transformers/commands/run.py
+++ b/transformers/commands/run.py
@@ -16,7 +16,7 @@ def try_infer_format_from_ext(path: str):


 def run_command_factory(args):
-    nlp = pipeline(task=args.task, model=args.model, tokenizer=args.tokenizer)
+    nlp = pipeline(task=args.task, model=args.model, tokenizer=args.tokenizer, device=args.device)
    format = try_infer_format_from_ext(args.input) if args.format == 'infer' else args.format
    reader = PipelineDataFormat.from_str(format, args.output, args.input, args.column)
    return RunCommand(nlp, reader)
@@ -31,6 +31,7 @@ class RunCommand(BaseTransformersCLICommand):
    @staticmethod
    def register_subcommand(parser: ArgumentParser):
        run_parser = parser.add_parser('run', help="Run a pipeline through the CLI")
+        run_parser.add_argument('--device', type=int, default=-1, help='Indicate the device to run onto, -1 indicates CPU, >= 0 indicates GPU')
        run_parser.add_argument('--task', choices=SUPPORTED_TASKS.keys(), help='Task to run')
        run_parser.add_argument('--model', type=str, required=True, help='Name or path to the model to instantiate.')
        run_parser.add_argument('--tokenizer', type=str, help='Name of the tokenizer to use. (default: same as the model name)')

--- a/transformers/pipelines.py
+++ b/transformers/pipelines.py
@@ -18,6 +18,7 @@ import csv
 import json
 import os
 from abc import ABC, abstractmethod
+from contextlib import contextmanager
 from itertools import groupby
 from typing import Union, Optional, Tuple, List, Dict

@@ -152,11 +153,18 @@ class JsonPipelineDataFormat(PipelineDataFormat):


 class Pipeline(_ScikitCompat):
-    def __init__(self, model, tokenizer: PreTrainedTokenizer = None, args_parser: ArgumentHandler = None, **kwargs):
+    def __init__(self, model, tokenizer: PreTrainedTokenizer = None,
+                 args_parser: ArgumentHandler = None, device: int = -1, **kwargs):
+
        self.model = model
        self.tokenizer = tokenizer
+        self.device = device
        self._args_parser = args_parser or DefaultArgumentHandler()

+        # Special handling
+        if self.device >= 0 and not is_tf_available():
+            self.model = self.model.to('cuda:{}'.format(self.device))
+
    def save_pretrained(self, save_directory):
        if not os.path.isdir(save_directory):
            logger.error("Provided path ({}) should be a directory".format(save_directory))
@@ -176,11 +184,25 @@ class Pipeline(_ScikitCompat):
        inputs = self._args_parser(*texts, **kwargs)

        # Encode for forward
-        inputs = self.tokenizer.batch_encode_plus(
-            inputs, add_special_tokens=True, return_tensors='tf' if is_tf_available() else 'pt'
-        )
+        with self.device_placement():
+            inputs = self.tokenizer.batch_encode_plus(
+                inputs, add_special_tokens=True, return_tensors='tf' if is_tf_available() else 'pt'
+            )
+
+            return self._forward(inputs)
+
+    @contextmanager
+    def device_placement(self):
+        if is_tf_available():
+            import tensorflow as tf
+            with tf.device('/CPU:0' if self.device == -1 else '/device:GPU:{}'.format(self.device)):
+                yield
+        else:
+            import torch
+            if self.device >= 0:
+                torch.cuda.set_device(self.device)

-        return self._forward(inputs)
+            yield

    def _forward(self, inputs):
        if is_tf_available():
@@ -225,14 +247,17 @@ class NerPipeline(Pipeline):
            for i, w in enumerate(words):
                tokens = self.tokenizer.tokenize(w)
                token_to_word += [i] * len(tokens)
-            tokens = self.tokenizer.encode_plus(sentence, return_attention_mask=False, return_tensors='tf' if is_tf_available() else 'pt')

-            # Forward
-            if is_torch_available():
-                with torch.no_grad():
-                    entities = self.model(**tokens)[0][0].cpu().numpy()
-            else:
-                entities = self.model(tokens)[0][0].numpy()
+            # Manage correct placement of the tensors
+            with self.device_placement():
+                tokens = self.tokenizer.encode_plus(sentence, return_attention_mask=False, return_tensors='tf' if is_tf_available() else 'pt')
+
+                # Forward
+                if is_torch_available():
+                    with torch.no_grad():
+                        entities = self.model(**tokens)[0][0].cpu().numpy()
+                else:
+                    entities = self.model(tokens)[0][0].numpy()

            # Normalize scores
            answer, token_start = [], 1
@@ -352,18 +377,20 @@ class QuestionAnsweringPipeline(Pipeline):
        features = squad_convert_examples_to_features(examples, self.tokenizer, kwargs['max_seq_len'], kwargs['doc_stride'], kwargs['max_question_len'], False)
        fw_args = self.inputs_for_model(features)

-        if is_tf_available():
-            import tensorflow as tf
-            fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
-            start, end = self.model(fw_args)
-            start, end = start.numpy(), end.numpy()
-        else:
-            import torch
-            with torch.no_grad():
-                # Retrieve the score for the context tokens only (removing question tokens)
-                fw_args = {k: torch.tensor(v) for (k, v) in fw_args.items()}
-                start, end = self.model(**fw_args)
-                start, end = start.cpu().numpy(), end.cpu().numpy()
+        # Manage tensor allocation on correct device
+        with self.device_placement():
+            if is_tf_available():
+                import tensorflow as tf
+                fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
+                start, end = self.model(fw_args)
+                start, end = start.numpy(), end.numpy()
+            else:
+                import torch
+                with torch.no_grad():
+                    # Retrieve the score for the context tokens only (removing question tokens)
+                    fw_args = {k: torch.tensor(v) for (k, v) in fw_args.items()}
+                    start, end = self.model(**fw_args)
+                    start, end = start.cpu().numpy(), end.cpu().numpy()

        answers = []
        for (example, feature, start_, end_) in zip(examples, features, start, end):
@@ -374,7 +401,7 @@ class QuestionAnsweringPipeline(Pipeline):
            # Mask padding and question
            start_, end_ = start_ * np.abs(np.array(feature.p_mask) - 1), end_ * np.abs(np.array(feature.p_mask) - 1)

-            # TODO : What happend if not possible
+            # TODO : What happens if not possible
            # Mask CLS
            start_[0] = end_[0] = 0