Allow tensors placement on specific device through CLI and pipeline.

9c391277 · Morgan Funtowicz · 955d7ecb · 9c391277 · 9c391277
Commit 9c391277 authored Dec 16, 2019 by Morgan Funtowicz
Show whitespace changes
Inline Side-by-side

Showing with 54 additions and 26 deletions

transformers/commands/run.py transformers/commands/run.py +2 -1

transformers/pipelines.py transformers/pipelines.py +52 -25

No files found.
--- a/transformers/commands/run.py
+++ b/transformers/commands/run.py
@@ -16,7 +16,7 @@ def try_infer_format_from_ext(path: str):
 def run_command_factory(args):
-    nlp = pipeline(task=args.task, model=args.model, tokenizer=args.tokenizer)
+    nlp = pipeline(task=args.task, model=args.model, tokenizer=args.tokenizer, device=args.device)
    format = try_infer_format_from_ext(args.input) if args.format == 'infer' else args.format
    reader = PipelineDataFormat.from_str(format, args.output, args.input, args.column)
    return RunCommand(nlp, reader)
@@ -31,6 +31,7 @@ class RunCommand(BaseTransformersCLICommand):
    @staticmethod
    def register_subcommand(parser: ArgumentParser):
        run_parser = parser.add_parser('run', help="Run a pipeline through the CLI")
+        run_parser.add_argument('--device', type=int, default=-1, help='Indicate the device to run onto, -1 indicates CPU, >= 0 indicates GPU')
        run_parser.add_argument('--task', choices=SUPPORTED_TASKS.keys(), help='Task to run')
        run_parser.add_argument('--model', type=str, required=True, help='Name or path to the model to instantiate.')
        run_parser.add_argument('--tokenizer', type=str, help='Name of the tokenizer to use. (default: same as the model name)')

--- a/transformers/pipelines.py
+++ b/transformers/pipelines.py
@@ -18,6 +18,7 @@ import csv
 import json
 import os
 from abc import ABC, abstractmethod
+from contextlib import contextmanager
 from itertools import groupby
 from typing import Union, Optional, Tuple, List, Dict
@@ -152,11 +153,18 @@ class JsonPipelineDataFormat(PipelineDataFormat):
 class Pipeline(_ScikitCompat):
-    def __init__(self, model, tokenizer: PreTrainedTokenizer = None, args_parser: ArgumentHandler = None, **kwargs):
+    def __init__(self, model, tokenizer: PreTrainedTokenizer = None,
+                 args_parser: ArgumentHandler = None, device: int = -1, **kwargs):
        self.model = model
        self.tokenizer = tokenizer
+        self.device = device
        self._args_parser = args_parser or DefaultArgumentHandler()
+        # Special handling
+        if self.device >= 0 and not is_tf_available():
+            self.model = self.model.to('cuda:{}'.format(self.device))
    def save_pretrained(self, save_directory):
        if not os.path.isdir(save_directory):
            logger.error("Provided path ({}) should be a directory".format(save_directory))
@@ -176,12 +184,26 @@ class Pipeline(_ScikitCompat):
        inputs = self._args_parser(*texts, **kwargs)
        # Encode for forward
+        with self.device_placement():
            inputs = self.tokenizer.batch_encode_plus(
                inputs, add_special_tokens=True, return_tensors='tf' if is_tf_available() else 'pt'
            )
            return self._forward(inputs)
+    @contextmanager
+    def device_placement(self):
+        if is_tf_available():
+            import tensorflow as tf
+            with tf.device('/CPU:0' if self.device == -1 else '/device:GPU:{}'.format(self.device)):
+                yield
+        else:
+            import torch
+            if self.device >= 0:
+                torch.cuda.set_device(self.device)
+            yield
    def _forward(self, inputs):
        if is_tf_available():
            # TODO trace model
@@ -225,6 +247,9 @@ class NerPipeline(Pipeline):
            for i, w in enumerate(words):
                tokens = self.tokenizer.tokenize(w)
                token_to_word += [i] * len(tokens)
+            # Manage correct placement of the tensors
+            with self.device_placement():
                tokens = self.tokenizer.encode_plus(sentence, return_attention_mask=False, return_tensors='tf' if is_tf_available() else 'pt')
                # Forward
@@ -352,6 +377,8 @@ class QuestionAnsweringPipeline(Pipeline):
        features = squad_convert_examples_to_features(examples, self.tokenizer, kwargs['max_seq_len'], kwargs['doc_stride'], kwargs['max_question_len'], False)
        fw_args = self.inputs_for_model(features)
+        # Manage tensor allocation on correct device
+        with self.device_placement():
            if is_tf_available():
                import tensorflow as tf
                fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
@@ -374,7 +401,7 @@ class QuestionAnsweringPipeline(Pipeline):
            # Mask padding and question
            start_, end_ = start_ * np.abs(np.array(feature.p_mask) - 1), end_ * np.abs(np.array(feature.p_mask) - 1)
-            # TODO : What happend if not possible
+            # TODO : What happens if not possible
            # Mask CLS
            start_[0] = end_[0] = 0