".circleci/git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "07d9bc217b1e70aa37cc606c5bb4b68d60c8197d"
Commit b31ba239 authored by thomwolf's avatar thomwolf
Browse files

cuda on in the examples by default

parent 0a9860da
...@@ -187,8 +187,14 @@ Let's see how to use `BertModel` to get hidden states ...@@ -187,8 +187,14 @@ Let's see how to use `BertModel` to get hidden states
model = BertModel.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased')
model.eval() model.eval()
# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')
# Predict hidden states features for each layer # Predict hidden states features for each layer
encoded_layers, _ = model(tokens_tensor, segments_tensors) with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
# We have a hidden states for each of the 12 layers in model bert-base-uncased # We have a hidden states for each of the 12 layers in model bert-base-uncased
assert len(encoded_layers) == 12 assert len(encoded_layers) == 12
``` ```
...@@ -200,8 +206,14 @@ And how to use `BertForMaskedLM` ...@@ -200,8 +206,14 @@ And how to use `BertForMaskedLM`
model = BertForMaskedLM.from_pretrained('bert-base-uncased') model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval() model.eval()
# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')
# Predict all tokens # Predict all tokens
predictions = model(tokens_tensor, segments_tensors) with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
# confirm we were able to predict 'henson' # confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item() predicted_index = torch.argmax(predictions[0, masked_index]).item()
...@@ -240,8 +252,13 @@ Let's see how to use `OpenAIGPTModel` to get hidden states ...@@ -240,8 +252,13 @@ Let's see how to use `OpenAIGPTModel` to get hidden states
model = OpenAIGPTModel.from_pretrained('openai-gpt') model = OpenAIGPTModel.from_pretrained('openai-gpt')
model.eval() model.eval()
# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
model.to('cuda')
# Predict hidden states features for each layer # Predict hidden states features for each layer
hidden_states = model(tokens_tensor) with torch.no_grad():
hidden_states = model(tokens_tensor)
``` ```
And how to use `OpenAIGPTLMHeadModel` And how to use `OpenAIGPTLMHeadModel`
...@@ -251,19 +268,25 @@ And how to use `OpenAIGPTLMHeadModel` ...@@ -251,19 +268,25 @@ And how to use `OpenAIGPTLMHeadModel`
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt') model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
model.eval() model.eval()
# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
model.to('cuda')
# Predict all tokens # Predict all tokens
predictions = model(tokens_tensor) with torch.no_grad():
predictions = model(tokens_tensor)
# get the predicted last token # get the predicted last token
predicted_index = torch.argmax(predictions[0, masked_index]).item() predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == '.</w>'
``` ```
### Transformer-XL ### Transformer-XL
Here is a quick-start example using `OpenAIGPTTokenizer`, `OpenAIGPTModel` and `OpenAIGPTLMHeadModel` class with OpenAI's pre-trained model. See the [doc section](#doc) below for all the details on these classes. Here is a quick-start example using `TransfoXLTokenizer`, `TransfoXLModel` and `TransfoXLModelLMHeadModel` class with the Transformer-XL model pre-trained on WikiText-103. See the [doc section](#doc) below for all the details on these classes.
First let's prepare a tokenized input with `OpenAIGPTTokenizer` First let's prepare a tokenized input with `TransfoXLTokenizer`
```python ```python
import torch import torch
...@@ -294,27 +317,40 @@ Let's see how to use `TransfoXLModel` to get hidden states ...@@ -294,27 +317,40 @@ Let's see how to use `TransfoXLModel` to get hidden states
model = TransfoXLModel.from_pretrained('transfo-xl-wt103') model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
model.eval() model.eval()
# Predict hidden states features for each layer # If you have a GPU, put everything on cuda
hidden_states_1, mems_1 = model(tokens_tensor_1) tokens_tensor_1 = tokens_tensor_1.to('cuda')
# We can re-use the memory cells in a subsequent call to attend a longer context tokens_tensor_2 = tokens_tensor_2.to('cuda')
hidden_states_2, mems_2 = model(tokens_tensor_2, mems_1) model.to('cuda')
with torch.no_grad():
# Predict hidden states features for each layer
hidden_states_1, mems_1 = model(tokens_tensor_1)
# We can re-use the memory cells in a subsequent call to attend a longer context
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
``` ```
And how to use `OpenAIGPTLMHeadModel` And how to use `TransfoXLLMHeadModel`
```python ```python
# Load pre-trained model (weights) # Load pre-trained model (weights)
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt') model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
model.eval() model.eval()
# Predict all tokens # If you have a GPU, put everything on cuda
predictions_1, mems_1 = model(tokens_tensor_1) tokens_tensor_1 = tokens_tensor_1.to('cuda')
# We can re-use the memory cells in a subsequent call to attend a longer context tokens_tensor_2 = tokens_tensor_2.to('cuda')
predictions_2, mems_2 = model(tokens_tensor_2, mems_1) model.to('cuda')
with torch.no_grad():
# Predict all tokens
predictions_1, mems_1 = model(tokens_tensor_1)
# We can re-use the memory cells in a subsequent call to attend a longer context
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# get the predicted last token # get the predicted last token
predicted_index = torch.argmax(predictions_1[0, masked_index]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == '.</w>'
``` ```
## Doc ## Doc
......
...@@ -52,8 +52,8 @@ def main(): ...@@ -52,8 +52,8 @@ def main():
help='length of the retained previous heads') help='length of the retained previous heads')
parser.add_argument('--clamp_len', type=int, default=1000, parser.add_argument('--clamp_len', type=int, default=1000,
help='max positional embedding index') help='max positional embedding index')
parser.add_argument('--cuda', action='store_true', parser.add_argument('--no_cuda', action='store_true',
help='use CUDA') help='Do not use CUDA even though CUA is available')
parser.add_argument('--work_dir', type=str, required=True, parser.add_argument('--work_dir', type=str, required=True,
help='path to the work_dir') help='path to the work_dir')
parser.add_argument('--no_log', action='store_true', parser.add_argument('--no_log', action='store_true',
...@@ -63,7 +63,8 @@ def main(): ...@@ -63,7 +63,8 @@ def main():
args = parser.parse_args() args = parser.parse_args()
assert args.ext_len >= 0, 'extended context length must be non-negative' assert args.ext_len >= 0, 'extended context length must be non-negative'
device = torch.device("cuda" if args.cuda else "cpu") device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
logger.info("device: {}".format(device))
# Load a pre-processed dataset # Load a pre-processed dataset
# You can also build the corpus yourself using TransfoXLCorpus methods # You can also build the corpus yourself using TransfoXLCorpus methods
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment