"vscode:/vscode.git/clone" did not exist on "087798b7fa6dc82f34a7462fddd313d38b251bda"
Commit 7ac47bfe authored by Morgan Funtowicz's avatar Morgan Funtowicz
Browse files

Updated notebook dependencies for Colab.


Signed-off-by: default avatarMorgan Funtowicz <morgan@huggingface.co>
parent be02176a
...@@ -77,21 +77,93 @@ ...@@ -77,21 +77,93 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"outputs": [],
"source": [
"!pip install transformers"
],
"metadata": { "metadata": {
"collapsed": false,
"pycharm": { "pycharm": {
"is_executing": false,
"name": "#%% code\n" "name": "#%% code\n"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: transformers in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (2.5.1)\n",
"Requirement already satisfied: filelock in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (3.0.12)\n",
"Requirement already satisfied: sentencepiece in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (0.1.83)\n",
"Requirement already satisfied: boto3 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (1.12.0)\n",
"Requirement already satisfied: requests in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (2.22.0)\n",
"Requirement already satisfied: numpy in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (1.18.1)\n",
"Requirement already satisfied: sacremoses in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (0.0.35)\n",
"Requirement already satisfied: tokenizers==0.5.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (0.5.2)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (2020.1.8)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from transformers) (4.42.1)\n",
"Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from boto3->transformers) (0.3.3)\n",
"Requirement already satisfied: botocore<1.16.0,>=1.15.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from boto3->transformers) (1.15.0)\n",
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from boto3->transformers) (0.9.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (2019.11.28)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (2.8)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (1.25.8)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: joblib in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from sacremoses->transformers) (0.14.0)\n",
"Requirement already satisfied: click in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from sacremoses->transformers) (7.0)\n",
"Requirement already satisfied: six in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from sacremoses->transformers) (1.14.0)\n",
"Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from botocore<1.16.0,>=1.15.0->boto3->transformers) (0.15.2)\n",
"Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from botocore<1.16.0,>=1.15.0->boto3->transformers) (2.8.1)\n",
"Requirement already satisfied: tensorflow==2.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (2.1.0)\n",
"Requirement already satisfied: termcolor>=1.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.1.0)\n",
"Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.1.0)\n",
"Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (3.1.0)\n",
"Requirement already satisfied: protobuf>=3.8.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (3.11.4)\n",
"Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.18.1)\n",
"Requirement already satisfied: tensorboard<2.2.0,>=2.1.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (2.1.0)\n",
"Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.0.8)\n",
"Requirement already satisfied: wrapt>=1.11.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.11.2)\n",
"Requirement already satisfied: six>=1.12.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.14.0)\n",
"Requirement already satisfied: tensorflow-estimator<2.2.0,>=2.1.0rc0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (2.1.0)\n",
"Requirement already satisfied: scipy==1.4.1; python_version >= \"3\" in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.4.1)\n",
"Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.1.8)\n",
"Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.34.2)\n",
"Requirement already satisfied: grpcio>=1.8.6 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (1.16.1)\n",
"Requirement already satisfied: absl-py>=0.7.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.9.0)\n",
"Requirement already satisfied: gast==0.2.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.2.2)\n",
"Requirement already satisfied: astor>=0.6.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorflow==2.1.0) (0.8.0)\n",
"Requirement already satisfied: setuptools in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from protobuf>=3.8.0->tensorflow==2.1.0) (45.2.0.post20200210)\n",
"Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.11.2)\n",
"Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (0.4.1)\n",
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (3.1.1)\n",
"Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.0.0)\n",
"Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (2.22.0)\n",
"Requirement already satisfied: h5py in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from keras-applications>=1.0.8->tensorflow==2.1.0) (2.10.0)\n",
"Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (4.0)\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (4.0.0)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (0.2.8)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.3.0)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (2.8)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (2019.11.28)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (3.0.4)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (1.25.8)\r\n",
"Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from rsa<4.1,>=3.1.4->google-auth<2,>=1.6.3->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (0.4.8)\r\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/Caskroom/miniconda/base/envs/huggingface/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.2.0,>=2.1.0->tensorflow==2.1.0) (3.1.0)\r\n"
]
} }
} ],
"source": [
"!pip install transformers\n",
"!pip install tensorflow==2.1.0"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 2,
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false, "is_executing": false,
...@@ -102,10 +174,10 @@ ...@@ -102,10 +174,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<torch.autograd.grad_mode.set_grad_enabled at 0x1af62fd450>" "<torch.autograd.grad_mode.set_grad_enabled at 0x102c0ce10>"
] ]
}, },
"execution_count": 74, "execution_count": 2,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -119,7 +191,7 @@ ...@@ -119,7 +191,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 3,
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false, "is_executing": false,
...@@ -133,7 +205,7 @@ ...@@ -133,7 +205,7 @@
"\n", "\n",
"# We need to create the model and tokenizer\n", "# We need to create the model and tokenizer\n",
"model = AutoModel.from_pretrained(MODEL_NAME)\n", "model = AutoModel.from_pretrained(MODEL_NAME)\n",
"tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) " "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)"
] ]
}, },
{ {
...@@ -151,7 +223,7 @@ ...@@ -151,7 +223,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 4,
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false, "is_executing": false,
...@@ -163,10 +235,10 @@ ...@@ -163,10 +235,10 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Tokens: ['[CLS]', 'This', 'is', 'an', 'input', 'example', '[SEP]']\n", "Tokens: ['This', 'is', 'an', 'input', 'example']\n",
"Tokens id: [101, 1188, 1110, 1126, 7758, 1859, 102]\n", "Tokens id: [1188, 1110, 1126, 7758, 1859]\n",
"Tokens PyTorch: tensor([[ 101, 1188, 1110, 1126, 7758, 1859, 102]])\n", "Tokens PyTorch: tensor([[ 101, 1188, 1110, 1126, 7758, 1859, 102]])\n",
"Tokenwise output: torch.Size([1, 7, 768]), Pooled output: torch.Size([1, 768])\n" "Tokenw ise output: torch.Size([1, 7, 768]), Pooled output: torch.Size([1, 768])\n"
] ]
} }
], ],
...@@ -180,13 +252,16 @@ ...@@ -180,13 +252,16 @@
"tokens_ids = tokenizer.convert_tokens_to_ids(tokens)\n", "tokens_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
"print(\"Tokens id: {}\".format(tokens_ids))\n", "print(\"Tokens id: {}\".format(tokens_ids))\n",
"\n", "\n",
"# Add the required special tokens\n",
"tokens_ids = tokenizer.build_inputs_with_special_tokens(tokens_ids)\n",
"\n",
"# We need to convert to a Deep Learning framework specific format, let's use PyTorch for now.\n", "# We need to convert to a Deep Learning framework specific format, let's use PyTorch for now.\n",
"tokens_pt = torch.tensor([tokens_ids])\n", "tokens_pt = torch.tensor([tokens_ids])\n",
"print(\"Tokens PyTorch: {}\".format(tokens_pt))\n", "print(\"Tokens PyTorch: {}\".format(tokens_pt))\n",
"\n", "\n",
"# Now we're ready to go through BERT with out input\n", "# Now we're ready to go through BERT with out input\n",
"outputs, pooled = model(tokens_pt)\n", "outputs, pooled = model(tokens_pt)\n",
"print(\"Tokenwise output: {}, Pooled output: {}\".format(outputs.shape, pooled.shape))" "print(\"Tokenw ise output: {}, Pooled output: {}\".format(outputs.shape, pooled.shape))"
] ]
}, },
{ {
...@@ -223,7 +298,7 @@ ...@@ -223,7 +298,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 5,
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false, "is_executing": false,
...@@ -275,8 +350,12 @@ ...@@ -275,8 +350,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 6,
"metadata": {}, "metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
...@@ -312,8 +391,12 @@ ...@@ -312,8 +391,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 7,
"metadata": {}, "metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
...@@ -358,8 +441,12 @@ ...@@ -358,8 +441,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": null,
"metadata": {}, "metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from transformers import TFBertModel, BertModel\n", "from transformers import TFBertModel, BertModel\n",
...@@ -371,18 +458,13 @@ ...@@ -371,18 +458,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": null,
"metadata": {}, "metadata": {
"outputs": [ "pycharm": {
{ "is_executing": false
"name": "stdout",
"output_type": "stream",
"text": [
"output differences: 2.971128560602665e-05\n",
"pooled differences: -8.576549589633942e-06\n"
]
} }
], },
"outputs": [],
"source": [ "source": [
"# transformers generates a ready to use dictionary with all the required parameters for the specific framework.\n", "# transformers generates a ready to use dictionary with all the required parameters for the specific framework.\n",
"input_tf = tokenizer.encode_plus(\"This is a sample input\", return_tensors=\"tf\")\n", "input_tf = tokenizer.encode_plus(\"This is a sample input\", return_tensors=\"tf\")\n",
...@@ -422,20 +504,13 @@ ...@@ -422,20 +504,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 82, "execution_count": null,
"metadata": {}, "metadata": {
"outputs": [ "pycharm": {
{ "is_executing": false
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 57.1 ms, sys: 2.44 ms, total: 59.5 ms\n",
"Wall time: 35.5 ms\n",
"CPU times: user 98.8 ms, sys: 725 µs, total: 99.5 ms\n",
"Wall time: 50 ms\n"
]
} }
], },
"outputs": [],
"source": [ "source": [
"from transformers import DistilBertModel\n", "from transformers import DistilBertModel\n",
"\n", "\n",
...@@ -466,8 +541,12 @@ ...@@ -466,8 +541,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": null,
"metadata": {}, "metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Let's load German BERT from the Bavarian State Library\n", "# Let's load German BERT from the Bavarian State Library\n",
...@@ -503,10 +582,10 @@ ...@@ -503,10 +582,10 @@
"pycharm": { "pycharm": {
"stem_cell": { "stem_cell": {
"cell_type": "raw", "cell_type": "raw",
"source": [],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
} },
"source": []
} }
} }
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment