Unverified Commit 40d98ebf authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Update benchmark notebook (#5603)

* Créé avec Colaboratory

* delete old file
parent 281e3948
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
"name": "05-benchmark", "name": "05-benchmark",
"provenance": [], "provenance": [],
"collapsed_sections": [], "collapsed_sections": [],
"authorship_tag": "ABX9TyNQ2BQG0erOGhTFF/2Mdn5a", "authorship_tag": "ABX9TyOAUMA92fdE4FM6A349/FWI",
"include_colab_link": true "include_colab_link": true
}, },
"kernelspec": { "kernelspec": {
...@@ -272,7 +272,7 @@ ...@@ -272,7 +272,7 @@
"colab_type": "text" "colab_type": "text"
}, },
"source": [ "source": [
"<a href=\"https://colab.research.google.com/github/huggingface/transformers/blob/add_benchmark_notebook/05_benchmark.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" "<a href=\"https://colab.research.google.com/github/huggingface/transformers/blob/update_notebook/notebooks/05_benchmark.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
] ]
}, },
{ {
...@@ -407,7 +407,7 @@ ...@@ -407,7 +407,7 @@
" print(\"GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB\".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))\n", " print(\"GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB\".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))\n",
"printm()" "printm()"
], ],
"execution_count": 1, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -431,7 +431,7 @@ ...@@ -431,7 +431,7 @@
"# If GPU RAM Util > 0% => crash notebook on purpose\n", "# If GPU RAM Util > 0% => crash notebook on purpose\n",
"# !kill -9 -1" "# !kill -9 -1"
], ],
"execution_count": 2, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
...@@ -499,7 +499,7 @@ ...@@ -499,7 +499,7 @@
"source": [ "source": [
"!python run_benchmark.py --help" "!python run_benchmark.py --help"
], ],
"execution_count": 4, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -615,7 +615,7 @@ ...@@ -615,7 +615,7 @@
"# create plots folder in content\n", "# create plots folder in content\n",
"!mkdir -p plots_pt" "!mkdir -p plots_pt"
], ],
"execution_count": 5, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
...@@ -638,7 +638,7 @@ ...@@ -638,7 +638,7 @@
" --inference_memory_csv_file plots_pt/required_memory.csv \\\n", " --inference_memory_csv_file plots_pt/required_memory.csv \\\n",
" --env_info_csv_file plots_pt/env.csv >/dev/null 2>&1 # redirect all prints" " --env_info_csv_file plots_pt/env.csv >/dev/null 2>&1 # redirect all prints"
], ],
"execution_count": 6, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
...@@ -666,7 +666,7 @@ ...@@ -666,7 +666,7 @@
"df = pd.read_csv('plots_pt/required_memory.csv')\n", "df = pd.read_csv('plots_pt/required_memory.csv')\n",
"df" "df"
], ],
"execution_count": 7, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result", "output_type": "execute_result",
...@@ -901,7 +901,7 @@ ...@@ -901,7 +901,7 @@
"df = pd.read_csv('plots_pt/env.csv')\n", "df = pd.read_csv('plots_pt/env.csv')\n",
"df" "df"
], ],
"execution_count": 8, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result", "output_type": "execute_result",
...@@ -1086,7 +1086,7 @@ ...@@ -1086,7 +1086,7 @@
"colab_type": "code", "colab_type": "code",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 514 "height": 534
}, },
"outputId": "22499f33-bafc-42b3-f1b7-fcb202df9cd2" "outputId": "22499f33-bafc-42b3-f1b7-fcb202df9cd2"
}, },
...@@ -1098,7 +1098,7 @@ ...@@ -1098,7 +1098,7 @@
"from IPython.display import Image\n", "from IPython.display import Image\n",
"Image('plots_pt/required_memory_plot.png')" "Image('plots_pt/required_memory_plot.png')"
], ],
"execution_count": 9, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1133,7 +1133,7 @@ ...@@ -1133,7 +1133,7 @@
"In short, all memory that is allocated for a given *model identifier*, *batch size* and *sequence length* is measured in a separate process. This way it can be ensured that there is no previously unreleased memory falsely included in the measurement. One should also note that the measured memory even includes the memory allocated by the CUDA driver to load PyTorch and TensorFlow and is, therefore, higher than library-specific memory measurement function, *e.g.* this one for [PyTorch](https://pytorch.org/docs/stable/cuda.html#torch.cuda.max_memory_allocated).\n", "In short, all memory that is allocated for a given *model identifier*, *batch size* and *sequence length* is measured in a separate process. This way it can be ensured that there is no previously unreleased memory falsely included in the measurement. One should also note that the measured memory even includes the memory allocated by the CUDA driver to load PyTorch and TensorFlow and is, therefore, higher than library-specific memory measurement function, *e.g.* this one for [PyTorch](https://pytorch.org/docs/stable/cuda.html#torch.cuda.max_memory_allocated).\n",
"\n", "\n",
"Alright, let's analyze the results. It can be noted that the models `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` and `deepset/roberta-base-squad2` require significantly less memory than the other three models. Besides `mrm8488/longformer-base-4096-finetuned-squadv2` all models more or less follow the same memory consumption pattern with `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` seemingly being able to better scale to larger sequence lengths. \n", "Alright, let's analyze the results. It can be noted that the models `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` and `deepset/roberta-base-squad2` require significantly less memory than the other three models. Besides `mrm8488/longformer-base-4096-finetuned-squadv2` all models more or less follow the same memory consumption pattern with `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` seemingly being able to better scale to larger sequence lengths. \n",
"`mrm8488/longformer-base-4096-finetuned-squadv2` is a *Longformer* model, which makes use of *LocalAttention* (check this blog post to learn more about local attention) so that the model scales much better to longer input sequences.\n", "`mrm8488/longformer-base-4096-finetuned-squadv2` is a *Longformer* model, which makes use of *LocalAttention* (check [this](https://huggingface.co/blog/reformer) blog post to learn more about local attention) so that the model scales much better to longer input sequences.\n",
"\n", "\n",
"For the sake of this notebook, we assume that the longest required input will be less than 512 tokens so that we settle on the models `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` and `deepset/roberta-base-squad2`. \n", "For the sake of this notebook, we assume that the longest required input will be less than 512 tokens so that we settle on the models `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` and `deepset/roberta-base-squad2`. \n",
"\n", "\n",
...@@ -1161,7 +1161,7 @@ ...@@ -1161,7 +1161,7 @@
" --batch_sizes 64 128 256 512\\\n", " --batch_sizes 64 128 256 512\\\n",
" --no_env_print" " --no_env_print"
], ],
"execution_count": 10, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1207,7 +1207,7 @@ ...@@ -1207,7 +1207,7 @@
"colab_type": "code", "colab_type": "code",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 514 "height": 534
}, },
"outputId": "092c4dac-5002-4603-8eba-cd4bca727744" "outputId": "092c4dac-5002-4603-8eba-cd4bca727744"
}, },
...@@ -1223,7 +1223,7 @@ ...@@ -1223,7 +1223,7 @@
"from IPython.display import Image\n", "from IPython.display import Image\n",
"Image('plots_pt/required_memory_plot_2.png')" "Image('plots_pt/required_memory_plot_2.png')"
], ],
"execution_count": 11, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1283,7 +1283,7 @@ ...@@ -1283,7 +1283,7 @@
" --batch_sizes 64 128 256 512 \\\n", " --batch_sizes 64 128 256 512 \\\n",
" --no_env_print \\" " --no_env_print \\"
], ],
"execution_count": 12, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1351,7 +1351,7 @@ ...@@ -1351,7 +1351,7 @@
"colab_type": "code", "colab_type": "code",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 514 "height": 534
}, },
"outputId": "3947ccf0-b91c-43bf-8569-d6afe0232185" "outputId": "3947ccf0-b91c-43bf-8569-d6afe0232185"
}, },
...@@ -1363,7 +1363,7 @@ ...@@ -1363,7 +1363,7 @@
"from IPython.display import Image\n", "from IPython.display import Image\n",
"Image('plots_tf/required_memory_plot_2.png')" "Image('plots_tf/required_memory_plot_2.png')"
], ],
"execution_count": 13, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1420,7 +1420,7 @@ ...@@ -1420,7 +1420,7 @@
" --batch_sizes 256 \\\n", " --batch_sizes 256 \\\n",
" --no_env_print \\" " --no_env_print \\"
], ],
"execution_count": 14, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1454,7 +1454,7 @@ ...@@ -1454,7 +1454,7 @@
"colab_type": "code", "colab_type": "code",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 514 "height": 534
}, },
"outputId": "152f14c7-288a-4471-9cc0-5108cb24804c" "outputId": "152f14c7-288a-4471-9cc0-5108cb24804c"
}, },
...@@ -1466,7 +1466,7 @@ ...@@ -1466,7 +1466,7 @@
"from IPython.display import Image\n", "from IPython.display import Image\n",
"Image('plots_tf/time_plot_2.png')" "Image('plots_tf/time_plot_2.png')"
], ],
"execution_count": 15, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1523,7 +1523,7 @@ ...@@ -1523,7 +1523,7 @@
" --no_env_print \\\n", " --no_env_print \\\n",
" --use_xla" " --use_xla"
], ],
"execution_count": 16, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1584,7 +1584,7 @@ ...@@ -1584,7 +1584,7 @@
"# Imports\n", "# Imports\n",
"from transformers import BartConfig, PyTorchBenchmark, PyTorchBenchmarkArguments" "from transformers import BartConfig, PyTorchBenchmark, PyTorchBenchmarkArguments"
], ],
"execution_count": 17, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
...@@ -1622,7 +1622,7 @@ ...@@ -1622,7 +1622,7 @@
"source": [ "source": [
"BartConfig.from_pretrained(\"facebook/bart-large-mnli\").to_diff_dict()" "BartConfig.from_pretrained(\"facebook/bart-large-mnli\").to_diff_dict()"
], ],
"execution_count": 18, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "display_data", "output_type": "display_data",
...@@ -1720,7 +1720,7 @@ ...@@ -1720,7 +1720,7 @@
"config_10000_vocab = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", vocab_size=10000)\n", "config_10000_vocab = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", vocab_size=10000)\n",
"config_8_layers = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", encoder_layers=8, decoder_layers=8)" "config_8_layers = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", encoder_layers=8, decoder_layers=8)"
], ],
"execution_count": 19, "execution_count": null,
"outputs": [] "outputs": []
}, },
{ {
...@@ -1770,7 +1770,7 @@ ...@@ -1770,7 +1770,7 @@
"# run benchmark\n", "# run benchmark\n",
"result = benchmark.run()" "result = benchmark.run()"
], ],
"execution_count": 20, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1829,7 +1829,7 @@ ...@@ -1829,7 +1829,7 @@
"colab_type": "code", "colab_type": "code",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 514 "height": 534
}, },
"outputId": "5dbeb7f7-c996-4db2-a560-735354a5b76f" "outputId": "5dbeb7f7-c996-4db2-a560-735354a5b76f"
}, },
...@@ -1841,7 +1841,7 @@ ...@@ -1841,7 +1841,7 @@
"from IPython.display import Image\n", "from IPython.display import Image\n",
"Image('plots_pt/training_mem_fp16.png')" "Image('plots_pt/training_mem_fp16.png')"
], ],
"execution_count": 21, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1916,7 +1916,7 @@ ...@@ -1916,7 +1916,7 @@
"# run benchmark\n", "# run benchmark\n",
"result = benchmark.run()" "result = benchmark.run()"
], ],
"execution_count": 22, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -1961,7 +1961,7 @@ ...@@ -1961,7 +1961,7 @@
"colab_type": "code", "colab_type": "code",
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
"height": 514 "height": 534
}, },
"outputId": "8a4b4db7-abed-47c4-da61-c3b1ccae66f1" "outputId": "8a4b4db7-abed-47c4-da61-c3b1ccae66f1"
}, },
...@@ -1973,7 +1973,7 @@ ...@@ -1973,7 +1973,7 @@
"from IPython.display import Image\n", "from IPython.display import Image\n",
"Image('plots_pt/training_speed_fp16.png')" "Image('plots_pt/training_speed_fp16.png')"
], ],
"execution_count": 23, "execution_count": null,
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
...@@ -2017,7 +2017,7 @@ ...@@ -2017,7 +2017,7 @@
"source": [ "source": [
"Alright, that's it! Now you should be able to benchmark your favorite models on your favorite configurations. \n", "Alright, that's it! Now you should be able to benchmark your favorite models on your favorite configurations. \n",
"\n", "\n",
"Transparency for the computational cost of a model is becoming more and more important. Feel free to share your results with the community on a shared spreadsheet or by tweeting us @huggingface 🤗." "Feel free to share your results with the community [here](https://github.com/huggingface/transformers/blob/master/examples/benchmarking/README.md) or by tweeting us https://twitter.com/HuggingFace 🤗."
] ]
} }
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment