add transformers model

459ecd48 · Sugon_ldc · 459ecd48 · 459ecd48 · 459ecd48 · 459ecd48
Commit 459ecd48 authored Jun 07, 2023 by Sugon_ldc
20 changed files
--- a/LICENSE
+++ b/LICENSE
+MIT License
+
+Copyright (c) 2020 Kentaro Yoshioka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
+# vision-transformers-cifar10
+Let's train vision transformers for cifar 10! 
+
+This is an unofficial and elementary implementation of `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`.
+
+I use pytorch for implementation.
+
+### Updates
+* Added [ConvMixer]((https://openreview.net/forum?id=TVHS5Y4dNvM)) implementation. Really simple! (2021/10)
+
+* Added wandb train log to reproduce results. (2022/3)
+
+* Added CaiT and ViT-small. (2022/3)
+
+* Added SwinTransformers. (2022/3)
+
+* Added MLP mixer. (2022/6)
+
+* Changed default training settings for ViT.
+
+# Usage example
+`python train_cifar10.py` # vit-patchsize-4
+
+`python train_cifar10.py  --size 48` # vit-patchsize-4-imsize-48
+
+`python train_cifar10.py --patch 2` # vit-patchsize-2
+
+`python train_cifar10.py --net vit_small --n_epochs 400` # vit-small
+
+`python train_cifar10.py --net vit_timm` # train with pretrained vit
+
+`python train_cifar10.py --net convmixer --n_epochs 400` # train with convmixer
+
+`python train_cifar10.py --net mlpmixer --n_epochs 500 --aug --lr 1e-3`
+
+`python train_cifar10.py --net cait --n_epochs 200` # train with cait
+
+`python train_cifar10.py --net swin --n_epochs 400` # train with SwinTransformers
+
+`python train_cifar10.py --net res18` # resnet18+randaug
+
+# Results..
+
+|             | Accuracy | Train Log |
+|:-----------:|:--------:|:--------:|
+| ViT patch=2 |    80%    | |
+| ViT patch=4 Epoch@200 |    80%   | [Log](https://wandb.ai/arutema47/cifar10-challange/reports/Untitled-Report--VmlldzoxNjU3MTU2?accessToken=3y3ib62e8b9ed2m2zb22dze8955fwuhljl5l4po1d5a3u9b7yzek1tz7a0d4i57r) |
+| ViT patch=4 Epoch@500 |    88%   | [Log](https://wandb.ai/arutema47/cifar10-challange/reports/Untitled-Report--VmlldzoxNjU3MTU2?accessToken=3y3ib62e8b9ed2m2zb22dze8955fwuhljl5l4po1d5a3u9b7yzek1tz7a0d4i57r) |
+| ViT patch=8 |    30%   | |
+| ViT small  | 80% | |
+| MLP mixer |    88%   | |
+| CaiT  | 80% | |
+| Swin-t  | 90% | |
+| ViT small (timm transfer) | 97.5% | |
+| ViT base (timm transfer) | 98.5% | |
+| [ConvMixerTiny(no pretrain)](https://openreview.net/forum?id=TVHS5Y4dNvM) | 96.3% |[Log](https://wandb.ai/arutema47/cifar10-challange/reports/convmixer--VmlldzoyMjEyOTk1?accessToken=2w9nox10so11ixf7t0imdhxq1rf1ftgzyax4r9h896iekm2byfifz3b7hkv3klrt)|
+|   resnet18  |  93%  | |
+|   resnet18+randaug  |  95%  | [Log](https://wandb.ai/arutema47/cifar10-challange/reports/Untitled-Report--VmlldzoxNjU3MTYz?accessToken=968duvoqt6xq7ep75ob0yppkzbxd0q03gxy2apytryv04a84xvj8ysdfvdaakij2) |
+
+# Used in..
+* Vision Transformer Pruning [arxiv](https://arxiv.org/abs/2104.08500) [github](https://github.com/Cydia2018/ViT-cifar10-pruning)
--- a/__pycache__/randomaug.cpython-38.pyc
+++ b/__pycache__/randomaug.cpython-38.pyc
--- a/__pycache__/utils.cpython-38.pyc
+++ b/__pycache__/utils.cpython-38.pyc
--- a/log/log_res18_patch4.txt
+++ b/log/log_res18_patch4.txt
+epoch:0
+Wed Oct 28 01:49:11 2020 Epoch 0, lr: 0.0010000, val loss: 118.90921, acc: 58.60000
+Wed Oct 28 01:49:30 2020 Epoch 1, lr: 0.0010000, val loss: 87.35475, acc: 70.25000
+Wed Oct 28 01:49:49 2020 Epoch 2, lr: 0.0010000, val loss: 80.21718, acc: 72.61000
+Wed Oct 28 01:50:16 2020 Epoch 3, lr: 0.0010000, val loss: 64.91081, acc: 78.19000
+Wed Oct 28 01:50:53 2020 Epoch 4, lr: 0.0010000, val loss: 58.25699, acc: 80.36000
+Wed Oct 28 01:51:30 2020 Epoch 5, lr: 0.0010000, val loss: 48.68425, acc: 84.14000
+Wed Oct 28 01:52:07 2020 Epoch 6, lr: 0.0010000, val loss: 53.80388, acc: 82.70000
+Wed Oct 28 01:52:45 2020 Epoch 7, lr: 0.0010000, val loss: 41.47698, acc: 85.79000
+Wed Oct 28 01:53:22 2020 Epoch 8, lr: 0.0010000, val loss: 42.50140, acc: 85.42000
+Wed Oct 28 01:53:59 2020 Epoch 9, lr: 0.0010000, val loss: 44.76749, acc: 85.55000
+Wed Oct 28 01:54:36 2020 Epoch 10, lr: 0.0010000, val loss: 37.57323, acc: 87.38000
+Wed Oct 28 01:55:13 2020 Epoch 11, lr: 0.0010000, val loss: 36.98403, acc: 87.91000
+Wed Oct 28 01:55:50 2020 Epoch 12, lr: 0.0010000, val loss: 43.49086, acc: 86.18000
+Wed Oct 28 01:56:27 2020 Epoch 13, lr: 0.0010000, val loss: 37.91397, acc: 87.79000
+Wed Oct 28 01:57:04 2020 Epoch 14, lr: 0.0010000, val loss: 38.68858, acc: 87.64000
+Wed Oct 28 01:57:42 2020 Epoch 15, lr: 0.0010000, val loss: 35.02082, acc: 89.31000
+Wed Oct 28 01:58:19 2020 Epoch 16, lr: 0.0010000, val loss: 38.61485, acc: 88.27000
+Wed Oct 28 01:58:55 2020 Epoch 17, lr: 0.0010000, val loss: 34.67143, acc: 89.52000
+Wed Oct 28 01:59:20 2020 Epoch 18, lr: 0.0010000, val loss: 34.27875, acc: 89.72000
+Wed Oct 28 01:59:49 2020 Epoch 19, lr: 0.0010000, val loss: 35.57404, acc: 89.55000
+Wed Oct 28 02:00:27 2020 Epoch 20, lr: 0.0010000, val loss: 34.90507, acc: 89.82000
+Wed Oct 28 02:00:54 2020 Epoch 21, lr: 0.0010000, val loss: 32.06855, acc: 90.63000
+Wed Oct 28 02:01:25 2020 Epoch 22, lr: 0.0010000, val loss: 34.56692, acc: 90.49000
+Wed Oct 28 02:02:02 2020 Epoch 23, lr: 0.0010000, val loss: 37.21982, acc: 90.03000
+Wed Oct 28 02:02:40 2020 Epoch 24, lr: 0.0010000, val loss: 33.62259, acc: 91.13000
+Wed Oct 28 02:03:16 2020 Epoch 25, lr: 0.0010000, val loss: 33.70737, acc: 90.56000
+Wed Oct 28 02:03:54 2020 Epoch 26, lr: 0.0001000, val loss: 34.69689, acc: 91.27000
+Wed Oct 28 02:04:31 2020 Epoch 27, lr: 0.0001000, val loss: 28.49893, acc: 92.42000
+Wed Oct 28 02:05:08 2020 Epoch 28, lr: 0.0001000, val loss: 28.81415, acc: 92.77000
+Wed Oct 28 02:05:46 2020 Epoch 29, lr: 0.0001000, val loss: 29.26207, acc: 92.73000
+Wed Oct 28 02:06:23 2020 Epoch 30, lr: 0.0001000, val loss: 30.33475, acc: 92.77000
+Wed Oct 28 02:07:00 2020 Epoch 31, lr: 0.0001000, val loss: 31.09149, acc: 92.78000
+Wed Oct 28 02:07:37 2020 Epoch 32, lr: 0.0000100, val loss: 31.47846, acc: 92.86000
+Wed Oct 28 02:08:14 2020 Epoch 33, lr: 0.0000100, val loss: 31.23287, acc: 92.79000
+Wed Oct 28 02:08:51 2020 Epoch 34, lr: 0.0000100, val loss: 32.13335, acc: 92.86000
+Wed Oct 28 02:09:28 2020 Epoch 35, lr: 0.0000100, val loss: 31.42652, acc: 92.88000
+Wed Oct 28 02:10:05 2020 Epoch 36, lr: 0.0000100, val loss: 31.88379, acc: 92.85000
+Wed Oct 28 02:10:43 2020 Epoch 37, lr: 0.0000010, val loss: 31.71775, acc: 92.73000
+Wed Oct 28 02:11:19 2020 Epoch 38, lr: 0.0000010, val loss: 31.56442, acc: 93.00000
+Wed Oct 28 02:11:56 2020 Epoch 39, lr: 0.0000010, val loss: 31.77023, acc: 93.01000
+Wed Oct 28 02:12:33 2020 Epoch 40, lr: 0.0000010, val loss: 31.19953, acc: 92.92000
+Wed Oct 28 02:13:10 2020 Epoch 41, lr: 0.0000010, val loss: 31.44710, acc: 92.97000
--- a/log/log_vit_patch2.csv
+++ b/log/log_vit_patch2.csv
+152.6886649131775,140.83905494213104,114.57930248975754,106.85598981380463,109.70434707403183,96.92400288581848,97.92824339866638,91.22835493087769,87.82749193906784,90.12244093418121,84.00897711515427,82.01755094528198,81.23839092254639,80.56400829553604,80.28153949975967,77.53050893545151,77.80443352460861,78.48978531360626,77.36746674776077,73.70677381753922,72.44489425420761,72.05933248996735,73.17760559916496,75.43363177776337,70.52083346247673,75.43264067173004,72.16524285078049,69.90382590889931,71.30116939544678,70.3159114420414,72.00254860520363,74.87856948375702,72.4836990237236,68.41016402840614,67.92861634492874,68.53646919131279,68.91035690903664,68.83064901828766,68.79765149950981,69.44012692570686
+44.89,50.78,58.38,61.61,61.26,66.13,65.61,67.63,69.01,68.4,70.46,71.16,71.23,71.96,72.54,73.17,73.43,72.98,72.63,75.13,75.14,75.47,75.02,74.84,76.11,75.3,76.02,76.51,76.42,76.78,76.42,75.79,76.59,78.17,78.44,78.95,78.77,78.98,79.09,79.2
--- a/log/log_vit_patch2.txt
+++ b/log/log_vit_patch2.txt
+Wed Oct 28 02:19:24 2020 Epoch 0, lr: 0.0001000, val loss: 152.68866, acc: 44.89000
+Wed Oct 28 02:23:22 2020 Epoch 1, lr: 0.0001000, val loss: 140.83905, acc: 50.78000
+Wed Oct 28 02:27:21 2020 Epoch 2, lr: 0.0001000, val loss: 114.57930, acc: 58.38000
+Wed Oct 28 02:31:19 2020 Epoch 3, lr: 0.0001000, val loss: 106.85599, acc: 61.61000
+Wed Oct 28 02:35:18 2020 Epoch 4, lr: 0.0001000, val loss: 109.70435, acc: 61.26000
+Wed Oct 28 02:39:15 2020 Epoch 5, lr: 0.0001000, val loss: 96.92400, acc: 66.13000
+Wed Oct 28 02:43:14 2020 Epoch 6, lr: 0.0001000, val loss: 97.92824, acc: 65.61000
+Wed Oct 28 02:47:12 2020 Epoch 7, lr: 0.0001000, val loss: 91.22835, acc: 67.63000
+Wed Oct 28 02:51:11 2020 Epoch 8, lr: 0.0001000, val loss: 87.82749, acc: 69.01000
+Wed Oct 28 02:55:09 2020 Epoch 9, lr: 0.0001000, val loss: 90.12244, acc: 68.40000
+Wed Oct 28 02:59:09 2020 Epoch 10, lr: 0.0001000, val loss: 84.00898, acc: 70.46000
+Wed Oct 28 03:03:07 2020 Epoch 11, lr: 0.0001000, val loss: 82.01755, acc: 71.16000
+Wed Oct 28 03:07:06 2020 Epoch 12, lr: 0.0001000, val loss: 81.23839, acc: 71.23000
+Wed Oct 28 03:11:05 2020 Epoch 13, lr: 0.0001000, val loss: 80.56401, acc: 71.96000
+Wed Oct 28 03:15:02 2020 Epoch 14, lr: 0.0001000, val loss: 80.28154, acc: 72.54000
+Wed Oct 28 03:17:21 2020 Epoch 15, lr: 0.0001000, val loss: 77.53051, acc: 73.17000
+Wed Oct 28 03:19:19 2020 Epoch 16, lr: 0.0001000, val loss: 77.80443, acc: 73.43000
+Wed Oct 28 03:21:17 2020 Epoch 17, lr: 0.0001000, val loss: 78.48979, acc: 72.98000
+Wed Oct 28 03:23:14 2020 Epoch 18, lr: 0.0001000, val loss: 77.36747, acc: 72.63000
+Wed Oct 28 03:25:12 2020 Epoch 19, lr: 0.0001000, val loss: 73.70677, acc: 75.13000
+Wed Oct 28 03:27:10 2020 Epoch 20, lr: 0.0001000, val loss: 72.44489, acc: 75.14000
+Wed Oct 28 03:29:07 2020 Epoch 21, lr: 0.0001000, val loss: 72.05933, acc: 75.47000
+Wed Oct 28 03:31:05 2020 Epoch 22, lr: 0.0001000, val loss: 73.17761, acc: 75.02000
+Wed Oct 28 03:33:02 2020 Epoch 23, lr: 0.0001000, val loss: 75.43363, acc: 74.84000
+Wed Oct 28 03:35:00 2020 Epoch 24, lr: 0.0001000, val loss: 70.52083, acc: 76.11000
+Wed Oct 28 03:36:58 2020 Epoch 25, lr: 0.0001000, val loss: 75.43264, acc: 75.30000
+Wed Oct 28 03:38:55 2020 Epoch 26, lr: 0.0001000, val loss: 72.16524, acc: 76.02000
+Wed Oct 28 03:40:53 2020 Epoch 27, lr: 0.0001000, val loss: 69.90383, acc: 76.51000
+Wed Oct 28 03:42:51 2020 Epoch 28, lr: 0.0001000, val loss: 71.30117, acc: 76.42000
+Wed Oct 28 03:44:48 2020 Epoch 29, lr: 0.0001000, val loss: 70.31591, acc: 76.78000
+Wed Oct 28 03:46:46 2020 Epoch 30, lr: 0.0001000, val loss: 72.00255, acc: 76.42000
+Wed Oct 28 03:48:44 2020 Epoch 31, lr: 0.0001000, val loss: 74.87857, acc: 75.79000
+Wed Oct 28 03:50:43 2020 Epoch 32, lr: 0.0000100, val loss: 72.48370, acc: 76.59000
+Wed Oct 28 03:54:25 2020 Epoch 33, lr: 0.0000100, val loss: 68.41016, acc: 78.17000
+Wed Oct 28 03:58:23 2020 Epoch 34, lr: 0.0000100, val loss: 67.92862, acc: 78.44000
+Wed Oct 28 04:02:23 2020 Epoch 35, lr: 0.0000100, val loss: 68.53647, acc: 78.95000
+Wed Oct 28 04:06:21 2020 Epoch 36, lr: 0.0000100, val loss: 68.91036, acc: 78.77000
+Wed Oct 28 04:10:21 2020 Epoch 37, lr: 0.0000100, val loss: 68.83065, acc: 78.98000
+Wed Oct 28 04:14:20 2020 Epoch 38, lr: 0.0000100, val loss: 68.79765, acc: 79.09000
+Wed Oct 28 04:18:19 2020 Epoch 39, lr: 0.0000010, val loss: 69.44013, acc: 79.20000
--- a/log/log_vit_patch4.csv
+++ b/log/log_vit_patch4.csv
+145.21276545524597,130.1471005678177,119.99731481075287,113.94353991746902,106.74877518415451,104.91727715730667,98.78972041606903,96.17003297805786,94.82091355323792,96.35868710279465,90.63396579027176,86.54807388782501,83.59144073724747,84.07160407304764,82.46806985139847,79.90045547485352,79.96950954198837,78.79997432231903,78.8913185596466,74.17493376135826,73.21486473083496,73.42624083161354,73.11926263570786
+48.18,53.22,57.05,59.23,61.69,62.95,64.83,65.46,66.54,66.62,67.32,69.44,70.03,70.15,70.91,72.24,72.31,72.31,72.27,74.16,74.63,74.36,74.8
--- a/log/log_vit_patch4.txt
+++ b/log/log_vit_patch4.txt
+Wed Oct 28 02:19:41 2020 Epoch 0, lr: 0.0001000, val loss: 149.25757, acc: 45.81000
+Wed Oct 28 02:20:50 2020 Epoch 1, lr: 0.0001000, val loss: 130.75906, acc: 52.83000
+Wed Oct 28 02:21:58 2020 Epoch 2, lr: 0.0001000, val loss: 119.69856, acc: 57.07000
+Wed Oct 28 02:23:07 2020 Epoch 3, lr: 0.0001000, val loss: 114.19812, acc: 59.59000
+Wed Oct 28 02:24:16 2020 Epoch 4, lr: 0.0001000, val loss: 108.78305, acc: 60.96000
+Wed Oct 28 02:25:25 2020 Epoch 5, lr: 0.0001000, val loss: 102.78246, acc: 62.86000
+Wed Oct 28 02:26:33 2020 Epoch 6, lr: 0.0001000, val loss: 101.42338, acc: 63.98000
+Wed Oct 28 02:27:41 2020 Epoch 7, lr: 0.0001000, val loss: 96.00607, acc: 65.40000
+Wed Oct 28 02:29:59 2020 Epoch 9, lr: 0.0001000, val loss: 89.91241, acc: 67.90000
+Wed Oct 28 02:31:08 2020 Epoch 10, lr: 0.0001000, val loss: 88.34163, acc: 68.80000
+Wed Oct 28 02:32:16 2020 Epoch 11, lr: 0.0001000, val loss: 86.33140, acc: 69.83000
+Wed Oct 28 02:33:25 2020 Epoch 12, lr: 0.0001000, val loss: 83.79393, acc: 70.81000
+Wed Oct 28 02:34:34 2020 Epoch 13, lr: 0.0001000, val loss: 82.32496, acc: 71.04000
+Wed Oct 28 02:35:42 2020 Epoch 14, lr: 0.0001000, val loss: 82.08312, acc: 70.56000
+Wed Oct 28 02:36:51 2020 Epoch 15, lr: 0.0001000, val loss: 77.11475, acc: 72.95000
+Wed Oct 28 02:38:00 2020 Epoch 16, lr: 0.0001000, val loss: 76.48433, acc: 73.32000
+Wed Oct 28 02:39:08 2020 Epoch 17, lr: 0.0001000, val loss: 75.59556, acc: 73.51000
+Wed Oct 28 02:40:17 2020 Epoch 18, lr: 0.0001000, val loss: 74.29323, acc: 74.17000
+Wed Oct 28 02:41:26 2020 Epoch 19, lr: 0.0001000, val loss: 73.56699, acc: 74.06000
+Wed Oct 28 02:42:35 2020 Epoch 20, lr: 0.0001000, val loss: 72.80090, acc: 75.08000
+Wed Oct 28 02:43:43 2020 Epoch 21, lr: 0.0001000, val loss: 75.17080, acc: 74.19000
+Wed Oct 28 02:44:52 2020 Epoch 22, lr: 0.0001000, val loss: 71.65324, acc: 75.47000
+Wed Oct 28 02:46:01 2020 Epoch 23, lr: 0.0001000, val loss: 69.89486, acc: 76.17000
+Wed Oct 28 02:47:09 2020 Epoch 24, lr: 0.0001000, val loss: 71.35829, acc: 75.00000
+Wed Oct 28 02:48:18 2020 Epoch 25, lr: 0.0001000, val loss: 70.00906, acc: 76.46000
+Wed Oct 28 02:49:27 2020 Epoch 26, lr: 0.0001000, val loss: 68.82054, acc: 76.74000
+Wed Oct 28 02:50:36 2020 Epoch 27, lr: 0.0001000, val loss: 67.50249, acc: 77.24000
+Wed Oct 28 02:51:44 2020 Epoch 28, lr: 0.0001000, val loss: 67.71045, acc: 77.08000
+Wed Oct 28 02:52:53 2020 Epoch 29, lr: 0.0001000, val loss: 68.44057, acc: 76.84000
+Wed Oct 28 02:54:01 2020 Epoch 30, lr: 0.0001000, val loss: 67.50337, acc: 77.33000
+Wed Oct 28 02:55:10 2020 Epoch 31, lr: 0.0001000, val loss: 67.98517, acc: 77.42000
+Wed Oct 28 02:56:18 2020 Epoch 32, lr: 0.0001000, val loss: 67.19535, acc: 77.34000
+Wed Oct 28 02:57:27 2020 Epoch 33, lr: 0.0001000, val loss: 67.21230, acc: 78.01000
+Wed Oct 28 02:58:36 2020 Epoch 34, lr: 0.0001000, val loss: 69.19893, acc: 77.84000
+Wed Oct 28 02:59:44 2020 Epoch 35, lr: 0.0001000, val loss: 64.88638, acc: 78.87000
+Wed Oct 28 03:00:53 2020 Epoch 36, lr: 0.0001000, val loss: 66.69895, acc: 78.46000
+Wed Oct 28 03:02:02 2020 Epoch 37, lr: 0.0001000, val loss: 66.10147, acc: 78.60000
+Wed Oct 28 03:03:10 2020 Epoch 38, lr: 0.0001000, val loss: 64.60483, acc: 79.13000
+Wed Oct 28 03:04:18 2020 Epoch 39, lr: 0.0001000, val loss: 68.31437, acc: 78.62000
+Wed Oct 28 03:05:27 2020 Epoch 40, lr: 0.0001000, val loss: 64.75209, acc: 79.57000
+Wed Oct 28 03:06:36 2020 Epoch 41, lr: 0.0001000, val loss: 64.46018, acc: 79.63000
+Wed Oct 28 03:07:44 2020 Epoch 42, lr: 0.0001000, val loss: 67.03657, acc: 78.78000
+Wed Oct 28 03:08:53 2020 Epoch 43, lr: 0.0001000, val loss: 67.35336, acc: 79.11000
+Wed Oct 28 03:10:02 2020 Epoch 44, lr: 0.0001000, val loss: 67.63423, acc: 79.02000
+Wed Oct 28 03:11:10 2020 Epoch 45, lr: 0.0001000, val loss: 67.07969, acc: 79.66000
+Wed Oct 28 03:12:19 2020 Epoch 46, lr: 0.0000100, val loss: 69.18692, acc: 79.11000
+Wed Oct 28 03:13:28 2020 Epoch 47, lr: 0.0000100, val loss: 64.23234, acc: 80.77000
+Wed Oct 28 03:14:36 2020 Epoch 48, lr: 0.0000100, val loss: 64.81049, acc: 80.81000
+Wed Oct 28 03:15:44 2020 Epoch 49, lr: 0.0000100, val loss: 65.04099, acc: 80.91000
+Wed Oct 28 03:52:28 2020 Epoch 0, lr: 0.0001000, val loss: 145.21277, acc: 48.18000
+Wed Oct 28 03:53:37 2020 Epoch 1, lr: 0.0001000, val loss: 130.14710, acc: 53.22000
+Wed Oct 28 03:54:45 2020 Epoch 2, lr: 0.0001000, val loss: 119.99731, acc: 57.05000
+Wed Oct 28 03:55:53 2020 Epoch 3, lr: 0.0001000, val loss: 113.94354, acc: 59.23000
+Wed Oct 28 03:57:02 2020 Epoch 4, lr: 0.0001000, val loss: 106.74878, acc: 61.69000
+Wed Oct 28 03:58:10 2020 Epoch 5, lr: 0.0001000, val loss: 104.91728, acc: 62.95000
+Wed Oct 28 03:59:18 2020 Epoch 6, lr: 0.0001000, val loss: 98.78972, acc: 64.83000
+Wed Oct 28 04:00:27 2020 Epoch 7, lr: 0.0001000, val loss: 96.17003, acc: 65.46000
+Wed Oct 28 04:01:35 2020 Epoch 8, lr: 0.0001000, val loss: 94.82091, acc: 66.54000
+Wed Oct 28 04:02:43 2020 Epoch 9, lr: 0.0001000, val loss: 96.35869, acc: 66.62000
+Wed Oct 28 04:03:52 2020 Epoch 10, lr: 0.0001000, val loss: 90.63397, acc: 67.32000
+Wed Oct 28 04:05:01 2020 Epoch 11, lr: 0.0001000, val loss: 86.54807, acc: 69.44000
+Wed Oct 28 04:06:09 2020 Epoch 12, lr: 0.0001000, val loss: 83.59144, acc: 70.03000
+Wed Oct 28 04:07:17 2020 Epoch 13, lr: 0.0001000, val loss: 84.07160, acc: 70.15000
+Wed Oct 28 04:08:26 2020 Epoch 14, lr: 0.0001000, val loss: 82.46807, acc: 70.91000
+Wed Oct 28 04:09:35 2020 Epoch 15, lr: 0.0001000, val loss: 79.90046, acc: 72.24000
+Wed Oct 28 04:10:43 2020 Epoch 16, lr: 0.0001000, val loss: 79.96951, acc: 72.31000
+Wed Oct 28 04:11:51 2020 Epoch 17, lr: 0.0001000, val loss: 78.79997, acc: 72.31000
+Wed Oct 28 04:13:00 2020 Epoch 18, lr: 0.0001000, val loss: 78.89132, acc: 72.27000
+Wed Oct 28 04:14:08 2020 Epoch 19, lr: 0.0001000, val loss: 74.17493, acc: 74.16000
+Wed Oct 28 04:15:17 2020 Epoch 20, lr: 0.0001000, val loss: 73.21486, acc: 74.63000
+Wed Oct 28 04:16:25 2020 Epoch 21, lr: 0.0001000, val loss: 73.42624, acc: 74.36000
+Wed Oct 28 04:17:34 2020 Epoch 22, lr: 0.0001000, val loss: 73.11926, acc: 74.80000
--- a/models/__init__.py
+++ b/models/__init__.py
+# -*- coding: utf-8 -*-
+
+from models.vgg import *
+#from dpn import *
+#from lenet import *
+#from senet import *
+#from pnasnet import *
+#from densenet import *
+#from googlenet import *
+#from shufflenet import *
+from models.resnet import *
+#from resnext import *
+#from preact_resnet import *
+#from mobilenet import *
+#from mobilenetv2 import *
\ No newline at end of file
--- a/models/__pycache__/__init__.cpython-38.pyc
+++ b/models/__pycache__/__init__.cpython-38.pyc
--- a/models/__pycache__/convmixer.cpython-38.pyc
+++ b/models/__pycache__/convmixer.cpython-38.pyc
--- a/models/__pycache__/resnet.cpython-38.pyc
+++ b/models/__pycache__/resnet.cpython-38.pyc
--- a/models/__pycache__/swin.cpython-38.pyc
+++ b/models/__pycache__/swin.cpython-38.pyc
--- a/models/__pycache__/vgg.cpython-38.pyc
+++ b/models/__pycache__/vgg.cpython-38.pyc
--- a/models/__pycache__/vit.cpython-38.pyc
+++ b/models/__pycache__/vit.cpython-38.pyc
--- a/models/cait.py
+++ b/models/cait.py
+# https://github.com/lucidrains/vit-pytorch/blob/main/vit_pytorch/cait.py
+
+from random import randrange
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+
+from einops import rearrange, repeat
+from einops.layers.torch import Rearrange
+
+# helpers
+
+def exists(val):
+    return val is not None
+
+def dropout_layers(layers, dropout):
+    if dropout == 0:
+        return layers
+
+    num_layers = len(layers)
+    to_drop = torch.zeros(num_layers).uniform_(0., 1.) < dropout
+
+    # make sure at least one layer makes it
+    if all(to_drop):
+        rand_index = randrange(num_layers)
+        to_drop[rand_index] = False
+
+    layers = [layer for (layer, drop) in zip(layers, to_drop) if not drop]
+    return layers
+
+# classes
+
+class LayerScale(nn.Module):
+    def __init__(self, dim, fn, depth):
+        super().__init__()
+        if depth <= 18:  # epsilon detailed in section 2 of paper
+            init_eps = 0.1
+        elif depth > 18 and depth <= 24:
+            init_eps = 1e-5
+        else:
+            init_eps = 1e-6
+
+        scale = torch.zeros(1, 1, dim).fill_(init_eps)
+        self.scale = nn.Parameter(scale)
+        self.fn = fn
+    def forward(self, x, **kwargs):
+        return self.fn(x, **kwargs) * self.scale
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout = 0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+
+class Attention(nn.Module):
+    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
+        super().__init__()
+        inner_dim = dim_head *  heads
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+
+        self.to_q = nn.Linear(dim, inner_dim, bias = False)
+        self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False)
+
+        self.attend = nn.Softmax(dim = -1)
+
+        self.mix_heads_pre_attn = nn.Parameter(torch.randn(heads, heads))
+        self.mix_heads_post_attn = nn.Parameter(torch.randn(heads, heads))
+
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x, context = None):
+        b, n, _, h = *x.shape, self.heads
+
+        context = x if not exists(context) else torch.cat((x, context), dim = 1)
+
+        qkv = (self.to_q(x), *self.to_kv(context).chunk(2, dim = -1))
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
+
+        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+
+        dots = einsum('b h i j, h g -> b g i j', dots, self.mix_heads_pre_attn)    # talking heads, pre-softmax
+        attn = self.attend(dots)
+        attn = einsum('b h i j, h g -> b g i j', attn, self.mix_heads_post_attn)   # talking heads, post-softmax
+
+        out = einsum('b h i j, b h j d -> b h i d', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        return self.to_out(out)
+
+class Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0., layer_dropout = 0.):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        self.layer_dropout = layer_dropout
+
+        for ind in range(depth):
+            self.layers.append(nn.ModuleList([
+                LayerScale(dim, PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)), depth = ind + 1),
+                LayerScale(dim, PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)), depth = ind + 1)
+            ]))
+    def forward(self, x, context = None):
+        layers = dropout_layers(self.layers, dropout = self.layer_dropout)
+
+        for attn, ff in layers:
+            x = attn(x, context = context) + x
+            x = ff(x) + x
+        return x
+
+class CaiT(nn.Module):
+    def __init__(
+        self,
+        *,
+        image_size,
+        patch_size,
+        num_classes,
+        dim,
+        depth,
+        cls_depth,
+        heads,
+        mlp_dim,
+        dim_head = 64,
+        dropout = 0.,
+        emb_dropout = 0.,
+        layer_dropout = 0.
+    ):
+        super().__init__()
+        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
+        num_patches = (image_size // patch_size) ** 2
+        patch_dim = 3 * patch_size ** 2
+
+        self.to_patch_embedding = nn.Sequential(
+            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
+            nn.Linear(patch_dim, dim),
+        )
+
+        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches, dim))
+        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
+
+        self.dropout = nn.Dropout(emb_dropout)
+
+        self.patch_transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, layer_dropout)
+        self.cls_transformer = Transformer(dim, cls_depth, heads, dim_head, mlp_dim, dropout, layer_dropout)
+
+        self.mlp_head = nn.Sequential(
+            nn.LayerNorm(dim),
+            nn.Linear(dim, num_classes)
+        )
+
+    def forward(self, img):
+        x = self.to_patch_embedding(img)
+        b, n, _ = x.shape
+
+        x += self.pos_embedding[:, :n]
+        x = self.dropout(x)
+
+        x = self.patch_transformer(x)
+
+        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
+        x = self.cls_transformer(cls_tokens, context = x)
+
+        return self.mlp_head(x[:, 0])
--- a/models/convmixer.py
+++ b/models/convmixer.py
+# https://openreview.net/forum?id=TVHS5Y4dNvM
+
+import torch.nn as nn
+
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+        
+    def forward(self, x):
+        return self.fn(x) + x
+
+def ConvMixer(dim, depth, kernel_size=9, patch_size=7, n_classes=1000):
+    return nn.Sequential(
+ nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size),
+ nn.GELU(),
+ nn.BatchNorm2d(dim),
+ *[nn.Sequential(
+ Residual(nn.Sequential(
+ nn.Conv2d(dim, dim, kernel_size, groups=dim, padding="same"),
+ nn.GELU(),
+ nn.BatchNorm2d(dim)
+ )),
+ nn.Conv2d(dim, dim, kernel_size=1),
+ nn.GELU(),
+ nn.BatchNorm2d(dim)
+ ) for i in range(depth)],
+ nn.AdaptiveAvgPool2d((1,1)),
+ nn.Flatten(),
+ nn.Linear(dim, n_classes)
+)
--- a/models/mlpmixer.py
+++ b/models/mlpmixer.py
+# https://github.com/lucidrains/mlp-mixer-pytorch/blob/main/mlp_mixer_pytorch/mlp_mixer_pytorch.py
+from torch import nn
+from functools import partial
+from einops.layers.torch import Rearrange, Reduce
+
+pair = lambda x: x if isinstance(x, tuple) else (x, x)
+
+class PreNormResidual(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.fn = fn
+        self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x):
+        return self.fn(self.norm(x)) + x
+
+def FeedForward(dim, expansion_factor = 4, dropout = 0., dense = nn.Linear):
+    inner_dim = int(dim * expansion_factor)
+    return nn.Sequential(
+        dense(dim, inner_dim),
+        nn.GELU(),
+        nn.Dropout(dropout),
+        dense(inner_dim, dim),
+        nn.Dropout(dropout)
+    )
+
+def MLPMixer(*, image_size, channels, patch_size, dim, depth, num_classes, expansion_factor = 4, expansion_factor_token = 0.5, dropout = 0.):
+    image_h, image_w = pair(image_size)
+    assert (image_h % patch_size) == 0 and (image_w % patch_size) == 0, 'image must be divisible by patch size'
+    num_patches = (image_h // patch_size) * (image_w // patch_size)
+    chan_first, chan_last = partial(nn.Conv1d, kernel_size = 1), nn.Linear
+
+    return nn.Sequential(
+        Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
+        nn.Linear((patch_size ** 2) * channels, dim),
+        *[nn.Sequential(
+            PreNormResidual(dim, FeedForward(num_patches, expansion_factor, dropout, chan_first)),
+            PreNormResidual(dim, FeedForward(dim, expansion_factor_token, dropout, chan_last))
+        ) for _ in range(depth)],
+        nn.LayerNorm(dim),
+        Reduce('b n c -> b c', 'mean'),
+        nn.Linear(dim, num_classes)
+    )
--- a/models/resnet.py
+++ b/models/resnet.py
+# -*- coding: utf-8 -*-
+
+'''ResNet in PyTorch.
+For Pre-activation ResNet, see 'preact_resnet.py'.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+def ResNet18():
+    return ResNet(BasicBlock, [2,2,2,2])
+
+def ResNet34():
+    return ResNet(BasicBlock, [3,4,6,3])
+
+def ResNet50():
+    return ResNet(Bottleneck, [3,4,6,3])
+
+def ResNet101():
+    return ResNet(Bottleneck, [3,4,23,3])
+
+def ResNet152():
+    return ResNet(Bottleneck, [3,8,36,3])
+
+
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1,3,32,32))
+    print(y.size())
+
+# test()
\ No newline at end of file