help='Save folder name if any special information is to be included.')
### General Training Parameters
parser.add_argument('--kernels',default=8,type=int,help='Number of workers for pytorch dataloader.')
parser.add_argument('--bs',default=112,type=int,help='Mini-Batchsize to use.')
parser.add_argument('--samples_per_class',default=4,type=int,help='Number of samples in one class drawn before choosing the next class. Set to >1 for losses other than ProxyNCA.')
####### Main Parameter: Dataset to use for Training
parser.add_argument('--dataset',default='Inaturalist',type=str,help='Dataset to use.',choices=['Inaturalist','semi_fungi'])
### General Training Parameters
parser.add_argument('--lr',default=0.00001,type=float,help='Learning Rate for network parameters.')
parser.add_argument('--fc_lr_mul',default=5,type=float,help='OPTIONAL: Multiply the embedding layer learning rate by this value. If set to 0, the embedding layer shares the same learning rate.')
parser.add_argument('--n_epochs',default=400,type=int,help='Number of training epochs.')
parser.add_argument('--kernels',default=8,type=int,help='Number of workers for pytorch dataloader.')
parser.add_argument('--bs',default=112,type=int,help='Mini-Batchsize to use.')
parser.add_argument('--samples_per_class',default=4,type=int,help='Number of samples in one class drawn before choosing the next class')
parser.add_argument('--seed',default=1,type=int,help='Random seed for reproducibility.')
parser.add_argument('--scheduler',default='step',type=str,help='Type of learning rate scheduling. Currently: step & exp.')
parser.add_argument('--gamma',default=0.3,type=float,help='Learning rate reduction after tau epochs.')
parser.add_argument('--decay',default=0.001,type=float,help='Weight decay for optimizer.')
parser.add_argument('--tau',default=[200,300],nargs='+',type=int,help='Stepsize(s) before reducing learning rate.')
parser.add_argument('--infrequent_eval',default=0,type=int,help='only compute evaluation metrics every 10 epochs')
parser.add_argument('--opt',default='adam',help='adam or sgd')
parser.add_argument('--resume',default='',type=str,help='path to checkpoint to load weights from (if empty then ImageNet pre-trained weights are loaded')
##### Network parameters
parser.add_argument('--embed_dim',default=512,type=int,help='Embedding dimensionality of the network')
parser.add_argument('--grad_measure',action='store_true',help='If added, gradients passed from embedding layer to the last conv-layer are stored in each iteration.')
parser.add_argument('--dist_measure',action='store_true',help='If added, the ratio between intra- and interclass distances is stored after each epoch.')
parser.add_argument('--not_pretrained',action='store_true',help='If added, the network will be trained WITHOUT ImageNet-pretrained weights.')
##### Setup Parameters
parser.add_argument('--gpu',default=0,type=int,help='GPU-id for GPU to use.')
parser.add_argument('--savename',default='',type=str,help='Save folder name if any special information is to be included.')
### Paths to datasets and storage folder
parser.add_argument('--source_path',default='/scratch/shared/beegfs/abrown/datasets',type=str,help='Path to data')
parser.add_argument('--save_path',default=os.getcwd()+'/Training_Results',type=str,help='Where to save the checkpoints')
####### Main Parameter: Dataset to use for Training
parser.add_argument('--dataset',default='Inaturalist',type=str,help='Dataset to use.',choices=['Inaturalist','semi_fungi'])
### General Training Parameters
parser.add_argument('--lr',default=0.00001,type=float,help='Learning Rate for network parameters.')
parser.add_argument('--fc_lr_mul',default=5,type=float,help='OPTIONAL: Multiply the embedding layer learning rate by this value. If set to 0, the embedding layer shares the same learning rate.')
parser.add_argument('--n_epochs',default=400,type=int,help='Number of training epochs.')
parser.add_argument('--kernels',default=8,type=int,help='Number of workers for pytorch dataloader.')
parser.add_argument('--bs',default=112,type=int,help='Mini-Batchsize to use.')
parser.add_argument('--samples_per_class',default=4,type=int,help='Number of samples in one class drawn before choosing the next class')
parser.add_argument('--seed',default=1,type=int,help='Random seed for reproducibility.')
parser.add_argument('--scheduler',default='step',type=str,help='Type of learning rate scheduling. Currently: step & exp.')
parser.add_argument('--gamma',default=0.3,type=float,help='Learning rate reduction after tau epochs.')
parser.add_argument('--decay',default=0.0004,type=float,help='Weight decay for optimizer.')
parser.add_argument('--tau',default=[200,300],nargs='+',type=int,help='Stepsize(s) before reducing learning rate.')
parser.add_argument('--infrequent_eval',default=0,type=int,help='only compute evaluation metrics every 10 epochs')
parser.add_argument('--opt',default='adam',help='adam or sgd')
parser.add_argument('--resume',default='',type=str,help='path to checkpoint to load weights from (if empty then ImageNet pre-trained weights are loaded')
##### Network parameters
parser.add_argument('--embed_dim',default=512,type=int,help='Embedding dimensionality of the network')
parser.add_argument('--grad_measure',action='store_true',help='If added, gradients passed from embedding layer to the last conv-layer are stored in each iteration.')
parser.add_argument('--dist_measure',action='store_true',help='If added, the ratio between intra- and interclass distances is stored after each epoch.')
parser.add_argument('--not_pretrained',action='store_true',help='If added, the network will be trained WITHOUT ImageNet-pretrained weights.')
##### Setup Parameters
parser.add_argument('--gpu',default=0,type=int,help='GPU-id for GPU to use.')
parser.add_argument('--savename',default='',type=str,help='Save folder name if any special information is to be included.')
### Paths to datasets and storage folder
parser.add_argument('--source_path',default='/scratch/shared/beegfs/abrown/datasets',type=str,help='Path to data')
parser.add_argument('--save_path',default=os.getcwd()+'/Training_Results',type=str,help='Where to save the checkpoints')
@@ -5,11 +5,6 @@ The following options can be specified via command line arguments:
```
optional arguments:
-h, --help show this help message and exit
--dropout DROPOUT dropout probability
--n-hidden N_HIDDEN number of hidden units
--lr LR learning rate
-e N_EPOCHS, --n-epochs N_EPOCHS
number of training epochs
--runs RUNS
```
...
...
@@ -58,68 +53,3 @@ ParameterDict(
The input features are passed to a modified version of the R-GCN architecture. As in the R-GCN paper, each _edge-type_ has its own linear projection matrix (the "weight" ModuleDict below). Different from the original paper, however, each _node-type_ has its own "self" linear projection matrix (the "loop_weights" ModuleDict below). There are 7 edge-types: 4 natural edge-types ("cites", "affiliated_with", "has_topic" and "writes") and 3 manufactured reverse edge-types ("rev-affiliated_with", "rev-has_topic", "rev-writes"). As mentioned above, note that there is _not_ a reverse edge type like "rev-cites", and instead the reverse edges are given the same type of "cites". This exception was presumably made because the source and destinate nodes are of type "paper". Whereas the 7 "relation" linear layers do not have a bias, the 4 "self" linear layers do.
With two of these layers, a hidden dimension size of 64 and 349 output classes, we end up with 337,460 R-GCN model parameters.