Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenPCDet
Commits
71a45080
Commit
71a45080
authored
Jul 28, 2020
by
Shaoshuai Shi
Browse files
specify epochs and batch size in the configs
parent
e15c16a7
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
49 additions
and
33 deletions
+49
-33
docs/GETTING_STARTED.md
docs/GETTING_STARTED.md
+13
-17
docs/INSTALL.md
docs/INSTALL.md
+1
-1
pcdet/utils/common_utils.py
pcdet/utils/common_utils.py
+6
-10
tools/cfgs/kitti_models/PartA2.yaml
tools/cfgs/kitti_models/PartA2.yaml
+3
-0
tools/cfgs/kitti_models/PartA2_free.yaml
tools/cfgs/kitti_models/PartA2_free.yaml
+3
-0
tools/cfgs/kitti_models/pointpillar.yaml
tools/cfgs/kitti_models/pointpillar.yaml
+3
-0
tools/cfgs/kitti_models/pointrcnn.yaml
tools/cfgs/kitti_models/pointrcnn.yaml
+3
-0
tools/cfgs/kitti_models/pv_rcnn.yaml
tools/cfgs/kitti_models/pv_rcnn.yaml
+3
-0
tools/cfgs/kitti_models/second.yaml
tools/cfgs/kitti_models/second.yaml
+3
-0
tools/cfgs/kitti_models/second_multihead.yaml
tools/cfgs/kitti_models/second_multihead.yaml
+3
-0
tools/train.py
tools/train.py
+8
-5
No files found.
docs/GETTING_STARTED.md
View file @
71a45080
#
# Getting Started
The dataset configs are located within
[
tools/cfgs/dataset_configs
](
tools/cfgs/dataset_configs
)
,
and the model configs are located within
[
tools/cfgs
](
tools/cfgs
)
for different datasets
, like
[
tools/cfgs/kitti_models/
](
tools/cfgs/kitti_models/
)
.
# Getting Started
The dataset configs are located within
[
tools/cfgs/dataset_configs
](
../
tools/cfgs/dataset_configs
)
,
and the model configs are located within
[
tools/cfgs
](
../
tools/cfgs
)
for different datasets.
## Dataset Preparation
...
...
@@ -73,34 +73,30 @@ python test.py --cfg_file ${CONFIG_FILE} --batch_size ${BATCH_SIZE} --eval_all
*
To test with multiple GPUs:
```
shell script
sh scripts/
slurm_test_mgpu.sh ${PARTITION}
${NUM_GPUS} \
sh scripts/
dist_test.sh
${NUM_GPUS} \
--cfg_file ${CONFIG_FILE} --batch_size ${BATCH_SIZE}
# or
sh scripts/
dist_test.sh
${NUM_GPUS} \
sh scripts/
slurm_test_mgpu.sh ${PARTITION}
${NUM_GPUS} \
--cfg_file ${CONFIG_FILE} --batch_size ${BATCH_SIZE}
```
### Train a model
Note that the
`--batch_size`
depends on the number of your training GPUs,
please refer to
`Model Zoo`
of
[
README.md
](
../README.md
)
for the setting of batch_size for different models.
You could optionally add extra command line parameters
`--batch_size ${BATCH_SIZE}`
and
`--epochs ${EPOCHS}`
to specify your preferred parameters.
*
Train with multiple GPUs:
```
shell script
sh scripts/dist_train.sh ${NUM_GPUS} \
--cfg_file ${CONFIG_FILE} --batch_size ${BATCH_SIZE} --epochs 80
```
*
Train with multiple machines
:
*
Train with multiple
GPUs or multiple
machines
```
shell script
sh scripts/slurm_train.sh ${PARTITION} ${JOB_NAME} ${NUM_GPUS} \
--cfg_file ${CONFIG_FILE} --batch_size ${BATCH_SIZE} --epochs 80
sh scripts/dist_train.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE}
# or
sh scripts/slurm_train.sh ${PARTITION} ${JOB_NAME} ${NUM_GPUS} --cfg_file ${CONFIG_FILE}
```
*
Train with a single GPU:
```
shell script
python train.py --cfg_file ${CONFIG_FILE}
--batch_size ${BATCH_SIZE} --epochs 50
python train.py --cfg_file ${CONFIG_FILE}
```
docs/INSTALL.md
View file @
71a45080
...
...
@@ -10,7 +10,7 @@ All the codes are tested in the following environment:
### Install `pcdet v0.3`
NOTE: Please re-install
`pcdet v0.3`
by running
`python setup.py develop`
if you have already installed
`pcdet v0.1`
previously
.
NOTE: Please re-install
`pcdet v0.3`
by running
`python setup.py develop`
even
if you have already installed
previous version
.
a. Clone this repository.
```
shell
...
...
pcdet/utils/common_utils.py
View file @
71a45080
...
...
@@ -110,11 +110,10 @@ def keep_arrays_by_name(gt_names, used_classes):
return
inds
def
init_dist_slurm
(
batch_size
,
tcp_port
,
local_rank
,
backend
=
'nccl'
):
def
init_dist_slurm
(
tcp_port
,
local_rank
,
backend
=
'nccl'
):
"""
modified from https://github.com/open-mmlab/mmdetection
Args:
batch_size:
tcp_port:
backend:
...
...
@@ -134,13 +133,10 @@ def init_dist_slurm(batch_size, tcp_port, local_rank, backend='nccl'):
dist
.
init_process_group
(
backend
=
backend
)
total_gpus
=
dist
.
get_world_size
()
assert
batch_size
%
total_gpus
==
0
,
'Batch size should be matched with GPUS: (%d, %d)'
%
(
batch_size
,
total_gpus
)
batch_size_each_gpu
=
batch_size
//
total_gpus
rank
=
dist
.
get_rank
()
return
batch_size_each_gpu
,
rank
return
total_gpus
,
rank
def
init_dist_pytorch
(
batch_size
,
tcp_port
,
local_rank
,
backend
=
'nccl'
):
def
init_dist_pytorch
(
tcp_port
,
local_rank
,
backend
=
'nccl'
):
if
mp
.
get_start_method
(
allow_none
=
True
)
is
None
:
mp
.
set_start_method
(
'spawn'
)
...
...
@@ -152,10 +148,9 @@ def init_dist_pytorch(batch_size, tcp_port, local_rank, backend='nccl'):
rank
=
local_rank
,
world_size
=
num_gpus
)
assert
batch_size
%
num_gpus
==
0
,
'Batch size should be matched with GPUS: (%d, %d)'
%
(
batch_size
,
num_gpus
)
batch_size_each_gpu
=
batch_size
//
num_gpus
rank
=
dist
.
get_rank
()
return
batch_size_each_gpu
,
rank
return
num_gpus
,
rank
def
get_dist_info
():
if
torch
.
__version__
<
'1.0'
:
...
...
@@ -173,6 +168,7 @@ def get_dist_info():
world_size
=
1
return
rank
,
world_size
def
merge_results_dist
(
result_part
,
size
,
tmpdir
):
rank
,
world_size
=
get_dist_info
()
os
.
makedirs
(
tmpdir
,
exist_ok
=
True
)
...
...
tools/cfgs/kitti_models/PartA2.yaml
View file @
71a45080
...
...
@@ -170,6 +170,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
4
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.01
WEIGHT_DECAY
:
0.01
...
...
tools/cfgs/kitti_models/PartA2_free.yaml
View file @
71a45080
...
...
@@ -114,6 +114,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
4
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.003
WEIGHT_DECAY
:
0.01
...
...
tools/cfgs/kitti_models/pointpillar.yaml
View file @
71a45080
...
...
@@ -143,6 +143,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
4
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.003
WEIGHT_DECAY
:
0.01
...
...
tools/cfgs/kitti_models/pointrcnn.yaml
View file @
71a45080
...
...
@@ -139,6 +139,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
2
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.01
WEIGHT_DECAY
:
0.01
...
...
tools/cfgs/kitti_models/pv_rcnn.yaml
View file @
71a45080
...
...
@@ -228,6 +228,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
2
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.01
WEIGHT_DECAY
:
0.01
...
...
tools/cfgs/kitti_models/second.yaml
View file @
71a45080
...
...
@@ -100,6 +100,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
4
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.003
WEIGHT_DECAY
:
0.01
...
...
tools/cfgs/kitti_models/second_multihead.yaml
View file @
71a45080
...
...
@@ -116,6 +116,9 @@ MODEL:
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
4
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.003
WEIGHT_DECAY
:
0.01
...
...
tools/train.py
View file @
71a45080
...
...
@@ -20,8 +20,8 @@ def parse_config():
parser
=
argparse
.
ArgumentParser
(
description
=
'arg parser'
)
parser
.
add_argument
(
'--cfg_file'
,
type
=
str
,
default
=
None
,
help
=
'specify the config for training'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
16
,
required
=
False
,
help
=
'batch size for training'
)
parser
.
add_argument
(
'--epochs'
,
type
=
int
,
default
=
30
,
required
=
False
,
help
=
'number of epochs to train for'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
None
,
required
=
False
,
help
=
'batch size for training'
)
parser
.
add_argument
(
'--epochs'
,
type
=
int
,
default
=
None
,
required
=
False
,
help
=
'number of epochs to train for'
)
parser
.
add_argument
(
'--workers'
,
type
=
int
,
default
=
4
,
help
=
'number of workers for dataloader'
)
parser
.
add_argument
(
'--extra_tag'
,
type
=
str
,
default
=
'default'
,
help
=
'extra tag for this experiment'
)
parser
.
add_argument
(
'--ckpt'
,
type
=
str
,
default
=
None
,
help
=
'checkpoint to start from'
)
...
...
@@ -58,10 +58,14 @@ def main():
if
args
.
launcher
==
'none'
:
dist_train
=
False
else
:
args
.
batch_size
,
cfg
.
LOCAL_RANK
=
getattr
(
common_utils
,
'init_dist_%s'
%
args
.
launcher
)(
args
.
batch_size
,
args
.
tcp_port
,
args
.
local_rank
,
backend
=
'nccl'
total_gpus
,
cfg
.
LOCAL_RANK
=
getattr
(
common_utils
,
'init_dist_%s'
%
args
.
launcher
)(
args
.
tcp_port
,
args
.
local_rank
,
backend
=
'nccl'
)
dist_train
=
True
args
.
batch_size
=
cfg
.
OPTIMIZATION
.
BATCH_SIZE_PER_GPU
if
args
.
batch_size
is
None
else
args
.
batch_size
args
.
epochs
=
cfg
.
OPTIMIZATION
.
NUM_EPOCHS
if
args
.
epochs
is
None
else
args
.
epochs
if
args
.
fix_random_seed
:
common_utils
.
set_random_seed
(
666
)
...
...
@@ -79,7 +83,6 @@ def main():
logger
.
info
(
'CUDA_VISIBLE_DEVICES=%s'
%
gpu_list
)
if
dist_train
:
total_gpus
=
dist
.
get_world_size
()
logger
.
info
(
'total_batch_size: %d'
%
(
total_gpus
*
args
.
batch_size
))
for
key
,
val
in
vars
(
args
).
items
():
logger
.
info
(
'{:16} {}'
.
format
(
key
,
val
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment