Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
46455328
Unverified
Commit
46455328
authored
Jan 11, 2023
by
Rhett Ying
Committed by
GitHub
Jan 11, 2023
Browse files
[CI] fix bugs for multigpu benchmarks (#5140)
parent
d8370299
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
7 additions
and
57 deletions
+7
-57
benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py
benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py
+2
-51
benchmarks/benchmarks/utils.py
benchmarks/benchmarks/utils.py
+1
-1
benchmarks/scripts/build_dgl_asv.sh
benchmarks/scripts/build_dgl_asv.sh
+1
-1
benchmarks/scripts/publish.sh
benchmarks/scripts/publish.sh
+3
-3
docker/Dockerfile.ci_benchmark
docker/Dockerfile.ci_benchmark
+0
-1
No files found.
benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py
View file @
46455328
...
@@ -48,7 +48,6 @@ class EntityClassify(nn.Module):
...
@@ -48,7 +48,6 @@ class EntityClassify(nn.Module):
num_hidden_layers
=
1
,
num_hidden_layers
=
1
,
dropout
=
0
,
dropout
=
0
,
use_self_loop
=
False
,
use_self_loop
=
False
,
low_mem
=
True
,
layer_norm
=
False
,
layer_norm
=
False
,
):
):
super
(
EntityClassify
,
self
).
__init__
()
super
(
EntityClassify
,
self
).
__init__
()
...
@@ -61,7 +60,6 @@ class EntityClassify(nn.Module):
...
@@ -61,7 +60,6 @@ class EntityClassify(nn.Module):
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_hidden_layers
=
num_hidden_layers
self
.
dropout
=
dropout
self
.
dropout
=
dropout
self
.
use_self_loop
=
use_self_loop
self
.
use_self_loop
=
use_self_loop
self
.
low_mem
=
low_mem
self
.
layer_norm
=
layer_norm
self
.
layer_norm
=
layer_norm
self
.
layers
=
nn
.
ModuleList
()
self
.
layers
=
nn
.
ModuleList
()
...
@@ -75,7 +73,6 @@ class EntityClassify(nn.Module):
...
@@ -75,7 +73,6 @@ class EntityClassify(nn.Module):
self
.
num_bases
,
self
.
num_bases
,
activation
=
F
.
relu
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
self_loop
=
self
.
use_self_loop
,
low_mem
=
self
.
low_mem
,
dropout
=
self
.
dropout
,
dropout
=
self
.
dropout
,
layer_norm
=
layer_norm
,
layer_norm
=
layer_norm
,
)
)
...
@@ -91,7 +88,6 @@ class EntityClassify(nn.Module):
...
@@ -91,7 +88,6 @@ class EntityClassify(nn.Module):
self
.
num_bases
,
self
.
num_bases
,
activation
=
F
.
relu
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
self_loop
=
self
.
use_self_loop
,
low_mem
=
self
.
low_mem
,
dropout
=
self
.
dropout
,
dropout
=
self
.
dropout
,
layer_norm
=
layer_norm
,
layer_norm
=
layer_norm
,
)
)
...
@@ -106,7 +102,6 @@ class EntityClassify(nn.Module):
...
@@ -106,7 +102,6 @@ class EntityClassify(nn.Module):
self
.
num_bases
,
self
.
num_bases
,
activation
=
None
,
activation
=
None
,
self_loop
=
self
.
use_self_loop
,
self_loop
=
self
.
use_self_loop
,
low_mem
=
self
.
low_mem
,
layer_norm
=
layer_norm
,
layer_norm
=
layer_norm
,
)
)
)
)
...
@@ -236,7 +231,6 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
...
@@ -236,7 +231,6 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
num_hidden_layers
=
args
.
n_layers
-
2
,
num_hidden_layers
=
args
.
n_layers
-
2
,
dropout
=
args
.
dropout
,
dropout
=
args
.
dropout
,
use_self_loop
=
args
.
use_self_loop
,
use_self_loop
=
args
.
use_self_loop
,
low_mem
=
args
.
low_mem
,
layer_norm
=
args
.
layer_norm
,
layer_norm
=
args
.
layer_norm
,
)
)
...
@@ -373,14 +367,12 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
...
@@ -373,14 +367,12 @@ def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None):
@
utils
.
skip_if_not_4gpu
()
@
utils
.
skip_if_not_4gpu
()
@
utils
.
benchmark
(
"time"
,
timeout
=
600
)
@
utils
.
benchmark
(
"time"
,
timeout
=
600
)
@
utils
.
parametrize
(
"data"
,
[
"am"
,
"ogbn-mag"
])
@
utils
.
parametrize
(
"data"
,
[
"am"
,
"ogbn-mag"
])
@
utils
.
parametrize
(
"low_mem"
,
[
True
,
False
])
@
utils
.
parametrize
(
"dgl_sparse"
,
[
True
,
False
])
@
utils
.
parametrize
(
"dgl_sparse"
,
[
True
,
False
])
def
track_time
(
data
,
low_mem
,
dgl_sparse
):
def
track_time
(
data
,
dgl_sparse
):
# load graph data
# load graph data
dataset
=
utils
.
process_data
(
data
)
dataset
=
utils
.
process_data
(
data
)
args
=
config
()
args
=
config
()
devices
=
[
0
,
1
,
2
,
3
]
devices
=
[
0
,
1
,
2
,
3
]
args
.
low_mem
=
low_mem
args
.
dgl_sparse
=
dgl_sparse
args
.
dgl_sparse
=
dgl_sparse
args
.
dataset
=
dataset
args
.
dataset
=
dataset
ogb_dataset
=
False
ogb_dataset
=
False
...
@@ -572,49 +564,8 @@ def config():
...
@@ -572,49 +564,8 @@ def config():
node_feats
=
False
,
node_feats
=
False
,
num_workers
=
0
,
num_workers
=
0
,
dgl_sparse
=
False
,
dgl_sparse
=
False
,
low_mem
=
False
,
)
)
# parser.add_argument("--dropout", type=float, default=0,
# help="dropout probability")
# parser.add_argument("--n-hidden", type=int, default=16,
# help="number of hidden units")
# parser.add_argument("--gpu", type=str, default='0',
# help="gpu")
# parser.add_argument("--lr", type=float, default=1e-2,
# help="learning rate")
# parser.add_argument("--sparse-lr", type=float, default=2e-2,
# help="sparse embedding learning rate")
# parser.add_argument("--n-bases", type=int, default=-1,
# help="number of filter weight matrices, default: -1 [use all]")
# parser.add_argument("--n-layers", type=int, default=2,
# help="number of propagation rounds")
# parser.add_argument("-e", "--n-epochs", type=int, default=50,
# help="number of training epochs")
# parser.add_argument("-d", "--dataset", type=str, required=True,
# help="dataset to use")
# parser.add_argument("--l2norm", type=float, default=0,
# help="l2 norm coef")
# parser.add_argument("--fanout", type=str, default="4, 4",
# help="Fan-out of neighbor sampling.")
# parser.add_argument("--use-self-loop", default=False, action='store_true',
# help="include self feature as a special relation")
# fp = parser.add_mutually_exclusive_group(required=False)
# parser.add_argument("--batch-size", type=int, default=100,
# help="Mini-batch size. ")
# parser.add_argument("--eval-batch-size", type=int, default=32,
# help="Mini-batch size. ")
# parser.add_argument("--num-workers", type=int, default=0,
# help="Number of workers for dataloader.")
# parser.add_argument("--low-mem", default=False, action='store_true',
# help="Whether use low mem RelGraphCov")
# parser.add_argument("--dgl-sparse", default=False, action='store_true',
# help='Use sparse embedding for node embeddings.')
# parser.add_argument('--node-feats', default=False, action='store_true',
# help='Whether use node features')
# parser.add_argument('--layer-norm', default=False, action='store_true',
# help='Use layer norm')
# parser.set_defaults(validation=True)
# args = parser.parse_args()
return
args
return
args
...
...
benchmarks/benchmarks/utils.py
View file @
46455328
...
@@ -534,7 +534,7 @@ def skip_if_not_4gpu():
...
@@ -534,7 +534,7 @@ def skip_if_not_4gpu():
"""skip if DGL_BENCH_DEVICE is gpu"""
"""skip if DGL_BENCH_DEVICE is gpu"""
def
_wrapper
(
func
):
def
_wrapper
(
func
):
if
GPU_COUNT
!=
4
:
if
GPU_COUNT
<
4
:
# skip if not enabled
# skip if not enabled
print
(
"Skip {}"
.
format
(
func
.
__name__
))
print
(
"Skip {}"
.
format
(
func
.
__name__
))
func
.
benchmark_name
=
"skip_"
+
func
.
__name__
func
.
benchmark_name
=
"skip_"
+
func
.
__name__
...
...
benchmarks/scripts/build_dgl_asv.sh
View file @
46455328
...
@@ -10,7 +10,7 @@ pip install -r /asv/torch_gpu_pip.txt
...
@@ -10,7 +10,7 @@ pip install -r /asv/torch_gpu_pip.txt
# build
# build
CMAKE_VARS
=
"-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DBUILD_SPARSE=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda"
CMAKE_VARS
=
"-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DBUILD_SPARSE=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda"
if
[[
$DEVICE
==
"gpu"
]]
;
then
if
[[
$DEVICE
==
"gpu"
]]
;
then
CMAKE_VARS
=
"-DUSE_CUDA=ON
$CMAKE_VARS
"
CMAKE_VARS
=
"-DUSE_CUDA=ON
-DUSE_NCCL=ON
$CMAKE_VARS
"
fi
fi
arch
=
`
uname
-m
`
arch
=
`
uname
-m
`
if
[[
$arch
==
*
"x86"
*
]]
;
then
if
[[
$arch
==
*
"x86"
*
]]
;
then
...
...
benchmarks/scripts/publish.sh
View file @
46455328
...
@@ -26,7 +26,7 @@ else
...
@@ -26,7 +26,7 @@ else
fi
fi
WS_ROOT
=
/asv/dgl
WS_ROOT
=
/asv/dgl
docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116
docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116
_v230110
if
[
-z
"
$DGL_REG_CONF
"
]
;
then
if
[
-z
"
$DGL_REG_CONF
"
]
;
then
DOCKER_ENV_OPT
=
"
$DOCKER_ENV_OPT
"
DOCKER_ENV_OPT
=
"
$DOCKER_ENV_OPT
"
else
else
...
@@ -56,14 +56,14 @@ if [[ $DEVICE == "cpu" ]]; then
...
@@ -56,14 +56,14 @@ if [[ $DEVICE == "cpu" ]]; then
$DOCKER_MOUNT_OPT
\
$DOCKER_MOUNT_OPT
\
$DOCKER_ENV_OPT
\
$DOCKER_ENV_OPT
\
--shm-size
=
"16g"
\
--shm-size
=
"16g"
\
--hostname
=
$MACHINE
-dit
public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash
--hostname
=
$MACHINE
-dit
public.ecr.aws/s1o7b3d9/benchmark_test:cu116
_v230110
/bin/bash
else
else
docker run
--name
dgl-reg
\
docker run
--name
dgl-reg
\
--rm
--gpus
all
\
--rm
--gpus
all
\
$DOCKER_MOUNT_OPT
\
$DOCKER_MOUNT_OPT
\
$DOCKER_ENV_OPT
\
$DOCKER_ENV_OPT
\
--shm-size
=
"16g"
\
--shm-size
=
"16g"
\
--hostname
=
$MACHINE
-dit
public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash
--hostname
=
$MACHINE
-dit
public.ecr.aws/s1o7b3d9/benchmark_test:cu116
_v230110
/bin/bash
fi
fi
pwd
pwd
...
...
docker/Dockerfile.ci_benchmark
View file @
46455328
...
@@ -27,5 +27,4 @@ ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH}
...
@@ -27,5 +27,4 @@ ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH}
ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH}
ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH}
ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH}
ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
ENV CUDA_VISIBLE_DEVICES=0
ENV TF_FORCE_GPU_ALLOW_GROWTH=true
ENV TF_FORCE_GPU_ALLOW_GROWTH=true
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment