Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
maskrcnn
Commits
d06288b2
Unverified
Commit
d06288b2
authored
Jan 14, 2025
by
Shriya Rishab
Committed by
GitHub
Jan 14, 2025
Browse files
Merge pull request #13 from Ribin-Baby/dgx/bugfix
minor bug fixes
parents
5b62935b
b93fbddc
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
55 additions
and
9 deletions
+55
-9
NVIDIA/benchmarks/bert/implementations/pytorch/run_with_docker.sh
...enchmarks/bert/implementations/pytorch/run_with_docker.sh
+7
-1
NVIDIA/benchmarks/dlrm_dcnv2/implementations/hugectr/run_with_docker.sh
...rks/dlrm_dcnv2/implementations/hugectr/run_with_docker.sh
+7
-1
NVIDIA/benchmarks/maskrcnn/implementations/pytorch/run_with_docker.sh
...marks/maskrcnn/implementations/pytorch/run_with_docker.sh
+7
-1
NVIDIA/benchmarks/resnet/implementations/mxnet/common/fit.py
NVIDIA/benchmarks/resnet/implementations/mxnet/common/fit.py
+5
-2
NVIDIA/benchmarks/resnet/implementations/mxnet/common/optimizer.py
...nchmarks/resnet/implementations/mxnet/common/optimizer.py
+2
-1
NVIDIA/benchmarks/resnet/implementations/mxnet/run_with_docker.sh
...enchmarks/resnet/implementations/mxnet/run_with_docker.sh
+7
-1
NVIDIA/benchmarks/unet3d/implementations/mxnet/run_with_docker.sh
...enchmarks/unet3d/implementations/mxnet/run_with_docker.sh
+7
-1
NVIDIA/benchmarks/unet3d/implementations/mxnet/runtime/arguments.py
...chmarks/unet3d/implementations/mxnet/runtime/arguments.py
+13
-1
No files found.
NVIDIA/benchmarks/bert/implementations/pytorch/run_with_docker.sh
View file @
d06288b2
...
@@ -92,7 +92,13 @@ cleanup_docker
...
@@ -92,7 +92,13 @@ cleanup_docker
trap
'set -eux; cleanup_docker'
EXIT
trap
'set -eux; cleanup_docker'
EXIT
# Setup container
# Setup container
nvidia-docker run
--rm
--init
--detach
--gpus
=
'"'
device
=
${
NV_GPU
}
'"'
\
if
[
-z
"
${
NV_GPU
-
}
"
]
;
then
readonly
_docker_gpu_args
=
"--gpus all"
else
readonly
_docker_gpu_args
=
'--gpus="'
device
=
${
NV_GPU
}
'" -e NVIDIA_VISIBLE_DEVICES='
"
${
NV_GPU
}
"
fi
docker run
${
_docker_gpu_args
}
--rm
--init
--detach
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--name
=
"
${
_cont_name
}
"
${
_cont_mounts
[@]
}
\
--name
=
"
${
_cont_name
}
"
${
_cont_mounts
[@]
}
\
...
...
NVIDIA/benchmarks/dlrm_dcnv2/implementations/hugectr/run_with_docker.sh
View file @
d06288b2
...
@@ -60,7 +60,13 @@ cleanup_docker
...
@@ -60,7 +60,13 @@ cleanup_docker
trap
'set -eux; cleanup_docker'
EXIT
trap
'set -eux; cleanup_docker'
EXIT
# Setup container
# Setup container
nvidia-docker run
--rm
--init
--detach
\
if
[
-z
"
${
NV_GPU
-
}
"
]
;
then
readonly
_docker_gpu_args
=
"--gpus all"
else
readonly
_docker_gpu_args
=
'--gpus="'
device
=
${
NV_GPU
}
'" -e NVIDIA_VISIBLE_DEVICES='
"
${
NV_GPU
}
"
fi
docker run
${
_docker_gpu_args
}
--rm
--init
--detach
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
"
${
CONT
}
"
sleep
infinity
"
${
CONT
}
"
sleep
infinity
...
...
NVIDIA/benchmarks/maskrcnn/implementations/pytorch/run_with_docker.sh
View file @
d06288b2
...
@@ -60,7 +60,13 @@ cleanup_docker
...
@@ -60,7 +60,13 @@ cleanup_docker
trap
'set -eux; cleanup_docker'
EXIT
trap
'set -eux; cleanup_docker'
EXIT
# Setup container
# Setup container
nvidia-docker run
--rm
--init
--detach
\
if
[
-z
"
${
NV_GPU
-
}
"
]
;
then
readonly
_docker_gpu_args
=
"--gpus all"
else
readonly
_docker_gpu_args
=
'--gpus="'
device
=
${
NV_GPU
}
'" -e NVIDIA_VISIBLE_DEVICES='
"
${
NV_GPU
}
"
fi
docker run
${
_docker_gpu_args
}
--rm
--init
--detach
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
...
...
NVIDIA/benchmarks/resnet/implementations/mxnet/common/fit.py
View file @
d06288b2
...
@@ -53,7 +53,9 @@ import cuda_graphs.graph_wrapper as graph_wrapper
...
@@ -53,7 +53,9 @@ import cuda_graphs.graph_wrapper as graph_wrapper
from
common.data
import
SyntheticDataIter
from
common.data
import
SyntheticDataIter
from
scaleoutbridge
import
init_bridge
,
ScaleoutBridge
as
SBridge
# from scaleoutbridge import init_bridge, ScaleoutBridge as SBridge
from
mlperf_common.scaleoutbridge
import
init_bridge
,
ScaleoutBridgeBase
as
SBridge
from
mlperf_common.frameworks.mxnet
import
MXNetProfilerHandler
,
MPICommunicationHandler
TRAIN_CUDA_GRAPH_ID
=
0
TRAIN_CUDA_GRAPH_ID
=
0
...
@@ -907,7 +909,8 @@ def mlperf_fit(self, args, train_data,
...
@@ -907,7 +909,8 @@ def mlperf_fit(self, args, train_data,
key
=
mllogger
.
constants
.
BLOCK_START
,
key
=
mllogger
.
constants
.
BLOCK_START
,
metadata
=
{
'first_epoch_num'
:
block_epoch_start
+
1
,
'epoch_count'
:
block_epoch_count
})
metadata
=
{
'first_epoch_num'
:
block_epoch_start
+
1
,
'epoch_count'
:
block_epoch_count
})
sbridge
=
init_bridge
(
hvd
.
rank
())
#sbridge = init_bridge(hvd.rank())
sbridge
=
init_bridge
(
MXNetProfilerHandler
(),
MPICommunicationHandler
(),
mllogger
)
################################################################################
################################################################################
# training loop with dali overlap with fwd
# training loop with dali overlap with fwd
...
...
NVIDIA/benchmarks/resnet/implementations/mxnet/common/optimizer.py
View file @
d06288b2
...
@@ -48,7 +48,8 @@ from mxnet.ndarray import sparse
...
@@ -48,7 +48,8 @@ from mxnet.ndarray import sparse
#####
#####
from
mxnet
import
cuda_utils
as
cu
from
mxnet
import
cuda_utils
as
cu
from
scaleoutbridge
import
ScaleoutBridge
as
SBridge
# from scaleoutbridge import ScaleoutBridge as SBridge
from
mlperf_common.scaleoutbridge
import
ScaleoutBridgeBase
as
SBridge
from
common.data
import
SyntheticDataIter
from
common.data
import
SyntheticDataIter
...
...
NVIDIA/benchmarks/resnet/implementations/mxnet/run_with_docker.sh
View file @
d06288b2
...
@@ -70,7 +70,13 @@ cleanup_docker
...
@@ -70,7 +70,13 @@ cleanup_docker
trap
'set -eux; cleanup_docker'
EXIT
trap
'set -eux; cleanup_docker'
EXIT
# Setup container
# Setup container
nvidia-docker run
--rm
--init
--detach
\
if
[
-z
"
${
NV_GPU
-
}
"
]
;
then
readonly
_docker_gpu_args
=
"--gpus all"
else
readonly
_docker_gpu_args
=
'--gpus="'
device
=
${
NV_GPU
}
'" -e NVIDIA_VISIBLE_DEVICES='
"
${
NV_GPU
}
"
fi
docker run
${
_docker_gpu_args
}
--rm
--init
--detach
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
...
...
NVIDIA/benchmarks/unet3d/implementations/mxnet/run_with_docker.sh
View file @
d06288b2
...
@@ -51,7 +51,13 @@ cleanup_docker
...
@@ -51,7 +51,13 @@ cleanup_docker
trap
'set -eux; cleanup_docker'
EXIT
trap
'set -eux; cleanup_docker'
EXIT
# Setup container
# Setup container
nvidia-docker run
--rm
--init
--detach
\
if
[
-z
"
${
NV_GPU
-
}
"
]
;
then
readonly
_docker_gpu_args
=
"--gpus all"
else
readonly
_docker_gpu_args
=
'--gpus="'
device
=
${
NV_GPU
}
'" -e NVIDIA_VISIBLE_DEVICES='
"
${
NV_GPU
}
"
fi
docker run
${
_docker_gpu_args
}
--rm
--init
--detach
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--net
=
host
--uts
=
host
--ipc
=
host
--security-opt
=
seccomp
=
unconfined
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--ulimit
=
stack
=
67108864
--ulimit
=
memlock
=
-1
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
--name
=
"
${
_cont_name
}
"
"
${
_cont_mounts
[@]
}
"
\
...
...
NVIDIA/benchmarks/unet3d/implementations/mxnet/runtime/arguments.py
View file @
d06288b2
...
@@ -90,4 +90,16 @@ PARSER.add_argument('--input_batch_multiplier', '-ibm', dest='input_batch_multip
...
@@ -90,4 +90,16 @@ PARSER.add_argument('--input_batch_multiplier', '-ibm', dest='input_batch_multip
PARSER
.
add_argument
(
'--use_cached_loader'
,
'-ucl'
,
dest
=
'use_cached_loader'
,
action
=
'store_true'
,
default
=
False
)
PARSER
.
add_argument
(
'--use_cached_loader'
,
'-ucl'
,
dest
=
'use_cached_loader'
,
action
=
'store_true'
,
default
=
False
)
PARSER
.
add_argument
(
'--stick_to_shard'
,
'-sts'
,
dest
=
'stick_to_shard'
,
action
=
'store_true'
,
default
=
False
)
PARSER
.
add_argument
(
'--stick_to_shard'
,
'-sts'
,
dest
=
'stick_to_shard'
,
action
=
'store_true'
,
default
=
False
)
PARSER
.
add_argument
(
'--use_nvshmem'
,
dest
=
'use_nvshmem'
,
action
=
'store_true'
,
default
=
False
)
PARSER
.
add_argument
(
'--use_nvshmem'
,
dest
=
'use_nvshmem'
,
action
=
'store_true'
,
default
=
False
)
\ No newline at end of file
## Additional arguments passed
PARSER
.
add_argument
(
'--dense_seq_output'
,
action
=
'store_true'
,
help
=
'Enable dense sequential output'
)
PARSER
.
add_argument
(
'--pad_fmha'
,
action
=
'store_true'
,
help
=
'Enable padding for FMHA'
)
PARSER
.
add_argument
(
'--fused_bias_fc'
,
action
=
'store_true'
,
help
=
'Enable fused bias for FC'
)
PARSER
.
add_argument
(
'--fused_bias_mha'
,
action
=
'store_true'
,
help
=
'Enable fused bias for MHA'
)
PARSER
.
add_argument
(
'--fused_dropout_add'
,
action
=
'store_true'
,
help
=
'Enable fused dropout and add'
)
PARSER
.
add_argument
(
'--fused_gemm_gelu'
,
action
=
'store_true'
,
help
=
'Enable fused GEMM and GELU'
)
PARSER
.
add_argument
(
'--packed_samples'
,
action
=
'store_true'
,
help
=
'Enable packed samples'
)
PARSER
.
add_argument
(
'--use_transformer_engine2'
,
action
=
'store_true'
,
help
=
'Enable transformer engine v2'
)
PARSER
.
add_argument
(
'--cuda_graph_mode'
,
type
=
str
,
help
=
'CUDA graph mode'
)
PARSER
.
add_argument
(
'--use_cuda_graph'
,
action
=
'store_true'
,
help
=
'Use CUDA graph'
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment