Commit b93fbddc authored by Ribin-Baby's avatar Ribin-Baby
Browse files

dug fix update

parent 5b62935b
...@@ -92,7 +92,13 @@ cleanup_docker ...@@ -92,7 +92,13 @@ cleanup_docker
trap 'set -eux; cleanup_docker' EXIT trap 'set -eux; cleanup_docker' EXIT
# Setup container # Setup container
nvidia-docker run --rm --init --detach --gpus='"'device=${NV_GPU}'"' \ if [ -z "${NV_GPU-}" ]; then
readonly _docker_gpu_args="--gpus all"
else
readonly _docker_gpu_args='--gpus="'device=${NV_GPU}'" -e NVIDIA_VISIBLE_DEVICES='"${NV_GPU}"
fi
docker run ${_docker_gpu_args} --rm --init --detach \
--net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \ --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \
--ulimit=stack=67108864 --ulimit=memlock=-1 \ --ulimit=stack=67108864 --ulimit=memlock=-1 \
--name="${_cont_name}" ${_cont_mounts[@]} \ --name="${_cont_name}" ${_cont_mounts[@]} \
......
...@@ -60,7 +60,13 @@ cleanup_docker ...@@ -60,7 +60,13 @@ cleanup_docker
trap 'set -eux; cleanup_docker' EXIT trap 'set -eux; cleanup_docker' EXIT
# Setup container # Setup container
nvidia-docker run --rm --init --detach \ if [ -z "${NV_GPU-}" ]; then
readonly _docker_gpu_args="--gpus all"
else
readonly _docker_gpu_args='--gpus="'device=${NV_GPU}'" -e NVIDIA_VISIBLE_DEVICES='"${NV_GPU}"
fi
docker run ${_docker_gpu_args} --rm --init --detach \
--net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \ --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \
--name="${_cont_name}" "${_cont_mounts[@]}" \ --name="${_cont_name}" "${_cont_mounts[@]}" \
"${CONT}" sleep infinity "${CONT}" sleep infinity
......
...@@ -60,7 +60,13 @@ cleanup_docker ...@@ -60,7 +60,13 @@ cleanup_docker
trap 'set -eux; cleanup_docker' EXIT trap 'set -eux; cleanup_docker' EXIT
# Setup container # Setup container
nvidia-docker run --rm --init --detach \ if [ -z "${NV_GPU-}" ]; then
readonly _docker_gpu_args="--gpus all"
else
readonly _docker_gpu_args='--gpus="'device=${NV_GPU}'" -e NVIDIA_VISIBLE_DEVICES='"${NV_GPU}"
fi
docker run ${_docker_gpu_args} --rm --init --detach \
--net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \ --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \
--ulimit=stack=67108864 --ulimit=memlock=-1 \ --ulimit=stack=67108864 --ulimit=memlock=-1 \
--name="${_cont_name}" "${_cont_mounts[@]}" \ --name="${_cont_name}" "${_cont_mounts[@]}" \
......
...@@ -53,7 +53,9 @@ import cuda_graphs.graph_wrapper as graph_wrapper ...@@ -53,7 +53,9 @@ import cuda_graphs.graph_wrapper as graph_wrapper
from common.data import SyntheticDataIter from common.data import SyntheticDataIter
from scaleoutbridge import init_bridge, ScaleoutBridge as SBridge # from scaleoutbridge import init_bridge, ScaleoutBridge as SBridge
from mlperf_common.scaleoutbridge import init_bridge, ScaleoutBridgeBase as SBridge
from mlperf_common.frameworks.mxnet import MXNetProfilerHandler, MPICommunicationHandler
TRAIN_CUDA_GRAPH_ID = 0 TRAIN_CUDA_GRAPH_ID = 0
...@@ -907,7 +909,8 @@ def mlperf_fit(self, args, train_data, ...@@ -907,7 +909,8 @@ def mlperf_fit(self, args, train_data,
key=mllogger.constants.BLOCK_START, key=mllogger.constants.BLOCK_START,
metadata={'first_epoch_num': block_epoch_start + 1, 'epoch_count': block_epoch_count}) metadata={'first_epoch_num': block_epoch_start + 1, 'epoch_count': block_epoch_count})
sbridge = init_bridge(hvd.rank()) #sbridge = init_bridge(hvd.rank())
sbridge = init_bridge(MXNetProfilerHandler(), MPICommunicationHandler(), mllogger)
################################################################################ ################################################################################
# training loop with dali overlap with fwd # training loop with dali overlap with fwd
......
...@@ -48,7 +48,8 @@ from mxnet.ndarray import sparse ...@@ -48,7 +48,8 @@ from mxnet.ndarray import sparse
##### #####
from mxnet import cuda_utils as cu from mxnet import cuda_utils as cu
from scaleoutbridge import ScaleoutBridge as SBridge # from scaleoutbridge import ScaleoutBridge as SBridge
from mlperf_common.scaleoutbridge import ScaleoutBridgeBase as SBridge
from common.data import SyntheticDataIter from common.data import SyntheticDataIter
......
...@@ -70,7 +70,13 @@ cleanup_docker ...@@ -70,7 +70,13 @@ cleanup_docker
trap 'set -eux; cleanup_docker' EXIT trap 'set -eux; cleanup_docker' EXIT
# Setup container # Setup container
nvidia-docker run --rm --init --detach \ if [ -z "${NV_GPU-}" ]; then
readonly _docker_gpu_args="--gpus all"
else
readonly _docker_gpu_args='--gpus="'device=${NV_GPU}'" -e NVIDIA_VISIBLE_DEVICES='"${NV_GPU}"
fi
docker run ${_docker_gpu_args} --rm --init --detach \
--net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \ --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \
--ulimit=stack=67108864 --ulimit=memlock=-1 \ --ulimit=stack=67108864 --ulimit=memlock=-1 \
--name="${_cont_name}" "${_cont_mounts[@]}" \ --name="${_cont_name}" "${_cont_mounts[@]}" \
......
...@@ -51,7 +51,13 @@ cleanup_docker ...@@ -51,7 +51,13 @@ cleanup_docker
trap 'set -eux; cleanup_docker' EXIT trap 'set -eux; cleanup_docker' EXIT
# Setup container # Setup container
nvidia-docker run --rm --init --detach \ if [ -z "${NV_GPU-}" ]; then
readonly _docker_gpu_args="--gpus all"
else
readonly _docker_gpu_args='--gpus="'device=${NV_GPU}'" -e NVIDIA_VISIBLE_DEVICES='"${NV_GPU}"
fi
docker run ${_docker_gpu_args} --rm --init --detach \
--net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \ --net=host --uts=host --ipc=host --security-opt=seccomp=unconfined \
--ulimit=stack=67108864 --ulimit=memlock=-1 \ --ulimit=stack=67108864 --ulimit=memlock=-1 \
--name="${_cont_name}" "${_cont_mounts[@]}" \ --name="${_cont_name}" "${_cont_mounts[@]}" \
......
...@@ -90,4 +90,16 @@ PARSER.add_argument('--input_batch_multiplier', '-ibm', dest='input_batch_multip ...@@ -90,4 +90,16 @@ PARSER.add_argument('--input_batch_multiplier', '-ibm', dest='input_batch_multip
PARSER.add_argument('--use_cached_loader', '-ucl', dest='use_cached_loader', action='store_true', default=False) PARSER.add_argument('--use_cached_loader', '-ucl', dest='use_cached_loader', action='store_true', default=False)
PARSER.add_argument('--stick_to_shard', '-sts', dest='stick_to_shard', action='store_true', default=False) PARSER.add_argument('--stick_to_shard', '-sts', dest='stick_to_shard', action='store_true', default=False)
PARSER.add_argument('--use_nvshmem', dest='use_nvshmem', action='store_true', default=False) PARSER.add_argument('--use_nvshmem', dest='use_nvshmem', action='store_true', default=False)
\ No newline at end of file
## Additional arguments passed
PARSER.add_argument('--dense_seq_output', action='store_true', help='Enable dense sequential output')
PARSER.add_argument('--pad_fmha', action='store_true', help='Enable padding for FMHA')
PARSER.add_argument('--fused_bias_fc', action='store_true', help='Enable fused bias for FC')
PARSER.add_argument('--fused_bias_mha', action='store_true', help='Enable fused bias for MHA')
PARSER.add_argument('--fused_dropout_add', action='store_true', help='Enable fused dropout and add')
PARSER.add_argument('--fused_gemm_gelu', action='store_true', help='Enable fused GEMM and GELU')
PARSER.add_argument('--packed_samples', action='store_true', help='Enable packed samples')
PARSER.add_argument('--use_transformer_engine2', action='store_true', help='Enable transformer engine v2')
PARSER.add_argument('--cuda_graph_mode', type=str, help='CUDA graph mode')
PARSER.add_argument('--use_cuda_graph', action='store_true', help='Use CUDA graph')
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment