"official/vision/dataloaders/tf_example_decoder.py" did not exist on "d5afcc72373febee1c58c5dfcd0166f37bfe6cb3"
Commit 2ac5586e authored by Rayyyyy's avatar Rayyyyy
Browse files

first commit

parents
Pipeline #784 canceled with stages
# general settings
name: train_HAT_SRx4_ImageNet_from_scratch
model_type: HATModel
scale: 4
num_gpu: auto
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: ImageNet
type: ImageNetPairedDataset
dataroot_gt: datasets/ImageNet/GT
meta_info_file: hat/data/meta_info/meta_info_ImageNet_GT.txt
io_backend:
type: disk
gt_size: 256
use_hflip: true
use_rot: true
# data loader
use_shuffle: true
num_worker_per_gpu: 6
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
val_1:
name: Set5
type: PairedImageDataset
dataroot_gt: ./datasets/Set5/GTmod4
dataroot_lq: ./datasets/Set5/LRbicx4
io_backend:
type: disk
val_2:
name: Set14
type: PairedImageDataset
dataroot_gt: ./datasets/Set14/GTmod4
dataroot_lq: ./datasets/Set14/LRbicx4
io_backend:
type: disk
# val_3:
# name: Urban100
# type: PairedImageDataset
# dataroot_gt: ./datasets/urban100/GTmod4
# dataroot_lq: ./datasets/urban100/LRbicx4
# io_backend:
# type: disk
# network structures
network_g:
type: HAT
upscale: 4
in_chans: 3
img_size: 64
window_size: 16
compress_ratio: 3
squeeze_factor: 30
conv_scale: 0.01
overlap_ratio: 0.5
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
# path
path:
pretrain_network_g: ~
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 2e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [300000, 500000, 650000, 700000, 750000]
gamma: 0.5
total_iter: 800000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 1e4
save_img: false
pbar: False
metrics:
psnr:
type: calculate_psnr
crop_border: 4
test_y_channel: true
better: higher # the higher, the better. Default: higher
ssim:
type: calculate_ssim
crop_border: 4
test_y_channel: true
better: higher # the higher, the better. Default: higher
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 1e4
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_HAT_SRx4_finetune_from_ImageNet_pretrain
model_type: HATModel
scale: 4
num_gpu: auto
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DF2K
type: PairedImageDataset
dataroot_gt: datasets/DF2K/DF2K_HR_sub
dataroot_lq: datasets/DF2K/DF2K_bicx4_sub
meta_info_file: hat/data/meta_info/meta_info_DF2Ksub_GT.txt
io_backend:
type: disk
gt_size: 256
use_hflip: true
use_rot: true
# data loader
use_shuffle: true
num_worker_per_gpu: 6
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
val_1:
name: Set5
type: PairedImageDataset
dataroot_gt: ./datasets/Set5/GTmod4
dataroot_lq: ./datasets/Set5/LRbicx4
io_backend:
type: disk
val_2:
name: Set14
type: PairedImageDataset
dataroot_gt: ./datasets/Set14/GTmod4
dataroot_lq: ./datasets/Set14/LRbicx4
io_backend:
type: disk
# val_3:
# name: Urban100
# type: PairedImageDataset
# dataroot_gt: ./datasets/urban100/GTmod4
# dataroot_lq: ./datasets/urban100/LRbicx4
# io_backend:
# type: disk
# network structures
network_g:
type: HAT
upscale: 4
in_chans: 3
img_size: 64
window_size: 16
compress_ratio: 3
squeeze_factor: 30
conv_scale: 0.01
overlap_ratio: 0.5
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
# path
path:
pretrain_network_g: ./experiments/train_HAT_SRx4_ImageNet_from_scratch/models/net_g_latest.pth
param_key_g: 'params_ema'
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-5
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [125000, 200000, 225000, 240000]
gamma: 0.5
total_iter: 250000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
pbar: False
metrics:
psnr:
type: calculate_psnr
crop_border: 4
test_y_channel: true
better: higher # the higher, the better. Default: higher
ssim:
type: calculate_ssim
crop_border: 4
test_y_channel: true
better: higher # the higher, the better. Default: higher
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_HAT_SRx4_finetune_from_SRx2
model_type: HATModel
scale: 4
num_gpu: auto
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: DF2K
type: PairedImageDataset
dataroot_gt: datasets/DF2K/DF2K_HR_sub
dataroot_lq: datasets/DF2K/DF2K_bicx4_sub
meta_info_file: hat/data/meta_info/meta_info_DF2Ksub_GT.txt
io_backend:
type: disk
gt_size: 256
use_hflip: true
use_rot: true
# data loader
use_shuffle: true
num_worker_per_gpu: 6
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
val_1:
name: Set5
type: PairedImageDataset
dataroot_gt: ./datasets/Set5/GTmod4
dataroot_lq: ./datasets/Set5/LRbicx4
io_backend:
type: disk
val_2:
name: Set14
type: PairedImageDataset
dataroot_gt: ./datasets/Set14/GTmod4
dataroot_lq: ./datasets/Set14/LRbicx4
io_backend:
type: disk
# val_3:
# name: Urban100
# type: PairedImageDataset
# dataroot_gt: ./datasets/urban100/GTmod4
# dataroot_lq: ./datasets/urban100/LRbicx4
# io_backend:
# type: disk
# network structures
network_g:
type: HAT
upscale: 4
in_chans: 3
img_size: 64
window_size: 16
compress_ratio: 3
squeeze_factor: 30
conv_scale: 0.01
overlap_ratio: 0.5
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
# path
path:
pretrain_network_g: ./experiments/train_HAT_SRx2_from_scratch/models/net_g_latest.pth
param_key_g: 'params_ema'
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [125000, 200000, 225000, 240000]
gamma: 0.5
total_iter: 250000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# validation settings
val:
val_freq: !!float 5e3
save_img: false
pbar: False
metrics:
psnr:
type: calculate_psnr
crop_border: 4
test_y_channel: true
better: higher # the higher, the better. Default: higher
ssim:
type: calculate_ssim
crop_border: 4
test_y_channel: true
better: higher # the higher, the better. Default: higher
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 5e3
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
# general settings
name: train_Real_HAT_GAN_SRx4_finetune_from_mse_model
model_type: RealHATGANModel
scale: 4
num_gpu: auto
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
# USM the ground-truth
l1_gt_usm: True
percep_gt_usm: True
gan_gt_usm: False
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DFO/DFO_sub # Refer to Real-ESRGAN for OST dataset. Only DF2K is OK.
meta_info_file: hat/data/meta_info/meta_info_DFOsub_GT.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
use_shuffle: true
num_worker_per_gpu: 5
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
# Uncomment these for validation
# val:
# name: validation
# type: SingleImageDataset
# dataroot_lq: datasets/RealSRSet+5images
# io_backend:
# type: disk
# network structures
network_g:
type: HAT
upscale: 4
in_chans: 3
img_size: 64
window_size: 16
compress_ratio: 3
squeeze_factor: 30
conv_scale: 0.01
overlap_ratio: 0.5
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
network_d:
type: UNetDiscriminatorSN
num_in_ch: 3
num_feat: 64
skip_connection: True
# path
path:
# use the pre-trained Real-ESRNet model
pretrain_network_g: experiments/pretrained_models/Real_HAT_x4.pth # train the MSE-based model 'Real_HAT' first.
param_key_g: params_ema
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [400000]
gamma: 0.5
total_iter: 400000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# perceptual loss (content and style losses)
perceptual_opt:
type: PerceptualLoss
layer_weights:
# before relu
'conv1_2': 0.1
'conv2_2': 0.1
'conv3_4': 1
'conv4_4': 1
'conv5_4': 1
vgg_type: vgg19
use_input_norm: true
perceptual_weight: !!float 1.0
style_weight: 0
range_norm: false
criterion: l1
# gan loss
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 1e-1
net_d_iters: 1
net_d_init_iters: 0
# Uncomment these for validation
# validation settings
# val:
# val_freq: !!float 1e4
# save_img: True
# metrics:
# psnr: # metric name
# type: calculate_psnr
# crop_border: 4
# test_y_channel: false
# logging settings
logger:
print_freq: 200
save_checkpoint_freq: !!float 1e4
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
\ No newline at end of file
# general settings
name: train_Real_HAT_mse_model
model_type: RealHATMSEModel
scale: 4
num_gpu: auto
manual_seed: 0
# ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
gt_usm: True # USM the ground-truth
# the first degradation process
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
resize_range: [0.15, 1.5]
gaussian_noise_prob: 0.5
noise_range: [1, 30]
poisson_scale_range: [0.05, 3]
gray_noise_prob: 0.4
jpeg_range: [30, 95]
# the second degradation process
second_blur_prob: 0.8
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
resize_range2: [0.3, 1.2]
gaussian_noise_prob2: 0.5
noise_range2: [1, 25]
poisson_scale_range2: [0.05, 2.5]
gray_noise_prob2: 0.4
jpeg_range2: [30, 95]
gt_size: 256
queue_size: 180
# dataset and data loader settings
datasets:
train:
name: DF2K+OST
type: RealESRGANDataset
dataroot_gt: datasets/DFO/DFO_sub # Refer to Real-ESRGAN for OST dataset. Only DF2K is OK.
meta_info_file: hat/data/meta_info/meta_info_DFOsub_GT.txt
io_backend:
type: disk
blur_kernel_size: 21
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob: 0.1
blur_sigma: [0.2, 3]
betag_range: [0.5, 4]
betap_range: [1, 2]
blur_kernel_size2: 21
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
sinc_prob2: 0.1
blur_sigma2: [0.2, 1.5]
betag_range2: [0.5, 4]
betap_range2: [1, 2]
final_sinc_prob: 0.8
gt_size: 256
use_hflip: True
use_rot: False
# data loader
use_shuffle: true
num_worker_per_gpu: 5
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
# Uncomment these for validation
# val:
# name: validation
# type: SingleImageDataset
# dataroot_lq: datasets/RealSRSet+5images
# io_backend:
# type: disk
# network structures
network_g:
type: HAT
upscale: 4
in_chans: 3
img_size: 64
window_size: 16
compress_ratio: 3
squeeze_factor: 30
conv_scale: 0.01
overlap_ratio: 0.5
img_range: 1.
depths: [6, 6, 6, 6, 6, 6]
embed_dim: 180
num_heads: [6, 6, 6, 6, 6, 6]
mlp_ratio: 2
upsampler: 'pixelshuffle'
resi_connection: '1conv'
# path
path:
pretrain_network_g: ~
param_key_g: params_ema
strict_load_g: true
resume_state: ~
# training settings
train:
ema_decay: 0.999
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [1000000]
gamma: 0.5
total_iter: 1000000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: 1.0
reduction: mean
# Uncomment these for validation
# validation settings
# val:
# val_freq: !!float 1e4
# save_img: True
# metrics:
# psnr: # metric name
# type: calculate_psnr
# crop_border: 4
# test_y_channel: false
# logging settings
logger:
print_freq: 200
save_checkpoint_freq: !!float 1e4
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500
import numpy as np
import tempfile
import shutil
import os
from PIL import Image
import subprocess
from cog import BasePredictor, Input, Path
class Predictor(BasePredictor):
def predict(
self,
image: Path = Input(
description="Input Image.",
),
) -> Path:
input_dir = "input_dir"
output_path = Path(tempfile.mkdtemp()) / "output.png"
try:
for d in [input_dir, "results"]:
if os.path.exists(input_dir):
shutil.rmtree(input_dir)
os.makedirs(input_dir, exist_ok=False)
input_path = os.path.join(input_dir, os.path.basename(image))
shutil.copy(str(image), input_path)
subprocess.call(
[
"python",
"hat/test.py",
"-opt",
"options/test/HAT_SRx4_ImageNet-LR.yml",
]
)
res_dir = os.path.join(
"results", "HAT_SRx4_ImageNet-LR", "visualization", "custom"
)
assert (
len(os.listdir(res_dir)) == 1
), "Should contain only one result for Single prediction."
res = Image.open(os.path.join(res_dir, os.listdir(res_dir)[0]))
res.save(str(output_path))
finally:
pass
shutil.rmtree(input_dir)
shutil.rmtree("results")
return output_path
ulimit -u 200000
echo "START TIME: $(date)"
hostfile=./hostfile
np=$(cat $hostfile|sort|uniq |wc -l)
np=$(($np*8))
echo $np
nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
which mpirun
# 添加pythonlib环境, 用户需修改为自己的环境变量地址
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/path/of/conda/envs/{env_name}/lib
export PYTHON=python3
export NPROC_PER_NODE=4
# 使用mpirun 进行多卡训练,分别对单张卡进行单线程启动
# -np 显卡数量
# -x 将变量传递到single_process.sh脚本中
mpirun -np $np --allow-run-as-root --hostfile hostfile --bind-to none -x dist_url -x PYTHON -x NPROC_PER_NODE `pwd`/single_process.sh
echo "END TIME: $(date)"
[flake8]
ignore =
# line break before binary operator (W503)
W503,
# line break after binary operator (W504)
W504,
max-line-length=120
[yapf]
based_on_style = pep8
column_limit = 120
blank_line_before_nested_class_or_def = true
split_before_expression_after_opening_paren = true
[isort]
line_length = 120
multi_line_output = 0
known_standard_library = pkg_resources,setuptools
known_first_party = basicsr
known_third_party = cv2,requests,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
#!/usr/bin/env python
from setuptools import find_packages, setup
import os
import subprocess
import time
version_file = 'hat/version.py'
def readme():
with open('README.md', encoding='utf-8') as f:
content = f.read()
return content
def get_git_hash():
def _minimal_ext_cmd(cmd):
# construct minimal environment
env = {}
for k in ['SYSTEMROOT', 'PATH', 'HOME']:
v = os.environ.get(k)
if v is not None:
env[k] = v
# LANGUAGE is used on win32
env['LANGUAGE'] = 'C'
env['LANG'] = 'C'
env['LC_ALL'] = 'C'
out = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
return out
try:
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
sha = out.strip().decode('ascii')
except OSError:
sha = 'unknown'
return sha
def get_hash():
if os.path.exists('.git'):
sha = get_git_hash()[:7]
else:
sha = 'unknown'
return sha
def write_version_py():
content = """# GENERATED VERSION FILE
# TIME: {}
__version__ = '{}'
__gitsha__ = '{}'
version_info = ({})
"""
sha = get_hash()
with open('VERSION', 'r') as f:
SHORT_VERSION = f.read().strip()
VERSION_INFO = ', '.join([x if x.isdigit() else f'"{x}"' for x in SHORT_VERSION.split('.')])
version_file_str = content.format(time.asctime(), SHORT_VERSION, sha, VERSION_INFO)
with open(version_file, 'w') as f:
f.write(version_file_str)
def get_version():
with open(version_file, 'r') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
def get_requirements(filename='requirements.txt'):
here = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(here, filename), 'r') as f:
requires = [line.replace('\n', '') for line in f.readlines()]
return requires
if __name__ == '__main__':
write_version_py()
setup(
name='hat',
version=get_version(),
description='HAT',
long_description=readme(),
long_description_content_type='text/markdown',
author='Xiangyu Chen',
author_email='chxy95@gmail.com',
keywords='computer vision, pytorch, basicsr, image restoration, super-resolution',
url='https://github.com/chxy95/HAT',
include_package_data=True,
packages=find_packages(exclude=('options', 'datasets', 'experiments', 'results', 'tb_logger', 'wandb')),
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: Apache Software License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
],
license='MIT License',
setup_requires=['cython', 'numpy'],
install_requires=get_requirements(),
zip_safe=False)
#!/bin/bash
# NCCL相关的DEBUG信息显示
export NCCL_DEBUG=INFO
export HSA_FORCE_FINE_GRAIN_PCIE=1
export NCCL_P2P_LEVEL=5
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export NCCL_PLUGIN_P2P=ucx
export NCCL_SHM_DISABLE=1
export NCCL_IB_DISABLE=0 #不适用ib
export NCCL_IB_HCA=mlx5_0
export NCCL_CROSS_NIC=1
export RCCL_NCHANNELS=4
export MASTER_ADDR=$dist_url
export MASTER_PORT=4321
export RANK=$OMPI_COMM_WORLD_RANK
export NCCL_SOCKET_IFNAME=ib0
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
# 待训练yaml文件地址
YAML='options/train/train_HAT_SRx4_finetune_from_ImageNet_pretrain.yml'
APP="python3 -u hat/train.py -opt ${YAML} --launcher pytorch "
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_0:1
export UCX_IB_PCI_BW=mlx5_0:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_1:1
export UCX_IB_PCI_BW=mlx5_1:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_2:1
export UCX_IB_PCI_BW=mlx5_2:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_3:1
export UCX_IB_PCI_BW=mlx5_3:50Gbs
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[4])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_4:1
export UCX_IB_PCI_BW=mlx5_4:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[5])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_5:1
export UCX_IB_PCI_BW=mlx5_5:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[6])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_6:1
export UCX_IB_PCI_BW=mlx5_6:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
[7])
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export UCX_NET_DEVICES=mlx5_7:1
export UCX_IB_PCI_BW=mlx5_7:50Gbs
numactl --cpunodebind=3 --membind=3 ${APP}
;;
esac
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment