Commit 63567b0c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add model mobilenetv2

parents
import torch
import os
def init_ddp(visiable_devices='0,1,2,3'):
if torch.cuda.device_count() > 1:
os.environ['HIP_VISIBLE_DEVICES'] = visiable_devices
local_rank = int(os.environ["LOCAL_RANK"])
print("local_rank:" + str(local_rank))
#torch.distributed.init_process_group(backend='nccl', init_method='tcp://localhost:23456', rank=0, world_size=1)
torch.distributed.init_process_group(backend="nccl")
# local_rank = torch.distributed.get_rank()
torch.cuda.set_device(local_rank)
# device = torch.device("cuda", args.local_rank)
return local_rank
else:
return None
This diff is collapsed.
import cv2
import csv
import numpy as np
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import torch
from torchvision import transforms
class DataFile():
def __init__(self, path, local_rank):
self.labels = []
self.pics = []
self.usage = []
self.local_rank = local_rank
f = open(path, 'r')
ln = 0
ts_proc = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
for row in csv.reader(f):
if ln != 0:
self.labels.append(int(row[0]))
arr = row[1].split(' ')
arr = [int(x) for x in arr]
nparr = np.array(arr, dtype=np.uint8)
rmk_img = cv2.resize(nparr, (224,224))
rmk_img = cv2.cvtColor(rmk_img, cv2.COLOR_GRAY2RGB)
ft_ts = ts_proc(rmk_img)
self.pics.append(ft_ts)
self.usage.append(row[2])
ln += 1
if ln % 5000 == 0 and (local_rank == None or local_rank == 0):
print("{} pics loaded.".format(ln))
f.close()
def to_file(self):
pass
def get_data(self):
return self.labels, self.pics, self.usage
class LabelFile():
def __init__(self, path, local_rank):
self.labels = []
f = open(path, 'r')
ln = 0
for row in csv.reader(f, delimiter=','):
if ln != 0:
# print(row)
lab_cells = row[2:]
# print(lab_cells)
lab_cells = np.array(lab_cells, dtype=np.uint8)
lab = np.argmax(lab_cells)
self.labels.append(lab)
ln += 1
if ln % 5000 == 0 and (local_rank == None or local_rank == 0):
print("{} labels loaded.".format(ln))
f.close()
def get_labels(self):
return self.labels
class Fer2013Dataset(Dataset):
def __init__(self, local_rank):
print('local_rank_datawork:',local_rank)
#self.datafile = DataFile('data/fer2013/fer2013.csv', local_rank)
#self.labelfile = LabelFile('data/fer2013/fer2013new_ms_labs.csv', local_rank)
self.datafile = DataFile('data/fer2013//DDP_data_231017.csv', local_rank)
self.mode = 'train'
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
if local_rank == None:
self.randomization(0)
else:
self.randomization(local_rank)
def randomization(self, seed):
labels, pics, usage = self.datafile.get_data()
# ms_labels = self.labelfile.get_labels()
tarpics = []
tarlabs = []
for i in range(0, len(labels)):
if labels[i] == 1:
tarpics.append(pics[i])
tarlabs.append(0)
if labels[i] == 2:
tarpics.append(pics[i])
tarlabs.append(1)
if labels[i] == 3:
tarpics.append(pics[i])
tarlabs.append(2)
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
tarpics, tarlabs, test_size=0.2, random_state=0, stratify=tarlabs)
def __len__(self):
if self.mode == 'train':
return len(self.y_train)
elif self.mode == 'test':
return len(self.y_test)
def __getitem__(self, index):
if self.mode == 'train':
return self.X_train[index], torch.tensor(self.y_train[index])
elif self.mode == 'test':
return self.X_test[index], torch.tensor(self.y_test[index])
def set_mode(self, mode):
self.mode = mode
def show_pic(pixels):
cv2.imshow('Show', pixels)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
# data = DataFile('data/fer2013/fer2013.csv')
# labels, pics, usage = data.get_data()
# for i in range(0, len(labels)):
# show_pic(pics[i])
# labels = LabelFile('data/fer2013/fer2013new_ms_labs.csv', 0).get_labels()
# print('done')
x = Fer2013Dataset(0)
print('done')
import torch
import torch.nn as nn
# from peselibs_config import get_lib_path
import sys
# sys.path.append(get_lib_path())
import DDP
import torchvision.models.mobilenet as mobilenet
from datawork import *
from sklearn.metrics import accuracy_score
from fitlog import FitLog
from torch.utils.data import DataLoader
import time
import random
g_dubug = False
class MobileNetV2Driver():
def __init__(self, local_rank):#DDP: system initialization
self.nclass = 9
self.batch_size = 64
self.local_rank = local_rank
self.nepoch = 500
self.nround = 10
self.lr = 0.00001
self.loader = None
self.test_loader = None
self.dataset = None
self.device = None
#model & device
self.model = mobilenet.MobileNetV2(num_classes=self.nclass)
# print("local_rank:{}".format(local_rank))
self._init_device()
self.model.to(self.device)
if self.local_rank != None:
self.model = nn.parallel.DistributedDataParallel(
self.model, device_ids=[self.local_rank], find_unused_parameters=True)
self.criterion = nn.CrossEntropyLoss()
# self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr)
self.optimizer = torch.optim.Adam(self.model.parameters(),lr=0.001,betas=(0.9,0.999))
#self.scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=1, gamma=0.98)
self.dataset = Fer2013Dataset(local_rank)
try:
self.sampler = torch.utils.data.distributed.DistributedSampler(self.dataset)
except:
self.sampler=None
if self.local_rank != None:
self.loader = DataLoader(
self.dataset, batch_size=self.batch_size,sampler=self.sampler, shuffle=False)
else:
self.loader = DataLoader(
self.dataset, batch_size=self.batch_size, shuffle=True)
self.test_loader = DataLoader(
self.dataset, batch_size=self.batch_size,shuffle=True)
def _init_device(self):
if self.local_rank != None:
self.device = torch.device('cuda', self.local_rank)
else:
if torch.cuda.is_available():
self.device = torch.device('cuda')
else:
self.device = torch.device('cpu')
def init_dataset(self, seed):
self.dataset.randomization(seed)
def train(self):
best_acc = 0
best_acc_at = 0
if self.local_rank == 0 or self.local_rank == None:
self.fitlog = FitLog("./logs/")
self.jishilog = FitLog("./logs/",prefix='jishi')
self.dlog = FitLog("./logs/", prefix='pred')
st_time=time.time()
for epoch in range(self.nepoch):
self.dataset.set_mode("train")
self.model.train()
all_loss = []
for batch_idx, (data, target) in enumerate(self.loader):
data, target = data.to(self.device), target.to(self.device)
self.dataset.set_mode("train")
##################train time
if self.local_rank == 0 or self.local_rank == None:
jishi1=time.time()
self.model.train()
self.optimizer.zero_grad()
output = self.model(data)
loss = self.criterion(output, target)
loss.backward()
self.optimizer.step()
if self.local_rank == 0 or self.local_rank == None:
jishi2=time.time()
jishi2_log = '****epc:{},process:{}/{},best_acc:{},start:{},end:{},duration:{}****'.format(str(epoch), str(batch_idx * len(data)), str(len(self.loader.dataset)), str(best_acc),str(jishi1), str(jishi2),str(jishi2 - jishi1))
jishi2_log=str(jishi2_log)
self.jishilog.append(jishi2_log)
print(jishi2_log)
#########################train time
all_loss.append(loss.item())
t1=time.time()
duration=t1-st_time
if (batch_idx % 10 == 0) and (self.local_rank == 0 or self.local_rank == None):
btstr = 'epc: {} [{}/{} ({:.0f}%)] loss: {:.6f} b-acc: {:.3f} @:{},curtime:{},duration:{}'.format(
epoch, batch_idx * len(data), len(self.loader.dataset),
100. * batch_idx / len(self.loader), loss.item(), best_acc, best_acc_at,str(t1),str(duration))
self.fitlog.append(btstr)
# print(btstr)
if g_dubug:
break
torch.save(self.model,'./mobilenet.pth')
if self.local_rank == 0 or self.local_rank == None:
t1=time.time()
duration=t1-st_time
acc, vloss, vloss_std,all_pred, all_tar = self._validate()
epcstr = '****epc:{},loss:{:.6f},loss_std:{:.6f},vloss:{:.6f},vloss_std:{:.6f},acc:{:.3f},duration:{}****'.format(
epoch, np.mean(all_loss), np.std(all_loss), vloss, vloss_std,acc,str(duration))
#self.dlog.append(epcstr+",preds:{},plabs:{}".format(str(all_pred), str(all_tar)))
self.dlog.append(epcstr)
self.dlog.append("pred"+str(all_pred))
self.dlog.append("tar"+str(all_tar))
if acc > best_acc:
best_acc = acc
best_acc_at = epoch
print(epcstr)
if g_dubug:
break
if self.local_rank == 0 or self.local_rank == None:
self.fitlog.close()
self.dlog.close()
self.jishilog.close()
def _validate(self):
self.model.eval()
self.dataset.set_mode('test')
all_pred = []
all_tar = []
accs = []
all_loss = []
with torch.no_grad():
for i, (ft, labs) in enumerate(self.test_loader):
ft, labs = ft.to(self.device), labs.to(self.device)
output = self.model(ft)
loss = self.criterion(output, labs)
preds = torch.argmax(output, dim=1).cpu().numpy().tolist()
all_pred.extend(preds)
all_tar.extend(labs.cpu().numpy().tolist())
accs.append(accuracy_score(all_tar, all_pred))
all_loss.append(loss.item())
if i % 100 == 0:
print('validating @ batch {}'.format(i))
if g_dubug:
break
return np.mean(accs), np.mean(all_loss),np.std(all_loss), all_pred, all_tar
def run(self, iround):
self.init_dataset(iround)
self.train()
if __name__ == '__main__':
print(torch.cuda.is_available())
############
seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
##############
t0 =time.time()
local_rank = DDP.init_ddp()
print("local_rank=",local_rank)
driver = MobileNetV2Driver(local_rank=local_rank)
# print("round {}".format(sys.argv[1]))
iround=1
driver.run(iround)
t1 =time.time()
print("result_time=",(t1-t0)/1000)
import torch
import torch.nn as nn
# from peselibs_config import get_lib_path
import sys
# sys.path.append(get_lib_path())
import DDP
import torchvision.models.mobilenet as mobilenet
from datawork import *
from sklearn.metrics import accuracy_score
from fitlog import FitLog
from torch.utils.data import DataLoader
import time
import torch.distributed as dist
import os
import datetime
import argparse
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--world-size', default=-1, type=int,
help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
g_dubug = False
class MobileNetV2Driver():
def __init__(self,args):#DDP: system initialization
self.nclass = 9
self.batch_size = 128 #64
#self.local_rank = local_rank
self.nepoch = 50
self.nround = 10
self.lr = 0.00001
self.loader = None
self.test_loader = None
self.dataset = None
self.device = None
self.args = args
self.local_rank = args.rank
#model & device
self.model = mobilenet.MobileNetV2(num_classes=self.nclass)
print("local_rank:{}".format(local_rank))
self._init_device()
self.model.to(self.device)
print('$$$$$$$$$$$$$$$$$$$$$$$$',self.device)
if self.local_rank != None:
self.model = nn.parallel.DistributedDataParallel(
self.model, device_ids=[self.local_rank%4],output_device=local_rank%4,find_unused_parameters=True)
self.criterion = nn.CrossEntropyLoss()
# self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr)
self.optimizer = torch.optim.Adam(self.model.parameters(),lr=0.00001,betas=(0.9,0.999))
#self.scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=1, gamma=0.98)
print('##################################self.dataset#############################')
self.dataset = Fer2013Dataset(local_rank%4)
print('self_data_load finish$$$$$$$$$$$$$$$$$$$$$')
try:
self.sampler = torch.utils.data.distributed.DistributedSampler(self.dataset,num_replicas=args.world_size, rank=args.rank)
except:
self.sampler=None
if self.local_rank != None:
self.loader = DataLoader(
self.dataset, batch_size=self.batch_size, sampler=self.sampler, shuffle=False)
else:
self.loader = DataLoader(
self.dataset, batch_size=self.batch_size, shuffle=True)
self.test_loader = DataLoader(
self.dataset, batch_size=self.batch_size, shuffle=True)
print('&&&&&&&&&&&&&&&&&&&&&dataset end&&&&&&&&&&&&&&&&&&&&&&&&&')
#self.model = torch.nn.parallel.DistributedDataParallel(self.model,device_ids=[self.local_rank])
def _init_device(self):
if self.local_rank != None:
self.device = torch.device('cuda', self.local_rank % 4)
else:
if torch.cuda.is_available():
self.device = torch.device('cuda')
else:
self.device = torch.device('cpu')
def init_dataset(self, seed):
self.dataset.randomization(seed)
def train(self):
best_acc = 0
best_acc_at = 0
if self.local_rank == 0 or self.local_rank == None:
self.fitlog = FitLog("logs/")
self.jishilog = FitLog("logs/",prefix='jishi')
self.dlog = FitLog("logs/", prefix='pred')
st_time=time.time()
for epoch in range(self.nepoch):
self.dataset.set_mode("train")
self.model.train()
all_loss = []
for batch_idx, (data, target) in enumerate(self.loader):
data, target = data.to(self.device), target.to(self.device)
self.dataset.set_mode("train")
if self.local_rank == 0 or self.local_rank == None:
jishi1=time.time()
self.model.train()
self.optimizer.zero_grad()
output = self.model(data)
loss = self.criterion(output, target)
loss.backward()
self.optimizer.step()
if self.local_rank == 0 or self.local_rank == None:
jishi2=time.time()
jishi2_log='****epc:{},process:{}/{},start:{},end:{},duration:{}****'.format(str(epoch),str(batch_idx* len(data)),str(len(self.loader.dataset)), str(jishi1),str(jishi2),str(jishi2-jishi1))
#print(jishi2_log)
jishi2_log=str(jishi2_log)
self.jishilog.append(jishi2_log)
print(jishi2_log)
all_loss.append(loss.item())
t1=time.time()
duration=t1-st_time
if (batch_idx % 10 == 0) and (self.local_rank == 0 or self.local_rank == None):
btstr = 'epc: {} [{}/{} ({:.0f}%)] loss: {:.6f} b-acc: {:.3f} @:{},curtime:{},duration:{}'.format(
epoch, batch_idx * len(data), len(self.loader.dataset),
100. * batch_idx / len(self.loader), loss.item(), best_acc, best_acc_at,str(t1),str(duration))
self.fitlog.append(btstr)
# print(btstr)
if g_dubug:
break
torch.save(self.model,'./mobilenet.pth')
if self.local_rank == 0 or self.local_rank == None:
t1=time.time()
duration=t1-st_time
acc, vloss, vloss_std,all_pred, all_tar = self._validate()
epcstr = '****epc:{},loss:{:.6f},loss_std:{:.6f},vloss:{:.6f},vloss_std:{:.6f},acc:{:.3f},duration:{}****'.format(
epoch, np.mean(all_loss), np.std(all_loss), vloss, vloss_std,acc,str(duration))
self.dlog.append(epcstr+",preds:{},plabs:{}".format(str(all_pred), str(all_tar)))
if acc > best_acc:
best_acc = acc
best_acc_at = epoch
print(epcstr)
if g_dubug:
break
if self.local_rank == 0 or self.local_rank == None:
self.fitlog.close()
self.dlog.close()
self.jishilog.close()
def _validate(self):
self.model.eval()
self.dataset.set_mode('test')
all_pred = []
all_tar = []
accs = []
all_loss = []
with torch.no_grad():
for i, (ft, labs) in enumerate(self.test_loader):
ft, labs = ft.to(self.device), labs.to(self.device)
output = self.model(ft)
loss = self.criterion(output, labs)
preds = torch.argmax(output, dim=1).cpu().numpy().tolist()
all_pred.extend(preds)
all_tar.extend(labs.cpu().numpy().tolist())
accs.append(accuracy_score(all_tar, all_pred))
all_loss.append(loss.item())
if i % 100 == 0:
print('validating @ batch {}'.format(i))
if g_dubug:
break
return np.mean(accs), np.mean(all_loss),np.std(all_loss), all_pred, all_tar
def run(self, iround):
self.init_dataset(iround)
self.train()
def init_ddp(args,visiable_devices='0,1,2,3'):
if torch.cuda.device_count() > 1:
#os.environ['HIP_VISIBLE_DEVICES'] = visiable_devices
local_rank = args.rank #int(os.environ["LOCAL_RANK"])
print("local_rank:" + str(local_rank))
#torch.distributed.init_process_group(backend='nccl', init_method='tcp://localhost:23456', rank=0, world_size=1)
#torch.distributed.init_process_group(backend="nccl")
print(args.dist_backend)
print(args.dist_url)
print(args.world_size)
print(args.rank)
print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
# local_rank = torch.distributed.get_rank()
torch.cuda.set_device(local_rank % 4)
# device = torch.device("cuda", args.local_rank)
return local_rank
else:
return None
if __name__ == '__main__':
print("torch.cuda.is_available",torch.cuda.is_available())
args = parser.parse_args()
t0 =time.time()
print(torch.cuda.device_count())
local_rank = init_ddp(args)
print(local_rank)
driver = MobileNetV2Driver(args)
# print("round {}".format(sys.argv[1]))
iround=1
driver.run(iround)
t1 =time.time()
print("result_time=",(t1-t0)/1000)
#dist.destroy_process_group()
import datetime
import os
class FitLog:
def __init__(self, folderpath="", fname=None, prefix=''):
self.fname = fname
if self.fname == None:
self.fname = prefix + datetime.datetime.now().strftime("%y%m%d%H%M%S" + ".log")
self.fh = open(folderpath + self.fname, 'w', newline='')
def append(self, line, with_time=False, change_line=True):
str2append = ""
if with_time is False:
str2append = line
else:
str2append = str(datetime.datetime.now()) + " " + line
if change_line is True:
str2append += os.linesep
self.fh.write(str2append)
self.fh.flush()
def close(self):
self.fh.flush()
self.fh.close()
\ No newline at end of file
File added
#!/bin/bash
#SBATCH -J test
#SBATCH -p wzhdexclu03
#SBATCH -N 1
##SBATCH -n 32
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=32
#SBATCH --gres=dcu:1
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10-dtk22.10-py38
#conda activate base
module purge
module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 compiler/dtk/23.04
module list
python -u driver.py #需要运行的程序
export LD_LIBRARY_PATH=${UCX_HOME}/lib:$LD_LIBRARY_PATH
env > env_$SLURM_JOBID
node_list=(`nodeset -e ${SLURM_NODELIST}`)
master_node=${node_list[0]}
for((i=0;i<${SLURM_NNODES};i++))
do
echo ${node_list[$i]} slots=1 >> hostfile-$SLURM_JOB_ID
done
echo mpirun -np ${SLURM_NNODES} --hostfile hostfile-$SLURM_JOB_ID ./single_process.sh ${SLURM_NNODES} ${master_node}
mpirun -np ${SLURM_NNODES} --hostfile hostfile-$SLURM_JOB_ID ./single_process.sh ${SLURM_NNODES} ${master_node}
#!/bin/bash
sbatch run_multi_onenode.sh
def get_lib_path():
return "F:/SynologyDrive/ProjectsExtend/PESELibs/"
\ No newline at end of file
import pandas as pd
from PIL import Image
import numpy as np
df=pd.read_csv("data/fer2013/fer2013.csv",chunksize=200)
for d in df:
pixels=d['pixels'].apply(lambda x:x.split(" ")).tolist()
# pixels=pixels.split(" ")
pix=pixels[0]
pix_2arry=[]
pix = [int(x) for x in pix]
for i in range(0,len(pix),64):
pix_2arry.append(pix[i:i+64])
img = Image.fromarray(np.array(pix_2arry).astype(np.uint8))
img.show()
\ No newline at end of file
blinker==1.6.3
certifi==2023.7.22
charset-normalizer==3.2.0
click==8.1.7
docopt==0.6.2
filelock==3.12.4
Flask==3.0.0
gitdb==4.0.10
GitPython==3.1.37
idna==3.4
importlib-metadata==6.8.0
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.3.2
MarkupSafe==2.1.3
numpy==1.24.4
opencv-python==4.8.1.78
Pillow==10.0.0
pip==23.2.1
requests==2.31.0
scikit-learn==1.3.1
scipy==1.10.1
setuptools==68.0.0
smmap==5.0.1
threadpoolctl==3.2.0
typing_extensions==4.7.1
urllib3==2.0.4
Werkzeug==3.0.0
wheel==0.38.4
zipp==3.17.0
#/bin/bash
mkdir -p logs
#rm -rf log/*
mkdir -p hostfile
sbatch run_mpi.sh
#!/bin/bash
#SBATCH -J test
#SBATCH -p wzhdexclu03
#SBATCH -N 4
#SBATCH -n 32
##SBATCH --ntasks-per-node=4
##SBATCH --cpus-per-task=8
#SBATCH --gres=dcu:4
source ~/miniconda3/etc/profile.d/conda.sh
#conda activate torch1.10-dtk22.10-py38
conda activate base
module switch compiler/dtk/22.10
python -u driver.py #需要运行的程序
#!/bin/bash
#SBATCH -p wzhdexclu03
#SBATCH -N 2
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=32
#SBATCH --gres=dcu:4
#SBATCH -J multi_machine_dcu
#SBATCH -o logs/pt-%j.out
#SBATCH -e logs/pt-%j.err
echo "START TIME: $(date)"
hostfile=./hostfile/$SLURM_JOB_ID
scontrol show hostnames $SLURM_JOB_NODELIST > ${hostfile}
for i in `cat $hostfile`
do
echo ${i} slots=4 >> `pwd`/hostfile/hostfile-dl-$SLURM_JOB_ID
done
np=$(cat $hostfile|sort|uniq |wc -l)
np=$(($np*4))
nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
echo ${dist_url}
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10-dtk23.04.1-py38
#conda activate base
module purge
module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 compiler/dtk/23.04.1
module list
mpirun -np $np --hostfile hostfile/hostfile-dl-$SLURM_JOB_ID --bind-to none `pwd`/single_process.sh $dist_url
#!/bin/bash
#SBATCH -p wzhdexclu03
#SBATCH -N 1
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=32
#SBATCH --gres=dcu:4
#SBATCH -J onenode_4dcu
#SBATCH -o logs/pt-%j.out
#SBATCH -e logs/pt-%j.err
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10-dtk22.10-py38
#conda activate base
module purge
module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 compiler/dtk/23.04
module list
export HIP_VISIBLE_DEVICES=0,1,2,3
python3 -m torch.distributed.run --nproc_per_node 4 driver.py #需要运行的程序
#!/bin/bash
#SBATCH -p wzhdexclu03
#SBATCH -N 1
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=32
#SBATCH --gres=dcu:4
#SBATCH -J single_dcu
#SBATCH -o logs/pt-%j.out
#SBATCH -e logs/pt-%j.err
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10-dtk22.10-py38
#conda activate base
module purge
module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 compiler/dtk/23.04
module list
export HIP_VISIBLE_DEVICES=0
python -u driver.py #需要运行的程序
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment