Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
......@@ -4,12 +4,12 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import io
import os
import zipfile
import io
import numpy as np
from PIL import Image
from PIL import ImageFile
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
......@@ -96,7 +96,7 @@ class ZipReader(object):
try:
im = Image.open(io.BytesIO(data))
except:
print("ERROR IMG LOADED: ", path_img)
print('ERROR IMG LOADED: ', path_img)
random_img = np.random.rand(224, 224, 3) * 255
im = Image.fromarray(np.uint8(random_img))
return im
......@@ -11,7 +11,7 @@ import torch.distributed as dist
def _allreduce_fut(process_group: dist.ProcessGroup,
tensor: torch.Tensor) -> torch.futures.Future[torch.Tensor]:
"Averages the input gradient tensor by allreduce and returns a future."
'Averages the input gradient tensor by allreduce and returns a future.'
group_to_use = process_group if process_group is not None else dist.group.WORLD
# Apply the division first to avoid overflow, especially for FP16.
......
from contextlib import contextmanager
import deepspeed
import torch
import torch.nn as nn
import deepspeed
from deepspeed.runtime.zero import GatheredParameters
from contextlib import contextmanager
class EMADeepspeed(nn.Module):
......@@ -49,7 +50,7 @@ class EMADeepspeed(nn.Module):
shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
else:
assert not key in self.m_name2s_name
assert key not in self.m_name2s_name
def copy_to(self, model):
shadow_params = dict(self.named_buffers())
......@@ -60,7 +61,7 @@ class EMADeepspeed(nn.Module):
if m_param[key].requires_grad:
m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
else:
assert not key in self.m_name2s_name
assert key not in self.m_name2s_name
def store(self, model):
"""
......
......@@ -4,15 +4,15 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import argparse
import os
import time
import argparse
import torch
from tqdm import tqdm
from config import get_config
from models import build_model
from tqdm import tqdm
def get_args():
parser = argparse.ArgumentParser()
......@@ -31,6 +31,7 @@ def get_args():
cfg = get_config(args)
return args, cfg
def get_model(args, cfg):
model = build_model(cfg)
ckpt = torch.load(args.ckpt, map_location='cpu')['model']
......@@ -38,6 +39,7 @@ def get_model(args, cfg):
model.load_state_dict(ckpt)
return model
def speed_test(model, input):
# warmup
for _ in tqdm(range(100)):
......@@ -50,7 +52,8 @@ def speed_test(model, input):
_ = model(input)
end = time.time()
th = 100 / (end - start)
print(f"using time: {end - start}, throughput {th}")
print(f'using time: {end - start}, throughput {th}')
def torch2onnx(args, cfg):
model = get_model(args, cfg).cuda()
......@@ -66,6 +69,7 @@ def torch2onnx(args, cfg):
return model
def onnx2trt(args):
from mmdeploy.backend.tensorrt import from_onnx
......@@ -83,6 +87,7 @@ def onnx2trt(args):
max_workspace_size=2**30,
)
def check(args, cfg):
from mmdeploy.backend.tensorrt.wrapper import TRTWrapper
......@@ -105,6 +110,7 @@ def check(args, cfg):
speed_test(model, x)
speed_test(trt_model, dict(input=x))
def main():
args, cfg = get_args()
......@@ -117,5 +123,6 @@ def main():
print('onnx -> trt: success')
check(args, cfg)
if __name__ == '__main__':
main()
......@@ -71,8 +71,8 @@ class IntermediateLayerGetter:
def main(args, config):
from models import build_model
import torchvision.transforms as T
from models import build_model
from PIL import Image
model = build_model(config)
......@@ -110,13 +110,14 @@ def main(args, config):
if __name__ == '__main__':
import argparse
import torch
from config import get_config
parser = argparse.ArgumentParser('Get Intermediate Layer Output')
parser.add_argument('--cfg', type=str, required=True, metavar="FILE", help='Path to config file')
parser.add_argument('--img', type=str, required=True, metavar="FILE", help='Path to img file')
parser.add_argument("--keys", default=None, nargs='+', help="The intermediate layer's keys you want to save.")
parser.add_argument('--cfg', type=str, required=True, metavar='FILE', help='Path to config file')
parser.add_argument('--img', type=str, required=True, metavar='FILE', help='Path to img file')
parser.add_argument('--keys', default=None, nargs='+', help="The intermediate layer's keys you want to save.")
parser.add_argument('--resume', help='resume from checkpoint')
parser.add_argument('--save', action='store_true', help='Save the results.')
args = parser.parse_args()
......@@ -125,4 +126,4 @@ if __name__ == '__main__':
mid_outputs, model_output = main(args, config)
if args.save:
torch.save(mid_outputs, args.img[:-3] + '.pth')
\ No newline at end of file
torch.save(mid_outputs, args.img[:-3] + '.pth')
......@@ -4,10 +4,11 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import functools
import logging
import os
import sys
import logging
import functools
from termcolor import colored
......
......@@ -6,8 +6,8 @@
import torch
from timm.scheduler.cosine_lr import CosineLRScheduler
from timm.scheduler.step_lr import StepLRScheduler
from timm.scheduler.scheduler import Scheduler
from timm.scheduler.step_lr import StepLRScheduler
def build_scheduler(config, optimizer, n_iter_per_epoch):
......@@ -67,7 +67,7 @@ class LinearLRScheduler(Scheduler):
initialize=True,
) -> None:
super().__init__(optimizer,
param_group_field="lr",
param_group_field='lr',
noise_range_t=noise_range_t,
noise_pct=noise_pct,
noise_std=noise_std,
......
......@@ -4,34 +4,32 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
import time
import random
import argparse
import datetime
import numpy as np
import os
import random
import subprocess
import time
from contextlib import suppress
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
from timm.utils import ModelEma, ApexScaler
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import accuracy, AverageMeter
from config import get_config
from models import build_model
from dataset import build_loader
from ddp_hooks import fp16_compress_hook
from logger import create_logger
from lr_scheduler import build_scheduler
from models import build_model
from optimizer import build_optimizer
from logger import create_logger
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import ApexScaler, AverageMeter, ModelEma, accuracy
from utils import MyAverageMeter
from utils import NativeScalerWithGradNormCount as NativeScaler
from utils import (load_checkpoint, load_pretrained, save_checkpoint,
get_grad_norm, auto_resume_helper, reduce_tensor,
load_ema_checkpoint, MyAverageMeter)
from contextlib import suppress
from ddp_hooks import fp16_compress_hook
from utils import (auto_resume_helper, get_grad_norm, load_checkpoint,
load_ema_checkpoint, load_pretrained, reduce_tensor,
save_checkpoint)
try:
from apex import amp
......@@ -60,10 +58,10 @@ def parse_option():
parser.add_argument('--cfg',
type=str,
required=True,
metavar="FILE",
metavar='FILE',
help='path to config file')
parser.add_argument(
"--opts",
'--opts',
help="Modify config options by adding 'KEY VALUE' pairs. ",
default=None,
nargs='+')
......@@ -71,7 +69,7 @@ def parse_option():
# easy config modification
parser.add_argument('--batch-size',
type=int,
help="batch size for single GPU")
help='batch size for single GPU')
parser.add_argument('--dataset',
type=str,
help='dataset name',
......@@ -98,11 +96,11 @@ def parse_option():
parser.add_argument('--accumulation-steps',
type=int,
default=1,
help="gradient accumulation steps")
help='gradient accumulation steps')
parser.add_argument(
'--use-checkpoint',
action='store_true',
help="whether to use gradient checkpointing to save memory")
help='whether to use gradient checkpointing to save memory')
parser.add_argument(
'--amp-opt-level',
type=str,
......@@ -128,10 +126,10 @@ def parse_option():
parser.add_argument(
'--use-zero',
action='store_true',
help="whether to use ZeroRedundancyOptimizer (ZeRO) to save memory")
help='whether to use ZeroRedundancyOptimizer (ZeRO) to save memory')
# distributed training
parser.add_argument("--local-rank",
parser.add_argument('--local-rank',
type=int,
required=True,
help='local rank for DistributedDataParallel')
......@@ -152,14 +150,14 @@ def throughput(data_loader, model, logger):
for i in range(50):
model(images)
torch.cuda.synchronize()
logger.info(f"throughput averaged with 30 times")
logger.info(f'throughput averaged with 30 times')
tic1 = time.time()
for i in range(30):
model(images)
torch.cuda.synchronize()
tic2 = time.time()
logger.info(
f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
f'batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}'
)
return
......@@ -170,7 +168,7 @@ def main(config):
data_loader_val, data_loader_test, mixup_fn = build_loader(config)
# build runner
logger.info(f"Creating model:{config.MODEL.TYPE}/{config.MODEL.NAME}")
logger.info(f'Creating model:{config.MODEL.TYPE}/{config.MODEL.NAME}')
model = build_model(config)
model.cuda()
logger.info(str(model))
......@@ -178,7 +176,7 @@ def main(config):
# build optimizer
optimizer = build_optimizer(config, model)
if config.AMP_OPT_LEVEL != "O0":
if config.AMP_OPT_LEVEL != 'O0':
config.defrost()
if has_native_amp:
config.native_amp = True
......@@ -189,14 +187,14 @@ def main(config):
else:
use_amp = None
logger.warning(
"Neither APEX or native Torch AMP is available, using float32. "
"Install NVIDA apex or upgrade to PyTorch 1.6")
'Neither APEX or native Torch AMP is available, using float32. '
'Install NVIDA apex or upgrade to PyTorch 1.6')
config.freeze()
# setup automatic mixed-precision (AMP) loss scaling and op casting
amp_autocast = suppress # do nothing
loss_scaler = None
if config.AMP_OPT_LEVEL != "O0":
if config.AMP_OPT_LEVEL != 'O0':
if use_amp == 'apex':
model, optimizer = amp.initialize(model,
optimizer,
......@@ -223,16 +221,16 @@ def main(config):
model.register_comm_hook(state=None, hook=fp16_compress_hook)
logger.info('using fp16_compress_hook!')
except:
logger.info("cannot register fp16_compress_hook!")
logger.info('cannot register fp16_compress_hook!')
model_without_ddp = model.module
n_parameters = sum(p.numel() for p in model.parameters()
if p.requires_grad)
logger.info(f"number of params: {n_parameters}")
logger.info(f'number of params: {n_parameters}')
if hasattr(model_without_ddp, 'flops'):
flops = model_without_ddp.flops()
logger.info(f"number of GFLOPs: {flops / 1e9}")
logger.info(f'number of GFLOPs: {flops / 1e9}')
# build learning rate scheduler
lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train)) \
......@@ -256,7 +254,7 @@ def main(config):
if resume_file:
if config.MODEL.RESUME:
logger.warning(
f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
f'auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}'
)
config.defrost()
config.MODEL.RESUME = resume_file
......@@ -274,14 +272,14 @@ def main(config):
if data_loader_val is not None:
acc1, acc5, loss = validate(config, data_loader_val, model)
logger.info(
f"Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%"
f'Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%'
)
elif config.MODEL.PRETRAINED:
load_pretrained(config, model_without_ddp, logger)
if data_loader_val is not None:
acc1, acc5, loss = validate(config, data_loader_val, model)
logger.info(
f"Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%"
f'Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%'
)
# evaluate EMA
......@@ -289,12 +287,12 @@ def main(config):
if config.TRAIN.EMA.ENABLE:
# Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
model_ema = ModelEma(model, decay=config.TRAIN.EMA.DECAY)
print("Using EMA with decay = %.8f" % config.TRAIN.EMA.DECAY)
print('Using EMA with decay = %.8f' % config.TRAIN.EMA.DECAY)
if config.MODEL.RESUME:
load_ema_checkpoint(config, model_ema, logger)
acc1, acc5, loss = validate(config, data_loader_val, model_ema.ema)
logger.info(
f"Accuracy of the ema network on the {len(dataset_val)} test images: {acc1:.1f}%"
f'Accuracy of the ema network on the {len(dataset_val)} test images: {acc1:.1f}%'
)
if config.THROUGHPUT_MODE:
......@@ -304,7 +302,7 @@ def main(config):
return
# train
logger.info("Start training")
logger.info('Start training')
start_time = time.time()
for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
data_loader_train.sampler.set_epoch(epoch)
......@@ -337,7 +335,7 @@ def main(config):
if data_loader_val is not None and epoch % config.EVAL_FREQ == 0:
acc1, acc5, loss = validate(config, data_loader_val, model, epoch)
logger.info(
f"Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%"
f'Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%'
)
if dist.get_rank() == 0 and acc1 > max_accuracy:
save_checkpoint(config,
......@@ -357,7 +355,7 @@ def main(config):
acc1, acc5, loss = validate(config, data_loader_val,
model_ema.ema, epoch)
logger.info(
f"Accuracy of the ema network on the {len(dataset_val)} test images: {acc1:.1f}%"
f'Accuracy of the ema network on the {len(dataset_val)} test images: {acc1:.1f}%'
)
if dist.get_rank() == 0 and acc1 > max_ema_accuracy:
save_checkpoint(config,
......@@ -411,7 +409,7 @@ def train_one_epoch(config,
samples, targets = mixup_fn(samples, targets)
if not obsolete_torch_version(TORCH_VERSION,
(1, 9)) and config.AMP_OPT_LEVEL != "O0":
(1, 9)) and config.AMP_OPT_LEVEL != 'O0':
with amp_autocast(dtype=amp_type):
outputs = model(samples)
else:
......@@ -420,7 +418,7 @@ def train_one_epoch(config,
if config.TRAIN.ACCUMULATION_STEPS > 1:
if not obsolete_torch_version(
TORCH_VERSION, (1, 9)) and config.AMP_OPT_LEVEL != "O0":
TORCH_VERSION, (1, 9)) and config.AMP_OPT_LEVEL != 'O0':
with amp_autocast(dtype=amp_type):
loss = criterion(outputs, targets)
loss = loss / config.TRAIN.ACCUMULATION_STEPS
......@@ -428,7 +426,7 @@ def train_one_epoch(config,
with amp_autocast():
loss = criterion(outputs, targets)
loss = loss / config.TRAIN.ACCUMULATION_STEPS
if config.AMP_OPT_LEVEL != "O0":
if config.AMP_OPT_LEVEL != 'O0':
is_second_order = hasattr(optimizer, 'is_second_order') and \
optimizer.is_second_order
grad_norm = loss_scaler(loss,
......@@ -458,14 +456,14 @@ def train_one_epoch(config,
lr_scheduler.step_update(epoch * num_steps + idx)
else:
if not obsolete_torch_version(
TORCH_VERSION, (1, 9)) and config.AMP_OPT_LEVEL != "O0":
TORCH_VERSION, (1, 9)) and config.AMP_OPT_LEVEL != 'O0':
with amp_autocast(dtype=amp_type):
loss = criterion(outputs, targets)
else:
with amp_autocast():
loss = criterion(outputs, targets)
optimizer.zero_grad()
if config.AMP_OPT_LEVEL != "O0":
if config.AMP_OPT_LEVEL != 'O0':
is_second_order = hasattr(optimizer, 'is_second_order') and \
optimizer.is_second_order
grad_norm = loss_scaler(loss,
......@@ -513,7 +511,7 @@ def train_one_epoch(config,
f'mem {memory_used:.0f}MB')
epoch_time = time.time() - start
logger.info(
f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
f'EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}'
)
......@@ -578,35 +576,35 @@ def validate(config, data_loader, model, epoch=None):
if __name__ == '__main__':
_, config = parse_option()
if config.AMP_OPT_LEVEL != "O0":
assert has_native_amp, "Please update pytorch(1.6+) to support amp!"
if config.AMP_OPT_LEVEL != 'O0':
assert has_native_amp, 'Please update pytorch(1.6+) to support amp!'
# init distributed env
if 'SLURM_PROCID' in os.environ and int(os.environ['SLURM_TASKS_PER_NODE']) != 1:
print("\nDist init: SLURM")
print('\nDist init: SLURM')
rank = int(os.environ['SLURM_PROCID'])
gpu = rank % torch.cuda.device_count()
config.defrost()
config.LOCAL_RANK = gpu
config.freeze()
world_size = int(os.environ["SLURM_NTASKS"])
if "MASTER_PORT" not in os.environ:
os.environ["MASTER_PORT"] = "29501"
node_list = os.environ["SLURM_NODELIST"]
world_size = int(os.environ['SLURM_NTASKS'])
if 'MASTER_PORT' not in os.environ:
os.environ['MASTER_PORT'] = '29501'
node_list = os.environ['SLURM_NODELIST']
addr = subprocess.getoutput(
f"scontrol show hostname {node_list} | head -n1")
if "MASTER_ADDR" not in os.environ:
os.environ["MASTER_ADDR"] = addr
f'scontrol show hostname {node_list} | head -n1')
if 'MASTER_ADDR' not in os.environ:
os.environ['MASTER_ADDR'] = addr
os.environ['RANK'] = str(rank)
os.environ['LOCAL_RANK'] = str(gpu)
os.environ['LOCAL_SIZE'] = str(torch.cuda.device_count())
os.environ['WORLD_SIZE'] = str(world_size)
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
rank = int(os.environ["RANK"])
rank = int(os.environ['RANK'])
world_size = int(os.environ['WORLD_SIZE'])
print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
print(f'RANK and WORLD_SIZE in environ: {rank}/{world_size}')
else:
rank = -1
world_size = -1
......@@ -647,13 +645,13 @@ if __name__ == '__main__':
os.makedirs(config.OUTPUT, exist_ok=True)
logger = create_logger(output_dir=config.OUTPUT,
dist_rank=dist.get_rank(),
name=f"{config.MODEL.NAME}")
name=f'{config.MODEL.NAME}')
if dist.get_rank() == 0:
path = os.path.join(config.OUTPUT, "config.json")
with open(path, "w") as f:
path = os.path.join(config.OUTPUT, 'config.json')
with open(path, 'w') as f:
f.write(config.dump())
logger.info(f"Full config saved to {path}")
logger.info(f'Full config saved to {path}')
# print config
logger.info(config.dump())
......
......@@ -4,31 +4,29 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import datetime
import argparse
import os
import time
import datetime
import logging
import os
import random
import time
import warnings
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from accelerate import Accelerator
from accelerate import GradScalerKwargs
from accelerate import Accelerator, GradScalerKwargs
from accelerate.logging import get_logger
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import AverageMeter, accuracy, ModelEma
from tqdm import tqdm
import warnings
from config import get_config
from models import build_model
from dataset import build_loader2
from ddp_hooks import fp16_compress_hook
from lr_scheduler import build_scheduler
from models import build_model
from optimizer import build_optimizer
from utils import load_pretrained, load_ema_checkpoint
from ddp_hooks import fp16_compress_hook
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import AverageMeter, ModelEma, accuracy
from tqdm import tqdm
from utils import load_ema_checkpoint, load_pretrained
logger = get_logger(__name__)
warnings.filterwarnings('ignore')
......@@ -37,11 +35,11 @@ warnings.filterwarnings('ignore')
def parse_option():
parser = argparse.ArgumentParser(
'InternImage training and evaluation script', add_help=False)
parser.add_argument('--cfg', type=str, required=True, metavar="FILE", help='path to config file')
parser.add_argument("--opts", help="Modify config options by adding 'KEY VALUE' pairs. ", default=None, nargs='+')
parser.add_argument('--cfg', type=str, required=True, metavar='FILE', help='path to config file')
parser.add_argument('--opts', help="Modify config options by adding 'KEY VALUE' pairs. ", default=None, nargs='+')
# easy config modification
parser.add_argument('--batch-size', type=int, help="batch size for single GPU")
parser.add_argument('--batch-size', type=int, help='batch size for single GPU')
parser.add_argument('--dataset', type=str, help='dataset name', default=None)
parser.add_argument('--data-path', type=str, help='path to dataset')
parser.add_argument('--zip', action='store_true', help='use zipped dataset instead of folder dataset')
......@@ -58,16 +56,16 @@ def parse_option():
parser.add_argument('--eval', action='store_true', help='Perform evaluation only')
parser.add_argument('--throughput', action='store_true', help='Test throughput only')
parser.add_argument('--save-ckpt-num', default=1, type=int)
parser.add_argument('--accumulation-steps', type=int, default=1, help="gradient accumulation steps")
parser.add_argument('--accumulation-steps', type=int, default=1, help='gradient accumulation steps')
parser.add_argument('--disable-grad-scalar', action='store_true', help='disable Grad Scalar')
parser.add_argument(
"--logger",
'--logger',
type=str,
default="tensorboard",
choices=["tensorboard", "wandb"],
default='tensorboard',
choices=['tensorboard', 'wandb'],
help=(
"Whether to use [tensorboard](https://www.tensorflow.org/tensorboard) or [wandb](https://www.wandb.ai)"
" for experiment tracking and logging of model metrics and model checkpoints"
'Whether to use [tensorboard](https://www.tensorflow.org/tensorboard) or [wandb](https://www.wandb.ai)'
' for experiment tracking and logging of model metrics and model checkpoints'
),
)
......@@ -91,10 +89,10 @@ def seed_everything(seed, rank):
def save_config(config):
path = os.path.join(config.OUTPUT, "config.json")
with open(path, "w") as f:
path = os.path.join(config.OUTPUT, 'config.json')
with open(path, 'w') as f:
f.write(config.dump())
logger.info(f"Full config saved to {path}")
logger.info(f'Full config saved to {path}')
def build_criterion(config):
......@@ -140,7 +138,7 @@ def setup_autoresume(config):
if resume_file:
if config.MODEL.RESUME:
logger.warning(f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}")
logger.warning(f'auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}')
config.defrost()
config.MODEL.RESUME = resume_file
config.freeze()
......@@ -200,10 +198,10 @@ def load_checkpoint_if_needed(accelerator, config, lr_scheduler=None):
def log_model_statistic(model_wo_ddp):
n_parameters = sum(p.numel() for p in model_wo_ddp.parameters()
if p.requires_grad)
logger.info(f"number of params: {n_parameters}")
logger.info(f'number of params: {n_parameters}')
if hasattr(model_wo_ddp, 'flops'):
flops = model_wo_ddp.flops()
logger.info(f"number of GFLOPs: {flops / 1e9}")
logger.info(f'number of GFLOPs: {flops / 1e9}')
def train_epoch(*, model, optimizer, data_loader, scheduler, criterion, mixup_fn,
......@@ -316,15 +314,15 @@ def train(config, accelerator: Accelerator):
model.register_comm_hook(state=None, hook=fp16_compress_hook)
logger.info('using fp16_compress_hook!')
except:
logger.info("cannot register fp16_compress_hook!")
logger.info('cannot register fp16_compress_hook!')
max_acc = load_checkpoint_if_needed(accelerator, config, lr_scheduler)
logger.info(f"Created model:{config.MODEL.TYPE}/{config.MODEL.NAME}")
logger.info(f'Created model:{config.MODEL.TYPE}/{config.MODEL.NAME}')
logger.info(str(model))
logger.info("Effective Optimizer Steps: {}".format(effective_update_steps_per_epoch))
logger.info("Start training")
logger.info("Max accuracy: {}".format(max_acc))
logger.info('Effective Optimizer Steps: {}'.format(effective_update_steps_per_epoch))
logger.info('Start training')
logger.info('Max accuracy: {}'.format(max_acc))
log_model_statistic(accelerator.unwrap_model(model))
for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
......@@ -346,8 +344,8 @@ def main():
args, config = parse_option()
os.makedirs(config.OUTPUT, exist_ok=True)
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt='%m/%d/%Y %H:%M:%S',
filename=os.path.join(config.OUTPUT, 'run.log'),
level=logging.INFO,
)
......
......@@ -4,40 +4,39 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
import time
import random
import argparse
import datetime
import numpy as np
import os
import random
import subprocess
import time
import deepspeed
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import deepspeed
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import accuracy, AverageMeter
from config import get_config
from models import build_model
from dataset import build_loader
from lr_scheduler import build_scheduler
from optimizer import set_weight_decay_and_lr
from logger import create_logger
from utils import load_pretrained, reduce_tensor, MyAverageMeter
from ddp_hooks import fp16_compress_hook
from ema_deepspeed import EMADeepspeed
from logger import create_logger
from lr_scheduler import build_scheduler
from models import build_model
from optimizer import set_weight_decay_and_lr
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import AverageMeter, accuracy
from utils import MyAverageMeter, load_pretrained, reduce_tensor
def parse_option():
parser = argparse.ArgumentParser(
'InternImage training and evaluation script', add_help=False)
parser.add_argument('--cfg', type=str, required=True, metavar="FILE", help='path to config file')
parser.add_argument("--opts", help="Modify config options by adding 'KEY VALUE' pairs. ", default=None, nargs='+')
parser.add_argument('--cfg', type=str, required=True, metavar='FILE', help='path to config file')
parser.add_argument('--opts', help="Modify config options by adding 'KEY VALUE' pairs. ", default=None, nargs='+')
# easy config modification
parser.add_argument('--batch-size', type=int, help="batch size for single GPU")
parser.add_argument('--batch-size', type=int, help='batch size for single GPU')
parser.add_argument('--dataset', type=str, help='dataset name', default=None)
parser.add_argument('--data-path', type=str, help='path to dataset')
parser.add_argument('--zip', action='store_true', help='use zipped dataset instead of folder dataset')
......@@ -56,10 +55,10 @@ def parse_option():
parser.add_argument('--eval', action='store_true', help='Perform evaluation only')
parser.add_argument('--throughput', action='store_true', help='Test throughput only')
parser.add_argument('--save-ckpt-num', default=1, type=int)
parser.add_argument('--accumulation-steps', type=int, default=1, help="gradient accumulation steps")
parser.add_argument('--accumulation-steps', type=int, default=1, help='gradient accumulation steps')
# distributed training
parser.add_argument("--local-rank", type=int, required=True, help='local rank for DistributedDataParallel')
parser.add_argument('--local-rank', type=int, required=True, help='local rank for DistributedDataParallel')
# deepspeed config
parser.add_argument('--disable-grad-scalar', action='store_true', help='disable Grad Scalar')
......@@ -69,7 +68,7 @@ def parse_option():
help='enable model offloading')
# To use Zero3, Please use main_accelerate.py instead.
# For this script, we are facing a similar issue as https://github.com/microsoft/DeepSpeed/issues/3068
parser.add_argument("--zero-stage", type=int, default=1, choices=[1, 2], help='deep speed zero stage')
parser.add_argument('--zero-stage', type=int, default=1, choices=[1, 2], help='deep speed zero stage')
args, unparsed = parser.parse_known_args()
config = get_config(args)
......@@ -87,10 +86,10 @@ def seed_everything(seed, rank):
def save_config(config):
path = os.path.join(config.OUTPUT, "config.json")
with open(path, "w") as f:
path = os.path.join(config.OUTPUT, 'config.json')
with open(path, 'w') as f:
f.write(config.dump())
logger.info(f"Full config saved to {path}")
logger.info(f'Full config saved to {path}')
def build_criterion(config):
......@@ -132,10 +131,10 @@ def scale_learning_rate(config, num_processes):
def log_model_statistic(model_wo_ddp):
n_parameters = sum(p.numel() for p in model_wo_ddp.parameters()
if p.requires_grad)
logger.info(f"number of params: {n_parameters / 1e6} M")
logger.info(f'number of params: {n_parameters / 1e6} M')
if hasattr(model_wo_ddp, 'flops'):
flops = model_wo_ddp.flops()
logger.info(f"number of GFLOPs: {flops / 1e9}")
logger.info(f'number of GFLOPs: {flops / 1e9}')
def get_parameter_groups(model, config):
......@@ -171,37 +170,37 @@ def build_ds_config(config, args):
opt_lower = config.TRAIN.OPTIMIZER.NAME.lower()
if opt_lower == 'adamw':
optimizer = {
"type": "AdamW",
"params": {
"lr": config.TRAIN.BASE_LR,
"eps": config.TRAIN.OPTIMIZER.EPS,
"betas": config.TRAIN.OPTIMIZER.BETAS,
"weight_decay": config.TRAIN.WEIGHT_DECAY
'type': 'AdamW',
'params': {
'lr': config.TRAIN.BASE_LR,
'eps': config.TRAIN.OPTIMIZER.EPS,
'betas': config.TRAIN.OPTIMIZER.BETAS,
'weight_decay': config.TRAIN.WEIGHT_DECAY
}
}
else:
return NotImplemented
ds_config = {
"train_micro_batch_size_per_gpu": config.DATA.BATCH_SIZE,
"optimizer": optimizer,
"fp16": {
"enabled": True,
"auto_cast": True,
"loss_scale": 1 if args.disable_grad_scalar else 0
'train_micro_batch_size_per_gpu': config.DATA.BATCH_SIZE,
'optimizer': optimizer,
'fp16': {
'enabled': True,
'auto_cast': True,
'loss_scale': 1 if args.disable_grad_scalar else 0
},
"zero_optimization": {
"stage": args.zero_stage,
"offload_optimizer": {
"device": args.offload_optimizer
'zero_optimization': {
'stage': args.zero_stage,
'offload_optimizer': {
'device': args.offload_optimizer
},
"offload_param": {
"device": args.offload_param
'offload_param': {
'device': args.offload_param
}
},
"steps_per_print": 1e10,
"gradient_accumulation_steps": config.TRAIN.ACCUMULATION_STEPS,
"gradient_clipping": config.TRAIN.CLIP_GRAD,
'steps_per_print': 1e10,
'gradient_accumulation_steps': config.TRAIN.ACCUMULATION_STEPS,
'gradient_clipping': config.TRAIN.CLIP_GRAD,
}
return ds_config
......@@ -216,14 +215,14 @@ def throughput(data_loader, model, logger):
for i in range(50):
model(images)
torch.cuda.synchronize()
logger.info(f"throughput averaged with 30 times")
logger.info(f'throughput averaged with 30 times')
tic1 = time.time()
for i in range(30):
model(images)
torch.cuda.synchronize()
tic2 = time.time()
logger.info(
f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
f'batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}'
)
return
......@@ -281,7 +280,7 @@ def train_epoch(config, model, criterion, data_loader, optimizer, epoch, mixup_f
f'mem {memory_used:.0f}MB')
epoch_time = time.time() - start
logger.info(f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}")
logger.info(f'EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}')
@torch.no_grad()
......@@ -361,7 +360,7 @@ def train(config, ds_config):
model.register_comm_hook(state=None, hook=fp16_compress_hook)
logger.info('using fp16_compress_hook!')
except:
logger.info("cannot register fp16_compress_hook!")
logger.info('cannot register fp16_compress_hook!')
model_without_ddp = model.module
......@@ -399,10 +398,10 @@ def train(config, ds_config):
# -------------- training ---------------- #
logger.info(f"Creating model:{config.MODEL.TYPE}/{config.MODEL.NAME}")
logger.info(f'Creating model:{config.MODEL.TYPE}/{config.MODEL.NAME}')
logger.info(str(model))
logger.info(get_optimizer_state_str(optimizer))
logger.info("Start training")
logger.info('Start training')
logger.info('max_accuracy: {}'.format(max_accuracy))
log_model_statistic(model_without_ddp)
......@@ -429,7 +428,7 @@ def train(config, ds_config):
if epoch % config.EVAL_FREQ == 0:
acc1, _, _ = eval_epoch(config, data_loader_val, model, epoch)
logger.info(f"Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%")
logger.info(f'Accuracy of the network on the {len(dataset_val)} test images: {acc1:.1f}%')
if acc1 > max_accuracy:
model.save_checkpoint(
......@@ -451,7 +450,7 @@ def train(config, ds_config):
if model_ema is not None:
with model_ema.activate(model):
acc1_ema, _, _ = eval_epoch(config, data_loader_val, model, epoch)
logger.info(f"[EMA] Accuracy of the network on the {len(dataset_val)} test images: {acc1_ema:.1f}%")
logger.info(f'[EMA] Accuracy of the network on the {len(dataset_val)} test images: {acc1_ema:.1f}%')
max_accuracy_ema = max(max_accuracy_ema, acc1_ema)
logger.info(f'[EMA] Max accuracy: {max_accuracy_ema:.2f}%')
......@@ -475,7 +474,8 @@ def eval(config):
logger.info(msg)
except:
try:
from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
from deepspeed.utils.zero_to_fp32 import \
get_fp32_state_dict_from_zero_checkpoint
ckpt_dir = os.path.dirname(config.MODEL.RESUME)
tag = os.path.basename(config.MODEL.RESUME)
state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir=ckpt_dir, tag=tag)
......@@ -498,30 +498,30 @@ if __name__ == '__main__':
# init distributed env
if 'SLURM_PROCID' in os.environ and int(os.environ['SLURM_TASKS_PER_NODE']) != 1:
print("\nDist init: SLURM")
print('\nDist init: SLURM')
rank = int(os.environ['SLURM_PROCID'])
gpu = rank % torch.cuda.device_count()
config.defrost()
config.LOCAL_RANK = gpu
config.freeze()
world_size = int(os.environ["SLURM_NTASKS"])
if "MASTER_PORT" not in os.environ:
os.environ["MASTER_PORT"] = "29501"
node_list = os.environ["SLURM_NODELIST"]
world_size = int(os.environ['SLURM_NTASKS'])
if 'MASTER_PORT' not in os.environ:
os.environ['MASTER_PORT'] = '29501'
node_list = os.environ['SLURM_NODELIST']
addr = subprocess.getoutput(
f"scontrol show hostname {node_list} | head -n1")
if "MASTER_ADDR" not in os.environ:
os.environ["MASTER_ADDR"] = addr
f'scontrol show hostname {node_list} | head -n1')
if 'MASTER_ADDR' not in os.environ:
os.environ['MASTER_ADDR'] = addr
os.environ['RANK'] = str(rank)
os.environ['LOCAL_RANK'] = str(gpu)
os.environ['LOCAL_SIZE'] = str(torch.cuda.device_count())
os.environ['WORLD_SIZE'] = str(world_size)
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
rank = int(os.environ["RANK"])
rank = int(os.environ['RANK'])
world_size = int(os.environ['WORLD_SIZE'])
print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
print(f'RANK and WORLD_SIZE in environ: {rank}/{world_size}')
else:
rank = -1
world_size = -1
......@@ -535,7 +535,7 @@ if __name__ == '__main__':
os.makedirs(config.OUTPUT, exist_ok=True)
logger = create_logger(output_dir=config.OUTPUT,
dist_rank=dist.get_rank(),
name=f"{config.MODEL.NAME}")
name=f'{config.MODEL.NAME}')
logger.info(config.dump())
if dist.get_rank() == 0: save_config(config)
......
......@@ -997,4 +997,4 @@
21194
21198
21367
21815
\ No newline at end of file
21815
......@@ -4,4 +4,4 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from .build import build_model
\ No newline at end of file
from .build import build_model
......@@ -31,6 +31,6 @@ def build_model(config):
remove_center=config.MODEL.INTERN_IMAGE.REMOVE_CENTER,
)
else:
raise NotImplementedError(f"Unkown model: {model_type}")
raise NotImplementedError(f'Unkown model: {model_type}')
return model
......@@ -6,10 +6,10 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
from timm.models.layers import trunc_normal_, DropPath
from ops_dcnv3 import modules as opsm
import torch.nn.functional as F
from timm.models.layers import DropPath, trunc_normal_
class to_channels_first(nn.Module):
......@@ -80,7 +80,7 @@ class CrossAttention(nn.Module):
attn_head_dim (int, optional): Dimension of attention head.
out_dim (int, optional): Dimension of output.
"""
def __init__(self,
dim,
num_heads=8,
......@@ -172,7 +172,7 @@ class AttentiveBlock(nn.Module):
attn_head_dim (int, optional): Dimension of attention head. Default: None.
out_dim (int, optional): Dimension of output. Default: None.
"""
def __init__(self,
dim,
num_heads,
......@@ -181,7 +181,7 @@ class AttentiveBlock(nn.Module):
drop=0.,
attn_drop=0.,
drop_path=0.,
norm_layer="LN",
norm_layer='LN',
attn_head_dim=None,
out_dim=None):
super().__init__()
......@@ -593,10 +593,10 @@ class InternImage(nn.Module):
print(f'using activation layer: {act_layer}')
print(f'using main norm layer: {norm_layer}')
print(f'using dpr: {drop_path_type}, {drop_path_rate}')
print(f"level2_post_norm: {level2_post_norm}")
print(f"level2_post_norm_block_ids: {level2_post_norm_block_ids}")
print(f"res_post_norm: {res_post_norm}")
print(f"remove_center: {remove_center}")
print(f'level2_post_norm: {level2_post_norm}')
print(f'level2_post_norm_block_ids: {level2_post_norm_block_ids}')
print(f'res_post_norm: {res_post_norm}')
print(f'remove_center: {remove_center}')
in_chans = 3
self.patch_embed = StemLayer(in_chans=in_chans,
......@@ -638,7 +638,7 @@ class InternImage(nn.Module):
remove_center=remove_center, # for InternImage-H/G
)
self.levels.append(level)
if not use_clip_projector: # for InternImage-T/S/B/L/XL
self.conv_head = nn.Sequential(
nn.Conv2d(self.num_features,
......@@ -671,7 +671,7 @@ class InternImage(nn.Module):
self.fc_norm = build_norm_layer(clip_embed_dim, norm_layer, eps=1e-6)
self.head = nn.Linear(
clip_embed_dim, num_classes) if num_classes > 0 else nn.Identity()
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.num_layers = len(depths)
self.apply(self._init_weights)
......@@ -705,16 +705,16 @@ class InternImage(nn.Module):
lr_ratios[tag] = decay
idx += 1
# patch_embed (before stage-1)
lr_ratios["patch_embed"] = lr_ratios['levels.0.blocks.0.']
lr_ratios['patch_embed'] = lr_ratios['levels.0.blocks.0.']
# levels.0.downsample (between stage-1 and stage-2)
lr_ratios["levels.0.downsample"] = lr_ratios['levels.1.blocks.0.']
lr_ratios["levels.0.norm"] = lr_ratios['levels.1.blocks.0.']
lr_ratios['levels.0.downsample'] = lr_ratios['levels.1.blocks.0.']
lr_ratios['levels.0.norm'] = lr_ratios['levels.1.blocks.0.']
# levels.1.downsample (between stage-2 and stage-3)
lr_ratios["levels.1.downsample"] = lr_ratios['levels.2.blocks.0.']
lr_ratios["levels.1.norm"] = lr_ratios['levels.2.blocks.0.']
lr_ratios['levels.1.downsample'] = lr_ratios['levels.2.blocks.0.']
lr_ratios['levels.1.norm'] = lr_ratios['levels.2.blocks.0.']
# levels.2.downsample (between stage-3 and stage-4)
lr_ratios["levels.2.downsample"] = lr_ratios['levels.3.blocks.0.']
lr_ratios["levels.2.norm"] = lr_ratios['levels.3.blocks.0.']
lr_ratios['levels.2.downsample'] = lr_ratios['levels.3.blocks.0.']
lr_ratios['levels.2.norm'] = lr_ratios['levels.3.blocks.0.']
return lr_ratios
def forward_features(self, x):
......@@ -738,11 +738,11 @@ class InternImage(nn.Module):
x, x_ = level(x, return_wo_downsample=True)
seq_out.append(x_)
return seq_out
def forward_clip_projector(self, x): # for InternImage-H/G
xs = self.forward_features_seq_out(x)
x1, x2, x3, x4 = xs
x1 = x1.permute(0, 3, 1, 2) # NHWC -> NCHW
x2 = x2.permute(0, 3, 1, 2) # NHWC -> NCHW
x3 = x3.permute(0, 3, 1, 2) # NHWC -> NCHW
......@@ -756,9 +756,9 @@ class InternImage(nn.Module):
x = x.flatten(-2).transpose(1, 2).contiguous()
x = self.clip_projector(x)
x = self.fc_norm(x)
return x
def forward(self, x):
if self.use_clip_projector: # for InternImage-H/G
x = self.forward_clip_projector(x)
......
......@@ -4,18 +4,16 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import, division, print_function
import DCNv3
import pkg_resources
import torch
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.cuda.amp import custom_bwd, custom_fwd
import DCNv3
import pkg_resources
dcn_version = float(pkg_resources.get_distribution('DCNv3').version)
......@@ -169,6 +167,7 @@ def remove_center_sampling_locations(sampling_locations, kernel_w, kernel_h):
sampling_locations = sampling_locations[:,:,:,idx, :]
return sampling_locations
def dcnv3_core_pytorch(
input, offset, mask, kernel_h,
kernel_w, stride_h, stride_w, pad_h,
......
......@@ -4,4 +4,4 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from .dcnv3 import DCNv3, DCNv3_pytorch
\ No newline at end of file
from .dcnv3 import DCNv3, DCNv3_pytorch
......@@ -4,15 +4,15 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import, division, print_function
import warnings
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.init import xavier_uniform_, constant_
from torch import nn
from torch.nn.init import constant_, xavier_uniform_
from ..functions import DCNv3Function, dcnv3_core_pytorch
......@@ -72,7 +72,7 @@ def build_act_layer(act_layer):
def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0):
raise ValueError(
"invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
'invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n)))
return (n & (n - 1) == 0) and n != 0
......@@ -126,7 +126,7 @@ class DCNv3_pytorch(nn.Module):
if not _is_power_of_2(_d_per_group):
warnings.warn(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation.")
'which is more efficient in our CUDA implementation.')
self.offset_scale = offset_scale
self.channels = channels
......@@ -164,7 +164,7 @@ class DCNv3_pytorch(nn.Module):
self.input_proj = nn.Linear(channels, channels)
self.output_proj = nn.Linear(channels, channels)
self._reset_parameters()
if center_feature_scale:
self.center_feature_scale_proj_weight = nn.Parameter(
torch.zeros((group, channels), dtype=torch.float))
......@@ -256,7 +256,7 @@ class DCNv3(nn.Module):
if not _is_power_of_2(_d_per_group):
warnings.warn(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation.")
'which is more efficient in our CUDA implementation.')
self.offset_scale = offset_scale
self.channels = channels
......@@ -297,7 +297,7 @@ class DCNv3(nn.Module):
self.input_proj = nn.Linear(channels, channels)
self.output_proj = nn.Linear(channels, channels)
self._reset_parameters()
if center_feature_scale:
self.center_feature_scale_proj_weight = nn.Parameter(
torch.zeros((group, channels), dtype=torch.float))
......@@ -332,7 +332,7 @@ class DCNv3(nn.Module):
mask = self.mask(x1).reshape(N, H, W, self.group, -1)
mask = F.softmax(mask, -1)
mask = mask.reshape(N, H, W, -1).type(dtype)
x = DCNv3Function.apply(
x, offset, mask,
self.kernel_size, self.kernel_size,
......@@ -343,7 +343,7 @@ class DCNv3(nn.Module):
self.offset_scale,
256,
self.remove_center)
if self.center_feature_scale:
center_feature_scale = self.center_feature_scale_module(
x1, self.center_feature_scale_proj_weight, self.center_feature_scale_proj_bias)
......
......@@ -4,39 +4,34 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
import glob
import os
import torch
from setuptools import find_packages, setup
from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
from setuptools import find_packages
from setuptools import setup
requirements = ["torch", "torchvision"]
requirements = ['torch', 'torchvision']
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "src")
extensions_dir = os.path.join(this_dir, 'src')
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
main_file = glob.glob(os.path.join(extensions_dir, '*.cpp'))
source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp'))
source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
sources = main_file + source_cpu
extension = CppExtension
extra_compile_args = {"cxx": []}
extra_compile_args = {'cxx': []}
define_macros = []
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
extra_compile_args["nvcc"] = [
define_macros += [('WITH_CUDA', None)]
extra_compile_args['nvcc'] = [
# "-DCUDA_HAS_FP16=1",
# "-D__CUDA_NO_HALF_OPERATORS__",
# "-D__CUDA_NO_HALF_CONVERSIONS__",
......@@ -49,7 +44,7 @@ def get_extensions():
include_dirs = [extensions_dir]
ext_modules = [
extension(
"DCNv3",
'DCNv3',
sources,
include_dirs=include_dirs,
define_macros=define_macros,
......@@ -60,16 +55,16 @@ def get_extensions():
setup(
name="DCNv3",
version="1.1",
author="InternImage",
url="https://github.com/OpenGVLab/InternImage",
name='DCNv3',
version='1.1',
author='InternImage',
url='https://github.com/OpenGVLab/InternImage',
description=
"PyTorch Wrapper for CUDA Functions of DCNv3",
'PyTorch Wrapper for CUDA Functions of DCNv3',
packages=find_packages(exclude=(
"configs",
"tests",
'configs',
'tests',
)),
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension},
)
......@@ -171,4 +171,4 @@ dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,
} else {
return {grad_input, grad_offset, grad_mask};
}
}
\ No newline at end of file
}
......@@ -1091,4 +1091,4 @@ void dcnv3_col2im_cuda(
if (err != cudaSuccess) {
printf("error in dcnv3_col2im_cuda: %s\n", cudaGetErrorString(err));
}
}
\ No newline at end of file
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment