Commit f37f9c2a authored by zhe chen's avatar zhe chen
Browse files

Release code for iNaturalist 2018 (#197)

parent cfd24625
...@@ -167,6 +167,27 @@ We use standard ImageNet dataset, you can download it from http://image-net.org/ ...@@ -167,6 +167,27 @@ We use standard ImageNet dataset, you can download it from http://image-net.org/
</details> </details>
<details>
<summary>iNaturalist 2018</summary>
- For the iNaturalist 2018, please download the dataset from the [official repository](https://github.com/visipedia/inat_comp/blob/master/2018/README.md).
The file structure should look like:
```bash
$ tree inat2018/
inat2018/
├── categories.json
├── test2018
├── test2018.json
├── train2018.json
├── train2018_locations.json
├── val2018
├── val2018.json
└── val2018_locations.json
```
</details>
## Released Models ## Released Models
<details open> <details open>
...@@ -204,6 +225,19 @@ We use standard ImageNet dataset, you can download it from http://image-net.org/ ...@@ -204,6 +225,19 @@ We use standard ImageNet dataset, you can download it from http://image-net.org/
</details> </details>
<details open>
<summary> iNaturalist 2018 Image Classification </summary>
<br>
<div>
| name | pretrain | resolution | acc@1 | #param | download |
| :-----------: | :--------: | :--------: | :---: | :----: | :-----------------------------------------------------------------------------: |
| InternImage-H | Joint 427M | 384x384 | 92.6 | 1.1B | [ckpt](<>) \| [cfg](configs/inaturalist2018/internimage_h_22ktoinat18_384.yaml) |
</div>
</details>
## Evaluation ## Evaluation
To evaluate a pretrained `InternImage` on ImageNet val, run: To evaluate a pretrained `InternImage` on ImageNet val, run:
......
DATA: DATA:
IMG_SIZE: 384 IMG_SIZE: 384
DATASET: inat18
IMG_ON_MEMORY: False IMG_ON_MEMORY: False
DATA_PATH: "data/inat2018/" DATASET: inat18
AUG: AUG:
MIXUP: 0.0 MIXUP: 0.0
CUTMIX: 0.0 CUTMIX: 0.0
REPROB: 0.0
MODEL: MODEL:
PRETRAINED: './pretrained/internimage_h_jointto22k_384.pth' TYPE: intern_image_meta_former
TYPE: intern_image_with_meta DROP_PATH_RATE: 0.6
DROP_PATH_RATE: 0.2
LABEL_SMOOTHING: 0.3 LABEL_SMOOTHING: 0.3
INTERN_IMAGE: INTERN_IMAGE:
CORE_OP: 'DCNv3' CORE_OP: 'DCNv3'
...@@ -26,22 +25,22 @@ MODEL: ...@@ -26,22 +25,22 @@ MODEL:
LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29] LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29]
CENTER_FEATURE_SCALE: True CENTER_FEATURE_SCALE: True
USE_CLIP_PROJECTOR: True USE_CLIP_PROJECTOR: True
PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth'
TRAIN: TRAIN:
EMA: EMA:
ENABLE: false ENABLE: true
DECAY: 0.9998 DECAY: 0.9999
EPOCHS: 100 EPOCHS: 100
WARMUP_EPOCHS: 0 WARMUP_EPOCHS: 0
WEIGHT_DECAY: 1e-8 WEIGHT_DECAY: 0.05
BASE_LR: 3e-05 # 512 BASE_LR: 2e-05 # 512
WARMUP_LR: 3e-08 WARMUP_LR: .0
MIN_LR: 3e-07 MIN_LR: .0
LR_LAYER_DECAY: true LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.8 LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
RAND_INIT_FT_HEAD: true RAND_INIT_FT_HEAD: true
USE_CHECKPOINT: false
OPTIMIZER: OPTIMIZER:
USE_ZERO: True
DCN_LR_MUL: 0.1 DCN_LR_MUL: 0.1
AMP_OPT_LEVEL: O0 AMP_OPT_LEVEL: O0
EVAL_FREQ: 1 EVAL_FREQ: 1
...@@ -12,7 +12,9 @@ import torch.distributed as dist ...@@ -12,7 +12,9 @@ import torch.distributed as dist
from timm.data import Mixup, create_transform from timm.data import Mixup, create_transform
from torchvision import transforms from torchvision import transforms
from .cached_image_folder import CachedImageFolder, ImageCephDataset from .cached_image_folder import (CachedImageFolder, ImageCephDataset,
INat18ImageCephDataset,
INat18ParserCephImage)
from .samplers import NodeDistributedSampler, SubsetRandomSampler from .samplers import NodeDistributedSampler, SubsetRandomSampler
try: try:
...@@ -229,6 +231,15 @@ def build_dataset(split, config): ...@@ -229,6 +231,15 @@ def build_dataset(split, config):
root = os.path.join(config.DATA.DATA_PATH, 'val') root = os.path.join(config.DATA.DATA_PATH, 'val')
dataset = ImageCephDataset(root, 'val', transform=transform) dataset = ImageCephDataset(root, 'val', transform=transform)
nb_classes = 1000 nb_classes = 1000
elif config.DATA.DATASET == 'inat18':
if prefix == 'train' and not config.EVAL_MODE:
root = config.DATA.DATA_PATH
dataset = INat18ImageCephDataset(
root, 'train', transform=transform, on_memory=config.DATA.IMG_ON_MEMORY)
elif prefix == 'val':
root = config.DATA.DATA_PATH
dataset = INat18ImageCephDataset(root, 'val', transform=transform)
nb_classes = 8142
else: else:
raise NotImplementedError( raise NotImplementedError(
f'build_dataset does support {config.DATA.DATASET}') f'build_dataset does support {config.DATA.DATASET}')
......
...@@ -340,6 +340,55 @@ class ImageCephDataset(data.Dataset): ...@@ -340,6 +340,55 @@ class ImageCephDataset(data.Dataset):
return self.parser.filenames(basename, absolute) return self.parser.filenames(basename, absolute)
class INat18ImageCephDataset(data.Dataset):
def __init__(self,
root,
split,
parser=None,
transform=None,
target_transform=None,
on_memory=False):
if split == 'train':
annotation_root = osp.join(root, 'train2018.json')
elif split == 'val':
annotation_root = osp.join(root, 'val2018.json')
elif split == 'test':
annotation_root = osp.join(root, 'test2018.json')
if parser is None or isinstance(parser, str):
parser = INat18ParserCephImage(root=root,
split=split,
annotation_root=annotation_root,
on_memory=on_memory)
self.parser = parser
self.transform = transform
self.target_transform = target_transform
self._consecutive_errors = 0
def __getitem__(self, index):
img, temporal_info, spatial_info, target = self.parser[index]
self._consecutive_errors = 0
if self.transform is not None:
img = self.transform(img)
if target is None:
target = -1
elif self.target_transform is not None:
target = self.target_transform(target)
temporal_info = torch.tensor(temporal_info).to(torch.float32)
spatial_info = torch.tensor(spatial_info).to(torch.float32)
return [img, temporal_info, spatial_info], target
def __len__(self):
return len(self.parser)
def filename(self, index, basename=False, absolute=False):
return self.parser.filename(index, basename, absolute)
def filenames(self, basename=False, absolute=False):
return self.parser.filenames(basename, absolute)
class Parser: class Parser:
def __init__(self): def __init__(self):
...@@ -372,7 +421,7 @@ class ParserCephImage(Parser): ...@@ -372,7 +421,7 @@ class ParserCephImage(Parser):
self.file_client = None self.file_client = None
self.kwargs = kwargs self.kwargs = kwargs
self.root = root # dataset:s3://imagenet22k self.root = root
if '22k' in root: if '22k' in root:
self.io_backend = 'petrel' self.io_backend = 'petrel'
with open(osp.join(annotation_root, '22k_class_to_idx.json'), with open(osp.join(annotation_root, '22k_class_to_idx.json'),
...@@ -497,7 +546,7 @@ class ParserCephImage(Parser): ...@@ -497,7 +546,7 @@ class ParserCephImage(Parser):
else: else:
target = int(target) target = int(target)
except: except:
print('aaaaaaaaaaaa', filepath, target) print(filepath, target)
exit() exit()
return img, target return img, target
...@@ -512,6 +561,87 @@ class ParserCephImage(Parser): ...@@ -512,6 +561,87 @@ class ParserCephImage(Parser):
return filename return filename
class INat18ParserCephImage(Parser):
def __init__(self,
root,
split,
annotation_root,
on_memory=False,
**kwargs):
super().__init__()
self.file_client = None
self.kwargs = kwargs
self.split = split
self.root = root
self.io_backend = 'disk'
data = mmcv.load(annotation_root)
self.samples = data['annotations']
self.file_names = [each['file_name'] for each in data['images']]
self.meta_data = mmcv.load(
annotation_root.replace('2018.json', '2018_locations.json'))
self.class_to_idx = {}
for i, each in enumerate(data['categories']):
self.class_to_idx[each['id']] = i
self.on_memory = on_memory
self._consecutive_errors = 0
# TODO: support on_memory function
def __getitem__(self, index):
if self.file_client is None:
self.file_client = FileClient(self.io_backend, **self.kwargs)
anns = self.samples[index]
filename = self.file_names[index]
img_id = anns['image_id']
target = anns['category_id']
# load meta information from json file
meta = self.meta_data[index]
date = meta['date']
latitude = meta['lat']
longitude = meta['lon']
location_uncertainty = meta['loc_uncert']
temporal_info = get_temporal_info(date, miss_hour=True)
spatial_info = get_spatial_info(latitude, longitude)
filepath = osp.join(self.root, filename)
try:
if self.on_memory:
img_bytes = self.holder[filepath]
else:
img_bytes = self.file_client.get(filepath)
img = mmcv.imfrombytes(img_bytes)[:, :, ::-1]
except Exception as e:
_logger.warning(
f'Skipped sample (index {index}, file {filepath}). {str(e)}')
self._consecutive_errors += 1
if self._consecutive_errors < _ERROR_RETRY:
return self.__getitem__((index + 1) % len(self))
else:
raise e
self._consecutive_errors = 0
img = Image.fromarray(img)
if self.class_to_idx is not None:
target = self.class_to_idx[target]
else:
target = int(target)
return img, temporal_info, spatial_info, target
def __len__(self):
return len(self.samples)
def _filename(self, index, basename=False, absolute=False):
filename, _ = self.samples[index].split(' ')
filename = osp.join(self.root, filename)
return filename
def get_temporal_info(date, miss_hour=False): def get_temporal_info(date, miss_hour=False):
try: try:
if date: if date:
......
...@@ -74,8 +74,7 @@ def parse_option(): ...@@ -74,8 +74,7 @@ def parse_option():
type=str, type=str,
help='dataset name', help='dataset name',
default=None) default=None)
parser.add_argument('--data-path', type=str, help='path to dataset', parser.add_argument('--data-path', type=str, help='path to dataset')
default='data/imagenet')
parser.add_argument('--zip', parser.add_argument('--zip',
action='store_true', action='store_true',
help='use zipped dataset instead of folder dataset') help='use zipped dataset instead of folder dataset')
...@@ -146,6 +145,9 @@ def throughput(data_loader, model, logger): ...@@ -146,6 +145,9 @@ def throughput(data_loader, model, logger):
model.eval() model.eval()
for idx, (images, _) in enumerate(data_loader): for idx, (images, _) in enumerate(data_loader):
if type(images) == list:
images = [item.cuda(non_blocking=True) for item in images]
else:
images = images.cuda(non_blocking=True) images = images.cuda(non_blocking=True)
batch_size = images.shape[0] batch_size = images.shape[0]
for i in range(50): for i in range(50):
...@@ -403,6 +405,9 @@ def train_one_epoch(config, ...@@ -403,6 +405,9 @@ def train_one_epoch(config,
amp_type = torch.float16 if config.AMP_TYPE == 'float16' else torch.bfloat16 amp_type = torch.float16 if config.AMP_TYPE == 'float16' else torch.bfloat16
for idx, (samples, targets) in enumerate(data_loader): for idx, (samples, targets) in enumerate(data_loader):
iter_begin_time = time.time() iter_begin_time = time.time()
if type(samples) == list:
samples = [item.cuda(non_blocking=True) for item in samples]
else:
samples = samples.cuda(non_blocking=True) samples = samples.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True) targets = targets.cuda(non_blocking=True)
...@@ -528,6 +533,9 @@ def validate(config, data_loader, model, epoch=None): ...@@ -528,6 +533,9 @@ def validate(config, data_loader, model, epoch=None):
end = time.time() end = time.time()
for idx, (images, target) in enumerate(data_loader): for idx, (images, target) in enumerate(data_loader):
if type(images) == list:
images = [item.cuda(non_blocking=True) for item in images]
else:
images = images.cuda(non_blocking=True) images = images.cuda(non_blocking=True)
target = target.cuda(non_blocking=True) target = target.cuda(non_blocking=True)
output = model(images) output = model(images)
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
# -------------------------------------------------------- # --------------------------------------------------------
from .intern_image import InternImage from .intern_image import InternImage
from .intern_image_meta_former import InternImageMetaFormer
def build_model(config): def build_model(config):
...@@ -30,6 +31,27 @@ def build_model(config): ...@@ -30,6 +31,27 @@ def build_model(config):
center_feature_scale=config.MODEL.INTERN_IMAGE.CENTER_FEATURE_SCALE, # for InternImage-H/G center_feature_scale=config.MODEL.INTERN_IMAGE.CENTER_FEATURE_SCALE, # for InternImage-H/G
remove_center=config.MODEL.INTERN_IMAGE.REMOVE_CENTER, remove_center=config.MODEL.INTERN_IMAGE.REMOVE_CENTER,
) )
elif model_type == 'intern_image_meta_former':
model = InternImageMetaFormer(
core_op=config.MODEL.INTERN_IMAGE.CORE_OP,
num_classes=config.MODEL.NUM_CLASSES,
channels=config.MODEL.INTERN_IMAGE.CHANNELS,
depths=config.MODEL.INTERN_IMAGE.DEPTHS,
groups=config.MODEL.INTERN_IMAGE.GROUPS,
layer_scale=config.MODEL.INTERN_IMAGE.LAYER_SCALE,
offset_scale=config.MODEL.INTERN_IMAGE.OFFSET_SCALE,
post_norm=config.MODEL.INTERN_IMAGE.POST_NORM,
mlp_ratio=config.MODEL.INTERN_IMAGE.MLP_RATIO,
with_cp=config.TRAIN.USE_CHECKPOINT,
drop_path_rate=config.MODEL.DROP_PATH_RATE,
res_post_norm=config.MODEL.INTERN_IMAGE.RES_POST_NORM, # for InternImage-H/G
dw_kernel_size=config.MODEL.INTERN_IMAGE.DW_KERNEL_SIZE, # for InternImage-H/G
use_clip_projector=config.MODEL.INTERN_IMAGE.USE_CLIP_PROJECTOR, # for InternImage-H/G
level2_post_norm=config.MODEL.INTERN_IMAGE.LEVEL2_POST_NORM, # for InternImage-H/G
level2_post_norm_block_ids=config.MODEL.INTERN_IMAGE.LEVEL2_POST_NORM_BLOCK_IDS, # for InternImage-H/G
center_feature_scale=config.MODEL.INTERN_IMAGE.CENTER_FEATURE_SCALE, # for InternImage-H/G
remove_center=config.MODEL.INTERN_IMAGE.REMOVE_CENTER,
)
else: else:
raise NotImplementedError(f'Unkown model: {model_type}') raise NotImplementedError(f'Unkown model: {model_type}')
......
This diff is collapsed.
#!/usr/bin/env bash
set -x
PARTITION=$1
JOB_NAME=$2
CONFIG=$3
GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-12}
SRUN_ARGS=${SRUN_ARGS:-""}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \
--quotatype=reserved \
${SRUN_ARGS} \
python -u main.py \
--cfg ${CONFIG} \
--accumulation-steps 1 \
--local-rank 0 \
--data-path data/inat2018 \
--output work_dirs ${@:4}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment