first init

c6a27e0b · panhb · e4b993b1 · c6a27e0b · c6a27e0b · c6a27e0b
Commit c6a27e0b authored Jan 07, 2025 by panhb
20 changed files
--- a/ppdet/core/config/yaml_helpers.py
+++ b/ppdet/core/config/yaml_helpers.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+import inspect
+
+import yaml
+from .schema import SharedConfig
+
+__all__ = ['serializable', 'Callable']
+
+
+def represent_dictionary_order(self, dict_data):
+    return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
+
+
+def setup_orderdict():
+    from collections import OrderedDict
+    yaml.add_representer(OrderedDict, represent_dictionary_order)
+
+
+def _make_python_constructor(cls):
+    def python_constructor(loader, node):
+        if isinstance(node, yaml.SequenceNode):
+            args = loader.construct_sequence(node, deep=True)
+            return cls(*args)
+        else:
+            kwargs = loader.construct_mapping(node, deep=True)
+            try:
+                return cls(**kwargs)
+            except Exception as ex:
+                print("Error when construct {} instance from yaml config".
+                      format(cls.__name__))
+                raise ex
+
+    return python_constructor
+
+
+def _make_python_representer(cls):
+    # python 2 compatibility
+    if hasattr(inspect, 'getfullargspec'):
+        argspec = inspect.getfullargspec(cls)
+    else:
+        argspec = inspect.getfullargspec(cls.__init__)
+    argnames = [arg for arg in argspec.args if arg != 'self']
+
+    def python_representer(dumper, obj):
+        if argnames:
+            data = {name: getattr(obj, name) for name in argnames}
+        else:
+            data = obj.__dict__
+        if '_id' in data:
+            del data['_id']
+        return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
+
+    return python_representer
+
+
+def serializable(cls):
+    """
+    Add loader and dumper for given class, which must be
+    "trivially serializable"
+
+    Args:
+        cls: class to be serialized
+
+    Returns: cls
+    """
+    yaml.add_constructor(u'!{}'.format(cls.__name__),
+                         _make_python_constructor(cls))
+    yaml.add_representer(cls, _make_python_representer(cls))
+    return cls
+
+
+yaml.add_representer(SharedConfig,
+                     lambda d, o: d.represent_data(o.default_value))
+
+
+@serializable
+class Callable(object):
+    """
+    Helper to be used in Yaml for creating arbitrary class objects
+
+    Args:
+        full_type (str): the full module path to target function
+    """
+
+    def __init__(self, full_type, args=[], kwargs={}):
+        super(Callable, self).__init__()
+        self.full_type = full_type
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self):
+        if '.' in self.full_type:
+            idx = self.full_type.rfind('.')
+            module = importlib.import_module(self.full_type[:idx])
+            func_name = self.full_type[idx + 1:]
+        else:
+            try:
+                module = importlib.import_module('builtins')
+            except Exception:
+                module = importlib.import_module('__builtin__')
+            func_name = self.full_type
+
+        func = getattr(module, func_name)
+        return func(*self.args, **self.kwargs)
--- a/ppdet/core/workspace.py
+++ b/ppdet/core/workspace.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import importlib
+import os
+import sys
+
+import yaml
+import collections
+
+try:
+    collectionsAbc = collections.abc
+except AttributeError:
+    collectionsAbc = collections
+
+from .config.schema import SchemaDict, SharedConfig, extract_schema
+from .config.yaml_helpers import serializable
+
+__all__ = [
+    'global_config',
+    'load_config',
+    'merge_config',
+    'get_registered_modules',
+    'create',
+    'register',
+    'serializable',
+    'dump_value',
+]
+
+
+def dump_value(value):
+    # XXX this is hackish, but collections.abc is not available in python 2
+    if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)):
+        value = yaml.dump(value, default_flow_style=True)
+        value = value.replace('\n', '')
+        value = value.replace('...', '')
+        return "'{}'".format(value)
+    else:
+        # primitive types
+        return str(value)
+
+
+class AttrDict(dict):
+    """Single level attribute dict, NOT recursive"""
+
+    def __init__(self, **kwargs):
+        super(AttrDict, self).__init__()
+        super(AttrDict, self).update(kwargs)
+
+    def __getattr__(self, key):
+        if key in self:
+            return self[key]
+        raise AttributeError("object has no attribute '{}'".format(key))
+
+    def __setattr__(self, key, value):
+        self[key] = value
+
+    def copy(self):
+        new_dict = AttrDict()
+        for k, v in self.items():
+            new_dict.update({k: v})
+        return new_dict
+
+
+global_config = AttrDict()
+
+BASE_KEY = '_BASE_'
+
+
+# parse and load _BASE_ recursively
+def _load_config_with_base(file_path):
+    with open(file_path) as f:
+        file_cfg = yaml.load(f, Loader=yaml.Loader)
+
+    # NOTE: cfgs outside have higher priority than cfgs in _BASE_
+    if BASE_KEY in file_cfg:
+        all_base_cfg = AttrDict()
+        base_ymls = list(file_cfg[BASE_KEY])
+        for base_yml in base_ymls:
+            if base_yml.startswith("~"):
+                base_yml = os.path.expanduser(base_yml)
+            if not base_yml.startswith('/'):
+                base_yml = os.path.join(os.path.dirname(file_path), base_yml)
+
+            with open(base_yml) as f:
+                base_cfg = _load_config_with_base(base_yml)
+                all_base_cfg = merge_config(base_cfg, all_base_cfg)
+
+        del file_cfg[BASE_KEY]
+        return merge_config(file_cfg, all_base_cfg)
+
+    return file_cfg
+
+
+def load_config(file_path):
+    """
+    Load config from file.
+
+    Args:
+        file_path (str): Path of the config file to be loaded.
+
+    Returns: global config
+    """
+    _, ext = os.path.splitext(file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+
+    # load config from file and merge into global config
+    cfg = _load_config_with_base(file_path)
+    cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
+    merge_config(cfg)
+
+    return global_config
+
+
+def dict_merge(dct, merge_dct):
+    """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
+    updating only top-level keys, dict_merge recurses down into dicts nested
+    to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
+    ``dct``.
+
+    Args:
+        dct: dict onto which the merge is executed
+        merge_dct: dct merged into dct
+
+    Returns: dct
+    """
+    for k, v in merge_dct.items():
+        if (k in dct and isinstance(dct[k], dict) and
+                isinstance(merge_dct[k], collectionsAbc.Mapping)):
+            dict_merge(dct[k], merge_dct[k])
+        else:
+            dct[k] = merge_dct[k]
+    return dct
+
+
+def merge_config(config, another_cfg=None):
+    """
+    Merge config into global config or another_cfg.
+
+    Args:
+        config (dict): Config to be merged.
+
+    Returns: global config
+    """
+    global global_config
+    dct = another_cfg or global_config
+    return dict_merge(dct, config)
+
+
+def get_registered_modules():
+    return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)}
+
+
+def make_partial(cls):
+    op_module = importlib.import_module(cls.__op__.__module__)
+    op = getattr(op_module, cls.__op__.__name__)
+    cls.__category__ = getattr(cls, '__category__', None) or 'op'
+
+    def partial_apply(self, *args, **kwargs):
+        kwargs_ = self.__dict__.copy()
+        kwargs_.update(kwargs)
+        return op(*args, **kwargs_)
+
+    if getattr(cls, '__append_doc__', True):  # XXX should default to True?
+        if sys.version_info[0] > 2:
+            cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
+            cls.__init__.__doc__ = op.__doc__
+            cls.__call__ = partial_apply
+            cls.__call__.__doc__ = op.__doc__
+        else:
+            # XXX work around for python 2
+            partial_apply.__doc__ = op.__doc__
+            cls.__call__ = partial_apply
+    return cls
+
+
+def register(cls):
+    """
+    Register a given module class.
+
+    Args:
+        cls (type): Module class to be registered.
+
+    Returns: cls
+    """
+    if cls.__name__ in global_config:
+        raise ValueError("Module class already registered: {}".format(
+            cls.__name__))
+    if hasattr(cls, '__op__'):
+        cls = make_partial(cls)
+    global_config[cls.__name__] = extract_schema(cls)
+    return cls
+
+
+def create(cls_or_name, **kwargs):
+    """
+    Create an instance of given module class.
+
+    Args:
+        cls_or_name (type or str): Class of which to create instance.
+
+    Returns: instance of type `cls_or_name`
+    """
+    assert type(cls_or_name) in [type, str
+                                 ], "should be a class or name of a class"
+    name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
+    if name in global_config:
+        if isinstance(global_config[name], SchemaDict):
+            pass
+        elif hasattr(global_config[name], "__dict__"):
+            # support instance return directly
+            return global_config[name]
+        else:
+            raise ValueError("The module {} is not registered".format(name))
+    else:
+        raise ValueError("The module {} is not registered".format(name))
+
+    config = global_config[name]
+    cls = getattr(config.pymodule, name)
+    cls_kwargs = {}
+    cls_kwargs.update(global_config[name])
+
+    # parse `shared` annoation of registered modules
+    if getattr(config, 'shared', None):
+        for k in config.shared:
+            target_key = config[k]
+            shared_conf = config.schema[k].default
+            assert isinstance(shared_conf, SharedConfig)
+            if target_key is not None and not isinstance(target_key,
+                                                         SharedConfig):
+                continue  # value is given for the module
+            elif shared_conf.key in global_config:
+                # `key` is present in config
+                cls_kwargs[k] = global_config[shared_conf.key]
+            else:
+                cls_kwargs[k] = shared_conf.default_value
+
+    # parse `inject` annoation of registered modules
+    if getattr(cls, 'from_config', None):
+        cls_kwargs.update(cls.from_config(config, **kwargs))
+
+    if getattr(config, 'inject', None):
+        for k in config.inject:
+            target_key = config[k]
+            # optional dependency
+            if target_key is None:
+                continue
+
+            if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
+                if 'name' not in target_key.keys():
+                    continue
+                inject_name = str(target_key['name'])
+                if inject_name not in global_config:
+                    raise ValueError(
+                        "Missing injection name {} and check it's name in cfg file".
+                        format(k))
+                target = global_config[inject_name]
+                for i, v in target_key.items():
+                    if i == 'name':
+                        continue
+                    target[i] = v
+                if isinstance(target, SchemaDict):
+                    cls_kwargs[k] = create(inject_name)
+            elif isinstance(target_key, str):
+                if target_key not in global_config:
+                    raise ValueError("Missing injection config:", target_key)
+                target = global_config[target_key]
+                if isinstance(target, SchemaDict):
+                    cls_kwargs[k] = create(target_key)
+                elif hasattr(target, '__dict__'):  # serialized object
+                    cls_kwargs[k] = target
+            else:
+                raise ValueError("Unsupported injection type:", target_key)
+    # prevent modification of global config values of reference types
+    # (e.g., list, dict) from within the created module instances
+    #kwargs = copy.deepcopy(kwargs)
+    return cls(**cls_kwargs)
--- a/ppdet/data/__init__.py
+++ b/ppdet/data/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from . import source
+from . import transform
+from . import reader
+
+from .source import *
+from .transform import *
+from .reader import *
--- a/ppdet/data/__pycache__/__init__.cpython-310.pyc
+++ b/ppdet/data/__pycache__/__init__.cpython-310.pyc
--- a/ppdet/data/__pycache__/reader.cpython-310.pyc
+++ b/ppdet/data/__pycache__/reader.cpython-310.pyc
--- a/ppdet/data/__pycache__/shm_utils.cpython-310.pyc
+++ b/ppdet/data/__pycache__/shm_utils.cpython-310.pyc
--- a/ppdet/data/__pycache__/utils.cpython-310.pyc
+++ b/ppdet/data/__pycache__/utils.cpython-310.pyc
--- a/ppdet/data/reader.py
+++ b/ppdet/data/reader.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import os
+import traceback
+import six
+import sys
+if sys.version_info >= (3, 0):
+    pass
+else:
+    pass
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+from copy import deepcopy
+
+from paddle.io import DataLoader, DistributedBatchSampler
+from .utils import default_collate_fn
+
+from ppdet.core.workspace import register
+from . import transform
+from .shm_utils import _get_shared_memory_size_in_M
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger('reader')
+
+MAIN_PID = os.getpid()
+
+
+class Compose(object):
+    def __init__(self, transforms, num_classes=80):
+        self.transforms = transforms
+        self.transforms_cls = []
+        for t in self.transforms:
+            for k, v in t.items():
+                op_cls = getattr(transform, k)
+                f = op_cls(**v)
+                if hasattr(f, 'num_classes'):
+                    f.num_classes = num_classes
+
+                self.transforms_cls.append(f)
+
+    def __call__(self, data):
+        for f in self.transforms_cls:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map sample transform [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        return data
+
+
+class BatchCompose(Compose):
+    def __init__(self, transforms, num_classes=80, collate_batch=True):
+        super(BatchCompose, self).__init__(transforms, num_classes)
+        self.collate_batch = collate_batch
+
+    def __call__(self, data):
+        for f in self.transforms_cls:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map batch transform [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        # remove keys which is not needed by model
+        extra_key = ['h', 'w', 'flipped']
+        for k in extra_key:
+            for sample in data:
+                if k in sample:
+                    sample.pop(k)
+
+        # batch data, if user-define batch function needed
+        # use user-defined here
+        if self.collate_batch:
+            batch_data = default_collate_fn(data)
+        else:
+            batch_data = {}
+            for k in data[0].keys():
+                tmp_data = []
+                for i in range(len(data)):
+                    tmp_data.append(data[i][k])
+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
+                    tmp_data = np.stack(tmp_data, axis=0)
+                batch_data[k] = tmp_data
+        return batch_data
+
+
+class BaseDataLoader(object):
+    """
+    Base DataLoader implementation for detection models
+
+    Args:
+        sample_transforms (list): a list of transforms to perform
+                                  on each sample
+        batch_transforms (list): a list of transforms to perform
+                                 on batch
+        batch_size (int): batch size for batch collating, default 1.
+        shuffle (bool): whether to shuffle samples
+        drop_last (bool): whether to drop the last incomplete,
+                          default False
+        num_classes (int): class number of dataset, default 80
+        collate_batch (bool): whether to collate batch in dataloader.
+            If set to True, the samples will collate into batch according
+            to the batch size. Otherwise, the ground-truth will not collate,
+            which is used when the number of ground-truch is different in 
+            samples.
+        use_shared_memory (bool): whether to use shared memory to
+                accelerate data loading, enable this only if you
+                are sure that the shared memory size of your OS
+                is larger than memory cost of input datas of model.
+                Note that shared memory will be automatically
+                disabled if the shared memory of OS is less than
+                1G, which is not enough for detection models.
+                Default False.
+    """
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=80,
+                 collate_batch=True,
+                 use_shared_memory=False,
+                 **kwargs):
+        # sample transform
+        self._sample_transforms = Compose(
+            sample_transforms, num_classes=num_classes)
+
+        # batch transfrom 
+        self._batch_transforms = BatchCompose(batch_transforms, num_classes,
+                                              collate_batch)
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.drop_last = drop_last
+        self.use_shared_memory = use_shared_memory
+        self.kwargs = kwargs
+
+    def __call__(self,
+                 dataset,
+                 worker_num,
+                 batch_sampler=None,
+                 return_list=False):
+        self.dataset = dataset
+        self.dataset.check_or_download_dataset()
+        self.dataset.parse_dataset()
+        # get data
+        self.dataset.set_transform(self._sample_transforms)
+        # set kwargs
+        self.dataset.set_kwargs(**self.kwargs)
+        # batch sampler
+        if batch_sampler is None:
+            self._batch_sampler = DistributedBatchSampler(
+                self.dataset,
+                batch_size=self.batch_size,
+                shuffle=self.shuffle,
+                drop_last=self.drop_last)
+        else:
+            self._batch_sampler = batch_sampler
+
+        # DataLoader do not start sub-process in Windows and Mac
+        # system, do not need to use shared memory
+        use_shared_memory = self.use_shared_memory and \
+                            sys.platform not in ['win32', 'darwin']
+        # check whether shared memory size is bigger than 1G(1024M)
+        if use_shared_memory:
+            shm_size = _get_shared_memory_size_in_M()
+            if shm_size is not None and shm_size < 1024.:
+                logger.warning("Shared memory size is less than 1G, "
+                               "disable shared_memory in DataLoader")
+                use_shared_memory = False
+
+        self.dataloader = DataLoader(
+            dataset=self.dataset,
+            batch_sampler=self._batch_sampler,
+            collate_fn=self._batch_transforms,
+            num_workers=worker_num,
+            return_list=return_list,
+            use_shared_memory=use_shared_memory)
+        self.loader = iter(self.dataloader)
+
+        return self
+
+    def __len__(self):
+        return len(self._batch_sampler)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            return next(self.loader)
+        except StopIteration:
+            self.loader = iter(self.dataloader)
+            six.reraise(*sys.exc_info())
+
+    def next(self):
+        # python2 compatibility
+        return self.__next__()
+
+
+@register
+class TrainReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=True,
+                 drop_last=True,
+                 num_classes=80,
+                 collate_batch=True,
+                 **kwargs):
+        super(TrainReader, self).__init__(sample_transforms, batch_transforms,
+                                          batch_size, shuffle, drop_last,
+                                          num_classes, collate_batch, **kwargs)
+
+
+@register
+class EvalReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=True,
+                 num_classes=80,
+                 **kwargs):
+        super(EvalReader, self).__init__(sample_transforms, batch_transforms,
+                                         batch_size, shuffle, drop_last,
+                                         num_classes, **kwargs)
+
+
+@register
+class TestReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=80,
+                 **kwargs):
+        super(TestReader, self).__init__(sample_transforms, batch_transforms,
+                                         batch_size, shuffle, drop_last,
+                                         num_classes, **kwargs)
+
+
+@register
+class EvalMOTReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=1,
+                 **kwargs):
+        super(EvalMOTReader, self).__init__(sample_transforms, batch_transforms,
+                                            batch_size, shuffle, drop_last,
+                                            num_classes, **kwargs)
+
+
+@register
+class TestMOTReader(BaseDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 batch_transforms=[],
+                 batch_size=1,
+                 shuffle=False,
+                 drop_last=False,
+                 num_classes=1,
+                 **kwargs):
+        super(TestMOTReader, self).__init__(sample_transforms, batch_transforms,
+                                            batch_size, shuffle, drop_last,
+                                            num_classes, **kwargs)
+
+
+# For Semi-Supervised Object Detection (SSOD)
+class Compose_SSOD(object):
+    def __init__(self, base_transforms, weak_aug, strong_aug, num_classes=80):
+        self.base_transforms = base_transforms
+        self.base_transforms_cls = []
+        for t in self.base_transforms:
+            for k, v in t.items():
+                op_cls = getattr(transform, k)
+                f = op_cls(**v)
+                if hasattr(f, 'num_classes'):
+                    f.num_classes = num_classes
+                self.base_transforms_cls.append(f)
+
+        self.weak_augs = weak_aug
+        self.weak_augs_cls = []
+        for t in self.weak_augs:
+            for k, v in t.items():
+                op_cls = getattr(transform, k)
+                f = op_cls(**v)
+                if hasattr(f, 'num_classes'):
+                    f.num_classes = num_classes
+                self.weak_augs_cls.append(f)
+
+        self.strong_augs = strong_aug
+        self.strong_augs_cls = []
+        for t in self.strong_augs:
+            for k, v in t.items():
+                op_cls = getattr(transform, k)
+                f = op_cls(**v)
+                if hasattr(f, 'num_classes'):
+                    f.num_classes = num_classes
+                self.strong_augs_cls.append(f)
+
+    def __call__(self, data):
+        for f in self.base_transforms_cls:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map sample transform [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        weak_data = deepcopy(data)
+        strong_data = deepcopy(data)
+        for f in self.weak_augs_cls:
+            try:
+                weak_data = f(weak_data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map weak aug [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        for f in self.strong_augs_cls:
+            try:
+                strong_data = f(strong_data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map strong aug [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        weak_data['strong_aug'] = strong_data
+        return weak_data
+
+
+class BatchCompose_SSOD(Compose):
+    def __init__(self, transforms, num_classes=80, collate_batch=True):
+        super(BatchCompose_SSOD, self).__init__(transforms, num_classes)
+        self.collate_batch = collate_batch
+
+    def __call__(self, data):
+        # split strong_data from data(weak_data)
+        strong_data = []
+        for sample in data:
+            strong_data.append(sample['strong_aug'])
+            sample.pop('strong_aug')
+
+        for f in self.transforms_cls:
+            try:
+                data = f(data)
+                strong_data = f(strong_data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                logger.warning("fail to map batch transform [{}] "
+                               "with error: {} and stack:\n{}".format(
+                                   f, e, str(stack_info)))
+                raise e
+
+        # remove keys which is not needed by model
+        extra_key = ['h', 'w', 'flipped']
+        for k in extra_key:
+            for sample in data:
+                if k in sample:
+                    sample.pop(k)
+            for sample in strong_data:
+                if k in sample:
+                    sample.pop(k)
+
+        # batch data, if user-define batch function needed
+        # use user-defined here
+        if self.collate_batch:
+            batch_data = default_collate_fn(data)
+            strong_batch_data = default_collate_fn(strong_data)
+            return batch_data, strong_batch_data
+        else:
+            batch_data = {}
+            for k in data[0].keys():
+                tmp_data = []
+                for i in range(len(data)):
+                    tmp_data.append(data[i][k])
+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
+                    tmp_data = np.stack(tmp_data, axis=0)
+                batch_data[k] = tmp_data
+
+            strong_batch_data = {}
+            for k in strong_data[0].keys():
+                tmp_data = []
+                for i in range(len(strong_data)):
+                    tmp_data.append(strong_data[i][k])
+                if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
+                    tmp_data = np.stack(tmp_data, axis=0)
+                strong_batch_data[k] = tmp_data
+
+        return batch_data, strong_batch_data
+
+
+class CombineSSODLoader(object):
+    def __init__(self, label_loader, unlabel_loader):
+        self.label_loader = label_loader
+        self.unlabel_loader = unlabel_loader
+
+    def __iter__(self):
+        while True:
+            try:
+                label_samples = next(self.label_loader_iter)
+            except:
+                self.label_loader_iter = iter(self.label_loader)
+                label_samples = next(self.label_loader_iter)
+
+            try:
+                unlabel_samples = next(self.unlabel_loader_iter)
+            except:
+                self.unlabel_loader_iter = iter(self.unlabel_loader)
+                unlabel_samples = next(self.unlabel_loader_iter)
+
+            yield (
+                label_samples[0],  # sup weak
+                label_samples[1],  # sup strong
+                unlabel_samples[0],  # unsup weak
+                unlabel_samples[1]  # unsup strong
+            )
+
+    def __call__(self):
+        return self.__iter__()
+
+
+class BaseSemiDataLoader(object):
+    def __init__(self,
+                 sample_transforms=[],
+                 weak_aug=[],
+                 strong_aug=[],
+                 sup_batch_transforms=[],
+                 unsup_batch_transforms=[],
+                 sup_batch_size=1,
+                 unsup_batch_size=1,
+                 shuffle=True,
+                 drop_last=True,
+                 num_classes=80,
+                 collate_batch=True,
+                 use_shared_memory=False,
+                 **kwargs):
+        # sup transforms
+        self._sample_transforms_label = Compose_SSOD(
+            sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
+        self._batch_transforms_label = BatchCompose_SSOD(
+            sup_batch_transforms, num_classes, collate_batch)
+        self.batch_size_label = sup_batch_size
+
+        # unsup transforms
+        self._sample_transforms_unlabel = Compose_SSOD(
+            sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
+        self._batch_transforms_unlabel = BatchCompose_SSOD(
+            unsup_batch_transforms, num_classes, collate_batch)
+        self.batch_size_unlabel = unsup_batch_size
+
+        # common
+        self.shuffle = shuffle
+        self.drop_last = drop_last
+        self.use_shared_memory = use_shared_memory
+        self.kwargs = kwargs
+
+    def __call__(self,
+                 dataset_label,
+                 dataset_unlabel,
+                 worker_num,
+                 batch_sampler_label=None,
+                 batch_sampler_unlabel=None,
+                 return_list=False):
+        # sup dataset 
+        self.dataset_label = dataset_label
+        self.dataset_label.check_or_download_dataset()
+        self.dataset_label.parse_dataset()
+        self.dataset_label.set_transform(self._sample_transforms_label)
+        self.dataset_label.set_kwargs(**self.kwargs)
+        if batch_sampler_label is None:
+            self._batch_sampler_label = DistributedBatchSampler(
+                self.dataset_label,
+                batch_size=self.batch_size_label,
+                shuffle=self.shuffle,
+                drop_last=self.drop_last)
+        else:
+            self._batch_sampler_label = batch_sampler_label
+
+        # unsup dataset
+        self.dataset_unlabel = dataset_unlabel
+        self.dataset_unlabel.length = self.dataset_label.__len__()
+        self.dataset_unlabel.check_or_download_dataset()
+        self.dataset_unlabel.parse_dataset()
+        self.dataset_unlabel.set_transform(self._sample_transforms_unlabel)
+        self.dataset_unlabel.set_kwargs(**self.kwargs)
+        if batch_sampler_unlabel is None:
+            self._batch_sampler_unlabel = DistributedBatchSampler(
+                self.dataset_unlabel,
+                batch_size=self.batch_size_unlabel,
+                shuffle=self.shuffle,
+                drop_last=self.drop_last)
+        else:
+            self._batch_sampler_unlabel = batch_sampler_unlabel
+
+        # DataLoader do not start sub-process in Windows and Mac
+        # system, do not need to use shared memory
+        use_shared_memory = self.use_shared_memory and \
+                            sys.platform not in ['win32', 'darwin']
+        # check whether shared memory size is bigger than 1G(1024M)
+        if use_shared_memory:
+            shm_size = _get_shared_memory_size_in_M()
+            if shm_size is not None and shm_size < 1024.:
+                logger.warning("Shared memory size is less than 1G, "
+                               "disable shared_memory in DataLoader")
+                use_shared_memory = False
+
+        self.dataloader_label = DataLoader(
+            dataset=self.dataset_label,
+            batch_sampler=self._batch_sampler_label,
+            collate_fn=self._batch_transforms_label,
+            num_workers=worker_num,
+            return_list=return_list,
+            use_shared_memory=use_shared_memory)
+
+        self.dataloader_unlabel = DataLoader(
+            dataset=self.dataset_unlabel,
+            batch_sampler=self._batch_sampler_unlabel,
+            collate_fn=self._batch_transforms_unlabel,
+            num_workers=worker_num,
+            return_list=return_list,
+            use_shared_memory=use_shared_memory)
+
+        self.dataloader = CombineSSODLoader(self.dataloader_label,
+                                            self.dataloader_unlabel)
+        self.loader = iter(self.dataloader)
+        return self
+
+    def __len__(self):
+        return len(self._batch_sampler_label)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self.loader)
+
+    def next(self):
+        # python2 compatibility
+        return self.__next__()
+
+
+@register
+class SemiTrainReader(BaseSemiDataLoader):
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 sample_transforms=[],
+                 weak_aug=[],
+                 strong_aug=[],
+                 sup_batch_transforms=[],
+                 unsup_batch_transforms=[],
+                 sup_batch_size=1,
+                 unsup_batch_size=1,
+                 shuffle=True,
+                 drop_last=True,
+                 num_classes=80,
+                 collate_batch=True,
+                 **kwargs):
+        super(SemiTrainReader, self).__init__(
+            sample_transforms, weak_aug, strong_aug, sup_batch_transforms,
+            unsup_batch_transforms, sup_batch_size, unsup_batch_size, shuffle,
+            drop_last, num_classes, collate_batch, **kwargs)
--- a/ppdet/data/shm_utils.py
+++ b/ppdet/data/shm_utils.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+SIZE_UNIT = ['K', 'M', 'G', 'T']
+SHM_QUERY_CMD = 'df -h'
+SHM_KEY = 'shm'
+SHM_DEFAULT_MOUNT = '/dev/shm'
+
+# [ shared memory size check ]
+# In detection models, image/target data occupies a lot of memory, and
+# will occupy lots of shared memory in multi-process DataLoader, we use
+# following code to get shared memory size and perform a size check to
+# disable shared memory use if shared memory size is not enough.
+# Shared memory getting process as follows:
+# 1. use `df -h` get all mount info
+# 2. pick up spaces whose mount info contains 'shm'
+# 3. if 'shm' space number is only 1, return its size
+# 4. if there are multiple 'shm' space, try to find the default mount
+#    directory '/dev/shm' is Linux-like system, otherwise return the
+#    biggest space size.
+
+
+def _parse_size_in_M(size_str):
+    if size_str[-1] == 'B':
+        num, unit = size_str[:-2], size_str[-2]
+    else:
+        num, unit = size_str[:-1], size_str[-1]
+    assert unit in SIZE_UNIT, \
+            "unknown shm size unit {}".format(unit)
+    return float(num) * \
+            (1024 ** (SIZE_UNIT.index(unit) - 1))
+
+
+def _get_shared_memory_size_in_M():
+    try:
+        df_infos = os.popen(SHM_QUERY_CMD).readlines()
+    except:
+        return None
+    else:
+        shm_infos = []
+        for df_info in df_infos:
+            info = df_info.strip()
+            if info.find(SHM_KEY) >= 0:
+                shm_infos.append(info.split())
+
+        if len(shm_infos) == 0:
+            return None
+        elif len(shm_infos) == 1:
+            return _parse_size_in_M(shm_infos[0][3])
+        else:
+            default_mount_infos = [
+                si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
+            ]
+            if default_mount_infos:
+                return _parse_size_in_M(default_mount_infos[0][3])
+            else:
+                return max([_parse_size_in_M(si[3]) for si in shm_infos])
--- a/ppdet/data/source/__init__.py
+++ b/ppdet/data/source/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import coco
+from . import voc
+from . import category
+
+from .coco import *
+from .voc import *
+from .category import *
+from .dataset import ImageFolder
--- a/ppdet/data/source/__pycache__/__init__.cpython-310.pyc
+++ b/ppdet/data/source/__pycache__/__init__.cpython-310.pyc
--- a/ppdet/data/source/__pycache__/category.cpython-310.pyc
+++ b/ppdet/data/source/__pycache__/category.cpython-310.pyc
--- a/ppdet/data/source/__pycache__/coco.cpython-310.pyc
+++ b/ppdet/data/source/__pycache__/coco.cpython-310.pyc
--- a/ppdet/data/source/__pycache__/dataset.cpython-310.pyc
+++ b/ppdet/data/source/__pycache__/dataset.cpython-310.pyc
--- a/ppdet/data/source/__pycache__/voc.cpython-310.pyc
+++ b/ppdet/data/source/__pycache__/voc.cpython-310.pyc
--- a/ppdet/data/source/category.py
+++ b/ppdet/data/source/category.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from ppdet.data.source.voc import pascalvoc_label
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['get_categories']
+
+
+def get_categories(metric_type, anno_file=None, arch=None):
+    """
+    Get class id to category id map and category id
+    to category name map from annotation file.
+
+    Args:
+        metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
+            and 'widerface'.
+        anno_file (str): annotation file path
+    """
+    if anno_file == None or (not os.path.isfile(anno_file)):
+        logger.warning(
+            "anno_file '{}' is None or not set or not exist, "
+            "please recheck TrainDataset/EvalDataset/TestDataset.anno_path, "
+            "otherwise the default categories will be used by metric_type.".
+            format(anno_file))
+
+    if metric_type.lower() == 'coco':
+        if anno_file and os.path.isfile(anno_file):
+            if anno_file.endswith('json'):
+                # lazy import pycocotools here
+                from pycocotools.coco import COCO
+                coco = COCO(anno_file)
+                cats = coco.loadCats(coco.getCatIds())
+
+                clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
+                catid2name = {cat['id']: cat['name'] for cat in cats}
+
+            elif anno_file.endswith('txt'):
+                cats = []
+                with open(anno_file) as f:
+                    for line in f.readlines():
+                        cats.append(line.strip())
+                if cats[0] == 'background': cats = cats[1:]
+
+                clsid2catid = {i: i for i in range(len(cats))}
+                catid2name = {i: name for i, name in enumerate(cats)}
+
+            else:
+                raise ValueError("anno_file {} should be json or txt.".format(
+                    anno_file))
+            return clsid2catid, catid2name
+
+        # anno file not exist, load default categories of COCO17
+        else:
+            logger.warning("metric_type: {}, load default categories of COCO.".
+                           format(metric_type))
+            return _coco17_category()
+
+    elif metric_type.lower() == 'voc':
+        if anno_file and os.path.isfile(anno_file):
+            cats = []
+            with open(anno_file) as f:
+                for line in f.readlines():
+                    cats.append(line.strip())
+
+            if cats[0] == 'background':
+                cats = cats[1:]
+
+            clsid2catid = {i: i for i in range(len(cats))}
+            catid2name = {i: name for i, name in enumerate(cats)}
+
+            return clsid2catid, catid2name
+
+        # anno file not exist, load default categories of
+        # VOC all 20 categories
+        else:
+            logger.warning("metric_type: {}, load default categories of VOC.".
+                           format(metric_type))
+            return _vocall_category()
+
+    else:
+        raise ValueError("unknown metric type {}".format(metric_type))
+
+
+def _coco17_category():
+    """
+    Get class id to category id map and category id
+    to category name map of COCO2017 dataset
+
+    """
+    clsid2catid = {
+        1: 1,
+        2: 2,
+        3: 3,
+        4: 4,
+        5: 5,
+        6: 6,
+        7: 7,
+        8: 8,
+        9: 9,
+        10: 10,
+        11: 11,
+        12: 13,
+        13: 14,
+        14: 15,
+        15: 16,
+        16: 17,
+        17: 18,
+        18: 19,
+        19: 20,
+        20: 21,
+        21: 22,
+        22: 23,
+        23: 24,
+        24: 25,
+        25: 27,
+        26: 28,
+        27: 31,
+        28: 32,
+        29: 33,
+        30: 34,
+        31: 35,
+        32: 36,
+        33: 37,
+        34: 38,
+        35: 39,
+        36: 40,
+        37: 41,
+        38: 42,
+        39: 43,
+        40: 44,
+        41: 46,
+        42: 47,
+        43: 48,
+        44: 49,
+        45: 50,
+        46: 51,
+        47: 52,
+        48: 53,
+        49: 54,
+        50: 55,
+        51: 56,
+        52: 57,
+        53: 58,
+        54: 59,
+        55: 60,
+        56: 61,
+        57: 62,
+        58: 63,
+        59: 64,
+        60: 65,
+        61: 67,
+        62: 70,
+        63: 72,
+        64: 73,
+        65: 74,
+        66: 75,
+        67: 76,
+        68: 77,
+        69: 78,
+        70: 79,
+        71: 80,
+        72: 81,
+        73: 82,
+        74: 84,
+        75: 85,
+        76: 86,
+        77: 87,
+        78: 88,
+        79: 89,
+        80: 90
+    }
+
+    catid2name = {
+        0: 'background',
+        1: 'person',
+        2: 'bicycle',
+        3: 'car',
+        4: 'motorcycle',
+        5: 'airplane',
+        6: 'bus',
+        7: 'train',
+        8: 'truck',
+        9: 'boat',
+        10: 'traffic light',
+        11: 'fire hydrant',
+        13: 'stop sign',
+        14: 'parking meter',
+        15: 'bench',
+        16: 'bird',
+        17: 'cat',
+        18: 'dog',
+        19: 'horse',
+        20: 'sheep',
+        21: 'cow',
+        22: 'elephant',
+        23: 'bear',
+        24: 'zebra',
+        25: 'giraffe',
+        27: 'backpack',
+        28: 'umbrella',
+        31: 'handbag',
+        32: 'tie',
+        33: 'suitcase',
+        34: 'frisbee',
+        35: 'skis',
+        36: 'snowboard',
+        37: 'sports ball',
+        38: 'kite',
+        39: 'baseball bat',
+        40: 'baseball glove',
+        41: 'skateboard',
+        42: 'surfboard',
+        43: 'tennis racket',
+        44: 'bottle',
+        46: 'wine glass',
+        47: 'cup',
+        48: 'fork',
+        49: 'knife',
+        50: 'spoon',
+        51: 'bowl',
+        52: 'banana',
+        53: 'apple',
+        54: 'sandwich',
+        55: 'orange',
+        56: 'broccoli',
+        57: 'carrot',
+        58: 'hot dog',
+        59: 'pizza',
+        60: 'donut',
+        61: 'cake',
+        62: 'chair',
+        63: 'couch',
+        64: 'potted plant',
+        65: 'bed',
+        67: 'dining table',
+        70: 'toilet',
+        72: 'tv',
+        73: 'laptop',
+        74: 'mouse',
+        75: 'remote',
+        76: 'keyboard',
+        77: 'cell phone',
+        78: 'microwave',
+        79: 'oven',
+        80: 'toaster',
+        81: 'sink',
+        82: 'refrigerator',
+        84: 'book',
+        85: 'clock',
+        86: 'vase',
+        87: 'scissors',
+        88: 'teddy bear',
+        89: 'hair drier',
+        90: 'toothbrush'
+    }
+
+    clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
+    catid2name.pop(0)
+
+    return clsid2catid, catid2name
+
+
+def _vocall_category():
+    """
+    Get class id to category id map and category id
+    to category name map of mixup voc dataset
+
+    """
+    label_map = pascalvoc_label()
+    label_map = sorted(label_map.items(), key=lambda x: x[1])
+    cats = [l[0] for l in label_map]
+
+    clsid2catid = {i: i for i in range(len(cats))}
+    catid2name = {i: name for i, name in enumerate(cats)}
+
+    return clsid2catid, catid2name
--- a/ppdet/data/source/coco.py
+++ b/ppdet/data/source/coco.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import copy
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+import numpy as np
+from ppdet.core.workspace import register, serializable
+from .dataset import DetDataset
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet']
+
+
+@register
+@serializable
+class COCODataSet(DetDataset):
+    """
+    Load dataset with COCO format.
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): coco annotation file path.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        load_crowd (bool): whether to load crowded ground-truth. 
+            False as default
+        allow_empty (bool): whether to load empty entry. False as default
+        empty_ratio (float): the ratio of empty record number to total 
+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
+            records and use all the empty entries. 1. as default
+        repeat (int): repeat times for dataset, use in benchmark.
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 load_crowd=False,
+                 allow_empty=False,
+                 empty_ratio=1.,
+                 repeat=1):
+        super(COCODataSet, self).__init__(
+            dataset_dir,
+            image_dir,
+            anno_path,
+            data_fields,
+            sample_num,
+            repeat=repeat)
+        self.load_image_only = False
+        self.load_semantic = False
+        self.load_crowd = load_crowd
+        self.allow_empty = allow_empty
+        self.empty_ratio = empty_ratio
+
+    def _sample_empty(self, records, num):
+        # if empty_ratio is out of [0. ,1.), do not sample the records
+        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
+            return records
+        import random
+        sample_num = min(
+            int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
+        records = random.sample(records, sample_num)
+        return records
+
+    def parse_dataset(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        assert anno_path.endswith('.json'), \
+            'invalid coco annotation file: ' + anno_path
+        from pycocotools.coco import COCO
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        img_ids.sort()
+        cat_ids = coco.getCatIds()
+        records = []
+        empty_records = []
+        ct = 0
+
+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        self.cname2cid = dict({
+            coco.loadCats(catid)[0]['name']: clsid
+            for catid, clsid in self.catid2clsid.items()
+        })
+
+        if 'annotations' not in coco.dataset:
+            self.load_image_only = True
+            logger.warning('Annotation file: {} does not contains ground truth '
+                           'and load image information only.'.format(anno_path))
+
+        for img_id in img_ids:
+            img_anno = coco.loadImgs([img_id])[0]
+            im_fname = img_anno['file_name']
+            im_w = float(img_anno['width'])
+            im_h = float(img_anno['height'])
+
+            im_path = os.path.join(image_dir,
+                                   im_fname) if image_dir else im_fname
+            is_empty = False
+            if not os.path.exists(im_path):
+                logger.warning('Illegal image file: {}, and it will be '
+                               'ignored'.format(im_path))
+                continue
+
+            if im_w < 0 or im_h < 0:
+                logger.warning('Illegal width: {} or height: {} in annotation, '
+                               'and im_id: {} will be ignored'.format(
+                                   im_w, im_h, img_id))
+                continue
+
+            coco_rec = {
+                'im_file': im_path,
+                'im_id': np.array([img_id]),
+                'h': im_h,
+                'w': im_w,
+            } if 'image' in self.data_fields else {}
+
+            if not self.load_image_only:
+                ins_anno_ids = coco.getAnnIds(
+                    imgIds=[img_id], iscrowd=None if self.load_crowd else False)
+                instances = coco.loadAnns(ins_anno_ids)
+
+                bboxes = []
+                is_rbox_anno = False
+                for inst in instances:
+                    # check gt bbox
+                    if inst.get('ignore', False):
+                        continue
+                    if 'bbox' not in inst.keys():
+                        continue
+                    else:
+                        if not any(np.array(inst['bbox'])):
+                            continue
+
+                    x1, y1, box_w, box_h = inst['bbox']
+                    x2 = x1 + box_w
+                    y2 = y1 + box_h
+                    eps = 1e-5
+                    if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
+                        inst['clean_bbox'] = [
+                            round(float(x), 3) for x in [x1, y1, x2, y2]
+                        ]
+                        bboxes.append(inst)
+                    else:
+                        logger.warning(
+                            'Found an invalid bbox in annotations: im_id: {}, '
+                            'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
+                                img_id, float(inst['area']), x1, y1, x2, y2))
+
+                num_bbox = len(bboxes)
+                if num_bbox <= 0 and not self.allow_empty:
+                    continue
+                elif num_bbox <= 0:
+                    is_empty = True
+
+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_poly = [None] * num_bbox
+                gt_track_id = -np.ones((num_bbox, 1), dtype=np.int32)
+
+                has_segmentation = False
+                has_track_id = False
+                for i, box in enumerate(bboxes):
+                    catid = box['category_id']
+                    gt_class[i][0] = self.catid2clsid[catid]
+                    gt_bbox[i, :] = box['clean_bbox']
+                    is_crowd[i][0] = box['iscrowd']
+                    # check RLE format 
+                    if 'segmentation' in box and box['iscrowd'] == 1:
+                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
+                    elif 'segmentation' in box and box['segmentation']:
+                        if not np.array(
+                                box['segmentation'],
+                                dtype=object).size > 0 and not self.allow_empty:
+                            bboxes.pop(i)
+                            gt_poly.pop(i)
+                            np.delete(is_crowd, i)
+                            np.delete(gt_class, i)
+                            np.delete(gt_bbox, i)
+                        else:
+                            gt_poly[i] = box['segmentation']
+                        has_segmentation = True
+
+                    if 'track_id' in box:
+                        gt_track_id[i][0] = box['track_id']
+                        has_track_id = True
+
+                if has_segmentation and not any(
+                        gt_poly) and not self.allow_empty:
+                    continue
+
+                gt_rec = {
+                    'is_crowd': is_crowd,
+                    'gt_class': gt_class,
+                    'gt_bbox': gt_bbox,
+                    'gt_poly': gt_poly,
+                }
+                if has_track_id:
+                    gt_rec.update({'gt_track_id': gt_track_id})
+
+                for k, v in gt_rec.items():
+                    if k in self.data_fields:
+                        coco_rec[k] = v
+
+                # TODO: remove load_semantic
+                if self.load_semantic and 'semantic' in self.data_fields:
+                    seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
+                                            'train2017', im_fname[:-3] + 'png')
+                    coco_rec.update({'semantic': seg_path})
+
+            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
+                im_path, img_id, im_h, im_w))
+            if is_empty:
+                empty_records.append(coco_rec)
+            else:
+                records.append(coco_rec)
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
+        logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
+                    format(ct, len(img_ids) - ct, anno_path))
+        if self.allow_empty and len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
+        self.roidbs = records
+
+
+@register
+@serializable
+class SlicedCOCODataSet(COCODataSet):
+    """Sliced COCODataSet"""
+
+    def __init__(
+            self,
+            dataset_dir=None,
+            image_dir=None,
+            anno_path=None,
+            data_fields=['image'],
+            sample_num=-1,
+            load_crowd=False,
+            allow_empty=False,
+            empty_ratio=1.,
+            repeat=1,
+            sliced_size=[640, 640],
+            overlap_ratio=[0.25, 0.25], ):
+        super(SlicedCOCODataSet, self).__init__(
+            dataset_dir=dataset_dir,
+            image_dir=image_dir,
+            anno_path=anno_path,
+            data_fields=data_fields,
+            sample_num=sample_num,
+            load_crowd=load_crowd,
+            allow_empty=allow_empty,
+            empty_ratio=empty_ratio,
+            repeat=repeat, )
+        self.sliced_size = sliced_size
+        self.overlap_ratio = overlap_ratio
+
+    def parse_dataset(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        assert anno_path.endswith('.json'), \
+            'invalid coco annotation file: ' + anno_path
+        from pycocotools.coco import COCO
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        img_ids.sort()
+        cat_ids = coco.getCatIds()
+        records = []
+        empty_records = []
+        ct = 0
+        ct_sub = 0
+
+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        self.cname2cid = dict({
+            coco.loadCats(catid)[0]['name']: clsid
+            for catid, clsid in self.catid2clsid.items()
+        })
+
+        if 'annotations' not in coco.dataset:
+            self.load_image_only = True
+            logger.warning('Annotation file: {} does not contains ground truth '
+                           'and load image information only.'.format(anno_path))
+        try:
+            import sahi
+            from sahi.slicing import slice_image
+        except Exception as e:
+            logger.error(
+                'sahi not found, plaese install sahi. '
+                'for example: `pip install sahi`, see https://github.com/obss/sahi.'
+            )
+            raise e
+
+        sub_img_ids = 0
+        for img_id in img_ids:
+            img_anno = coco.loadImgs([img_id])[0]
+            im_fname = img_anno['file_name']
+            im_w = float(img_anno['width'])
+            im_h = float(img_anno['height'])
+
+            im_path = os.path.join(image_dir,
+                                   im_fname) if image_dir else im_fname
+            is_empty = False
+            if not os.path.exists(im_path):
+                logger.warning('Illegal image file: {}, and it will be '
+                               'ignored'.format(im_path))
+                continue
+
+            if im_w < 0 or im_h < 0:
+                logger.warning('Illegal width: {} or height: {} in annotation, '
+                               'and im_id: {} will be ignored'.format(
+                                   im_w, im_h, img_id))
+                continue
+
+            slice_image_result = sahi.slicing.slice_image(
+                image=im_path,
+                slice_height=self.sliced_size[0],
+                slice_width=self.sliced_size[1],
+                overlap_height_ratio=self.overlap_ratio[0],
+                overlap_width_ratio=self.overlap_ratio[1])
+
+            sub_img_num = len(slice_image_result)
+            for _ind in range(sub_img_num):
+                im = slice_image_result.images[_ind]
+                coco_rec = {
+                    'image': im,
+                    'im_id': np.array([sub_img_ids + _ind]),
+                    'h': im.shape[0],
+                    'w': im.shape[1],
+                    'ori_im_id': np.array([img_id]),
+                    'st_pix': np.array(
+                        slice_image_result.starting_pixels[_ind],
+                        dtype=np.float32),
+                    'is_last': 1 if _ind == sub_img_num - 1 else 0,
+                } if 'image' in self.data_fields else {}
+                records.append(coco_rec)
+            ct_sub += sub_img_num
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
+        logger.info('{} samples and slice to {} sub_samples in file {}'.format(
+            ct, ct_sub, anno_path))
+        if self.allow_empty and len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
+        self.roidbs = records
+
+
+@register
+@serializable
+class SemiCOCODataSet(COCODataSet):
+    """Semi-COCODataSet used for supervised and unsupervised dataSet"""
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 load_crowd=False,
+                 allow_empty=False,
+                 empty_ratio=1.,
+                 repeat=1,
+                 supervised=True):
+        super(SemiCOCODataSet, self).__init__(
+            dataset_dir, image_dir, anno_path, data_fields, sample_num,
+            load_crowd, allow_empty, empty_ratio, repeat)
+        self.supervised = supervised
+        self.length = -1  # defalut -1 means all
+
+    def parse_dataset(self):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        assert anno_path.endswith('.json'), \
+            'invalid coco annotation file: ' + anno_path
+        from pycocotools.coco import COCO
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        img_ids.sort()
+        cat_ids = coco.getCatIds()
+        records = []
+        empty_records = []
+        ct = 0
+
+        self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
+        self.cname2cid = dict({
+            coco.loadCats(catid)[0]['name']: clsid
+            for catid, clsid in self.catid2clsid.items()
+        })
+
+        if 'annotations' not in coco.dataset or self.supervised == False:
+            self.load_image_only = True
+            logger.warning('Annotation file: {} does not contains ground truth '
+                           'and load image information only.'.format(anno_path))
+
+        for img_id in img_ids:
+            img_anno = coco.loadImgs([img_id])[0]
+            im_fname = img_anno['file_name']
+            im_w = float(img_anno['width'])
+            im_h = float(img_anno['height'])
+
+            im_path = os.path.join(image_dir,
+                                   im_fname) if image_dir else im_fname
+            is_empty = False
+            if not os.path.exists(im_path):
+                logger.warning('Illegal image file: {}, and it will be '
+                               'ignored'.format(im_path))
+                continue
+
+            if im_w < 0 or im_h < 0:
+                logger.warning('Illegal width: {} or height: {} in annotation, '
+                               'and im_id: {} will be ignored'.format(
+                                   im_w, im_h, img_id))
+                continue
+
+            coco_rec = {
+                'im_file': im_path,
+                'im_id': np.array([img_id]),
+                'h': im_h,
+                'w': im_w,
+            } if 'image' in self.data_fields else {}
+
+            if not self.load_image_only:
+                ins_anno_ids = coco.getAnnIds(
+                    imgIds=[img_id], iscrowd=None if self.load_crowd else False)
+                instances = coco.loadAnns(ins_anno_ids)
+
+                bboxes = []
+                is_rbox_anno = False
+                for inst in instances:
+                    # check gt bbox
+                    if inst.get('ignore', False):
+                        continue
+                    if 'bbox' not in inst.keys():
+                        continue
+                    else:
+                        if not any(np.array(inst['bbox'])):
+                            continue
+
+                    x1, y1, box_w, box_h = inst['bbox']
+                    x2 = x1 + box_w
+                    y2 = y1 + box_h
+                    eps = 1e-5
+                    if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
+                        inst['clean_bbox'] = [
+                            round(float(x), 3) for x in [x1, y1, x2, y2]
+                        ]
+                        bboxes.append(inst)
+                    else:
+                        logger.warning(
+                            'Found an invalid bbox in annotations: im_id: {}, '
+                            'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
+                                img_id, float(inst['area']), x1, y1, x2, y2))
+
+                num_bbox = len(bboxes)
+                if num_bbox <= 0 and not self.allow_empty:
+                    continue
+                elif num_bbox <= 0:
+                    is_empty = True
+
+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+                is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_poly = [None] * num_bbox
+
+                has_segmentation = False
+                for i, box in enumerate(bboxes):
+                    catid = box['category_id']
+                    gt_class[i][0] = self.catid2clsid[catid]
+                    gt_bbox[i, :] = box['clean_bbox']
+                    is_crowd[i][0] = box['iscrowd']
+                    # check RLE format 
+                    if 'segmentation' in box and box['iscrowd'] == 1:
+                        gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
+                    elif 'segmentation' in box and box['segmentation']:
+                        if not np.array(box['segmentation']
+                                        ).size > 0 and not self.allow_empty:
+                            bboxes.pop(i)
+                            gt_poly.pop(i)
+                            np.delete(is_crowd, i)
+                            np.delete(gt_class, i)
+                            np.delete(gt_bbox, i)
+                        else:
+                            gt_poly[i] = box['segmentation']
+                        has_segmentation = True
+
+                if has_segmentation and not any(
+                        gt_poly) and not self.allow_empty:
+                    continue
+
+                gt_rec = {
+                    'is_crowd': is_crowd,
+                    'gt_class': gt_class,
+                    'gt_bbox': gt_bbox,
+                    'gt_poly': gt_poly,
+                }
+
+                for k, v in gt_rec.items():
+                    if k in self.data_fields:
+                        coco_rec[k] = v
+
+                # TODO: remove load_semantic
+                if self.load_semantic and 'semantic' in self.data_fields:
+                    seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
+                                            'train2017', im_fname[:-3] + 'png')
+                    coco_rec.update({'semantic': seg_path})
+
+            logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
+                im_path, img_id, im_h, im_w))
+            if is_empty:
+                empty_records.append(coco_rec)
+            else:
+                records.append(coco_rec)
+            ct += 1
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+        assert ct > 0, 'not found any coco record in %s' % (anno_path)
+        logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
+                    format(ct, len(img_ids) - ct, anno_path))
+        if self.allow_empty and len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
+        self.roidbs = records
+
+        if self.supervised:
+            logger.info(f'Use {len(self.roidbs)} sup_samples data as LABELED')
+        else:
+            if self.length > 0:  # unsup length will be decide by sup length
+                all_roidbs = self.roidbs.copy()
+                selected_idxs = [
+                    np.random.choice(len(all_roidbs))
+                    for _ in range(self.length)
+                ]
+                self.roidbs = [all_roidbs[i] for i in selected_idxs]
+            logger.info(
+                f'Use {len(self.roidbs)} unsup_samples data as UNLABELED')
+
+    def __getitem__(self, idx):
+        n = len(self.roidbs)
+        if self.repeat > 1:
+            idx %= n
+        # data batch
+        roidb = copy.deepcopy(self.roidbs[idx])
+        if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
+            idx = np.random.randint(n)
+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
+        elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
+            idx = np.random.randint(n)
+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
+        elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
+            roidb = [roidb, ] + [
+                copy.deepcopy(self.roidbs[np.random.randint(n)])
+                for _ in range(4)
+            ]
+        if isinstance(roidb, Sequence):
+            for r in roidb:
+                r['curr_iter'] = self._curr_iter
+        else:
+            roidb['curr_iter'] = self._curr_iter
+        self._curr_iter += 1
+
+        return self.transform(roidb)
--- a/ppdet/data/source/dataset.py
+++ b/ppdet/data/source/dataset.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+# 
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import copy
+import numpy as np
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from paddle.io import Dataset
+from ppdet.core.workspace import register, serializable
+from ppdet.utils.download import get_dataset_path
+from ppdet.data import source
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@serializable
+class DetDataset(Dataset):
+    """
+    Load detection dataset.
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): annotation file path.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        use_default_label (bool): whether to load default label list.
+        repeat (int): repeat times for dataset, use in benchmark.
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 use_default_label=None,
+                 repeat=1,
+                 **kwargs):
+        super(DetDataset, self).__init__()
+        self.dataset_dir = dataset_dir if dataset_dir is not None else ''
+        self.anno_path = anno_path
+        self.image_dir = image_dir if image_dir is not None else ''
+        self.data_fields = data_fields
+        self.sample_num = sample_num
+        self.use_default_label = use_default_label
+        self.repeat = repeat
+        self._epoch = 0
+        self._curr_iter = 0
+
+    def __len__(self, ):
+        return len(self.roidbs) * self.repeat
+
+    def __call__(self, *args, **kwargs):
+        return self
+
+    def __getitem__(self, idx):
+        n = len(self.roidbs)
+        if self.repeat > 1:
+            idx %= n
+        # data batch
+        roidb = copy.deepcopy(self.roidbs[idx])
+        if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
+            idx = np.random.randint(n)
+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
+        elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
+            idx = np.random.randint(n)
+            roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
+        elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
+            roidb = [roidb, ] + [
+                copy.deepcopy(self.roidbs[np.random.randint(n)])
+                for _ in range(4)
+            ]
+        elif self.pre_img_epoch == 0 or self._epoch < self.pre_img_epoch:
+            # Add previous image as input, only used in CenterTrack
+            idx_pre_img = idx - 1
+            if idx_pre_img < 0:
+                idx_pre_img = idx + 1
+            roidb = [roidb, ] + [copy.deepcopy(self.roidbs[idx_pre_img])]
+        if isinstance(roidb, Sequence):
+            for r in roidb:
+                r['curr_iter'] = self._curr_iter
+        else:
+            roidb['curr_iter'] = self._curr_iter
+        self._curr_iter += 1
+
+        return self.transform(roidb)
+
+    def check_or_download_dataset(self):
+        self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
+                                            self.image_dir)
+
+    def set_kwargs(self, **kwargs):
+        self.mixup_epoch = kwargs.get('mixup_epoch', -1)
+        self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
+        self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
+        self.pre_img_epoch = kwargs.get('pre_img_epoch', -1)
+
+    def set_transform(self, transform):
+        self.transform = transform
+
+    def set_epoch(self, epoch_id):
+        self._epoch = epoch_id
+
+    def parse_dataset(self, ):
+        raise NotImplementedError(
+            "Need to implement parse_dataset method of Dataset")
+
+    def get_anno(self):
+        if self.anno_path is None:
+            return
+        return os.path.join(self.dataset_dir, self.anno_path)
+
+
+def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
+    return f.lower().endswith(extensions)
+
+
+def _make_dataset(dir):
+    dir = os.path.expanduser(dir)
+    if not os.path.isdir(dir):
+        raise ('{} should be a dir'.format(dir))
+    images = []
+    for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
+        for fname in sorted(fnames):
+            path = os.path.join(root, fname)
+            if _is_valid_file(path):
+                images.append(path)
+    return images
+
+
+@register
+@serializable
+class ImageFolder(DetDataset):
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 sample_num=-1,
+                 use_default_label=None,
+                 **kwargs):
+        super(ImageFolder, self).__init__(
+            dataset_dir,
+            image_dir,
+            anno_path,
+            sample_num=sample_num,
+            use_default_label=use_default_label)
+        self._imid2path = {}
+        self.roidbs = None
+        self.sample_num = sample_num
+
+    def check_or_download_dataset(self):
+        return
+
+    def get_anno(self):
+        if self.anno_path is None:
+            return
+        if self.dataset_dir:
+            return os.path.join(self.dataset_dir, self.anno_path)
+        else:
+            return self.anno_path
+
+    def parse_dataset(self, ):
+        if not self.roidbs:
+            self.roidbs = self._load_images()
+
+    def _parse(self):
+        image_dir = self.image_dir
+        if not isinstance(image_dir, Sequence):
+            image_dir = [image_dir]
+        images = []
+        for im_dir in image_dir:
+            if os.path.isdir(im_dir):
+                im_dir = os.path.join(self.dataset_dir, im_dir)
+                images.extend(_make_dataset(im_dir))
+            elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
+                images.append(im_dir)
+        return images
+
+    def _load_images(self):
+        images = self._parse()
+        ct = 0
+        records = []
+        for image in images:
+            assert image != '' and os.path.isfile(image), \
+                    "Image {} not found".format(image)
+            if self.sample_num > 0 and ct >= self.sample_num:
+                break
+            rec = {'im_id': np.array([ct]), 'im_file': image}
+            self._imid2path[ct] = image
+            ct += 1
+            records.append(rec)
+        assert len(records) > 0, "No image file found"
+        return records
+
+    def get_imid2path(self):
+        return self._imid2path
+
+    def set_images(self, images):
+        self.image_dir = images
+        self.roidbs = self._load_images()
+
+    def set_slice_images(self,
+                         images,
+                         slice_size=[640, 640],
+                         overlap_ratio=[0.25, 0.25]):
+        self.image_dir = images
+        ori_records = self._load_images()
+        try:
+            import sahi
+            from sahi.slicing import slice_image
+        except Exception as e:
+            logger.error(
+                'sahi not found, plaese install sahi. '
+                'for example: `pip install sahi`, see https://github.com/obss/sahi.'
+            )
+            raise e
+
+        sub_img_ids = 0
+        ct = 0
+        ct_sub = 0
+        records = []
+        for i, ori_rec in enumerate(ori_records):
+            im_path = ori_rec['im_file']
+            slice_image_result = sahi.slicing.slice_image(
+                image=im_path,
+                slice_height=slice_size[0],
+                slice_width=slice_size[1],
+                overlap_height_ratio=overlap_ratio[0],
+                overlap_width_ratio=overlap_ratio[1])
+
+            sub_img_num = len(slice_image_result)
+            for _ind in range(sub_img_num):
+                im = slice_image_result.images[_ind]
+                rec = {
+                    'image': im,
+                    'im_id': np.array([sub_img_ids + _ind]),
+                    'h': im.shape[0],
+                    'w': im.shape[1],
+                    'ori_im_id': np.array([ori_rec['im_id'][0]]),
+                    'st_pix': np.array(
+                        slice_image_result.starting_pixels[_ind],
+                        dtype=np.float32),
+                    'is_last': 1 if _ind == sub_img_num - 1 else 0,
+                } if 'image' in self.data_fields else {}
+                records.append(rec)
+            ct_sub += sub_img_num
+            ct += 1
+        logger.info('{} samples and slice to {} sub_samples.'.format(ct,
+                                                                     ct_sub))
+        self.roidbs = records
+
+    def get_label_list(self):
+        # Only VOC dataset needs label list in ImageFold 
+        return self.anno_path
+
+
+@register
+class CommonDataset(object):
+    def __init__(self, **dataset_args):
+        super(CommonDataset, self).__init__()
+        dataset_args = copy.deepcopy(dataset_args)
+        type = dataset_args.pop("name")
+        self.dataset = getattr(source, type)(**dataset_args)
+
+    def __call__(self):
+        return self.dataset
+
+
+@register
+class TrainDataset(CommonDataset):
+    pass
+
+
+@register
+class EvalMOTDataset(CommonDataset):
+    pass
+
+
+@register
+class TestMOTDataset(CommonDataset):
+    pass
+
+
+@register
+class EvalDataset(CommonDataset):
+    pass
+
+
+@register
+class TestDataset(CommonDataset):
+    pass
--- a/ppdet/data/source/voc.py
+++ b/ppdet/data/source/voc.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+
+import xml.etree.ElementTree as ET
+
+from ppdet.core.workspace import register, serializable
+
+from .dataset import DetDataset
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+@register
+@serializable
+class VOCDataSet(DetDataset):
+    """
+    Load dataset with PascalVOC format.
+
+    Notes:
+    `anno_path` must contains xml file and image file path for annotations.
+
+    Args:
+        dataset_dir (str): root directory for dataset.
+        image_dir (str): directory for images.
+        anno_path (str): voc annotation file path.
+        data_fields (list): key name of data dictionary, at least have 'image'.
+        sample_num (int): number of samples to load, -1 means all.
+        label_list (str): if use_default_label is False, will load
+            mapping between category and class index.
+        allow_empty (bool): whether to load empty entry. False as default
+        empty_ratio (float): the ratio of empty record number to total 
+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
+            records and use all the empty entries. 1. as default
+        repeat (int): repeat times for dataset, use in benchmark.
+    """
+
+    def __init__(self,
+                 dataset_dir=None,
+                 image_dir=None,
+                 anno_path=None,
+                 data_fields=['image'],
+                 sample_num=-1,
+                 label_list=None,
+                 allow_empty=False,
+                 empty_ratio=1.,
+                 repeat=1):
+        super(VOCDataSet, self).__init__(
+            dataset_dir=dataset_dir,
+            image_dir=image_dir,
+            anno_path=anno_path,
+            data_fields=data_fields,
+            sample_num=sample_num,
+            repeat=repeat)
+        self.label_list = label_list
+        self.allow_empty = allow_empty
+        self.empty_ratio = empty_ratio
+
+    def _sample_empty(self, records, num):
+        # if empty_ratio is out of [0. ,1.), do not sample the records
+        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
+            return records
+        import random
+        sample_num = min(
+            int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
+        records = random.sample(records, sample_num)
+        return records
+
+    def parse_dataset(self, ):
+        anno_path = os.path.join(self.dataset_dir, self.anno_path)
+        image_dir = os.path.join(self.dataset_dir, self.image_dir)
+
+        # mapping category name to class id
+        # first_class:0, second_class:1, ...
+        records = []
+        empty_records = []
+        ct = 0
+        cname2cid = {}
+        if self.label_list:
+            label_path = os.path.join(self.dataset_dir, self.label_list)
+            if not os.path.exists(label_path):
+                raise ValueError("label_list {} does not exists".format(
+                    label_path))
+            with open(label_path, 'r') as fr:
+                label_id = 0
+                for line in fr.readlines():
+                    cname2cid[line.strip()] = label_id
+                    label_id += 1
+        else:
+            cname2cid = pascalvoc_label()
+
+        with open(anno_path, 'r') as fr:
+            while True:
+                line = fr.readline()
+                if not line:
+                    break
+                img_file, xml_file = [os.path.join(image_dir, x) \
+                        for x in line.strip().split()[:2]]
+                if not os.path.exists(img_file):
+                    logger.warning(
+                        'Illegal image file: {}, and it will be ignored'.format(
+                            img_file))
+                    continue
+                if not os.path.isfile(xml_file):
+                    logger.warning(
+                        'Illegal xml file: {}, and it will be ignored'.format(
+                            xml_file))
+                    continue
+                tree = ET.parse(xml_file)
+                if tree.find('id') is None:
+                    im_id = np.array([ct])
+                else:
+                    im_id = np.array([int(tree.find('id').text)])
+
+                objs = tree.findall('object')
+                im_w = float(tree.find('size').find('width').text)
+                im_h = float(tree.find('size').find('height').text)
+                if im_w < 0 or im_h < 0:
+                    logger.warning(
+                        'Illegal width: {} or height: {} in annotation, '
+                        'and {} will be ignored'.format(im_w, im_h, xml_file))
+                    continue
+
+                num_bbox, i = len(objs), 0
+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
+                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+                for obj in objs:
+                    cname = obj.find('name').text
+
+                    # user dataset may not contain difficult field
+                    _difficult = obj.find('difficult')
+                    _difficult = int(
+                        _difficult.text) if _difficult is not None else 0
+
+                    x1 = float(obj.find('bndbox').find('xmin').text)
+                    y1 = float(obj.find('bndbox').find('ymin').text)
+                    x2 = float(obj.find('bndbox').find('xmax').text)
+                    y2 = float(obj.find('bndbox').find('ymax').text)
+                    x1 = max(0, x1)
+                    y1 = max(0, y1)
+                    x2 = min(im_w - 1, x2)
+                    y2 = min(im_h - 1, y2)
+                    if x2 > x1 and y2 > y1:
+                        gt_bbox[i, :] = [x1, y1, x2, y2]
+                        gt_class[i, 0] = cname2cid[cname]
+                        gt_score[i, 0] = 1.
+                        difficult[i, 0] = _difficult
+                        i += 1
+                    else:
+                        logger.warning(
+                            'Found an invalid bbox in annotations: xml_file: {}'
+                            ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
+                                xml_file, x1, y1, x2, y2))
+                gt_bbox = gt_bbox[:i, :]
+                gt_class = gt_class[:i, :]
+                gt_score = gt_score[:i, :]
+                difficult = difficult[:i, :]
+
+                voc_rec = {
+                    'im_file': img_file,
+                    'im_id': im_id,
+                    'h': im_h,
+                    'w': im_w
+                } if 'image' in self.data_fields else {}
+
+                gt_rec = {
+                    'gt_class': gt_class,
+                    'gt_score': gt_score,
+                    'gt_bbox': gt_bbox,
+                    'difficult': difficult
+                }
+                for k, v in gt_rec.items():
+                    if k in self.data_fields:
+                        voc_rec[k] = v
+
+                if len(objs) == 0:
+                    empty_records.append(voc_rec)
+                else:
+                    records.append(voc_rec)
+
+                ct += 1
+                if self.sample_num > 0 and ct >= self.sample_num:
+                    break
+        assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
+        logger.debug('{} samples in file {}'.format(ct, anno_path))
+        if self.allow_empty and len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
+        self.roidbs, self.cname2cid = records, cname2cid
+
+    def get_label_list(self):
+        return os.path.join(self.dataset_dir, self.label_list)
+
+
+def pascalvoc_label():
+    labels_map = {
+        'aeroplane': 0,
+        'bicycle': 1,
+        'bird': 2,
+        'boat': 3,
+        'bottle': 4,
+        'bus': 5,
+        'car': 6,
+        'cat': 7,
+        'chair': 8,
+        'cow': 9,
+        'diningtable': 10,
+        'dog': 11,
+        'horse': 12,
+        'motorbike': 13,
+        'person': 14,
+        'pottedplant': 15,
+        'sheep': 16,
+        'sofa': 17,
+        'train': 18,
+        'tvmonitor': 19
+    }
+    return labels_map
--- a/ppdet/data/transform/__init__.py
+++ b/ppdet/data/transform/__init__.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import operators
+from . import batch_operators
+
+from .operators import *
+from .batch_operators import *
+
+__all__ = []
+__all__ += registered_ops