# coding=utf-8 # Copyright 2021 The OneFlow Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import oneflow as flow from omegaconf import OmegaConf from oneflow.utils.data import DataLoader, TensorDataset sys.path.append(".") from libai.config import LazyCall, default_argument_parser from libai.engine import DefaultTrainer, default_setup from libai.optim import get_default_optimizer_params from libai.scheduler import WarmupMultiStepLR from tests.layers.test_trainer_model import build_graph, build_model def setup(args): """ Create configs and perform basic setups. """ cfg = OmegaConf.create() cfg.train = dict( output_dir="./demo_output", train_micro_batch_size=32, test_micro_batch_size=32, dist=dict( data_parallel_size=1, tensor_parallel_size=1, pipeline_parallel_size=1, pipeline_num_layers=4, ), start_iter=0, train_iter=20, train_epoch=1, warmup_ratio=0.05, lr_warmup_fraction=0.01, lr_decay_iter=6000, eval_period=1000, log_period=1, checkpointer=dict(period=100), nccl_fusion_threshold_mb=16, nccl_fusion_max_ops=24, scheduler=LazyCall(WarmupMultiStepLR)( warmup_factor=0.001, # alpha=0.01, warmup_method="linear", milestones=[0.1, 0.2], ), ) cfg.optim = LazyCall(flow.optim.AdamW)( parameters=LazyCall(get_default_optimizer_params)( # parameters.model is meant to be set to the model object, before # instantiating the optimizer. clip_grad_max_norm=1.0, clip_grad_norm_type=2.0, weight_decay_norm=0.0, weight_decay_bias=0.0, ), lr=1e-4, weight_decay=0.01, betas=(0.9, 0.999), do_bias_correction=True, ) cfg.graph = dict( enabled=True, ) default_setup(cfg, args) return cfg class DemoTrainer(DefaultTrainer): @classmethod def build_model(cls, cfg): """ Returns: flow.nn.Module: It now calls :func:`libai.layers.build_model`. Overwrite it if you'd like a different model. """ model = build_model(cfg) return model @classmethod def build_graph(cls, cfg, model, optimizer=None, lr_scheduler=None, is_train=True): return build_graph(cfg, model, optimizer, lr_scheduler) @classmethod def get_batch(cls, data): return [ flow.randn( 32, 512, sbp=flow.sbp.split(0), placement=flow.placement("cuda", [0]), ) ] @classmethod def build_train_loader(cls, cfg, tokenizer=None): return ( DataLoader( TensorDataset(flow.randn(1000)), batch_size=cfg.train.train_micro_batch_size ), None, None, ) @classmethod def build_test_loader(cls, cfg): return [] def main(args): cfg = setup(args) trainer = DemoTrainer(cfg) # trainer.resume_or_load(resume=args.resume) return trainer.train() if __name__ == "__main__": args = default_argument_parser().parse_args() main(args)