# coding=utf-8 # Copyright 2021 The OneFlow Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import logging import oneflow as flow from oneflow import nn from libai.layers import Linear from libai.models.bert_model import BertModel from libai.models.utils import init_method_normal from libai.utils import distributed as dist logger = logging.getLogger("libai." + __name__) class ClassificationLoss(nn.Module): def __init__(self): super().__init__() def forward(self, classification_logits, label): loss = nn.CrossEntropyLoss()(classification_logits, label) # NOTE: Change loss sbp sign [P, P] -> [P, B] to add with sop loss # whose sbp sign: [P, B] loss = loss.to_global(sbp=dist.get_nd_sbp([flow.sbp.partial_sum, flow.sbp.broadcast])) return loss class ModelForSequenceClassification(nn.Module): def __init__(self, cfg): super().__init__() self.num_classes = cfg.num_classes self.model = BertModel(cfg) if cfg.pretrain_megatron_weight is not None: from .load_megatron_weight import load_megatron_bert logger.info(f"loading pretraining: {cfg.pretrain_megatron_weight}") load_megatron_bert(self.model, cfg.pretrain_megatron_weight) logger.info("load succeed") init_method = init_method_normal(cfg.initializer_range) self.dropout = nn.Dropout(cfg.hidden_dropout_prob) self.classifier = Linear( cfg.hidden_size, self.num_classes, bias=True, parallel="row", init_method=init_method, layer_idx=-1, ) self.loss_fct = ClassificationLoss() def forward(self, input_ids, attention_mask, token_type_ids=None, labels=None): encoder_output, pooled_output = self.model(input_ids, attention_mask, token_type_ids) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) if self.training and labels is not None: loss = self.loss_fct(logits.view(-1, self.num_classes), labels.view(-1)) loss_dict = {"loss": loss} return loss_dict return {"prediction_scores": logits}