Commit af238596 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #2392 failed with stages
in 0 seconds
# Copyright (c) 2024 westlake-repl
# Copyright (c) 2024 Bytedance Ltd. and/or its affiliate
# SPDX-License-Identifier: MIT
# This file has been modified by Junyi Chen.
#
# Original file was released under MIT, with the full license text
# available at https://choosealicense.com/licenses/mit/.
#
# This modified file is released under the same license.
from torch.utils.data import Dataset
import torch
import pandas as pd
from transformers import AutoTokenizer
import logging
class BatchTextDataset(Dataset):
def __init__(self, config, dataload):
self.item_num = dataload.item_num
self.item_list = dataload.id2token['item_id']
self.max_text_length = config['MAX_TEXT_LENGTH']
self.device = config['device']
self.text_path = config['text_path']
self.text_keys = config['text_keys']
self.tokenizer = AutoTokenizer.from_pretrained(config['item_pretrain_dir'], trust_remote_code=True)
# self.pad_id = self.tokenizer.pad_token_id
# assert self.pad_id is not None, f"pad_token_id can't be {self.pad_id}"
self.item_prompt = config['item_prompt']
self.item_emb_token_n = config['item_emb_token_n']
self.logger = logging.getLogger()
self.load_content()
def __len__(self):
return self.item_num
def load_content(self):
self.env = pd.read_csv(self.text_path, delimiter=',', dtype={'item_id': str})
self.env = self.env[self.text_keys + ['item_id']]
self.env = self.env.set_index('item_id').T.to_dict()
self.logger.info(f"Text Item num: {len(self.env)}")
def __getitem__(self, index):
def process_item(item):
if item != self.item_list[0] and item not in self.env:
self.logger.info(f"{item} not in self.env")
item_i = self.env.get(item, {})
text_str = ""
if len(item_i):
text_str = f"{self.item_prompt}"
for key in self.text_keys:
value = item_i[key]
if value and str(value) != 'nan':
text_str += f"{key}: {value}"
ids = self.tokenizer.encode(text_str)
ids = ids[:self.max_text_length]
mask = [1] * len(ids)
return ids, mask
if index == 0 or index == self.item_num:
item_token_i = ""
else:
item_token_i = self.item_list[index]
pos_input_ids, pos_cu_input_lens, pos_position_ids = [], [], []
ids, _ = process_item(item_token_i)
pos_input_ids.extend(ids + [0] * self.item_emb_token_n)
pos_cu_input_lens.append(len(ids) + self.item_emb_token_n)
pos_position_ids.extend((torch.arange(len(ids) + self.item_emb_token_n) + (self.max_text_length - len(ids))).tolist())
outputs = {
"pos_item_ids": torch.as_tensor(index, dtype=torch.int64),
"pos_input_ids": torch.as_tensor(pos_input_ids, dtype=torch.int64),
"pos_cu_input_lens": torch.as_tensor(pos_cu_input_lens, dtype=torch.int64),
"pos_position_ids": torch.as_tensor(pos_position_ids, dtype=torch.int64)
}
return outputs
# Copyright (c) 2024 westlake-repl
# Copyright (c) 2024 Bytedance Ltd. and/or its affiliate
# SPDX-License-Identifier: MIT
# This file has been modified by Junyi Chen.
#
# Original file was released under MIT, with the full license text
# available at https://choosealicense.com/licenses/mit/.
#
# This modified file is released under the same license.
import torch
import numpy as np
from torch.utils.data._utils.collate import default_collate
import re
try:
from torch._six import string_classes
except:
string_classes = str
import collections
np_str_obj_array_pattern = re.compile(r"[SaUO]")
default_collate_err_msg_format = (
"default_collate: batch must contain tensors, numpy arrays, numbers, "
"dicts or lists; found {}"
)
def customize_collate(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
elem = batch[0]
elem_type = type(elem)
if isinstance(elem, torch.Tensor):
out = None
if torch.utils.data.get_worker_info() is not None:
# If we're in a background process, concatenate directly into a
# shared memory tensor to avoid an extra copy
numel = sum(x.numel() for x in batch)
storage = elem.storage()._new_shared(numel)
out = elem.new(storage)
return torch.stack(batch, 0, out=out)
elif (
elem_type.__module__ == "numpy"
and elem_type.__name__ != "str_"
and elem_type.__name__ != "string_"
):
if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap":
# array of string classes and object
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
return default_collate([torch.as_tensor(b) for b in batch])
elif elem.shape == (): # scalars
return torch.as_tensor(batch)
elif isinstance(elem, float):
return torch.tensor(batch, dtype=torch.float64)
elif isinstance(elem, int):
return torch.tensor(batch)
elif isinstance(elem, string_classes):
return batch
elif isinstance(elem, collections.abc.Mapping):
return {key: customize_collate([d[key] for d in batch]) for key in elem}
elif isinstance(elem, collections.abc.Sequence):
# check to make sure that the elements in batch have consistent size
return batch
def seq_eval_collate(batch):
item_seq = []
item_target = []
time_seq = []
history_i = []
for item in batch:
history_i.append(item[0])
item_seq.append(item[1])
item_target.append(item[2])
time_seq.append(item[3])
history_u = torch.cat([torch.full_like(hist_iid, i) for i, hist_iid in enumerate(history_i)])
history_i = torch.cat(history_i)
item_seq = torch.tensor(item_seq) # [batch, len]
item_target = torch.tensor(item_target) # [batch]
time_seq = torch.tensor(time_seq) # [batch]
positive_u = torch.arange(item_seq.shape[0]) # [batch]
# return item_seq, None, positive_u, item_target
return item_seq, time_seq, (history_u, history_i), positive_u, item_target
def customize_rmpad_collate(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
elem = batch[0]
elem_type = type(elem)
if isinstance(elem, torch.Tensor):
out = None
if torch.utils.data.get_worker_info() is not None:
# If we're in a background process, concatenate directly into a
# shared memory tensor to avoid an extra copy
numel = sum(x.numel() for x in batch)
storage = elem.storage()._new_shared(numel)
out = elem.new(storage)
return torch.stack(batch, 0, out=out)
elif (
elem_type.__module__ == "numpy"
and elem_type.__name__ != "str_"
and elem_type.__name__ != "string_"
):
if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap":
# array of string classes and object
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
return default_collate([torch.as_tensor(b) for b in batch])
elif elem.shape == (): # scalars
return torch.as_tensor(batch)
elif isinstance(elem, float):
return torch.tensor(batch, dtype=torch.float64)
elif isinstance(elem, int):
return torch.tensor(batch)
elif isinstance(elem, string_classes):
return batch
elif isinstance(elem, collections.abc.Mapping):
output = {}
for key in elem:
if any(['_input_ids' in key, '_cu_input_lens' in key, '_position_ids' in key]):
output[key] = torch.concat([d[key] for d in batch], dim=0)
else:
output[key] = customize_collate([d[key] for d in batch])
return output
elif isinstance(elem, collections.abc.Sequence):
# check to make sure that the elements in batch have consistent size
return batch
# Copyright (c) 2024 westlake-repl
# Copyright (c) 2024 Bytedance Ltd. and/or its affiliate
# SPDX-License-Identifier: MIT
# This file has been modified by Junyi Chen.
#
# Original file was released under MIT, with the full license text
# available at https://choosealicense.com/licenses/mit/.
#
# This modified file is released under the same license.
import torch
from torch.utils.data import Dataset
import numpy as np
import datetime
import pytz
class SeqEvalDataset(Dataset):
def __init__(self, config, dataload, phase='valid'):
self.dataload = dataload
self.max_item_list_length = config['MAX_ITEM_LIST_LENGTH_TEST'] if config['MAX_ITEM_LIST_LENGTH_TEST'] else config['MAX_ITEM_LIST_LENGTH']
self.user_seq = list(dataload.user_seq.values())
self.time_seq = list(dataload.time_seq.values())
self.use_time = config['use_time']
self.phase = phase
self.length = len(self.user_seq)
self.item_num = dataload.item_num
def __len__(self):
return self.length
def _padding_sequence(self, sequence, max_length):
sequence = list(sequence)
pad_len = max_length - len(sequence)
sequence = [0] * pad_len + sequence
sequence = sequence[-max_length:]
return sequence
def _padding_time_sequence(self, sequence, max_length):
sequence = list(sequence)
pad_len = max_length - len(sequence)
sequence = [0] * pad_len + sequence
sequence = sequence[-max_length:]
vq_time = []
for time in sequence:
dt = datetime.datetime.fromtimestamp(time, pytz.timezone('UTC'))
vq_time.append([dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second])
return vq_time
def __getitem__(self, index):
last_num = 2 if self.phase == 'valid' else 1
history_seq = self.user_seq[index][:-last_num]
item_seq = self._padding_sequence(history_seq, self.max_item_list_length)
item_target = self.user_seq[index][-last_num]
if self.use_time:
history_time_seq = self.time_seq[index][:-last_num]
else:
history_time_seq = []
time_seq = self._padding_time_sequence(history_time_seq, self.max_item_list_length)
return torch.tensor(history_seq), item_seq, item_target, time_seq # , item_length
# Copyright (c) 2024 westlake-repl
# Copyright (c) 2024 Bytedance Ltd. and/or its affiliate
# SPDX-License-Identifier: MIT
# This file has been modified by Junyi Chen.
#
# Original file was released under MIT, with the full license text
# available at https://choosealicense.com/licenses/mit/.
#
# This modified file is released under the same license.
from asyncio.log import logger
from torch.utils.data import Dataset
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer
import random
import datetime
import pytz
import math
import torch.distributed as dist
# 数据形式为 [[user_seq], [neg_item_seq]] , [mask]
class SEQTrainDataset(Dataset):
def __init__(self, config, dataload):
self.dataload = dataload
self.config = config
self.item_num = dataload.item_num
self.train_seq = dataload.train_feat['item_seq']
self.length = len(self.train_seq)
self.max_seq_length = config['MAX_ITEM_LIST_LENGTH']+1
self.device = config['device']
self.random_sample = True if config['loss'] and config['loss'] == 'nce' else False
self.num_negatives = config['num_negatives']
if self.num_negatives:
self.num_negatives = math.ceil(self.num_negatives / dist.get_world_size() / config['train_batch_size'])
logger.info(f"Use random sample {self.random_sample} for mask id")
def __len__(self):
return self.length
def _neg_sample(self, item_set):
item = random.randint(1, self.item_num - 1)
while item in item_set:
item = random.randint(1, self.item_num - 1)
return item
def _padding_sequence(self, sequence, max_length, random_sample=False):
pad_len = max_length - len(sequence)
if random_sample:
pad_seq = [self._neg_sample(sequence) for _ in range(pad_len)]
sequence = pad_seq + sequence
else:
sequence = [0] * pad_len + sequence
sequence = sequence[-max_length:]
return torch.tensor(sequence, dtype=torch.long)
def reconstruct_train_data(self, item_seq):
masked_index = []
neg_item = []
item_seq_len = len(item_seq)
for i in range(item_seq_len - 1):
neg_item.append(self._neg_sample(item_seq))
masked_index.append(1)
item_seq = self._padding_sequence(list(item_seq), self.max_seq_length, random_sample=self.random_sample)
if self.num_negatives:
neg_item = []
for _ in range(self.num_negatives):
neg_item.append(self._neg_sample(item_seq))
else:
neg_item = self._padding_sequence(neg_item, self.max_seq_length, random_sample=self.random_sample)
masked_index = self._padding_sequence(masked_index, self.max_seq_length-1)
return torch.as_tensor(item_seq, dtype=torch.int64), torch.as_tensor(neg_item, dtype=torch.int64), torch.as_tensor(masked_index, dtype=torch.int64)
def __getitem__(self, index):
# 最长长度为maxlen+1, 及若max_len是5
# 则存在 1,2,3,4,5,6序列,
# pos 2,3,4,5,6
# neg 0,8,9,7,9,8
# mask_index 1,1,1,1,1
item_seq = self.train_seq[index]
item_seq, neg_item, masked_index = self.reconstruct_train_data(item_seq)
return item_seq, neg_item, masked_index
class TextSEQTrainDataset(Dataset):
def __init__(self, config, dataload):
self.dataload = dataload
self.config = config
self.item_num = dataload.item_num
self.train_seq = dataload.train_feat['item_seq']
self.length = len(self.train_seq)
self.train_time_seq = dataload.train_feat['time_seq']
self.id2token = dataload.id2token['item_id']
self.max_seq_length = config['MAX_ITEM_LIST_LENGTH']+1
self.max_text_length = config['MAX_TEXT_LENGTH']
self.device = config['device']
self.text_path = config['text_path']
self.text_keys = config['text_keys']
self.tokenizer = AutoTokenizer.from_pretrained(config['item_pretrain_dir'], trust_remote_code=True)
# self.pad_id = self.tokenizer.pad_token_id
# assert self.pad_id is not None, f"pad_token_id can't be {self.pad_id}"
self.item_prompt = config['item_prompt']
self.item_emb_token_n = config['item_emb_token_n']
self.num_negatives = config['num_negatives']
self.random_sample = True if config['loss'] and config['loss'] == 'nce' else False
if self.num_negatives:
self.num_negatives = math.ceil(self.num_negatives / dist.get_world_size() / config['train_batch_size']) # for llm only
logger.info(f"Use random sample {self.random_sample} for mask id")
logger.info(f"Text path: {self.text_path}")
logger.info(f"Text keys: {self.text_keys}")
logger.info(f"Item prompt: {self.item_prompt}")
self.load_content()
def __len__(self):
return self.length
def load_content(self):
self.env = pd.read_csv(self.text_path, delimiter=',', dtype={'item_id': str})
self.env = self.env[self.text_keys + ['item_id']]
self.env = self.env.set_index('item_id').T.to_dict()
logger.info(f"Text Item num: {len(self.env)}")
def _neg_sample(self, item_set):
item = random.randint(1, self.item_num - 1)
while item in item_set:
item = random.randint(1, self.item_num - 1)
return item
def _padding_sequence(self, sequence, max_length, random_sample=False):
pad_len = max_length - len(sequence)
if random_sample:
pad_seq = [self._neg_sample(sequence) for _ in range(pad_len)]
sequence = pad_seq + sequence
else:
sequence = [0] * pad_len + sequence
sequence = sequence[-max_length:]
return torch.tensor(sequence, dtype=torch.long)
def reconstruct_train_data(self, item_seq):
masked_index = []
neg_item = []
item_seq_len = len(item_seq)
for i in range(item_seq_len - 1):
neg_item.append(self._neg_sample(item_seq))
masked_index.append(1)
item_seq = self._padding_sequence(list(item_seq), self.max_seq_length, random_sample=self.random_sample)
masked_index = self._padding_sequence(masked_index, self.max_seq_length-1)
if self.num_negatives:
neg_item = []
for _ in range(self.num_negatives):
neg_item.append(self._neg_sample([]))
else:
neg_item = self._padding_sequence(neg_item, self.max_seq_length, random_sample=self.random_sample)
return item_seq, neg_item, masked_index
def _padding_time_sequence(self, sequence, max_length):
pad_len = max_length - len(sequence)
sequence = [0] * pad_len + sequence
sequence = sequence[-max_length:]
vq_time = []
for time in sequence:
dt = datetime.datetime.fromtimestamp(time, pytz.timezone('UTC'))
vq_time.append([dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second])
return torch.tensor(vq_time, dtype=torch.long)
def __getitem__(self, index):
item_seq = self.train_seq[index]
item_seq, neg_item, masked_index = self.reconstruct_train_data(item_seq)
time_seq = self.train_time_seq[index]
time_seq = self._padding_time_sequence(list(time_seq), self.max_seq_length)
item_seq_token = self.id2token[item_seq]
neg_items_token = self.id2token[neg_item]
pos_input_ids, pos_cu_input_lens, pos_position_ids = [], [], []
neg_input_ids, neg_cu_input_lens, neg_position_ids = [], [], []
def process_item(item):
if item != self.id2token[0] and item not in self.env:
# assert item in self.env, f"{item}"
logger.info(f"{item} not in self.env")
item_i = self.env.get(item, {})
text_str = ""
if len(item_i):
text_str = f"{self.item_prompt}"
for key in self.text_keys:
value = item_i[key]
if value and str(value) != 'nan':
text_str += f"{key}: {value}"
ids = self.tokenizer.encode(text_str)
ids = ids[:self.max_text_length]
mask = [1] * len(ids)
return ids, mask
for item in item_seq_token:
ids, _ = process_item(item)
pos_input_ids.extend(ids + [0] * self.item_emb_token_n)
pos_cu_input_lens.append(len(ids) + self.item_emb_token_n)
pos_position_ids.extend((torch.arange(len(ids) + self.item_emb_token_n) + (self.max_text_length - len(ids))).tolist())
for neg in neg_items_token:
ids, _ = process_item(neg)
neg_input_ids.extend(ids + [0] * self.item_emb_token_n)
neg_cu_input_lens.append(len(ids) + self.item_emb_token_n)
neg_position_ids.extend((torch.arange(len(ids) + self.item_emb_token_n) + (self.max_text_length - len(ids))).tolist())
outputs = {
"pos_item_ids": torch.as_tensor(item_seq, dtype=torch.int64),
"neg_item_ids": torch.as_tensor(neg_item, dtype=torch.int64),
"pos_input_ids": torch.as_tensor(pos_input_ids, dtype=torch.int64),
"pos_cu_input_lens": torch.as_tensor(pos_cu_input_lens, dtype=torch.int64),
"pos_position_ids": torch.as_tensor(pos_position_ids, dtype=torch.int64),
"neg_input_ids": torch.as_tensor(neg_input_ids, dtype=torch.int64),
"neg_cu_input_lens": torch.as_tensor(neg_cu_input_lens, dtype=torch.int64),
"neg_position_ids": torch.as_tensor(neg_position_ids, dtype=torch.int64),
"attention_mask": torch.as_tensor(masked_index, dtype=torch.int64),
"time_ids": torch.as_tensor(time_seq, dtype=torch.int64),
}
return outputs
# Copyright (c) 2024 westlake-repl
# Copyright (c) 2024 Bytedance Ltd. and/or its affiliate
# SPDX-License-Identifier: MIT
# This file has been modified by Junyi Chen.
#
# Original file was released under MIT, with the full license text
# available at https://choosealicense.com/licenses/mit/.
#
# This modified file is released under the same license.
import copy
import importlib
import os
import pickle
from logging import getLogger
from REC.data.dataset import *
from REC.utils import set_color
from functools import partial
from .dataload import Data
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
import math
import copy
def load_data(config):
dataload = Data(config)
return dataload
def bulid_dataloader(config, dataload):
'''
split dataset, generate user history sequence, train/valid/test dataset
'''
dataset_dict = {
'SASRec': ('SEQTrainDataset', 'SeqEvalDataset', 'seq_eval_collate'),
'HSTU': ('SEQTrainDataset', 'SeqEvalDataset', 'seq_eval_collate'),
'LLMIDRec': ('SEQTrainDataset', 'SeqEvalDataset', 'seq_eval_collate'),
'HLLM': (('TextSEQTrainDataset', 'customize_rmpad_collate'), 'SeqEvalDataset', 'seq_eval_collate')
}
model_name = config['model']
dataload.build()
dataset_module = importlib.import_module('REC.data.dataset')
train_set_name, test_set_name, collate_fn_name = dataset_dict[model_name]
if isinstance(train_set_name, tuple):
train_set_class = getattr(dataset_module, train_set_name[0])
train_collate_fn = getattr(dataset_module, train_set_name[1])
else:
train_set_class = getattr(dataset_module, train_set_name)
train_collate_fn = None
test_set_class = getattr(dataset_module, test_set_name)
eval_collate_fn = getattr(dataset_module, collate_fn_name)
train_data = train_set_class(config, dataload)
valid_data = test_set_class(config, dataload, phase='valid')
test_data = test_set_class(config, dataload, phase='test')
logger = getLogger()
logger.info(
set_color('[Training]: ', 'pink') + set_color('train_batch_size', 'cyan') + ' = ' +
set_color(f'[{config["train_batch_size"]}]', 'yellow')
)
logger.info(
set_color('[Evaluation]: ', 'pink') + set_color('eval_batch_size', 'cyan') + ' = ' +
set_color(f'[{config["eval_batch_size"]}]', 'yellow')
)
train_sampler = torch.utils.data.distributed.DistributedSampler(train_data)
valid_sampler = NonConsecutiveSequentialDistributedSampler(valid_data)
test_sampler = NonConsecutiveSequentialDistributedSampler(test_data)
num_workers = 8
rank = torch.distributed.get_rank()
seed = torch.initial_seed()
init_fn = partial(
worker_init_fn, num_workers=num_workers, rank=rank,
seed=seed
)
if train_collate_fn:
train_loader = DataLoader(train_data, batch_size=config['train_batch_size'], num_workers=num_workers,
pin_memory=True, sampler=train_sampler, collate_fn=train_collate_fn, worker_init_fn=init_fn)
else:
train_loader = DataLoader(train_data, batch_size=config['train_batch_size'], num_workers=num_workers,
pin_memory=True, sampler=train_sampler, worker_init_fn=init_fn)
valid_loader = DataLoader(valid_data, batch_size=config['eval_batch_size'], num_workers=num_workers,
pin_memory=True, sampler=valid_sampler, collate_fn=eval_collate_fn)
test_loader = DataLoader(test_data, batch_size=config['eval_batch_size'], num_workers=num_workers,
pin_memory=True, sampler=test_sampler, collate_fn=eval_collate_fn)
return train_loader, valid_loader, test_loader
def worker_init_fn(worker_id, num_workers, rank, seed):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed = num_workers * rank + worker_id + seed
np.random.seed(worker_seed)
random.seed(worker_seed)
def worker_init_reset_seed(worker_id):
initial_seed = torch.initial_seed() % 2 ** 31
worker_seed = initial_seed + worker_id + torch.distributed.get_rank()
random.seed(worker_seed)
np.random.seed(worker_seed)
class NonConsecutiveSequentialDistributedSampler(torch.utils.data.sampler.Sampler):
def __init__(self, dataset, rank=None, num_replicas=None):
if num_replicas is None:
if not torch.distributed.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = torch.distributed.get_world_size()
if rank is None:
if not torch.distributed.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = torch.distributed.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.total_size = len(self.dataset)
self.num_samples = math.ceil(
(self.total_size-self.rank)/self.num_replicas
)
def __iter__(self):
indices = list(range(len(self.dataset)))
indices = indices[self.rank:self.total_size:self.num_replicas]
return iter(indices)
def __len__(self):
return self.num_samples
class ConsecutiveSequentialDistributedSampler(torch.utils.data.sampler.Sampler):
def __init__(self, dataset, batch_size, rank=None, num_replicas=None):
if num_replicas is None:
if not torch.distributed.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = torch.distributed.get_world_size()
if rank is None:
if not torch.distributed.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = torch.distributed.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.batch_size = batch_size
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.batch_size / self.num_replicas)) * self.batch_size
self.total_size = self.num_samples * self.num_replicas
def __iter__(self):
indices = list(range(len(self.dataset)))
# add extra samples to make it evenly divisible
indices += [indices[-1]] * (self.total_size - len(indices))
# subsample
indices = indices[self.rank * self.num_samples: (self.rank + 1) * self.num_samples]
return iter(indices)
def __len__(self):
return self.num_samples
from .base_metric import *
from .metrics import *
from .evaluator import *
from .register import *
from .collector import *
# Copyright (c) 2024 westlake-repl
# SPDX-License-Identifier: MIT
import torch
from REC.utils import EvaluatorType
class AbstractMetric(object):
""":class:`AbstractMetric` is the base object of all metrics. If you want to
implement a metric, you should inherit this class.
Args:
config (Config): the config of evaluator.
"""
smaller = False
def __init__(self, config):
self.decimal_place = config['metric_decimal_place'] + 2 if config['metric_decimal_place'] else 7
def calculate_metric(self, dataobject):
"""Get the dictionary of a metric.
Args:
dataobject(DataStruct): it contains all the information needed to calculate metrics.
Returns:
dict: such as ``{'metric@10': 3153, 'metric@20': 0.3824}``
"""
raise NotImplementedError('Method [calculate_metric] should be implemented.')
class TopkMetric(AbstractMetric):
""":class:`TopkMetric` is a base object of top-k metrics. If you want to
implement an top-k metric, you can inherit this class.
Args:
config (Config): The config of evaluator.
"""
metric_type = EvaluatorType.RANKING
metric_need = ['rec.topk']
def __init__(self, config):
super().__init__(config)
self.topk = config['topk']
def used_info(self, dataobject):
"""Get the bool matrix indicating whether the corresponding item is positive
and number of positive items for each user.
"""
rec_mat = dataobject.get('rec.topk')
topk_idx, pos_len_list = torch.split(rec_mat, [max(self.topk), 1], dim=1)
return topk_idx.to(torch.bool).numpy(), pos_len_list.squeeze(-1).numpy()
def topk_result(self, metric, value):
"""Match the metric value to the `k` and put them in `dictionary` form.
Args:
metric(str): the name of calculated metric.
value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
Returns:
dict: metric values required in the configuration.
"""
metric_dict = {}
avg_result = value.sum(axis=0)
for k in self.topk:
key = '{}@{}'.format(metric, k)
# metric_dict[key] = round(avg_result[k - 1], self.decimal_place)
metric_dict[key] = avg_result[k - 1]
return metric_dict
def metric_info(self, pos_index, pos_len=None):
"""Calculate the value of the metric.
Args:
pos_index(numpy.ndarray): a bool matrix, shape of ``n_users * max(topk)``. The item with the (j+1)-th \
highest score of i-th user is positive if ``pos_index[i][j] == True`` and negative otherwise.
pos_len(numpy.ndarray): a vector representing the number of positive items per user, shape of ``(n_users,)``.
Returns:
numpy.ndarray: metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
"""
raise NotImplementedError('Method [metric_info] of top-k metric should be implemented.')
class LossMetric(AbstractMetric):
""":class:`LossMetric` is a base object of loss based metrics and AUC. If you want to
implement an loss based metric, you can inherit this class.
Args:
config (Config): The config of evaluator.
"""
metric_type = EvaluatorType.VALUE
metric_need = ['rec.score', 'data.label']
def __init__(self, config):
super().__init__(config)
def used_info(self, dataobject):
"""Get scores that model predicted and the ground truth."""
preds = dataobject.get('rec.score')
trues = dataobject.get('data.label')
return preds.squeeze(-1).numpy(), trues.squeeze(-1).numpy()
def output_metric(self, metric, dataobject):
preds, trues = self.used_info(dataobject)
result = self.metric_info(preds, trues)
return {metric: round(result, self.decimal_place)}
def metric_info(self, preds, trues):
"""Calculate the value of the metric.
Args:
preds (numpy.ndarray): the scores predicted by model, a one-dimensional vector.
trues (numpy.ndarray): the label of items, which has the same shape as ``preds``.
Returns:
float: The value of the metric.
"""
raise NotImplementedError('Method [metric_info] of loss-based metric should be implemented.')
# Copyright (c) 2024 westlake-repl
# SPDX-License-Identifier: MIT
from .register import Register
import torch
import copy
import numpy as np
class DataStruct(object):
def __init__(self):
self._data_dict = {}
def __getitem__(self, name: str):
return self._data_dict[name]
def __setitem__(self, name: str, value):
self._data_dict[name] = value
def __delitem__(self, name: str):
self._data_dict.pop(name)
def __contains__(self, key: str):
return key in self._data_dict
def get(self, name: str):
if name not in self._data_dict:
raise IndexError("Can not load the data without registration !")
return self[name]
def set(self, name: str, value):
self._data_dict[name] = value
def update_tensor(self, name: str, value: torch.Tensor):
if name not in self._data_dict:
self._data_dict[name] = value.cpu().clone().detach()
else:
if not isinstance(self._data_dict[name], torch.Tensor):
raise ValueError("{} is not a tensor.".format(name))
self._data_dict[name] = torch.cat((self._data_dict[name], value.cpu().clone().detach()), dim=0)
def __str__(self):
data_info = '\nContaining:\n'
for data_key in self._data_dict.keys():
data_info += data_key + '\n'
return data_info
class Collector(object):
"""The collector is used to collect the resource for evaluator.
As the evaluation metrics are various, the needed resource not only contain the recommended result
but also other resource from data and model. They all can be collected by the collector during the training
and evaluation process.
This class is only used in Trainer.
"""
def __init__(self, config):
self.config = config
self.data_struct = DataStruct()
self.register = Register(config)
self.full = True
self.topk = self.config['topk']
self.device = self.config['device']
def data_collect(self, train_data):
""" Collect the evaluation resource from training data.
Args:
train_data (AbstractDataLoader): the training dataloader which contains the training data.
"""
if self.register.need('data.num_items'):
item_id = 'item_id'
self.data_struct.set('data.num_items', train_data.dataset.item_num)
if self.register.need('data.num_users'):
user_id = 'user_id'
self.data_struct.set('data.num_users', train_data.dataset.user_num)
if self.register.need('data.count_items'):
self.data_struct.set('data.count_items', train_data.dataset.item_counter)
if self.register.need('data.count_users'):
self.data_struct.set('data.count_items', train_data.dataset.user_counter)
def _average_rank(self, scores):
"""Get the ranking of an ordered tensor, and take the average of the ranking for positions with equal values.
Args:
scores(tensor): an ordered tensor, with size of `(N, )`
Returns:
torch.Tensor: average_rank
Example:
>>> average_rank(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,5,5]]))
tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000],
[2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]])
Reference:
https://github.com/scipy/scipy/blob/v0.17.1/scipy/stats/stats.py#L5262-L5352
"""
length, width = scores.shape
true_tensor = torch.full((length, 1), True, dtype=torch.bool, device=self.device)
obs = torch.cat([true_tensor, scores[:, 1:] != scores[:, :-1]], dim=1)
# bias added to dense
bias = torch.arange(0, length, device=self.device).repeat(width).reshape(width, -1). \
transpose(1, 0).reshape(-1)
dense = obs.view(-1).cumsum(0) + bias
# cumulative counts of each unique value
count = torch.where(torch.cat([obs, true_tensor], dim=1))[1]
# get average rank
avg_rank = .5 * (count[dense] + count[dense - 1] + 1).view(length, -1)
return avg_rank
def eval_batch_collect(
self, scores_tensor: torch.Tensor, positive_u: torch.Tensor, positive_i: torch.Tensor, interaction=None
):
""" Collect the evaluation resource from batched eval data and batched model output.
Args:
scores_tensor (Torch.Tensor): the output tensor of model with the shape of `(N, )`
interaction(Interaction): batched eval data.
positive_u(Torch.Tensor): the row index of positive items for each user.
positive_i(Torch.Tensor): the positive item id for each user.
"""
if self.register.need('rec.items'):
# get topk
_, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1) # n_users x k
self.data_struct.update_tensor('rec.items', topk_idx)
if self.register.need('rec.topk'):
_, topk_idx = torch.topk(scores_tensor, max(self.topk), dim=-1) # n_users x k
pos_matrix = torch.zeros_like(scores_tensor, dtype=torch.int)
pos_matrix[positive_u, positive_i] = 1
pos_len_list = pos_matrix.sum(dim=1, keepdim=True)
pos_idx = torch.gather(pos_matrix, dim=1, index=topk_idx)
result = torch.cat((pos_idx, pos_len_list), dim=1)
self.data_struct.update_tensor('rec.topk', result)
if self.register.need('rec.meanrank'):
desc_scores, desc_index = torch.sort(scores_tensor, dim=-1, descending=True)
# get the index of positive items in the ranking list
pos_matrix = torch.zeros_like(scores_tensor)
pos_matrix[positive_u, positive_i] = 1
pos_index = torch.gather(pos_matrix, dim=1, index=desc_index)
avg_rank = self._average_rank(desc_scores)
pos_rank_sum = torch.where(pos_index == 1, avg_rank, torch.zeros_like(avg_rank)).sum(dim=-1, keepdim=True)
pos_len_list = pos_matrix.sum(dim=1, keepdim=True)
user_len_list = desc_scores.argmin(dim=1, keepdim=True)
result = torch.cat((pos_rank_sum, user_len_list, pos_len_list), dim=1)
self.data_struct.update_tensor('rec.meanrank', result)
if self.register.need('rec.score'):
self.data_struct.update_tensor('rec.score', scores_tensor)
# if self.register.need('data.label'):
# self.label_field = self.config['LABEL_FIELD']
# self.data_struct.update_tensor('data.label', interaction[self.label_field].to(self.device))
def model_collect(self, model: torch.nn.Module):
""" Collect the evaluation resource from model.
Args:
model (nn.Module): the trained recommendation model.
"""
pass
# TODO:
def eval_collect(self, eval_pred: torch.Tensor, data_label: torch.Tensor):
""" Collect the evaluation resource from total output and label.
It was designed for those models that can not predict with batch.
Args:
eval_pred (torch.Tensor): the output score tensor of model.
data_label (torch.Tensor): the label tensor.
"""
if self.register.need('rec.score'):
self.data_struct.update_tensor('rec.score', eval_pred)
if self.register.need('data.label'):
self.label_field = self.config['LABEL_FIELD']
self.data_struct.update_tensor('data.label', data_label.to(self.device))
def distributed_concat(self, tensor, num_total_examples):
output_tensors = [tensor.clone() for _ in range(torch.distributed.get_world_size())]
torch.distributed.all_gather(output_tensors, tensor)
concat = torch.cat(output_tensors, dim=0)
# truncate the dummy elements added by SequentialDistributedSampler
return concat[:num_total_examples]
def get_data_struct(self):
""" Get all the evaluation resource that been collected.
And reset some of outdated resource.
"""
returned_struct = copy.deepcopy(self.data_struct)
for key in ['rec.topk', 'rec.meanrank', 'rec.score', 'rec.items', 'data.label']:
if key in self.data_struct:
del self.data_struct[key]
return returned_struct
# Copyright (c) 2024 westlake-repl
# SPDX-License-Identifier: MIT
from .register import metrics_dict
from .collector import DataStruct
from collections import OrderedDict
class Evaluator(object):
"""Evaluator is used to check parameter correctness, and summarize the results of all metrics.
"""
def __init__(self, config):
self.config = config
self.metrics = [metric.lower() for metric in self.config['metrics']]
self.metric_class = {}
for metric in self.metrics:
self.metric_class[metric] = metrics_dict[metric](self.config)
def evaluate(self, dataobject: DataStruct):
"""calculate all the metrics. It is called at the end of each epoch
Args:
dataobject (DataStruct): It contains all the information needed for metrics.
Returns:
collections.OrderedDict: such as ``{'hit@20': 0.3824, 'recall@20': 0.0527, 'hit@10': 0.3153, 'recall@10': 0.0329, 'gauc': 0.9236}``
"""
result_dict = OrderedDict()
for metric in self.metrics:
metric_val = self.metric_class[metric].calculate_metric(dataobject)
result_dict.update(metric_val)
return result_dict
# Copyright (c) 2024 westlake-repl
# SPDX-License-Identifier: MIT
from logging import getLogger
import numpy as np
from collections import Counter
from sklearn.metrics import auc as sk_auc
from sklearn.metrics import mean_absolute_error, mean_squared_error
from .utils import _binary_clf_curve
from .base_metric import AbstractMetric, TopkMetric, LossMetric
from REC.utils import EvaluatorType
# TopK Metrics
class Hit(TopkMetric):
r"""HR_ (also known as truncated Hit-Ratio) is a way of calculating how many 'hits'
you have in an n-sized list of ranked items. If there is at least one item that falls in the ground-truth set,
we call it a hit.
.. _HR: https://medium.com/@rishabhbhatia315/recommendation-system-evaluation-metrics-3f6739288870
.. math::
\mathrm {HR@K} = \frac{1}{|U|}\sum_{u \in U} \delta(\hat{R}(u) \cap R(u) \neq \emptyset),
:math:`\delta(·)` is an indicator function. :math:`\delta(b)` = 1 if :math:`b` is true and 0 otherwise.
:math:`\emptyset` denotes the empty set.
"""
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
pos_index, _ = self.used_info(dataobject)
result = self.metric_info(pos_index)
metric_dict = self.topk_result('hit', result)
return metric_dict
def metric_info(self, pos_index):
result = np.cumsum(pos_index, axis=1)
return (result > 0).astype(int)
class MRR(TopkMetric):
r"""The MRR_ (also known as Mean Reciprocal Rank) computes the reciprocal rank
of the first relevant item found by an algorithm.
.. _MRR: https://en.wikipedia.org/wiki/Mean_reciprocal_rank
.. math::
\mathrm {MRR@K} = \frac{1}{|U|}\sum_{u \in U} \frac{1}{\operatorname{rank}_{u}^{*}}
:math:`{rank}_{u}^{*}` is the rank position of the first relevant item found by an algorithm for a user :math:`u`.
"""
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
pos_index, _ = self.used_info(dataobject)
result = self.metric_info(pos_index)
metric_dict = self.topk_result('mrr', result)
return metric_dict
def metric_info(self, pos_index):
idxs = pos_index.argmax(axis=1)
result = np.zeros_like(pos_index, dtype=np.float64)
for row, idx in enumerate(idxs):
if pos_index[row, idx] > 0:
result[row, idx:] = 1 / (idx + 1)
else:
result[row, idx:] = 0
return result
class MAP(TopkMetric):
r"""MAP_ (also known as Mean Average Precision) is meant to calculate
average precision for the relevant items.
Note:
In this case the normalization factor used is :math:`\frac{1}{min(|\hat R(u)|, K)}`, which prevents your
AP score from being unfairly suppressed when your number of recommendations couldn't possibly capture
all the correct ones.
.. _MAP: http://sdsawtelle.github.io/blog/output/mean-average-precision-MAP-for-recommender-systems.html#MAP-for-Recommender-Algorithms
.. math::
\mathrm{MAP@K} = \frac{1}{|U|}\sum_{u \in U} (\frac{1}{min(|\hat R(u)|, K)} \sum_{j=1}^{|\hat{R}(u)|} I\left(\hat{R}_{j}(u) \in R(u)\right) \cdot Precision@j)
:math:`\hat{R}_{j}(u)` is the j-th item in the recommendation list of \hat R (u)).
"""
def __init__(self, config):
super().__init__(config)
self.config = config
def calculate_metric(self, dataobject):
pos_index, pos_len = self.used_info(dataobject)
result = self.metric_info(pos_index, pos_len)
metric_dict = self.topk_result('map', result)
return metric_dict
def metric_info(self, pos_index, pos_len):
pre = pos_index.cumsum(axis=1) / np.arange(1, pos_index.shape[1] + 1)
sum_pre = np.cumsum(pre * pos_index.astype(np.float64), axis=1)
len_rank = np.full_like(pos_len, pos_index.shape[1])
actual_len = np.where(pos_len > len_rank, len_rank, pos_len)
result = np.zeros_like(pos_index, dtype=np.float64)
for row, lens in enumerate(actual_len):
ranges = np.arange(1, pos_index.shape[1] + 1)
ranges[lens:] = ranges[lens - 1]
result[row] = sum_pre[row] / ranges
return result
class Recall(TopkMetric):
r"""Recall_ is a measure for computing the fraction of relevant items out of all relevant items.
.. _recall: https://en.wikipedia.org/wiki/Precision_and_recall#Recall
.. math::
\mathrm {Recall@K} = \frac{1}{|U|}\sum_{u \in U} \frac{|\hat{R}(u) \cap R(u)|}{|R(u)|}
:math:`|R(u)|` represents the item count of :math:`R(u)`.
"""
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
pos_index, pos_len = self.used_info(dataobject)
result = self.metric_info(pos_index, pos_len)
metric_dict = self.topk_result('recall', result)
return metric_dict
def metric_info(self, pos_index, pos_len):
return np.cumsum(pos_index, axis=1) / pos_len.reshape(-1, 1)
class NDCG(TopkMetric):
r"""NDCG_ (also known as normalized discounted cumulative gain) is a measure of ranking quality,
where positions are discounted logarithmically. It accounts for the position of the hit by assigning
higher scores to hits at top ranks.
.. _NDCG: https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG
.. math::
\mathrm {NDCG@K} = \frac{1}{|U|}\sum_{u \in U} (\frac{1}{\sum_{i=1}^{\min (|R(u)|, K)}
\frac{1}{\log _{2}(i+1)}} \sum_{i=1}^{K} \delta(i \in R(u)) \frac{1}{\log _{2}(i+1)})
:math:`\delta(·)` is an indicator function.
"""
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
pos_index, pos_len = self.used_info(dataobject)
result = self.metric_info(pos_index, pos_len)
metric_dict = self.topk_result('ndcg', result)
return metric_dict
def metric_info(self, pos_index, pos_len):
len_rank = np.full_like(pos_len, pos_index.shape[1])
idcg_len = np.where(pos_len > len_rank, len_rank, pos_len)
iranks = np.zeros_like(pos_index, dtype=np.float64)
iranks[:, :] = np.arange(1, pos_index.shape[1] + 1)
idcg = np.cumsum(1.0 / np.log2(iranks + 1), axis=1)
for row, idx in enumerate(idcg_len):
idcg[row, idx:] = idcg[row, idx - 1]
ranks = np.zeros_like(pos_index, dtype=np.float64)
ranks[:, :] = np.arange(1, pos_index.shape[1] + 1)
dcg = 1.0 / np.log2(ranks + 1)
dcg = np.cumsum(np.where(pos_index, dcg, 0), axis=1)
result = dcg / idcg
return result
class Precision(TopkMetric):
r"""Precision_ (also called positive predictive value) is a measure for computing the fraction of relevant items
out of all the recommended items. We average the metric for each user :math:`u` get the final result.
.. _precision: https://en.wikipedia.org/wiki/Precision_and_recall#Precision
.. math::
\mathrm {Precision@K} = \frac{1}{|U|}\sum_{u \in U} \frac{|\hat{R}(u) \cap R(u)|}{|\hat {R}(u)|}
:math:`|\hat R(u)|` represents the item count of :math:`\hat R(u)`.
"""
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
pos_index, _ = self.used_info(dataobject)
result = self.metric_info(pos_index)
metric_dict = self.topk_result('precision', result)
return metric_dict
def metric_info(self, pos_index):
return pos_index.cumsum(axis=1) / np.arange(1, pos_index.shape[1] + 1)
# CTR Metrics
class GAUC(AbstractMetric):
r"""GAUC (also known as Grouped Area Under Curve) is used to evaluate the two-class model, referring to
the area under the ROC curve grouped by user. We weighted the index of each user :math:`u` by the number of positive
samples of users to get the final result.
For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/3219819.3219823>`__
Note:
It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC.
It is also not limited to k. Due to our padding for `scores_tensor` with `-np.inf`, the padding
value will influence the ranks of origin items. Therefore, we use descending sort here and make
an identity transformation to the formula of `AUC`, which is shown in `auc_` function.
For readability, we didn't do simplification in the code.
.. math::
\begin{align*}
\mathrm {AUC(u)} &= \frac {{{|R(u)|} \times {(n+1)} - \frac{|R(u)| \times (|R(u)|+1)}{2}} -
\sum\limits_{i=1}^{|R(u)|} rank_{i}} {{|R(u)|} \times {(n - |R(u)|)}} \\
\mathrm{GAUC} &= \frac{1}{\sum_{u \in U} |R(u)|}\sum_{u \in U} |R(u)| \cdot(\mathrm {AUC(u)})
\end{align*}
:math:`rank_i` is the descending rank of the i-th items in :math:`R(u)`.
"""
metric_type = EvaluatorType.RANKING
metric_need = ['rec.meanrank']
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
mean_rank = dataobject.get('rec.meanrank').numpy()
pos_rank_sum, user_len_list, pos_len_list = np.split(mean_rank, 3, axis=1)
user_len_list, pos_len_list = user_len_list.squeeze(-1), pos_len_list.squeeze(-1)
result = self.metric_info(pos_rank_sum, user_len_list, pos_len_list)
return {'gauc': round(result, self.decimal_place)}
def metric_info(self, pos_rank_sum, user_len_list, pos_len_list):
"""Get the value of GAUC metric.
Args:
pos_rank_sum (numpy.ndarray): sum of descending rankings for positive items of each users.
user_len_list (numpy.ndarray): the number of predicted items for users.
pos_len_list (numpy.ndarray): the number of positive items for users.
Returns:
float: The value of the GAUC.
"""
neg_len_list = user_len_list - pos_len_list
# check positive and negative samples
any_without_pos = np.any(pos_len_list == 0)
any_without_neg = np.any(neg_len_list == 0)
non_zero_idx = np.full(len(user_len_list), True, dtype=np.bool)
if any_without_pos:
logger = getLogger()
logger.warning(
"No positive samples in some users, "
"true positive value should be meaningless, "
"these users have been removed from GAUC calculation"
)
non_zero_idx *= (pos_len_list != 0)
if any_without_neg:
logger = getLogger()
logger.warning(
"No negative samples in some users, "
"false positive value should be meaningless, "
"these users have been removed from GAUC calculation"
)
non_zero_idx *= (neg_len_list != 0)
if any_without_pos or any_without_neg:
item_list = user_len_list, neg_len_list, pos_len_list, pos_rank_sum
user_len_list, neg_len_list, pos_len_list, pos_rank_sum = map(lambda x: x[non_zero_idx], item_list)
pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum)
user_auc = pair_num / (neg_len_list * pos_len_list)
result = (user_auc * pos_len_list).sum() / pos_len_list.sum()
return result
class AUC(LossMetric):
r"""AUC_ (also known as Area Under Curve) is used to evaluate the two-class model, referring to
the area under the ROC curve.
.. _AUC: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
Note:
This metric does not calculate group-based AUC which considers the AUC scores
averaged across users. It is also not limited to k. Instead, it calculates the
scores on the entire prediction results regardless the users. We call the interface
in `scikit-learn`, and code calculates the metric using the variation of following formula.
.. math::
\mathrm {AUC} = \frac {{{M} \times {(N+1)} - \frac{M \times (M+1)}{2}} -
\sum\limits_{i=1}^{M} rank_{i}} {{M} \times {(N - M)}}
:math:`M` denotes the number of positive items.
:math:`N` denotes the total number of user-item interactions.
:math:`rank_i` denotes the descending rank of the i-th positive item.
"""
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
return self.output_metric('auc', dataobject)
def metric_info(self, preds, trues):
fps, tps = _binary_clf_curve(trues, preds)
if len(fps) > 2:
optimal_idxs = np.where(np.r_[True, np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True])[0]
fps = fps[optimal_idxs]
tps = tps[optimal_idxs]
tps = np.r_[0, tps]
fps = np.r_[0, fps]
if fps[-1] <= 0:
logger = getLogger()
logger.warning("No negative samples in y_true, " "false positive value should be meaningless")
fpr = np.repeat(np.nan, fps.shape)
else:
fpr = fps / fps[-1]
if tps[-1] <= 0:
logger = getLogger()
logger.warning("No positive samples in y_true, " "true positive value should be meaningless")
tpr = np.repeat(np.nan, tps.shape)
else:
tpr = tps / tps[-1]
result = sk_auc(fpr, tpr)
return result
# Loss-based Metrics
class MAE(LossMetric):
r"""MAE_ (also known as Mean Absolute Error regression loss) is used to evaluate the difference between
the score predicted by the model and the actual behavior of the user.
.. _MAE: https://en.wikipedia.org/wiki/Mean_absolute_error
.. math::
\mathrm{MAE}=\frac{1}{|{S}|} \sum_{(u, i) \in {S}}\left|\hat{r}_{u i}-r_{u i}\right|
:math:`|S|` represents the number of pairs in :math:`S`.
"""
smaller = True
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
return self.output_metric('mae', dataobject)
def metric_info(self, preds, trues):
return mean_absolute_error(trues, preds)
class RMSE(LossMetric):
r"""RMSE_ (also known as Root Mean Squared Error) is another error metric like `MAE`.
.. _RMSE: https://en.wikipedia.org/wiki/Root-mean-square_deviation
.. math::
\mathrm{RMSE} = \sqrt{\frac{1}{|{S}|} \sum_{(u, i) \in {S}}(\hat{r}_{u i}-r_{u i})^{2}}
"""
smaller = True
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
return self.output_metric('rmse', dataobject)
def metric_info(self, preds, trues):
return np.sqrt(mean_squared_error(trues, preds))
class LogLoss(LossMetric):
r"""Logloss_ (also known as logistic loss or cross-entropy loss) is used to evaluate the probabilistic
output of the two-class classifier.
.. _Logloss: http://wiki.fast.ai/index.php/Log_Loss
.. math::
LogLoss = \frac{1}{|S|} \sum_{(u,i) \in S}(-((r_{u i} \ \log{\hat{r}_{u i}}) + {(1 - r_{u i})}\ \log{(1 - \hat{r}_{u i})}))
"""
smaller = True
def __init__(self, config):
super().__init__(config)
def calculate_metric(self, dataobject):
return self.output_metric('logloss', dataobject)
def metric_info(self, preds, trues):
eps = 1e-15
preds = np.float6464(preds)
preds = np.clip(preds, eps, 1 - eps)
loss = np.sum(-trues * np.log(preds) - (1 - trues) * np.log(1 - preds))
return loss / len(preds)
class ItemCoverage(AbstractMetric):
r"""ItemCoverage_ computes the coverage of recommended items over all items.
.. _ItemCoverage: https://en.wikipedia.org/wiki/Coverage_(information_systems)
For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/1864708.1864761>`__
and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`__.
.. math::
\mathrm{Coverage@K}=\frac{\left| \bigcup_{u \in U} \hat{R}(u) \right|}{|I|}
"""
metric_type = EvaluatorType.RANKING
metric_need = ['rec.items', 'data.num_items']
def __init__(self, config):
super().__init__(config)
self.topk = config['topk']
def used_info(self, dataobject):
"""Get the matrix of recommendation items and number of items in total item set"""
item_matrix = dataobject.get('rec.items')
num_items = dataobject.get('data.num_items')
return item_matrix.numpy(), num_items
def calculate_metric(self, dataobject):
item_matrix, num_items = self.used_info(dataobject)
metric_dict = {}
for k in self.topk:
key = '{}@{}'.format('itemcoverage', k)
metric_dict[key] = round(self.get_coverage(item_matrix[:, :k], num_items), self.decimal_place)
return metric_dict
def get_coverage(self, item_matrix, num_items):
"""Get the coverage of recommended items over all items
Args:
item_matrix(numpy.ndarray): matrix of items recommended to users.
num_items(int): the total number of items.
Returns:
float: the `coverage` metric.
"""
unique_count = np.unique(item_matrix).shape[0]
return unique_count / num_items
class AveragePopularity(AbstractMetric):
r"""AveragePopularity computes the average popularity of recommended items.
For further details, please refer to the `paper <https://arxiv.org/abs/1205.6700>`__
and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`__.
.. math::
\mathrm{AveragePopularity@K}=\frac{1}{|U|} \sum_{u \in U } \frac{\sum_{i \in R_{u}} \phi(i)}{|R_{u}|}
:math:`\phi(i)` is the number of interaction of item i in training data.
"""
metric_type = EvaluatorType.RANKING
smaller = True
metric_need = ['rec.items', 'data.count_items']
def __init__(self, config):
super().__init__(config)
self.topk = config['topk']
def used_info(self, dataobject):
"""Get the matrix of recommendation items and the popularity of items in training data"""
item_counter = dataobject.get('data.count_items')
item_matrix = dataobject.get('rec.items')
return item_matrix.numpy(), dict(item_counter)
def calculate_metric(self, dataobject):
item_matrix, item_count = self.used_info(dataobject)
result = self.metric_info(self.get_pop(item_matrix, item_count))
metric_dict = self.topk_result('averagepopularity', result)
return metric_dict
def get_pop(self, item_matrix, item_count):
"""Convert the matrix of item id to the matrix of item popularity using a dict:{id,count}.
Args:
item_matrix(numpy.ndarray): matrix of items recommended to users.
item_count(dict): the number of interaction of items in training data.
Returns:
numpy.ndarray: the popularity of items in the recommended list.
"""
value = np.zeros_like(item_matrix)
for i in range(item_matrix.shape[0]):
row = item_matrix[i, :]
for j in range(row.shape[0]):
value[i][j] = item_count.get(row[j], 0)
return value
def metric_info(self, values):
return values.cumsum(axis=1) / np.arange(1, values.shape[1] + 1)
def topk_result(self, metric, value):
"""Match the metric value to the `k` and put them in `dictionary` form
Args:
metric(str): the name of calculated metric.
value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
Returns:
dict: metric values required in the configuration.
"""
metric_dict = {}
avg_result = value.mean(axis=0)
for k in self.topk:
key = '{}@{}'.format(metric, k)
metric_dict[key] = round(avg_result[k - 1], self.decimal_place)
return metric_dict
class ShannonEntropy(AbstractMetric):
r"""ShannonEntropy_ presents the diversity of the recommendation items.
It is the entropy over items' distribution.
.. _ShannonEntropy: https://en.wikipedia.org/wiki/Entropy_(information_theory)
For further details, please refer to the `paper <https://arxiv.org/abs/1205.6700>`__
and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`__
.. math::
\mathrm {ShannonEntropy@K}=-\sum_{i=1}^{|I|} p(i) \log p(i)
:math:`p(i)` is the probability of recommending item i
which is the number of item i in recommended list over all items.
"""
metric_type = EvaluatorType.RANKING
metric_need = ['rec.items']
def __init__(self, config):
super().__init__(config)
self.topk = config['topk']
def used_info(self, dataobject):
"""Get the matrix of recommendation items.
"""
item_matrix = dataobject.get('rec.items')
return item_matrix.numpy()
def calculate_metric(self, dataobject):
item_matrix = self.used_info(dataobject)
metric_dict = {}
for k in self.topk:
key = '{}@{}'.format('shannonentropy', k)
metric_dict[key] = round(self.get_entropy(item_matrix[:, :k]), self.decimal_place)
return metric_dict
def get_entropy(self, item_matrix):
"""Get shannon entropy through the top-k recommendation list.
Args:
item_matrix(numpy.ndarray): matrix of items recommended to users.
Returns:
float: the shannon entropy.
"""
item_count = dict(Counter(item_matrix.flatten()))
total_num = item_matrix.shape[0] * item_matrix.shape[1]
result = 0.0
for cnt in item_count.values():
p = cnt / total_num
result += -p * np.log(p)
return result / len(item_count)
class GiniIndex(AbstractMetric):
r"""GiniIndex presents the diversity of the recommendation items.
It is used to measure the inequality of a distribution.
.. _GiniIndex: https://en.wikipedia.org/wiki/Gini_coefficient
For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/3308560.3317303>`__.
.. math::
\mathrm {GiniIndex@K}=\left(\frac{\sum_{i=1}^{|I|}(2 i-|I|-1) P{(i)}}{|I| \sum_{i=1}^{|I|} P{(i)}}\right)
:math:`P{(i)}` represents the number of times all items appearing in the recommended list,
which is indexed in non-decreasing order (P_{(i)} \leq P_{(i+1)}).
"""
metric_type = EvaluatorType.RANKING
smaller = True
metric_need = ['rec.items', 'data.num_items']
def __init__(self, config):
super().__init__(config)
self.topk = config['topk']
def used_info(self, dataobject):
"""Get the matrix of recommendation items and number of items in total item set"""
item_matrix = dataobject.get('rec.items')
num_items = dataobject.get('data.num_items')
return item_matrix.numpy(), num_items
def calculate_metric(self, dataobject):
item_matrix, num_items = self.used_info(dataobject)
metric_dict = {}
for k in self.topk:
key = '{}@{}'.format('giniindex', k)
metric_dict[key] = round(self.get_gini(item_matrix[:, :k], num_items), self.decimal_place)
return metric_dict
def get_gini(self, item_matrix, num_items):
"""Get gini index through the top-k recommendation list.
Args:
item_matrix(numpy.ndarray): matrix of items recommended to users.
num_items(int): the total number of items.
Returns:
float: the gini index.
"""
item_count = dict(Counter(item_matrix.flatten()))
sorted_count = np.array(sorted(item_count.values()))
num_recommended_items = sorted_count.shape[0]
total_num = item_matrix.shape[0] * item_matrix.shape[1]
idx = np.arange(num_items - num_recommended_items + 1, num_items + 1)
gini_index = np.sum((2 * idx - num_items - 1) * sorted_count) / total_num
gini_index /= num_items
return gini_index
class TailPercentage(AbstractMetric):
r"""TailPercentage_ computes the percentage of long-tail items in recommendation items.
.. _TailPercentage: https://en.wikipedia.org/wiki/Long_tail#Criticisms
For further details, please refer to the `paper <https://arxiv.org/pdf/2007.12329.pdf>`__.
.. math::
\mathrm {TailPercentage@K}=\frac{1}{|U|} \sum_{u \in U} \frac{\sum_{i \in R_{u}} {\delta(i \in T)}}{|R_{u}|}
:math:`\delta(·)` is an indicator function.
:math:`T` is the set of long-tail items,
which is a portion of items that appear in training data seldomly.
Note:
If you want to use this metric, please set the parameter 'tail_ratio' in the config
which can be an integer or a float in (0,1]. Otherwise it will default to 0.1.
"""
metric_type = EvaluatorType.RANKING
metric_need = ['rec.items', 'data.count_items']
def __init__(self, config):
super().__init__(config)
self.topk = config['topk']
self.tail = config['tail_ratio']
if self.tail is None or self.tail <= 0:
self.tail = 0.1
def used_info(self, dataobject):
"""Get the matrix of recommendation items and number of items in total item set."""
item_matrix = dataobject.get('rec.items')
count_items = dataobject.get('data.count_items')
return item_matrix.numpy(), dict(count_items)
def get_tail(self, item_matrix, count_items):
"""Get long-tail percentage through the top-k recommendation list.
Args:
item_matrix(numpy.ndarray): matrix of items recommended to users.
count_items(dict): the number of interaction of items in training data.
Returns:
float: long-tail percentage.
"""
if self.tail > 1:
tail_items = [item for item, cnt in count_items.items() if cnt <= self.tail]
else:
count_items = sorted(count_items.items(), key=lambda kv: (kv[1], kv[0]))
cut = max(int(len(count_items) * self.tail), 1)
count_items = count_items[:cut]
tail_items = [item for item, cnt in count_items]
value = np.zeros_like(item_matrix)
for i in range(item_matrix.shape[0]):
row = item_matrix[i, :]
for j in range(row.shape[0]):
value[i][j] = 1 if row[j] in tail_items else 0
return value
def calculate_metric(self, dataobject):
item_matrix, count_items = self.used_info(dataobject)
result = self.metric_info(self.get_tail(item_matrix, count_items))
metric_dict = self.topk_result('tailpercentage', result)
return metric_dict
def metric_info(self, values):
return values.cumsum(axis=1) / np.arange(1, values.shape[1] + 1)
def topk_result(self, metric, value):
"""Match the metric value to the `k` and put them in `dictionary` form.
Args:
metric(str): the name of calculated metric.
value(numpy.ndarray): metrics for each user, including values from `metric@1` to `metric@max(self.topk)`.
Returns:
dict: metric values required in the configuration.
"""
metric_dict = {}
avg_result = value.mean(axis=0)
for k in self.topk:
key = '{}@{}'.format(metric, k)
metric_dict[key] = round(avg_result[k - 1], self.decimal_place)
return metric_dict
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment