import math from lm_eval.api.register import register_aggregation def weighted_mean(items): a, b = zip(*items) return sum(a) / sum(b) @register_aggregation("mean") def mean(arr): return sum(arr) / len(arr) @register_aggregation("median") def median(arr): return arr[len(arr) // 2] @register_aggregation("perplexity") def perplexity(items): return math.exp(-mean(items)) @register_aggregation("weighted_perplexity") def weighted_perplexity(items): return math.exp(-weighted_mean(items)) @register_aggregation("bits_per_byte") def bits_per_byte(items): return -weighted_mean(items) / math.log(2)