test_learning_to_rank.R 5.08 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
context("Learning to rank")

# numerical tolerance to use when checking metric values
TOLERANCE <- 1e-06

test_that("learning-to-rank with lgb.train() works as expected", {
    set.seed(708L)
    data(agaricus.train, package = "lightgbm")
    # just keep a few features,to generate an model with imperfect fit
    train <- agaricus.train
    train_data <- train$data[1L:6000L, 1L:20L]
    dtrain <- lgb.Dataset(
        train_data
        , label = train$label[1L:6000L]
        , group = rep(150L, 40L)
    )
    ndcg_at <- "1,2,3"
    eval_names <-  paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
    params <- list(
        objective = "lambdarank"
        , metric = "ndcg"
        , ndcg_at = ndcg_at
        , metric_freq = 1L
        , max_position = 3L
        , learning_rate = 0.001
    )
    model <- lgb.train(
        params = params
        , data = dtrain
        , nrounds = 10L
    )
    expect_true(lgb.is.Booster(model))

    dumped_model <- jsonlite::fromJSON(
        model$dump_model()
    )
    expect_equal(dumped_model[["objective"]], "lambdarank")
    expect_equal(dumped_model[["max_feature_idx"]], ncol(train_data) - 1L)

    # check that evaluation results make sense (0.0 < nDCG < 1.0)
    eval_results <- model$eval_train()
    expect_equal(length(eval_results), length(eval_names))
    for (result in eval_results) {
        expect_true(result[["value"]] > 0.0 && result[["value"]] < 1.0)
        expect_true(result[["higher_better"]])
        expect_identical(result[["data_name"]], "training")
    }
    expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
    expect_equal(eval_results[[1L]][["value"]], 0.825)
50
51
    expect_true(abs(eval_results[[2L]][["value"]] - 0.7766434) < TOLERANCE)
    expect_true(abs(eval_results[[3L]][["value"]] - 0.7527939) < TOLERANCE)
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
})

test_that("learning-to-rank with lgb.cv() works as expected", {
    set.seed(708L)
    data(agaricus.train, package = "lightgbm")
    # just keep a few features,to generate an model with imperfect fit
    train <- agaricus.train
    train_data <- train$data[1L:6000L, 1L:20L]
    dtrain <- lgb.Dataset(
        train_data
        , label = train$label[1L:6000L]
        , group = rep(150L, 40L)
    )
    ndcg_at <- "1,2,3"
    eval_names <-  paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
    params <- list(
        objective = "lambdarank"
        , metric = "ndcg"
        , ndcg_at = ndcg_at
        , metric_freq = 1L
        , max_position = 3L
        , label_gain = "0,1,3"
    )
    nfold <- 4L
    nrounds <- 10L
    cv_bst <- lgb.cv(
        params = params
        , data = dtrain
        , nrounds = nrounds
        , nfold = nfold
        , min_data = 1L
        , learning_rate = 0.01
    )
    expect_is(cv_bst, "lgb.CVBooster")
    expect_equal(length(cv_bst$boosters), nfold)

    # "valid" should contain results for each metric
    eval_results <- cv_bst$record_evals[["valid"]]
    eval_names <-  c("ndcg@1", "ndcg@2", "ndcg@3")
    expect_identical(names(eval_results), eval_names)

    # check that best score and iter make sense (0.0 < nDCG < 1.0)
    best_iter <- cv_bst$best_iter
    best_score <- cv_bst$best_score
    expect_true(best_iter > 0L && best_iter <= nrounds)
    expect_true(best_score > 0.0 && best_score < 1.0)
    expect_true(abs(best_score - 0.775) < TOLERANCE)

    # best_score should be set for the first metric
    first_metric <- eval_names[[1L]]
    expect_equal(best_score, eval_results[[first_metric]][["eval"]][[best_iter]])

    for (eval_name in eval_names) {
        results_for_this_metric <- eval_results[[eval_name]]

        # each set of metrics should have eval and eval_err
        expect_identical(names(results_for_this_metric), c("eval", "eval_err"))

        # there should be one "eval" and "eval_err" per round
        expect_equal(length(results_for_this_metric[["eval"]]), nrounds)
        expect_equal(length(results_for_this_metric[["eval_err"]]), nrounds)

        # check that evaluation results make sense (0.0 < nDCG < 1.0)
        all_evals <- unlist(results_for_this_metric[["eval"]])
        expect_true(all(all_evals > 0.0 & all_evals < 1.0))
    }

    # first and last value of each metric should be as expected
    ndcg1_values <- c(0.725, 0.75, 0.75, 0.775, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75)
    expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE))

    ndcg2_values <- c(
        0.6863147, 0.720986, 0.7306574, 0.745986, 0.7306574,
        0.720986, 0.7403287, 0.7403287, 0.7403287, 0.7306574
    )
    expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE))

    ndcg3_values <- c(
        0.6777939, 0.6984639, 0.711732, 0.7234639, 0.711732,
        0.7101959, 0.719134, 0.719134, 0.725, 0.711732
    )
    expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE))

    # check details of each booster
    for (bst in cv_bst$boosters) {
        dumped_model <- jsonlite::fromJSON(
            bst$booster$dump_model()
        )
        expect_equal(dumped_model[["objective"]], "lambdarank")
        expect_equal(dumped_model[["max_feature_idx"]], ncol(train_data) - 1L)
    }
})