test_learning_to_rank.R 5.49 KB
Newer Older
1
2
3
4
VERBOSITY <- as.integer(
  Sys.getenv("LIGHTGBM_TEST_VERBOSITY", "-1")
)

5
6
7
8
9
context("Learning to rank")

# numerical tolerance to use when checking metric values
TOLERANCE <- 1e-06

10
ON_SOLARIS <- Sys.info()["sysname"] == "SunOS"
James Lamb's avatar
James Lamb committed
11
ON_32_BIT_WINDOWS <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8L
12

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
test_that("learning-to-rank with lgb.train() works as expected", {
    set.seed(708L)
    data(agaricus.train, package = "lightgbm")
    # just keep a few features,to generate an model with imperfect fit
    train <- agaricus.train
    train_data <- train$data[1L:6000L, 1L:20L]
    dtrain <- lgb.Dataset(
        train_data
        , label = train$label[1L:6000L]
        , group = rep(150L, 40L)
    )
    ndcg_at <- "1,2,3"
    eval_names <-  paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
    params <- list(
        objective = "lambdarank"
        , metric = "ndcg"
        , ndcg_at = ndcg_at
30
        , lambdarank_truncation_level = 3L
31
        , learning_rate = 0.001
32
        , verbose = VERBOSITY
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
    )
    model <- lgb.train(
        params = params
        , data = dtrain
        , nrounds = 10L
    )
    expect_true(lgb.is.Booster(model))

    dumped_model <- jsonlite::fromJSON(
        model$dump_model()
    )
    expect_equal(dumped_model[["objective"]], "lambdarank")
    expect_equal(dumped_model[["max_feature_idx"]], ncol(train_data) - 1L)

    # check that evaluation results make sense (0.0 < nDCG < 1.0)
    eval_results <- model$eval_train()
    expect_equal(length(eval_results), length(eval_names))
    for (result in eval_results) {
        expect_true(result[["value"]] > 0.0 && result[["value"]] < 1.0)
        expect_true(result[["higher_better"]])
        expect_identical(result[["data_name"]], "training")
    }
    expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
56
    expect_equal(eval_results[[1L]][["value"]], 0.775)
James Lamb's avatar
James Lamb committed
57
    if (!(ON_SOLARIS || ON_32_BIT_WINDOWS)) {
58
59
60
        expect_true(abs(eval_results[[2L]][["value"]] - 0.745986) < TOLERANCE)
        expect_true(abs(eval_results[[3L]][["value"]] - 0.7351959) < TOLERANCE)
    }
61
62
63
})

test_that("learning-to-rank with lgb.cv() works as expected", {
James Lamb's avatar
James Lamb committed
64
65
66
67
    testthat::skip_if(
        ON_SOLARIS || ON_32_BIT_WINDOWS
        , message = "Skipping on Solaris and 32-bit Windows"
    )
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    set.seed(708L)
    data(agaricus.train, package = "lightgbm")
    # just keep a few features,to generate an model with imperfect fit
    train <- agaricus.train
    train_data <- train$data[1L:6000L, 1L:20L]
    dtrain <- lgb.Dataset(
        train_data
        , label = train$label[1L:6000L]
        , group = rep(150L, 40L)
    )
    ndcg_at <- "1,2,3"
    eval_names <-  paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
    params <- list(
        objective = "lambdarank"
        , metric = "ndcg"
        , ndcg_at = ndcg_at
84
        , lambdarank_truncation_level = 3L
85
        , label_gain = "0,1,3"
86
87
        , min_data = 1L
        , learning_rate = 0.01
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
    )
    nfold <- 4L
    nrounds <- 10L
    cv_bst <- lgb.cv(
        params = params
        , data = dtrain
        , nrounds = nrounds
        , nfold = nfold
    )
    expect_is(cv_bst, "lgb.CVBooster")
    expect_equal(length(cv_bst$boosters), nfold)

    # "valid" should contain results for each metric
    eval_results <- cv_bst$record_evals[["valid"]]
    eval_names <-  c("ndcg@1", "ndcg@2", "ndcg@3")
    expect_identical(names(eval_results), eval_names)

    # check that best score and iter make sense (0.0 < nDCG < 1.0)
    best_iter <- cv_bst$best_iter
    best_score <- cv_bst$best_score
    expect_true(best_iter > 0L && best_iter <= nrounds)
    expect_true(best_score > 0.0 && best_score < 1.0)
110
    expect_true(abs(best_score - 0.75) < TOLERANCE)
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

    # best_score should be set for the first metric
    first_metric <- eval_names[[1L]]
    expect_equal(best_score, eval_results[[first_metric]][["eval"]][[best_iter]])

    for (eval_name in eval_names) {
        results_for_this_metric <- eval_results[[eval_name]]

        # each set of metrics should have eval and eval_err
        expect_identical(names(results_for_this_metric), c("eval", "eval_err"))

        # there should be one "eval" and "eval_err" per round
        expect_equal(length(results_for_this_metric[["eval"]]), nrounds)
        expect_equal(length(results_for_this_metric[["eval_err"]]), nrounds)

        # check that evaluation results make sense (0.0 < nDCG < 1.0)
        all_evals <- unlist(results_for_this_metric[["eval"]])
        expect_true(all(all_evals > 0.0 & all_evals < 1.0))
    }

    # first and last value of each metric should be as expected
132
    ndcg1_values <- c(0.675, 0.725, 0.65, 0.725, 0.75, 0.725, 0.75, 0.725, 0.75, 0.75)
133
134
135
    expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE))

    ndcg2_values <- c(
136
137
        0.6556574, 0.6669721, 0.6306574, 0.6476294, 0.6629581,
        0.6476294, 0.6629581, 0.6379581, 0.7113147, 0.6823008
138
139
140
141
    )
    expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE))

    ndcg3_values <- c(
142
143
        0.6484639, 0.6571238, 0.6469279, 0.6540516, 0.6481857,
        0.6481857, 0.6481857, 0.6466496, 0.7027939, 0.6629898
144
145
146
147
148
149
150
151
152
153
154
155
    )
    expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE))

    # check details of each booster
    for (bst in cv_bst$boosters) {
        dumped_model <- jsonlite::fromJSON(
            bst$booster$dump_model()
        )
        expect_equal(dumped_model[["objective"]], "lambdarank")
        expect_equal(dumped_model[["max_feature_idx"]], ncol(train_data) - 1L)
    }
})