-
Notifications
You must be signed in to change notification settings - Fork 5
/
rank_objective.hpp
298 lines (280 loc) · 10.6 KB
/
rank_objective.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_
#include <LightGBM/metric.h>
#include <LightGBM/objective_function.h>
#include <limits>
#include <string>
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <vector>
namespace LightGBM {
/*!
* \brief Objective function for Lambdrank with NDCG
*/
class LambdarankNDCG: public ObjectiveFunction {
public:
explicit LambdarankNDCG(const Config& config) {
sigmoid_ = static_cast<double>(config.sigmoid);
label_gain_ = config.label_gain;
// initialize DCG calculator
DCGCalculator::DefaultLabelGain(&label_gain_);
DCGCalculator::Init(label_gain_);
// will optimize NDCG@optimize_pos_at_
optimize_pos_at_ = config.max_position;
sigmoid_table_.clear();
inverse_max_dcgs_.clear();
if (sigmoid_ <= 0.0) {
Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_);
}
}
explicit LambdarankNDCG(const std::vector<std::string>&) {
}
~LambdarankNDCG() {
}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
// get label
label_ = metadata.label();
DCGCalculator::CheckLabel(label_, num_data_);
// get weights
weights_ = metadata.weights();
// get boundries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("Lambdarank tasks require query information");
}
num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid computation many times
inverse_max_dcgs_.resize(num_queries_);
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK(optimize_pos_at_,
label_ + query_boundaries_[i],
query_boundaries_[i + 1] - query_boundaries_[i]);
if (inverse_max_dcgs_[i] > 0.0) {
inverse_max_dcgs_[i] = 1.0f / inverse_max_dcgs_[i];
}
}
// construct sigmoid table to speed up sigmoid transform
ConstructSigmoidTable();
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
#pragma omp parallel for schedule(guided)
for (data_size_t i = 0; i < num_queries_; ++i) {
GetGradientsForOneQuery(score, gradients, hessians, i);
}
}
// grank with fixed size bucket
inline void GetGradientsForOneQuery(const double* score,
score_t* lambdas, score_t* hessians, data_size_t query_id) const {
// get doc boundary for current query
const data_size_t start = query_boundaries_[query_id];
const data_size_t cnt =
query_boundaries_[query_id + 1] - query_boundaries_[query_id];
// get max DCG on current query
// const double inverse_max_dcg = inverse_max_dcgs_[query_id];
// add pointers with offset
const label_t* label = label_ + start;
score += start;
lambdas += start;
hessians += start;
// initialize with zero
for (data_size_t i = 0; i < cnt; ++i) {
lambdas[i] = 0.0f;
hessians[i] = 0.0f;
}
label_t all_ratings[5];
label_t worst_label = 5;
label_t best_label = 0;
// get sorted indices for scores
std::vector<data_size_t> sorted_idx;
for (int i = 0; i < cnt; ++i) {
sorted_idx.emplace_back(i);
all_ratings[static_cast<int>(label[i])] = 1;
if (label[i] < worst_label) {
worst_label = label[i];
}
if (label[i] > best_label) {
best_label = label[i];
}
}
// calculate the discount for all ratings
// if all_ratings[i] == 0, no such rating for this query, discount = 0
int position = 0;
for (data_size_t i = 4; i >= 0; --i) {
if (all_ratings[i]) {
all_ratings[i] = DCGCalculator::GetDiscount(position);
position++;
}
}
if (best_label == worst_label) {return; }
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
// get best and worst score
const double best_score = score[sorted_idx[0]];
data_size_t worst_idx = cnt - 1;
if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
worst_idx -= 1;
}
const double worst_score = score[sorted_idx[worst_idx]];
// get max DCG on current query
const double inverse_max_dcg = inverse_max_dcgs_[query_id];
const int bucket_size = 2;
const double sigma = 2.0f;
// start accmulate lambdas by pairs
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t high = sorted_idx[i];
const int high_label = static_cast<int>(label[high]);
const double high_score = score[high];
if (high_score == kMinScore || high_label == worst_label) { continue; }
const double high_label_gain = label_gain_[high_label];
const double high_discount = DCGCalculator::GetDiscount(i);
double exp_high_score = std::exp(sigma * (high_score-best_score));
double sum_exp_low_score = 0.0;
double high_sum_lambda = 0.0;
double high_sum_hessian = 0.0;
int num_low = 0;
std::vector<double> sum_exp_low_scores;
for (data_size_t j = 0; j < cnt; ++j) {
// skip same data
if (i == j) { continue; }
const data_size_t low = sorted_idx[j];
const int low_label = static_cast<int>(label[low]);
const double low_score = score[low];
// only consider pair with different label
if (low_label < high_label && low_score > kMinScore) {
sum_exp_low_score += std::exp(sigma * (low_score-best_score));
num_low++;
}
if (num_low == bucket_size) {
sum_exp_low_scores.push_back(sum_exp_low_score);
sum_exp_low_score = 0.0;
num_low = 0;
}
}
if (num_low < bucket_size) {
sum_exp_low_scores.push_back(sum_exp_low_score);
}
num_low = 0;
for (data_size_t j = 0; j < cnt; ++j) {
// skip same data
if (i == j) { continue; }
const data_size_t low = sorted_idx[j];
const int low_label = static_cast<int>(label[low]);
const double low_score = score[low];
// only consider pair with different label
if (low_label < high_label && low_score > kMinScore)
{
sum_exp_low_score = sum_exp_low_scores.at(num_low / bucket_size);
num_low++;
if (sum_exp_low_score == 0.0) {continue; }
const double low_label_gain = label_gain_[low_label];
const double low_discount = DCGCalculator::GetDiscount(j);
// get dcg gap
const double dcg_gap = high_label_gain - low_label_gain;
// get discount of this pair
const double paired_discount = fabs(high_discount - low_discount);
const double delta_score = high_score - low_score;
double sum_exp = exp_high_score + sum_exp_low_score;
double p_high = exp_high_score / sum_exp;
double exp_low_score = std::exp(sigma * (low_score-best_score));
double p_low = exp_low_score / sum_exp;
double l_p_lambda = p_low * sigma;//positive
double l_p_hessian = l_p_lambda * (1 - p_low) * sigma * sigma;
double fancy = dcg_gap * paired_discount * inverse_max_dcg;
// regular the delta_pair_NDCG by score distance
if (high_label != low_label && best_score != worst_score) {
fancy /= (0.01f + fabs(delta_score));
}
l_p_lambda *= fancy;
l_p_hessian *= fancy;
lambdas[low] += static_cast<score_t>(l_p_lambda);
hessians[low] += static_cast<score_t>(l_p_hessian);
high_sum_lambda += -l_p_lambda;
high_sum_hessian += l_p_lambda * p_high * sigma * sigma;//positive
}
}
lambdas[high] += static_cast<score_t>(high_sum_lambda);
hessians[high] += static_cast<score_t>(high_sum_hessian);
}
// if need weights
if (weights_ != nullptr) {
for (data_size_t i = 0; i < cnt; ++i) {
lambdas[i] = static_cast<score_t>(lambdas[i] * weights_[start + i]);
hessians[i] = static_cast<score_t>(hessians[i] * weights_[start + i]);
}
}
}
inline double GetSigmoid(double score) const {
if (score <= min_sigmoid_input_) {
// too small, use lower bound
return sigmoid_table_[0];
} else if (score >= max_sigmoid_input_) {
// too big, use upper bound
return sigmoid_table_[_sigmoid_bins - 1];
} else {
return sigmoid_table_[static_cast<size_t>((score - min_sigmoid_input_) * sigmoid_table_idx_factor_)];
}
}
void ConstructSigmoidTable() {
// get boundary
min_sigmoid_input_ = min_sigmoid_input_ / sigmoid_ / 2;
max_sigmoid_input_ = -min_sigmoid_input_;
sigmoid_table_.resize(_sigmoid_bins);
// get score to bin factor
sigmoid_table_idx_factor_ =
_sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
// cache
for (size_t i = 0; i < _sigmoid_bins; ++i) {
const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
sigmoid_table_[i] = 2.0f / (1.0f + std::exp(2.0f * score * sigmoid_));
}
}
const char* GetName() const override {
return "lambdarank";
}
std::string ToString() const override {
std::stringstream str_buf;
str_buf << GetName();
return str_buf.str();
}
bool NeedAccuratePrediction() const override { return false; }
private:
/*! \brief Gains for labels */
std::vector<double> label_gain_;
/*! \brief Cache inverse max DCG, speed up calculation */
std::vector<double> inverse_max_dcgs_;
/*! \brief Simgoid param */
double sigmoid_;
/*! \brief Optimized NDCG@ */
int optimize_pos_at_;
/*! \brief Number of queries */
data_size_t num_queries_;
/*! \brief Number of data */
data_size_t num_data_;
/*! \brief Pointer of label */
const label_t* label_;
/*! \brief Pointer of weights */
const label_t* weights_;
/*! \brief Query boundries */
const data_size_t* query_boundaries_;
/*! \brief Cache result for sigmoid transform to speed up */
std::vector<double> sigmoid_table_;
/*! \brief Number of bins in simoid table */
size_t _sigmoid_bins = 1024 * 1024;
/*! \brief Minimal input of sigmoid table */
double min_sigmoid_input_ = -50;
/*! \brief Maximal input of sigmoid table */
double max_sigmoid_input_ = 50;
/*! \brief Factor that covert score to bin in sigmoid table */
double sigmoid_table_idx_factor_;
};
} // namespace LightGBM
#endif // LightGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_