Commit f0ef3442 authored by yuguo960516yuguo's avatar yuguo960516yuguo
Browse files

2.3.2-dtk-22.10.1

parent ad08b8ce
Pipeline #227 failed with stages
in 0 seconds
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
#include "paddle/fluid/distributed/the_one_ps.pb.h"
namespace paddle {
namespace distributed {
// DownpourUnitAccessor
class CtrCommonAccessor : public ValueAccessor {
public:
struct CtrCommonFeatureValue {
/*
float slot;
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
std::vector<float> embed_g2sum;
std::vector<float> embedx_w;
std::<vector>float embedx_g2sum;
*/
int Dim() { return 6 + embed_sgd_dim + embedx_sgd_dim + embedx_dim; }
int DimSize(size_t dim, int embedx_dim) { return sizeof(float); }
int Size() { return Dim() * sizeof(float); }
int SlotIndex() { return 0; }
int UnseenDaysIndex() { return SlotIndex() + 1; }
int DeltaScoreIndex() { return UnseenDaysIndex() + 1; }
int ShowIndex() { return DeltaScoreIndex() + 1; }
int ClickIndex() { return ShowIndex() + 1; }
int EmbedWIndex() { return ClickIndex() + 1; }
int EmbedG2SumIndex() { return EmbedWIndex() + 1; }
int EmbedxWIndex() { return EmbedG2SumIndex() + embed_sgd_dim; }
int EmbedxG2SumIndex() { return EmbedxWIndex() + embedx_dim; }
float& UnseenDays(float* val) { return val[UnseenDaysIndex()]; }
float& DeltaScore(float* val) { return val[DeltaScoreIndex()]; }
float& Show(float* val) { return val[ShowIndex()]; }
float& Click(float* val) { return val[ClickIndex()]; }
float& Slot(float* val) { return val[SlotIndex()]; }
float& EmbedW(float* val) { return val[EmbedWIndex()]; }
float& EmbedG2Sum(float* val) { return val[EmbedG2SumIndex()]; }
float& EmbedxW(float* val) { return val[EmbedxWIndex()]; }
float& EmbedxG2Sum(float* val) { return val[EmbedxG2SumIndex()]; }
int embed_sgd_dim;
int embedx_dim;
int embedx_sgd_dim;
};
struct CtrCommonPushValue {
/*
float slot;
float show;
float click;
float embed_g;
std::vector<float> embedx_g;
*/
static int Dim(int embedx_dim) { return 4 + embedx_dim; }
static int DimSize(int dim, int embedx_dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int SlotIndex() { return 0; }
static int ShowIndex() { return CtrCommonPushValue::SlotIndex() + 1; }
static int ClickIndex() { return CtrCommonPushValue::ShowIndex() + 1; }
static int EmbedGIndex() { return CtrCommonPushValue::ClickIndex() + 1; }
static int EmbedxGIndex() { return CtrCommonPushValue::EmbedGIndex() + 1; }
static float& Slot(float* val) {
return val[CtrCommonPushValue::SlotIndex()];
}
static float& Show(float* val) {
return val[CtrCommonPushValue::ShowIndex()];
}
static float& Click(float* val) {
return val[CtrCommonPushValue::ClickIndex()];
}
static float& EmbedG(float* val) {
return val[CtrCommonPushValue::EmbedGIndex()];
}
static float* EmbedxG(float* val) {
return val + CtrCommonPushValue::EmbedxGIndex();
}
};
struct CtrCommonPullValue {
/*
float show;
float click;
float embed_w;
std::vector<float> embedx_w;
*/
static int Dim(int embedx_dim) { return 3 + embedx_dim; }
static int DimSize(size_t dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int ShowIndex() { return 0; }
static int ClickIndex() { return 1; }
static int EmbedWIndex() { return 2; }
static int EmbedxWIndex() { return 3; }
static float& Show(float* val) {
return val[CtrCommonPullValue::ShowIndex()];
}
static float& Click(float* val) {
return val[CtrCommonPullValue::ClickIndex()];
}
static float& EmbedW(float* val) {
return val[CtrCommonPullValue::EmbedWIndex()];
}
static float* EmbedxW(float* val) {
return val + CtrCommonPullValue::EmbedxWIndex();
}
};
CtrCommonAccessor() {}
virtual ~CtrCommonAccessor() {}
virtual int Initialize();
// 初始化AccessorInfo
virtual void InitAccessorInfo();
// 判断该value是否进行shrink
virtual bool Shrink(float* value);
// 判断该value是否保存到ssd
// virtual bool save_ssd(float* value);
virtual bool NeedExtendMF(float* value);
virtual bool HasMF(int size);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 2, save xbox base feature
bool Save(float* value, int param) override;
bool SaveCache(float* value,
int param,
double global_cache_threshold) override;
bool SaveSSD(float* value) override;
// update delta_score and unseen_days after save
void UpdateStatAfterSave(float* value, int param) override;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual int32_t Create(float** value, size_t num);
// 从values中选取到select_values中
virtual int32_t Select(float** select_values,
const float** values,
size_t num);
// 将update_values聚合到一起
virtual int32_t Merge(float** update_values,
const float** other_update_values,
size_t num);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t Merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual int32_t Update(float** values,
const float** update_values,
size_t num);
std::string ParseToString(const float* value, int param) override;
int32_t ParseFromString(const std::string& str, float* v) override;
virtual bool CreateValue(int type, const float* value);
// 这个接口目前只用来取show
float GetField(float* value, const std::string& name) override {
// CHECK(name == "show");
if (name == "show") {
return common_feature_value.Show(value);
}
return 0.0;
}
private:
// float ShowClickScore(float show, float click);
// SparseValueSGDRule* _embed_sgd_rule;
// SparseValueSGDRule* _embedx_sgd_rule;
// CtrCommonFeatureValue common_feature_value;
float _show_click_decay_rate;
int32_t _ssd_unseenday_threshold;
bool _show_scale = false;
public: // TODO(zhaocaibei123): it should be private, but we make it public
// for unit test
CtrCommonFeatureValue common_feature_value;
float ShowClickScore(float show, float click);
SparseValueSGDRule* _embed_sgd_rule;
SparseValueSGDRule* _embedx_sgd_rule;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h"
#include <gflags/gflags.h>
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"
namespace paddle {
namespace distributed {
int CtrDoubleAccessor::Initialize() {
auto name = _config.embed_sgd_param().name();
_embed_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
_embed_sgd_rule->LoadConfig(_config.embed_sgd_param(), 1);
name = _config.embedx_sgd_param().name();
_embedx_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
_embedx_sgd_rule->LoadConfig(_config.embedx_sgd_param(),
_config.embedx_dim());
_show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate();
_ssd_unseenday_threshold =
_config.ctr_accessor_param().ssd_unseenday_threshold();
if (_config.ctr_accessor_param().show_scale()) {
_show_scale = true;
}
InitAccessorInfo();
return 0;
}
void CtrDoubleAccessor::InitAccessorInfo() {
auto embedx_dim = _config.embedx_dim();
_accessor_info.dim = CtrDoubleFeatureValue::Dim(embedx_dim);
_accessor_info.size = CtrDoubleFeatureValue::Size(embedx_dim);
_accessor_info.select_dim = 3 + embedx_dim;
_accessor_info.select_size = _accessor_info.select_dim * sizeof(float);
_accessor_info.update_dim = 4 + embedx_dim;
_accessor_info.update_size = _accessor_info.update_dim * sizeof(float);
_accessor_info.mf_size = (embedx_dim + 1) * sizeof(float);
}
bool CtrDoubleAccessor::Shrink(float* value) {
// auto base_threshold = _config.ctr_accessor_param().base_threshold();
// auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
// auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
auto delete_after_unseen_days =
_config.ctr_accessor_param().delete_after_unseen_days();
auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
// time_decay first
CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate;
CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate;
// shrink after
auto score = ShowClickScore(CtrDoubleFeatureValue::Show(value),
CtrDoubleFeatureValue::Click(value));
auto unseen_days = CtrDoubleFeatureValue::UnseenDays(value);
if (score < delete_threshold || unseen_days > delete_after_unseen_days) {
return true;
}
return false;
}
bool CtrDoubleAccessor::SaveSSD(float* value) {
if (CtrDoubleFeatureValue::UnseenDays(value) > _ssd_unseenday_threshold) {
return true;
}
return false;
}
bool CtrDoubleAccessor::SaveCache(float* value,
int param,
double global_cache_threshold) {
auto base_threshold = _config.ctr_accessor_param().base_threshold();
auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
if (ShowClickScore(CtrDoubleFeatureValue::Show(value),
CtrDoubleFeatureValue::Click(value)) >= base_threshold &&
CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) {
return CtrDoubleFeatureValue::Show(value) > global_cache_threshold;
}
return false;
}
bool CtrDoubleAccessor::Save(float* value, int param) {
// auto base_threshold = _config.ctr_accessor_param().base_threshold();
// auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
// auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
auto base_threshold = _config.ctr_accessor_param().base_threshold();
auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
if (param == 2) {
delta_threshold = 0;
}
switch (param) {
// save all
case 0: {
return true;
}
// save xbox delta
case 1:
// save xbox base
case 2: {
if (ShowClickScore(CtrDoubleFeatureValue::Show(value),
CtrDoubleFeatureValue::Click(value)) >=
base_threshold &&
CtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold &&
CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) {
// do this after save, because it must not be modified when retry
if (param == 2) {
CtrDoubleFeatureValue::DeltaScore(value) = 0;
}
return true;
} else {
return false;
}
}
// already decayed in shrink
case 3: {
// CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate;
// CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate;
// do this after save, because it must not be modified when retry
// CtrDoubleFeatureValue::UnseenDays(value)++;
return true;
}
default:
return true;
}
}
void CtrDoubleAccessor::UpdateStatAfterSave(float* value, int param) {
auto base_threshold = _config.ctr_accessor_param().base_threshold();
auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
if (param == 2) {
delta_threshold = 0;
}
switch (param) {
case 1: {
if (ShowClickScore(CtrDoubleFeatureValue::Show(value),
CtrDoubleFeatureValue::Click(value)) >=
base_threshold &&
CtrDoubleFeatureValue::DeltaScore(value) >= delta_threshold &&
CtrDoubleFeatureValue::UnseenDays(value) <= delta_keep_days) {
CtrDoubleFeatureValue::DeltaScore(value) = 0;
}
}
return;
case 3: {
CtrDoubleFeatureValue::UnseenDays(value)++;
}
return;
default:
return;
}
}
int32_t CtrDoubleAccessor::Create(float** values, size_t num) {
for (size_t value_item = 0; value_item < num; ++value_item) {
float* value = values[value_item];
value[CtrDoubleFeatureValue::UnseenDaysIndex()] = 0;
value[CtrDoubleFeatureValue::DeltaScoreIndex()] = 0;
*reinterpret_cast<double*>(value + CtrDoubleFeatureValue::ShowIndex()) = 0;
*(double*)(value + CtrDoubleFeatureValue::ClickIndex()) = 0;
value[CtrDoubleFeatureValue::SlotIndex()] = -1;
bool zero_init = _config.ctr_accessor_param().zero_init();
_embed_sgd_rule->InitValue(value + CtrDoubleFeatureValue::EmbedWIndex(),
value + CtrDoubleFeatureValue::EmbedG2SumIndex(),
zero_init);
_embedx_sgd_rule->InitValue(
value + CtrDoubleFeatureValue::EmbedxWIndex(),
value + CtrDoubleFeatureValue::EmbedxG2SumIndex(),
false);
}
return 0;
}
bool CtrDoubleAccessor::NeedExtendMF(float* value) {
auto show = ((double*)(value + CtrDoubleFeatureValue::ShowIndex()))[0];
auto click = ((double*)(value + CtrDoubleFeatureValue::ClickIndex()))[0];
// float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff()
auto score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() +
click * _config.ctr_accessor_param().click_coeff();
//+ click * _config.ctr_accessor_param().click_coeff();
return score >= _config.embedx_threshold();
}
// from CtrDoubleFeatureValue to CtrDoublePullValue
int32_t CtrDoubleAccessor::Select(float** select_values,
const float** values,
size_t num) {
auto embedx_dim = _config.embedx_dim();
for (size_t value_item = 0; value_item < num; ++value_item) {
float* select_value = select_values[value_item];
float* value = const_cast<float*>(values[value_item]);
select_value[CtrDoublePullValue::ShowIndex()] =
(float)*(double*)(value + CtrDoubleFeatureValue::ShowIndex());
select_value[CtrDoublePullValue::ClickIndex()] =
(float)*(double*)(value + CtrDoubleFeatureValue::ClickIndex());
select_value[CtrDoublePullValue::EmbedWIndex()] =
value[CtrDoubleFeatureValue::EmbedWIndex()];
memcpy(select_value + CtrDoublePullValue::EmbedxWIndex(),
value + CtrDoubleFeatureValue::EmbedxWIndex(),
embedx_dim * sizeof(float));
}
return 0;
}
// from CtrDoublePushValue to CtrDoublePushValue
// first dim: item
// second dim: field num
int32_t CtrDoubleAccessor::Merge(float** update_values,
const float** other_update_values,
size_t num) {
auto embedx_dim = _config.embedx_dim();
size_t total_dim = CtrDoublePushValue::Dim(embedx_dim);
for (size_t value_item = 0; value_item < num; ++value_item) {
float* update_value = update_values[value_item];
const float* other_update_value = other_update_values[value_item];
/**(double*)(update_value + CtrDoublePushValue::ShowIndex()) +=
*(double*)(other_update_value + CtrDoublePushValue::ShowIndex());
*(double*)(update_value + CtrDoublePushValue::ClickIndex()) +=
*(double*)(other_update_value + CtrDoublePushValue::ClickIndex());
for (auto i = 3u; i < total_dim; ++i) {
update_value[i] += other_update_value[i];
}*/
for (size_t i = 0; i < total_dim; ++i) {
if (static_cast<int>(i) != CtrDoublePushValue::SlotIndex()) {
update_value[i] += other_update_value[i];
}
}
}
return 0;
}
// from CtrDoublePushValue to CtrDoubleFeatureValue
// first dim: item
// second dim: field num
int32_t CtrDoubleAccessor::Update(float** update_values,
const float** push_values,
size_t num) {
for (size_t value_item = 0; value_item < num; ++value_item) {
float* update_value = update_values[value_item];
const float* push_value = push_values[value_item];
float push_show = push_value[CtrDoublePushValue::ShowIndex()];
float push_click = push_value[CtrDoublePushValue::ClickIndex()];
float slot = push_value[CtrDoublePushValue::SlotIndex()];
*(double*)(update_value + CtrDoubleFeatureValue::ShowIndex()) +=
(double)push_show;
*(double*)(update_value + CtrDoubleFeatureValue::ClickIndex()) +=
(double)push_click;
update_value[CtrDoubleFeatureValue::SlotIndex()] = slot;
update_value[CtrDoubleFeatureValue::DeltaScoreIndex()] +=
(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
push_click * _config.ctr_accessor_param().click_coeff();
//(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
// push_click * _config.ctr_accessor_param().click_coeff();
update_value[CtrDoubleFeatureValue::UnseenDaysIndex()] = 0;
if (!_show_scale) {
push_show = 1;
}
VLOG(3) << "accessor show scale:" << _show_scale
<< ", push_show:" << push_show;
_embed_sgd_rule->UpdateValue(
update_value + CtrDoubleFeatureValue::EmbedWIndex(),
update_value + CtrDoubleFeatureValue::EmbedG2SumIndex(),
push_value + CtrDoublePushValue::EmbedGIndex(),
push_show);
_embedx_sgd_rule->UpdateValue(
update_value + CtrDoubleFeatureValue::EmbedxWIndex(),
update_value + CtrDoubleFeatureValue::EmbedxG2SumIndex(),
push_value + CtrDoublePushValue::EmbedxGIndex(),
push_show);
}
return 0;
}
bool CtrDoubleAccessor::CreateValue(int stage, const float* value) {
// stage == 0, pull
// stage == 1, push
if (stage == 0) {
return true;
} else if (stage == 1) {
auto show = CtrDoublePushValue::Show(const_cast<float*>(value));
auto click = CtrDoublePushValue::Click(const_cast<float*>(value));
auto score = ShowClickScore(show, click);
if (score <= 0) {
return false;
}
if (score >= 1) {
return true;
}
return local_uniform_real_distribution<float>()(local_random_engine()) <
score;
} else {
return true;
}
}
double CtrDoubleAccessor::ShowClickScore(double show, double click) {
// auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
// auto click_coeff = _config.ctr_accessor_param().click_coeff();
auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
auto click_coeff = _config.ctr_accessor_param().click_coeff();
return (show - click) * nonclk_coeff + click * click_coeff;
}
std::string CtrDoubleAccessor::ParseToString(const float* v, int param_size) {
thread_local std::ostringstream os;
os.clear();
os.str("");
os << v[0] << " " << v[1] << " " << (float)((double*)(v + 2))[0] << " "
<< (float)((double*)(v + 4))[0] << " " << v[6] << " " << v[7] << " "
<< v[8];
auto show = CtrDoubleFeatureValue::Show(const_cast<float*>(v));
auto click = CtrDoubleFeatureValue::Click(const_cast<float*>(v));
auto score = ShowClickScore(show, click);
if (score >= _config.embedx_threshold() && param_size > 9) {
os << " " << v[9];
for (size_t i = 0; i < _config.embedx_dim(); ++i) {
os << " " << v[10 + i];
}
}
return os.str();
}
int CtrDoubleAccessor::ParseFromString(const std::string& str, float* value) {
int embedx_dim = _config.embedx_dim();
float data_buff[_accessor_info.dim + 2];
float* data_buff_ptr = data_buff;
_embedx_sgd_rule->InitValue(
data_buff_ptr + CtrDoubleFeatureValue::EmbedxWIndex(),
data_buff_ptr + CtrDoubleFeatureValue::EmbedxG2SumIndex());
auto str_len = paddle::string::str_to_float(str.data(), data_buff_ptr);
CHECK(str_len >= 6) << "expect more than 6 real:" << str_len;
int show_index = CtrDoubleFeatureValue::ShowIndex();
int click_index = CtrDoubleFeatureValue::ClickIndex();
int embed_w_index = CtrDoubleFeatureValue::EmbedWIndex();
// no slot, embedx
int value_dim = _accessor_info.dim;
int embedx_g2sum_index = CtrDoubleFeatureValue::EmbedxG2SumIndex();
value[CtrDoubleFeatureValue::SlotIndex()] = -1;
// other case
if (str_len == (value_dim - 1)) {
// copy unseen_days..delta_score
memcpy(value, data_buff_ptr, show_index * sizeof(float));
// copy show & click
*(double*)(value + show_index) = (double)data_buff_ptr[2];
*(double*)(value + click_index) = (double)data_buff_ptr[3];
// copy others
value[CtrDoubleFeatureValue::EmbedWIndex()] = data_buff_ptr[4];
value[CtrDoubleFeatureValue::EmbedG2SumIndex()] = data_buff_ptr[5];
memcpy(value + embedx_g2sum_index,
data_buff_ptr + 6,
(embedx_dim + 1) * sizeof(float));
} else {
// copy unseen_days..delta_score
memcpy(value, data_buff_ptr, show_index * sizeof(float));
// copy show & click
*(double*)(value + show_index) = (double)data_buff_ptr[2];
*(double*)(value + click_index) = (double)data_buff_ptr[3];
// copy embed_w..embedx_w
memcpy(value + embed_w_index,
data_buff_ptr + 4,
(str_len - 4) * sizeof(float));
}
if (str_len == (value_dim - 1) || str_len == 6) {
str_len += 1;
}
return str_len + 2;
}
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
#include "paddle/fluid/distributed/the_one_ps.pb.h"
namespace paddle {
namespace distributed {
class CtrDoubleAccessor : public ValueAccessor {
public:
struct CtrDoubleFeatureValue {
/*
float unseen_days;
float delta_score;
double show;
double click;
float embed_w;
float embed_g2sum;
float slot;
float embedx_g2sum;
std::vector<float> embedx_w;
*/
static int Dim(int embedx_dim) { return 8 + embedx_dim; }
static int DimSize(size_t dim, int embedx_dim) { return sizeof(float); }
static int Size(int embedx_dim) {
return (Dim(embedx_dim) + 2) * sizeof(float);
}
static int UnseenDaysIndex() { return 0; }
static int DeltaScoreIndex() {
return CtrDoubleFeatureValue::UnseenDaysIndex() + 1;
}
static int ShowIndex() {
return CtrDoubleFeatureValue::DeltaScoreIndex() + 1;
}
// show is double
static int ClickIndex() { return CtrDoubleFeatureValue::ShowIndex() + 2; }
// click is double
static int EmbedWIndex() { return CtrDoubleFeatureValue::ClickIndex() + 2; }
static int EmbedG2SumIndex() {
return CtrDoubleFeatureValue::EmbedWIndex() + 1;
}
static int SlotIndex() {
return CtrDoubleFeatureValue::EmbedG2SumIndex() + 1;
}
static int EmbedxG2SumIndex() {
return CtrDoubleFeatureValue::SlotIndex() + 1;
}
static int EmbedxWIndex() {
return CtrDoubleFeatureValue::EmbedxG2SumIndex() + 1;
}
static float& UnseenDays(float* val) {
return val[CtrDoubleFeatureValue::UnseenDaysIndex()];
}
static float& DeltaScore(float* val) {
return val[CtrDoubleFeatureValue::DeltaScoreIndex()];
}
static double& Show(float* val) {
return ((double*)(val + CtrDoubleFeatureValue::ShowIndex()))[0];
}
static double& Click(float* val) {
return ((double*)(val + CtrDoubleFeatureValue::ClickIndex()))[0];
}
static float& Slot(float* val) {
return val[CtrDoubleFeatureValue::SlotIndex()];
}
static float& EmbedW(float* val) {
return val[CtrDoubleFeatureValue::EmbedWIndex()];
}
static float& EmbedG2Sum(float* val) {
return val[CtrDoubleFeatureValue::EmbedG2SumIndex()];
}
static float& EmbedxG2Sum(float* val) {
return val[CtrDoubleFeatureValue::EmbedxG2SumIndex()];
}
static float* EmbedxW(float* val) {
return (val + CtrDoubleFeatureValue::EmbedxWIndex());
}
};
struct CtrDoublePushValue {
/*
float slot;
float show;
float click;
float embed_g;
std::vector<float> embedx_g;
*/
static int Dim(int embedx_dim) { return 4 + embedx_dim; }
static int DimSize(int dim, int embedx_dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int SlotIndex() { return 0; }
static int ShowIndex() { return CtrDoublePushValue::SlotIndex() + 1; }
static int ClickIndex() { return CtrDoublePushValue::ShowIndex() + 1; }
static int EmbedGIndex() { return CtrDoublePushValue::ClickIndex() + 1; }
static int EmbedxGIndex() { return CtrDoublePushValue::EmbedGIndex() + 1; }
static float& Slot(float* val) {
return val[CtrDoublePushValue::SlotIndex()];
}
static float& Show(float* val) {
return val[CtrDoublePushValue::ShowIndex()];
}
static float& Click(float* val) {
return val[CtrDoublePushValue::ClickIndex()];
}
static float& EmbedG(float* val) {
return val[CtrDoublePushValue::EmbedGIndex()];
}
static float* EmbedxG(float* val) {
return val + CtrDoublePushValue::EmbedxGIndex();
}
};
struct CtrDoublePullValue {
/*
float show;
float click;
float embed_w;
std::vector<float> embedx_w;
*/
static int Dim(int embedx_dim) { return 3 + embedx_dim; }
static int DimSize(size_t dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int ShowIndex() { return 0; }
static int ClickIndex() { return 1; }
static int EmbedWIndex() { return 2; }
static int EmbedxWIndex() { return 3; }
static float& Show(float* val) {
return val[CtrDoublePullValue::ShowIndex()];
}
static float& Click(float* val) {
return val[CtrDoublePullValue::ClickIndex()];
}
static float& EmbedW(float* val) {
return val[CtrDoublePullValue::EmbedWIndex()];
}
static float* EmbedxW(float* val) {
return val + CtrDoublePullValue::EmbedxWIndex();
}
};
CtrDoubleAccessor() {}
virtual ~CtrDoubleAccessor() {}
virtual int Initialize();
// 初始化AccessorInfo
virtual void InitAccessorInfo();
// 判断该value是否进行shrink
virtual bool Shrink(float* value);
virtual bool NeedExtendMF(float* value);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 3, save all feature with time decay
virtual bool Save(float* value, int param) override;
bool SaveCache(float* value,
int param,
double global_cache_threshold) override;
// update delta_score and unseen_days after save
virtual void UpdateStatAfterSave(float* value, int param) override;
// 判断该value是否保存到ssd
virtual bool SaveSSD(float* value);
// virtual bool save_cache(float* value, int param, double
// global_cache_threshold) override;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual int32_t Create(float** value, size_t num);
// 从values中选取到select_values中
virtual int32_t Select(float** select_values,
const float** values,
size_t num);
// 将update_values聚合到一起
virtual int32_t Merge(float** update_values,
const float** other_update_values,
size_t num);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t Merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual int32_t Update(float** values,
const float** update_values,
size_t num);
virtual std::string ParseToString(const float* value, int param) override;
virtual int32_t ParseFromString(const std::string& str, float* v) override;
virtual bool CreateValue(int type, const float* value);
//这个接口目前只用来取show
virtual float GetField(float* value, const std::string& name) override {
CHECK(name == "show");
if (name == "show") {
return (float)CtrDoubleFeatureValue::Show(value);
}
return 0.0;
}
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, show)
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, click)
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, embed_w)
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, embedx_w)
private:
double ShowClickScore(double show, double click);
private:
SparseValueSGDRule* _embed_sgd_rule;
SparseValueSGDRule* _embedx_sgd_rule;
float _show_click_decay_rate;
int32_t _ssd_unseenday_threshold;
bool _show_scale = false;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h"
#include <gflags/gflags.h>
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"
namespace paddle {
namespace distributed {
int CtrDymfAccessor::Initialize() {
auto name = _config.embed_sgd_param().name();
_embed_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
_embed_sgd_rule->LoadConfig(_config.embed_sgd_param(), 1);
name = _config.embedx_sgd_param().name();
_embedx_sgd_rule = CREATE_PSCORE_CLASS(SparseValueSGDRule, name);
_embedx_sgd_rule->LoadConfig(_config.embedx_sgd_param(),
_config.embedx_dim());
common_feature_value.optimizer_name = name;
common_feature_value.embed_sgd_dim = _embed_sgd_rule->Dim();
common_feature_value.embedx_dim = _config.embedx_dim();
common_feature_value.embedx_sgd_dim = _embedx_sgd_rule->Dim();
_show_click_decay_rate = _config.ctr_accessor_param().show_click_decay_rate();
_ssd_unseenday_threshold =
_config.ctr_accessor_param().ssd_unseenday_threshold();
if (_config.ctr_accessor_param().show_scale()) {
_show_scale = true;
}
VLOG(0) << " INTO CtrDymfAccessor::Initialize(); embed_sgd_dim:"
<< common_feature_value.embed_sgd_dim
<< " embedx_dim:" << common_feature_value.embedx_dim
<< " embedx_sgd_dim:" << common_feature_value.embedx_sgd_dim;
InitAccessorInfo();
return 0;
}
void CtrDymfAccessor::InitAccessorInfo() {
_accessor_info.dim = common_feature_value.Dim();
_accessor_info.size = common_feature_value.Size();
auto embedx_dim = _config.embedx_dim();
VLOG(0) << "InitAccessorInfo embedx_dim:" << embedx_dim;
_accessor_info.select_dim = 4 + embedx_dim;
_accessor_info.select_size = _accessor_info.select_dim * sizeof(float);
_accessor_info.update_dim = 5 + embedx_dim;
_accessor_info.update_size = _accessor_info.update_dim * sizeof(float);
_accessor_info.mf_size =
(embedx_dim + common_feature_value.embedx_sgd_dim) * sizeof(float);
}
bool CtrDymfAccessor::Shrink(float* value) {
auto delete_after_unseen_days =
_config.ctr_accessor_param().delete_after_unseen_days();
auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
// time_decay first
common_feature_value.Show(value) *= _show_click_decay_rate;
common_feature_value.Click(value) *= _show_click_decay_rate;
// shrink after
auto score = ShowClickScore(common_feature_value.Show(value),
common_feature_value.Click(value));
auto unseen_days = common_feature_value.UnseenDays(value);
if (score < delete_threshold || unseen_days > delete_after_unseen_days) {
return true;
}
return false;
}
bool CtrDymfAccessor::SaveCache(float* value,
int param,
double global_cache_threshold) {
auto base_threshold = _config.ctr_accessor_param().base_threshold();
auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
if (ShowClickScore(common_feature_value.Show(value),
common_feature_value.Click(value)) >= base_threshold &&
common_feature_value.UnseenDays(value) <= delta_keep_days) {
return common_feature_value.Show(value) > global_cache_threshold;
}
return false;
}
bool CtrDymfAccessor::SaveSSD(float* value) {
if (common_feature_value.UnseenDays(value) > _ssd_unseenday_threshold) {
return true;
}
return false;
}
bool CtrDymfAccessor::Save(float* value, int param) {
auto base_threshold = _config.ctr_accessor_param().base_threshold();
auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
if (param == 2) {
delta_threshold = 0;
}
switch (param) {
// save all
case 0: {
return true;
}
// save xbox delta
case 1:
// save xbox base
case 2: {
if (ShowClickScore(common_feature_value.Show(value),
common_feature_value.Click(value)) >= base_threshold &&
common_feature_value.DeltaScore(value) >= delta_threshold &&
common_feature_value.UnseenDays(value) <= delta_keep_days) {
// do this after save, because it must not be modified when retry
if (param == 2) {
common_feature_value.DeltaScore(value) = 0;
}
return true;
} else {
return false;
}
}
// already decayed in shrink
case 3: {
// do this after save, because it must not be modified when retry
// common_feature_value.UnseenDays(value)++;
return true;
}
// save revert batch_model
case 5: {
return true;
}
default:
return true;
}
}
void CtrDymfAccessor::UpdateStatAfterSave(float* value, int param) {
auto base_threshold = _config.ctr_accessor_param().base_threshold();
auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
if (param == 2) {
delta_threshold = 0;
}
switch (param) {
case 1: {
if (ShowClickScore(common_feature_value.Show(value),
common_feature_value.Click(value)) >= base_threshold &&
common_feature_value.DeltaScore(value) >= delta_threshold &&
common_feature_value.UnseenDays(value) <= delta_keep_days) {
common_feature_value.DeltaScore(value) = 0;
}
}
return;
case 3: {
common_feature_value.UnseenDays(value)++;
}
return;
default:
return;
}
}
int32_t CtrDymfAccessor::Create(float** values, size_t num) {
for (size_t value_item = 0; value_item < num; ++value_item) {
float* value = values[value_item];
value[common_feature_value.UnseenDaysIndex()] = 0;
value[common_feature_value.DeltaScoreIndex()] = 0;
value[common_feature_value.ShowIndex()] = 0;
value[common_feature_value.ClickIndex()] = 0;
value[common_feature_value.SlotIndex()] = -1;
value[common_feature_value.MfDimIndex()] = -1;
_embed_sgd_rule->InitValue(
value + common_feature_value.EmbedWIndex(),
value + common_feature_value.EmbedG2SumIndex(),
false); // adam embed init not zero, adagrad embed init zero
_embedx_sgd_rule->InitValue(value + common_feature_value.EmbedxWIndex(),
value + common_feature_value.EmbedxG2SumIndex(),
false);
}
return 0;
}
bool CtrDymfAccessor::NeedExtendMF(float* value) {
float show = value[common_feature_value.ShowIndex()];
float click = value[common_feature_value.ClickIndex()];
float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff() +
click * _config.ctr_accessor_param().click_coeff();
return score >= _config.embedx_threshold();
}
bool CtrDymfAccessor::HasMF(int size) {
return size > common_feature_value.EmbedxG2SumIndex();
}
// from CommonFeatureValue to CtrDymfPullValue
int32_t CtrDymfAccessor::Select(float** select_values,
const float** values,
size_t num) {
auto embedx_dim = _config.embedx_dim();
for (size_t value_item = 0; value_item < num; ++value_item) {
float* select_value = select_values[value_item];
const float* value = values[value_item];
select_value[CtrDymfPullValue::ShowIndex()] =
value[common_feature_value.ShowIndex()];
select_value[CtrDymfPullValue::ClickIndex()] =
value[common_feature_value.ClickIndex()];
select_value[CtrDymfPullValue::EmbedWIndex()] =
value[common_feature_value.EmbedWIndex()];
memcpy(select_value + CtrDymfPullValue::EmbedxWIndex(),
value + common_feature_value.EmbedxWIndex(),
embedx_dim * sizeof(float));
}
return 0;
}
// from CtrDymfPushValue to CtrDymfPushValue
// first dim: item
// second dim: field num
int32_t CtrDymfAccessor::Merge(float** update_values,
const float** other_update_values,
size_t num) {
// currently merge in cpu is not supported
return 0;
}
// from CtrDymfPushValue to CommonFeatureValue
// first dim: item
// second dim: field num
int32_t CtrDymfAccessor::Update(float** update_values,
const float** push_values,
size_t num) {
// currently update in cpu is not supported
return 0;
}
bool CtrDymfAccessor::CreateValue(int stage, const float* value) {
// stage == 0, pull
// stage == 1, push
if (stage == 0) {
return true;
} else if (stage == 1) {
// operation
auto show = CtrDymfPushValue::Show(const_cast<float*>(value));
auto click = CtrDymfPushValue::Click(const_cast<float*>(value));
auto score = ShowClickScore(show, click);
if (score <= 0) {
return false;
}
if (score >= 1) {
return true;
}
return local_uniform_real_distribution<float>()(local_random_engine()) <
score;
} else {
return true;
}
}
float CtrDymfAccessor::ShowClickScore(float show, float click) {
auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
auto click_coeff = _config.ctr_accessor_param().click_coeff();
return (show - click) * nonclk_coeff + click * click_coeff;
}
std::string CtrDymfAccessor::ParseToString(const float* v, int param) {
/*
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
std::vector<float> embed_g2sum; // float embed_g2sum
float slot;
float mf_dim;
std::<vector>float embedx_g2sum; // float embedx_g2sum
std::vector<float> embedx_w;
*/
thread_local std::ostringstream os;
os.clear();
os.str("");
os << v[0] << " " << v[1] << " " << v[2] << " " << v[3] << " " << v[4];
// << v[5] << " " << v[6];
for (int i = common_feature_value.EmbedG2SumIndex();
i < common_feature_value.EmbedxG2SumIndex();
i++) {
os << " " << v[i];
}
auto show = common_feature_value.Show(const_cast<float*>(v));
auto click = common_feature_value.Click(const_cast<float*>(v));
auto score = ShowClickScore(show, click);
auto mf_dim = int(common_feature_value.MfDim(const_cast<float*>(v)));
if (score >= _config.embedx_threshold() &&
param > common_feature_value.EmbedxG2SumIndex()) {
for (auto i = common_feature_value.EmbedxG2SumIndex();
i < common_feature_value.Dim(mf_dim);
++i) {
os << " " << v[i];
}
}
return os.str();
}
int CtrDymfAccessor::ParseFromString(const std::string& str, float* value) {
auto ret = paddle::string::str_to_float(str.data(), value);
CHECK(ret >= 7) << "expect more than 7 real:" << ret;
return ret;
}
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
#include "paddle/fluid/distributed/the_one_ps.pb.h"
namespace paddle {
namespace distributed {
// DownpourUnitAccessor
class CtrDymfAccessor : public ValueAccessor {
public:
struct CtrDymfFeatureValue {
/*
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
// float embed_g2sum;
std::vector<float> embed_g2sum;
float slot;
float mf_dim
std::<vector>float embedx_g2sum;
// float embedx_g2sum;
std::vector<float> embedx_w;
*/
int Dim() { return 7 + embed_sgd_dim + embedx_sgd_dim + embedx_dim; }
int DimSize(size_t dim, int embedx_dim) { return sizeof(float); }
int Size() { return Dim() * sizeof(float); }
int UnseenDaysIndex() { return 0; }
int DeltaScoreIndex() { return UnseenDaysIndex() + 1; }
int ShowIndex() { return DeltaScoreIndex() + 1; }
int ClickIndex() { return ShowIndex() + 1; }
int EmbedWIndex() { return ClickIndex() + 1; }
int EmbedG2SumIndex() { return EmbedWIndex() + 1; }
int SlotIndex() { return EmbedG2SumIndex() + embed_sgd_dim; }
int MfDimIndex() { return SlotIndex() + 1; }
int EmbedxG2SumIndex() { return MfDimIndex() + 1; }
int EmbedxWIndex() { return EmbedxG2SumIndex() + embedx_sgd_dim; }
// 根据mf_dim计算的总长度
int Dim(int& mf_dim) {
int tmp_embedx_sgd_dim = 1;
if (optimizer_name == "SparseAdamSGDRule") { // adam
tmp_embedx_sgd_dim = mf_dim * 2 + 2;
} else if (optimizer_name == "SparseSharedAdamSGDRule") { // shared_adam
tmp_embedx_sgd_dim = 4;
}
return 7 + embed_sgd_dim + tmp_embedx_sgd_dim + mf_dim;
}
// 根据mf_dim计算的总byte数
int Size(int& mf_dim) { return (Dim(mf_dim)) * sizeof(float); }
float& UnseenDays(float* val) { return val[UnseenDaysIndex()]; }
float& DeltaScore(float* val) { return val[DeltaScoreIndex()]; }
float& Show(float* val) { return val[ShowIndex()]; }
float& Click(float* val) { return val[ClickIndex()]; }
float& Slot(float* val) { return val[SlotIndex()]; }
float& MfDim(float* val) { return val[MfDimIndex()]; }
float& EmbedW(float* val) { return val[EmbedWIndex()]; }
float& EmbedG2Sum(float* val) { return val[EmbedG2SumIndex()]; }
float& EmbedxG2Sum(float* val) { return val[EmbedxG2SumIndex()]; }
float& EmbedxW(float* val) { return val[EmbedxWIndex()]; }
int embed_sgd_dim;
int embedx_dim;
int embedx_sgd_dim;
std::string optimizer_name;
};
struct CtrDymfPushValue {
/*
float slot;
float show;
float click;
float mf_dim;
float embed_g;
std::vector<float> embedx_g;
*/
static int Dim(int embedx_dim) { return 5 + embedx_dim; }
static int DimSize(int dim, int embedx_dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int SlotIndex() { return 0; }
static int ShowIndex() { return CtrDymfPushValue::SlotIndex() + 1; }
static int ClickIndex() { return CtrDymfPushValue::ShowIndex() + 1; }
static int MfDimIndex() { return CtrDymfPushValue::ClickIndex() + 1; }
static int EmbedGIndex() { return CtrDymfPushValue::MfDimIndex() + 1; }
static int EmbedxGIndex() { return CtrDymfPushValue::EmbedGIndex() + 1; }
static float& Slot(float* val) {
return val[CtrDymfPushValue::SlotIndex()];
}
static float& Show(float* val) {
return val[CtrDymfPushValue::ShowIndex()];
}
static float& Click(float* val) {
return val[CtrDymfPushValue::ClickIndex()];
}
static float& MfDim(float* val) {
return val[CtrDymfPushValue::MfDimIndex()];
}
static float& EmbedG(float* val) {
return val[CtrDymfPushValue::EmbedGIndex()];
}
static float* EmbedxG(float* val) {
return val + CtrDymfPushValue::EmbedxGIndex();
}
};
struct CtrDymfPullValue {
/*
float show;
float click;
float mf_dim;
float embed_w;
std::vector<float> embedx_w;
*/
static int Dim(int embedx_dim) { return 4 + embedx_dim; }
static int DimSize(size_t dim) { return sizeof(float); }
static int Size(int embedx_dim) { return Dim(embedx_dim) * sizeof(float); }
static int ShowIndex() { return 0; }
static int ClickIndex() { return 1; }
static int MfDimIndex() { return 2; }
static int EmbedWIndex() { return 3; }
static int EmbedxWIndex() { return 4; }
static float& Show(float* val) {
return val[CtrDymfPullValue::ShowIndex()];
}
static float& Click(float* val) {
return val[CtrDymfPullValue::ClickIndex()];
}
static float& MfDim(float* val) {
return val[CtrDymfPullValue::MfDimIndex()];
}
static float& EmbedW(float* val) {
return val[CtrDymfPullValue::EmbedWIndex()];
}
static float* EmbedxW(float* val) {
return val + CtrDymfPullValue::EmbedxWIndex();
}
};
CtrDymfAccessor() {}
virtual ~CtrDymfAccessor() {}
virtual int Initialize();
// 初始化AccessorInfo
virtual void InitAccessorInfo();
// 判断该value是否进行shrink
virtual bool Shrink(float* value);
// 判断该value是否保存到ssd
// virtual bool save_ssd(float* value);
virtual bool NeedExtendMF(float* value);
virtual bool HasMF(int size);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 2, save xbox base feature
bool Save(float* value, int param) override;
bool SaveCache(float* value,
int param,
double global_cache_threshold) override;
bool SaveSSD(float* value) override;
// update delta_score and unseen_days after save
void UpdateStatAfterSave(float* value, int param) override;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual int32_t Create(float** value, size_t num);
// 从values中选取到select_values中
virtual int32_t Select(float** select_values,
const float** values,
size_t num);
// 将update_values聚合到一起
virtual int32_t Merge(float** update_values,
const float** other_update_values,
size_t num);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t Merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual int32_t Update(float** values,
const float** update_values,
size_t num);
std::string ParseToString(const float* value, int param) override;
int32_t ParseFromString(const std::string& str, float* v) override;
virtual bool CreateValue(int type, const float* value);
// 这个接口目前只用来取show
float GetField(float* value, const std::string& name) override {
// CHECK(name == "show");
if (name == "show") {
return common_feature_value.Show(value);
}
return 0.0;
}
private:
// float ShowClickScore(float show, float click);
// SparseValueSGDRule* _embed_sgd_rule;
// SparseValueSGDRule* _embedx_sgd_rule;
// CtrDymfFeatureValue common_feature_value;
float _show_click_decay_rate;
int32_t _ssd_unseenday_threshold;
bool _show_scale = false;
public: // TODO(zhaocaibei123): it should be private, but we make it public
// for unit test
CtrDymfFeatureValue common_feature_value;
float ShowClickScore(float show, float click);
SparseValueSGDRule* _embed_sgd_rule;
SparseValueSGDRule* _embedx_sgd_rule;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h> // for sqrt in CPU and CUDA
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/distributed/common/utils.h"
namespace paddle {
namespace distributed {
// dense optimzier
// TODO(tangwei12) integrate with sparse optimzer later.
class DenseOptimizer {
public:
DenseOptimizer() {}
explicit DenseOptimizer(const CommonAccessorParameter& accessor,
std::vector<std::vector<float>>* values) {}
virtual void Update(const float* update_values,
size_t num,
int begin,
int end) = 0;
virtual void SetGlobalLR(float* lr) { global_learning_rate_ = lr; }
protected:
float* global_learning_rate_;
};
// sum calc for dense tensor
class DSUM : public DenseOptimizer {
public:
explicit DSUM(const CommonAccessorParameter& accessor,
std::vector<std::vector<float>>* values) {
auto& names = accessor.params();
for (int x = 0; x < static_cast<int>(names.size()); ++x) {
if (names[x] == "Param") {
param = (*values)[x].data();
}
}
}
void Update(const float* update_values,
size_t num,
int begin,
int end) override {
auto update_numel = end - begin;
GetBlas<float>().VADD(
update_numel, update_values + begin, param + begin, param + begin);
}
float* param;
};
// sgd optimizer for dense tensor
class DSGD : public DenseOptimizer {
public:
explicit DSGD(const CommonAccessorParameter& accessor,
std::vector<std::vector<float>>* values) {
auto& names = accessor.params();
for (int x = 0; x < static_cast<int>(names.size()); ++x) {
if (names[x] == "LearningRate") {
learning_rate = (*values)[x].data();
}
if (names[x] == "Param") {
param = (*values)[x].data();
}
}
}
void Update(const float* update_values,
size_t num,
int begin,
int end) override {
auto update_numel = end - begin;
std::vector<float> grads;
grads.resize(update_numel);
auto blas = GetBlas<float>();
float lr = *(global_learning_rate_) * (*learning_rate);
blas.VCOPY(update_numel, update_values + begin, grads.data());
blas.SCAL(update_numel, lr, grads.data());
blas.VSUB(update_numel, param + begin, grads.data(), param + begin);
}
float* learning_rate;
float* param;
};
// adam optimizer for dense tensor
// TODO(zhaocaibei123): add CHECK(memory_dense_table.task_pool_size_) == 1
class DAdam : public DenseOptimizer {
public:
explicit DAdam(const CommonAccessorParameter& accessor,
std::vector<std::vector<float>>* values) {
auto& names = accessor.params();
for (int x = 0; x < static_cast<int>(names.size()); ++x) {
if (names[x] == "LearningRate") {
learning_rate = (*values)[x].data();
}
if (names[x] == "Param") {
param = (*values)[x].data();
}
if (names[x] == "Moment1") {
moment1 = (*values)[x].data();
}
if (names[x] == "Moment2") {
moment2 = (*values)[x].data();
}
if (names[x] == "Beta1Pow") {
beta1_pow = (*values)[x].data();
}
if (names[x] == "Beta2Pow") {
beta2_pow = (*values)[x].data();
}
}
// add attr later
beta1 = 0.9;
beta2 = 0.999;
epsilon = 1.0e-8;
}
// make sure memory_dense_table.task_pool_size_ == 1;
// otherwise, task_pool_size_ times beta1_pow/beta2_pow multiplication
void Update(const float* update_values,
size_t num,
int begin,
int end) override {
auto update_numel = end - begin;
std::vector<float> grad, grad2, tmp;
grad.resize(update_numel);
grad2.resize(update_numel);
tmp.resize(update_numel);
auto blas = GetBlas<float>();
blas.VCOPY(update_numel, update_values + begin, grad.data());
blas.VCOPY(update_numel, update_values + begin, grad2.data());
blas.SCAL(update_numel, 1 - beta1, grad.data());
blas.VSQUARE(update_numel, grad2.data(), grad2.data());
blas.SCAL(update_numel, 1 - beta2, grad2.data());
blas.SCAL(update_numel, beta1, moment1 + begin);
blas.VADD(update_numel, moment1 + begin, grad.data(), moment1 + begin);
blas.SCAL(update_numel, beta2, moment2 + begin);
blas.VADD(update_numel, moment2 + begin, grad2.data(), moment2 + begin);
beta1_pow[0] = beta1_pow[0] * beta1;
beta2_pow[0] = beta2_pow[0] * beta2;
float lr_ = *(global_learning_rate_)*learning_rate[0];
lr_ *= sqrt(1 - beta2_pow[0]) / (1 - beta1_pow[0]);
float* tmp_ = tmp.data();
float eps_ = epsilon * sqrt(1 - beta2_pow[0]);
SQRT<float>(update_numel, moment2 + begin, tmp_);
ADD<float>(update_numel, tmp_, eps_, tmp_);
blas.VDIV(update_numel, moment1 + begin, tmp_, tmp_);
blas.SCAL(update_numel, lr_, tmp_);
blas.VSUB(update_numel, param + begin, tmp_, param + begin);
}
float* learning_rate;
float* param;
float* moment1;
float* moment2;
float* beta1_pow;
float* beta2_pow;
float beta1;
float beta2;
float epsilon;
};
// adam optimizer for dense tensor
class DAdamD2Sum : public DenseOptimizer {
public:
explicit DAdamD2Sum(const CommonAccessorParameter& accessor,
std::vector<std::vector<float>>* values) {
lr_hardcode = 5e-6;
auto& names = accessor.params();
for (int x = 0; x < static_cast<int>(names.size()); ++x) {
if (names[x] == "LearningRate") {
learning_rate = (*values)[x].data();
} else if (names[x] == "Param") {
param = (*values)[x].data();
} else if (names[x] == "Moment") {
mom_velocity = (*values)[x].data();
} else if (names[x] == "G2Sum") {
ada_g2sum = (*values)[x].data();
} else if (names[x] == "D2Sum") {
ada_d2sum = (*values)[x].data();
} else if (names[x] == "MomentDecayRate") {
mom_decay_rate = (*values)[x].data();
} else if (names[x] == "AdaDecayRate") {
ada_decay_rate = (*values)[x].data();
} else if (names[x] == "AdaEpsilon") {
ada_epsilon = (*values)[x].data();
}
}
}
void Update(const float* update_values,
size_t num,
int begin,
int end) override {
auto update_numel = end - begin;
Eigen::Map<Eigen::MatrixXf> mat_ada_g2sum(
ada_g2sum + begin, 1, update_numel);
Eigen::Map<Eigen::MatrixXf> mat_ada_d2sum(
ada_d2sum + begin, 1, update_numel);
Eigen::Map<Eigen::MatrixXf> mat_mom_velocity(
mom_velocity + begin, 1, update_numel);
Eigen::Map<Eigen::MatrixXf> mat_w(param + begin, 1, update_numel);
Eigen::Map<const Eigen::MatrixXf> mat_grad(
update_values + begin, 1, update_numel);
mat_ada_d2sum = (mat_ada_d2sum * ada_decay_rate[0]).array() + 1;
mat_ada_g2sum =
(mat_ada_g2sum * ada_decay_rate[0]) + mat_grad.cwiseProduct(mat_grad);
thread_local std::vector<float> scale_vec;
scale_vec.resize(update_numel);
Eigen::Map<Eigen::MatrixXf> scale(scale_vec.data(), 1, update_numel);
memcpy(
scale_vec.data(), mat_ada_d2sum.data(), sizeof(float) * update_numel);
scale = scale.array() * ada_epsilon[0];
scale = (mat_ada_d2sum + scale).cwiseQuotient(mat_ada_g2sum + scale);
scale = scale.cwiseSqrt();
mat_mom_velocity =
(mat_mom_velocity + mat_grad) * mom_decay_rate[0] - mat_grad;
mat_w += learning_rate[0] * mat_mom_velocity.cwiseProduct(scale);
}
float* learning_rate;
float lr_hardcode;
float* param;
float* mom_velocity;
float* ada_g2sum;
float* ada_d2sum;
float* mom_decay_rate;
float* ada_decay_rate;
float* ada_epsilon;
};
// for data_norm
class DSummary : public DenseOptimizer {
public:
explicit DSummary(const CommonAccessorParameter& accessor,
std::vector<std::vector<float>>* values) {
auto& names = accessor.params();
for (int x = 0; x < static_cast<int>(names.size()); ++x) {
if (names[x] == "Param") {
param = (*values)[x].data();
} else if (names[x] == "SummaryDecayRate") {
summary_decay_rate = (*values)[x].data();
}
}
}
void Update(const float* update_values,
size_t num,
int begin,
int end) override {
auto update_numel = end - begin;
Eigen::Map<Eigen::MatrixXf> mat_w(param + begin, 1, update_numel);
Eigen::Map<const Eigen::MatrixXf> mat_grad(
update_values + begin, 1, update_numel);
mat_w = mat_w * summary_decay_rate_d + mat_grad;
}
float* summary_decay_rate;
double summary_decay_rate_d = 0.999999;
float* param;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mct/hash-map.hpp>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/distributed/common/chunk_allocator.h"
namespace paddle {
namespace distributed {
static const int CTR_SPARSE_SHARD_BUCKET_NUM_BITS = 6;
static const size_t CTR_SPARSE_SHARD_BUCKET_NUM =
static_cast<size_t>(1) << CTR_SPARSE_SHARD_BUCKET_NUM_BITS;
class FixedFeatureValue {
public:
FixedFeatureValue() {}
~FixedFeatureValue() {}
float* data() { return _data.data(); }
size_t size() { return _data.size(); }
void resize(size_t size) { _data.resize(size); }
void shrink_to_fit() { _data.shrink_to_fit(); }
private:
std::vector<float> _data;
};
template <class KEY, class VALUE>
struct alignas(64) SparseTableShard {
public:
typedef typename mct::closed_hash_map<KEY, mct::Pointer, std::hash<KEY>>
map_type;
struct iterator {
typename map_type::iterator it;
size_t bucket;
map_type* buckets;
friend bool operator==(const iterator& a, const iterator& b) {
return a.it == b.it;
}
friend bool operator!=(const iterator& a, const iterator& b) {
return a.it != b.it;
}
const KEY& key() const { return it->first; }
VALUE& value() const { return *(VALUE*)(void*)it->second; } // NOLINT
VALUE* value_ptr() const { return (VALUE*)(void*)it->second; } // NOLINT
iterator& operator++() {
++it;
while (it == buckets[bucket].end() &&
bucket + 1 < CTR_SPARSE_SHARD_BUCKET_NUM) {
it = buckets[++bucket].begin();
}
return *this;
}
iterator operator++(int) {
iterator ret = *this;
++*this;
return ret;
}
};
struct local_iterator {
typename map_type::iterator it;
friend bool operator==(const local_iterator& a, const local_iterator& b) {
return a.it == b.it;
}
friend bool operator!=(const local_iterator& a, const local_iterator& b) {
return a.it != b.it;
}
const KEY& key() const { return it->first; }
VALUE& value() const { return *(VALUE*)(void*)it->second; } // NOLINT
local_iterator& operator++() {
++it;
return *this;
}
local_iterator operator++(int) { return {it++}; }
};
~SparseTableShard() { clear(); }
bool empty() { return _alloc.size() == 0; }
size_t size() { return _alloc.size(); }
void set_max_load_factor(float x) {
for (size_t bucket = 0; bucket < CTR_SPARSE_SHARD_BUCKET_NUM; bucket++) {
_buckets[bucket].max_load_factor(x);
}
}
size_t bucket_count() { return CTR_SPARSE_SHARD_BUCKET_NUM; }
size_t bucket_size(size_t bucket) { return _buckets[bucket].size(); }
void clear() {
for (size_t bucket = 0; bucket < CTR_SPARSE_SHARD_BUCKET_NUM; bucket++) {
map_type& data = _buckets[bucket];
for (auto it = data.begin(); it != data.end(); ++it) {
_alloc.release((VALUE*)(void*)it->second); // NOLINT
}
data.clear();
}
}
iterator begin() {
auto it = _buckets[0].begin();
size_t bucket = 0;
while (it == _buckets[bucket].end() &&
bucket + 1 < CTR_SPARSE_SHARD_BUCKET_NUM) {
it = _buckets[++bucket].begin();
}
return {it, bucket, _buckets};
}
iterator end() {
return {_buckets[CTR_SPARSE_SHARD_BUCKET_NUM - 1].end(),
CTR_SPARSE_SHARD_BUCKET_NUM - 1,
_buckets};
}
local_iterator begin(size_t bucket) { return {_buckets[bucket].begin()}; }
local_iterator end(size_t bucket) { return {_buckets[bucket].end()}; }
iterator find(const KEY& key) {
size_t hash = _hasher(key);
size_t bucket = compute_bucket(hash);
auto it = _buckets[bucket].find_with_hash(key, hash);
if (it == _buckets[bucket].end()) {
return end();
}
return {it, bucket, _buckets};
}
VALUE& operator[](const KEY& key) { return emplace(key).first.value(); }
std::pair<iterator, bool> insert(const KEY& key, const VALUE& val) {
return emplace(key, val);
}
std::pair<iterator, bool> insert(const KEY& key, VALUE&& val) {
return emplace(key, std::move(val));
}
template <class... ARGS>
std::pair<iterator, bool> emplace(const KEY& key, ARGS&&... args) {
size_t hash = _hasher(key);
size_t bucket = compute_bucket(hash);
auto res = _buckets[bucket].insert_with_hash({key, NULL}, hash);
if (res.second) {
res.first->second = _alloc.acquire(std::forward<ARGS>(args)...);
}
return {{res.first, bucket, _buckets}, res.second};
}
iterator erase(iterator it) {
_alloc.release((VALUE*)(void*)it.it->second); // NOLINT
size_t bucket = it.bucket;
auto it2 = _buckets[bucket].erase(it.it);
while (it2 == _buckets[bucket].end() &&
bucket + 1 < CTR_SPARSE_SHARD_BUCKET_NUM) {
it2 = _buckets[++bucket].begin();
}
return {it2, bucket, _buckets};
}
void quick_erase(iterator it) {
_alloc.release((VALUE*)(void*)it.it->second); // NOLINT
_buckets[it.bucket].quick_erase(it.it);
}
local_iterator erase(size_t bucket, local_iterator it) {
_alloc.release((VALUE*)(void*)it.it->second); // NOLINT
return {_buckets[bucket].erase(it.it)};
}
void quick_erase(size_t bucket, local_iterator it) {
_alloc.release((VALUE*)(void*)it.it->second); // NOLINT
_buckets[bucket].quick_erase(it.it);
}
size_t erase(const KEY& key) {
auto it = find(key);
if (it == end()) {
return 0;
}
quick_erase(it);
return 1;
}
size_t compute_bucket(size_t hash) {
if (CTR_SPARSE_SHARD_BUCKET_NUM == 1) {
return 0;
} else {
return hash >> (sizeof(size_t) * 8 - CTR_SPARSE_SHARD_BUCKET_NUM_BITS);
}
}
private:
map_type _buckets[CTR_SPARSE_SHARD_BUCKET_NUM];
ChunkAllocator<VALUE> _alloc;
std::hash<KEY> _hasher;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ThreadPool.h>
#include <future> // NOLINT
#include <memory>
#include <unordered_set>
#include <vector>
namespace paddle {
namespace distributed {
class ConcurrentSet {
public:
ConcurrentSet() : pool_(new ::ThreadPool(1)) {}
~ConcurrentSet() {}
std::future<void> Update(const std::vector<uint64_t>& rows) {
auto task = [this, rows] {
for (auto row : rows) {
set_.insert(row);
}
};
return pool_->enqueue(std::move(task));
}
std::future<void> GetAndClear(std::vector<uint64_t>* result) {
auto task = [this, &result] {
result->clear();
for (auto& id : set_) {
result->push_back(id);
}
set_.clear();
};
return pool_->enqueue(std::move(task));
}
private:
std::unordered_set<uint64_t> set_;
std::unique_ptr<::ThreadPool> pool_{nullptr};
};
class GeoRecorder {
public:
explicit GeoRecorder(int trainer_num) : trainer_num_(trainer_num) {
trainer_rows_.reserve(trainer_num);
for (auto i = 0; i < trainer_num; ++i) {
trainer_rows_.emplace_back(new ConcurrentSet());
}
}
~GeoRecorder() = default;
void Update(const std::vector<uint64_t>& update_rows) {
VLOG(3) << " row size: " << update_rows.size();
std::vector<std::future<void>> fs;
for (auto& set : trainer_rows_) {
fs.push_back(set->Update(update_rows));
}
for (auto& f : fs) {
f.wait();
}
}
void GetAndClear(uint32_t trainer_id, std::vector<uint64_t>* result) {
VLOG(3) << "GetAndClear for trainer: " << trainer_id;
trainer_rows_.at(trainer_id)->GetAndClear(result).wait();
}
private:
const int trainer_num_;
std::vector<std::unique_ptr<ConcurrentSet>> trainer_rows_;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/operators/truncated_gaussian_random_op.h"
namespace paddle {
namespace distributed {
class Initializer {
public:
Initializer() {}
explicit Initializer(const std::vector<std::string> &attrs) {}
virtual float GetValue() = 0;
virtual void GetValue(std::vector<float> *values, int numel) {
for (int x = 0; x < numel; ++x) {
values->push_back(GetValue());
}
}
virtual void GetValue(float *value, int numel) {
for (int x = 0; x < numel; ++x) {
value[x] = GetValue();
}
}
virtual ~Initializer() {}
protected:
std::string name_;
unsigned int seed_;
};
class UniformInitializer : public Initializer {
public:
explicit UniformInitializer(const std::vector<std::string> &attrs) {
name_ = attrs[0];
seed_ = static_cast<unsigned int>(std::stoi(attrs[1]));
min_ = std::stof(attrs[2]);
max_ = std::stof(attrs[3]);
dist_ = std::uniform_real_distribution<float>(min_, max_);
random_engine_ = framework::GetCPURandomEngine(seed_);
}
float GetValue() override { return dist_(*random_engine_); }
void GetValue(float *value, int numel) {
for (int x = 0; x < numel; ++x) {
value[x] = dist_(*random_engine_);
}
}
private:
float min_;
float max_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::uniform_real_distribution<float> dist_;
};
class GaussianInitializer : public Initializer {
public:
explicit GaussianInitializer(const std::vector<std::string> &attrs) {
name_ = attrs[0];
seed_ = static_cast<unsigned int>(std::stoi(attrs[1]));
mean_ = std::stof(attrs[2]);
std_ = std::stof(attrs[3]);
random_engine_ = framework::GetCPURandomEngine(seed_);
dist_ = std::normal_distribution<float>(mean_, std_);
}
float GetValue() override { return dist_(*random_engine_); }
void GetValue(float *value, int numel) {
for (int x = 0; x < numel; ++x) {
value[x] = dist_(*random_engine_);
}
}
private:
float std_;
float mean_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::normal_distribution<float> dist_;
};
class TruncatedGaussianInitializer : public Initializer {
public:
explicit TruncatedGaussianInitializer(const std::vector<std::string> &attrs) {
name_ = attrs[0];
seed_ = static_cast<unsigned int>(std::stoi(attrs[1]));
mean_ = std::stof(attrs[2]);
std_ = std::stof(attrs[3]);
std::uniform_real_distribution<float> dist_(
std::numeric_limits<float>::min(), 1.0);
random_engine_ = framework::GetCPURandomEngine(seed_);
}
float GetValue() override {
paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
float value = truncated_normal(dist_(*random_engine_));
return value;
}
void GetValue(float *value, int numel) {
paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
for (int x = 0; x < numel; ++x) {
value[x] = truncated_normal(dist_(*random_engine_));
}
}
private:
float std_;
float mean_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::uniform_real_distribution<float> dist_;
};
class FillConstantInitializer : public Initializer {
public:
explicit FillConstantInitializer(const std::vector<std::string> &attrs) {
name_ = attrs[0];
value_ = std::stof(attrs[1]);
}
float GetValue() override { return value_; }
void GetValue(float *value, int numel) { std::fill_n(value, numel, value_); }
private:
float value_;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <rocksdb/db.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/options.h>
#include <rocksdb/slice.h>
#include <rocksdb/table.h>
#include <rocksdb/write_batch.h>
#include <iostream>
#include <string>
namespace paddle {
namespace distributed {
class RocksDBHandler {
public:
RocksDBHandler() {}
~RocksDBHandler() {}
static RocksDBHandler* GetInstance() {
static RocksDBHandler handler;
return &handler;
}
int initialize(const std::string& db_path, const int colnum) {
VLOG(3) << "db path: " << db_path << " colnum: " << colnum;
rocksdb::Options options;
rocksdb::BlockBasedTableOptions bbto;
bbto.block_size = 4 * 1024;
bbto.block_cache = rocksdb::NewLRUCache(64 * 1024 * 1024);
bbto.block_cache_compressed = rocksdb::NewLRUCache(64 * 1024 * 1024);
bbto.cache_index_and_filter_blocks = false;
bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(20, false));
bbto.whole_key_filtering = true;
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbto));
options.keep_log_file_num = 100;
options.max_log_file_size = 50 * 1024 * 1024; // 50MB
options.create_if_missing = true;
options.use_direct_reads = true;
options.max_background_flushes = 5;
options.max_background_compactions = 5;
options.base_background_compactions = 10;
options.write_buffer_size = 256 * 1024 * 1024; // 256MB
options.max_write_buffer_number = 8;
options.max_bytes_for_level_base =
options.max_write_buffer_number * options.write_buffer_size;
options.min_write_buffer_number_to_merge = 1;
options.target_file_size_base = 1024 * 1024 * 1024; // 1024MB
options.memtable_prefix_bloom_size_ratio = 0.02;
options.num_levels = 4;
options.max_open_files = -1;
options.compression = rocksdb::kNoCompression;
options.level0_file_num_compaction_trigger = 8;
options.level0_slowdown_writes_trigger =
1.8 * options.level0_file_num_compaction_trigger;
options.level0_stop_writes_trigger =
3.6 * options.level0_file_num_compaction_trigger;
if (!db_path.empty()) {
std::string rm_cmd = "rm -rf " + db_path;
system(rm_cmd.c_str());
}
rocksdb::Status s = rocksdb::DB::Open(options, db_path, &_db);
assert(s.ok());
_handles.resize(colnum);
for (int i = 0; i < colnum; i++) {
s = _db->CreateColumnFamily(
options, "shard_" + std::to_string(i), &_handles[i]);
assert(s.ok());
}
LOG(INFO) << "DB initialize success, colnum:" << colnum;
return 0;
}
int put(
int id, const char* key, int key_len, const char* value, int value_len) {
rocksdb::WriteOptions options;
options.disableWAL = true;
rocksdb::Status s = _db->Put(options,
_handles[id],
rocksdb::Slice(key, key_len),
rocksdb::Slice(value, value_len));
assert(s.ok());
return 0;
}
int put_batch(int id,
std::vector<std::pair<char*, int>>& ssd_keys,
std::vector<std::pair<char*, int>>& ssd_values,
int n) {
rocksdb::WriteOptions options;
options.disableWAL = true;
rocksdb::WriteBatch batch(n * 128);
for (int i = 0; i < n; i++) {
batch.Put(_handles[id],
rocksdb::Slice(ssd_keys[i].first, ssd_keys[i].second),
rocksdb::Slice(ssd_values[i].first, ssd_values[i].second));
}
rocksdb::Status s = _db->Write(options, &batch);
assert(s.ok());
return 0;
}
int get(int id, const char* key, int key_len, std::string& value) {
rocksdb::Status s = _db->Get(rocksdb::ReadOptions(),
_handles[id],
rocksdb::Slice(key, key_len),
&value);
if (s.IsNotFound()) {
return 1;
}
assert(s.ok());
return 0;
}
int del_data(int id, const char* key, int key_len) {
rocksdb::WriteOptions options;
options.disableWAL = true;
rocksdb::Status s =
_db->Delete(options, _handles[id], rocksdb::Slice(key, key_len));
assert(s.ok());
return 0;
}
int flush(int id) {
rocksdb::Status s = _db->Flush(rocksdb::FlushOptions(), _handles[id]);
assert(s.ok());
return 0;
}
rocksdb::Iterator* get_iterator(int id) {
return _db->NewIterator(rocksdb::ReadOptions(), _handles[id]);
}
int get_estimate_key_num(uint64_t& num_keys) {
_db->GetAggregatedIntProperty("rocksdb.estimate-num-keys", &num_keys);
return 0;
}
private:
std::vector<rocksdb::ColumnFamilyHandle*> _handles;
rocksdb::DB* _db;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace paddle {
namespace distributed {
struct PullSparseValue {
PullSparseValue() {}
explicit PullSparseValue(int numel, int dim)
: numel_(numel),
dim_(dim),
is_training_(true),
feasigns_(nullptr),
frequencies_(nullptr) {}
explicit PullSparseValue(std::vector<uint64_t>& feasigns, // NOLINT
std::vector<uint32_t>& frequencies, // NOLINT
int dim) {
numel_ = feasigns.size();
dim_ = dim;
is_training_ = true;
feasigns_ = feasigns.data();
frequencies_ = frequencies.data();
}
void DeserializeFromBytes(void* bytes) {
/*
|---isTraining--------------|
|---8*{num}B(keysData)------|
|---4*{num}B(Frequencies)---|
*/
auto* begin = reinterpret_cast<char*>(bytes);
is_training_ = reinterpret_cast<bool*>(begin)[0];
feasigns_ = reinterpret_cast<uint64_t*>(begin + sizeof(bool));
frequencies_ = reinterpret_cast<uint32_t*>(begin + sizeof(bool) +
sizeof(uint64_t) * numel_);
}
void Fission(const int shard_id,
const int shard_num,
std::vector<int>* offset_shard) const {
offset_shard->reserve(numel_ / shard_num + 1);
for (int x = 0; x < numel_; ++x) {
if (int(feasigns_[x] % shard_num) == shard_id) {
offset_shard->push_back(x);
}
}
}
int numel_;
int dim_;
bool is_training_;
uint64_t* feasigns_;
uint32_t* frequencies_;
};
} // namespace distributed
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define DECLARE_GRAPH_FRIEND_CLASS(a) friend class a;
#define DECLARE_1_FRIEND_CLASS(a, ...) DECLARE_GRAPH_FRIEND_CLASS(a)
#define DECLARE_2_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_1_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_3_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_2_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_4_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_3_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_5_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_4_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_6_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_5_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_7_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_6_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_8_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_7_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_9_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_8_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_10_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_9_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_11_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_10_FRIEND_CLASS(__VA_ARGS__)
#define REGISTER_GRAPH_FRIEND_CLASS(n, ...) \
DECLARE_##n##_FRIEND_CLASS(__VA_ARGS__)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/graph/graph_edge.h"
#include <cstring>
namespace paddle {
namespace distributed {
void GraphEdgeBlob::add_edge(int64_t id, float weight = 1) {
id_arr.push_back(id);
}
void WeightedGraphEdgeBlob::add_edge(int64_t id, float weight = 1) {
id_arr.push_back(id);
weight_arr.push_back(weight);
}
} // namespace distributed
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
namespace paddle {
namespace distributed {
class GraphEdgeBlob {
public:
GraphEdgeBlob() {}
virtual ~GraphEdgeBlob() {}
size_t size() { return id_arr.size(); }
virtual void add_edge(int64_t id, float weight);
int64_t get_id(int idx) { return id_arr[idx]; }
virtual float get_weight(int idx) { return 1; }
std::vector<int64_t>& export_id_array() { return id_arr; }
protected:
std::vector<int64_t> id_arr;
};
class WeightedGraphEdgeBlob : public GraphEdgeBlob {
public:
WeightedGraphEdgeBlob() {}
virtual ~WeightedGraphEdgeBlob() {}
virtual void add_edge(int64_t id, float weight);
virtual float get_weight(int idx) { return weight_arr[idx]; }
protected:
std::vector<float> weight_arr;
};
} // namespace distributed
} // namespace paddle
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment