Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Paddle
Commits
f0ef3442
Commit
f0ef3442
authored
Apr 26, 2023
by
yuguo960516yuguo
Browse files
2.3.2-dtk-22.10.1
parent
ad08b8ce
Pipeline
#227
failed with stages
in 0 seconds
Changes
274
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
2490 additions
and
0 deletions
+2490
-0
paddle/fluid/distributed/ps/table/ctr_accessor.h
paddle/fluid/distributed/ps/table/ctr_accessor.h
+208
-0
paddle/fluid/distributed/ps/table/ctr_double_accessor.cc
paddle/fluid/distributed/ps/table/ctr_double_accessor.cc
+379
-0
paddle/fluid/distributed/ps/table/ctr_double_accessor.h
paddle/fluid/distributed/ps/table/ctr_double_accessor.h
+224
-0
paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
+324
-0
paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h
paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h
+238
-0
paddle/fluid/distributed/ps/table/depends/dense.h
paddle/fluid/distributed/ps/table/depends/dense.h
+307
-0
paddle/fluid/distributed/ps/table/depends/feature_value.h
paddle/fluid/distributed/ps/table/depends/feature_value.h
+202
-0
paddle/fluid/distributed/ps/table/depends/geo_recorder.h
paddle/fluid/distributed/ps/table/depends/geo_recorder.h
+91
-0
paddle/fluid/distributed/ps/table/depends/initializers.h
paddle/fluid/distributed/ps/table/depends/initializers.h
+161
-0
paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
+163
-0
paddle/fluid/distributed/ps/table/depends/sparse_utils.h
paddle/fluid/distributed/ps/table/depends/sparse_utils.h
+77
-0
paddle/fluid/distributed/ps/table/graph/class_macro.h
paddle/fluid/distributed/ps/table/graph/class_macro.h
+39
-0
paddle/fluid/distributed/ps/table/graph/graph_edge.cc
paddle/fluid/distributed/ps/table/graph/graph_edge.cc
+30
-0
paddle/fluid/distributed/ps/table/graph/graph_edge.h
paddle/fluid/distributed/ps/table/graph/graph_edge.h
+47
-0
No files found.
Too many changes to show.
To preserve performance only
274 of 274+
files are displayed.
Plain diff
Email patch
paddle/fluid/distributed/ps/table/ctr_accessor.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
#include "paddle/fluid/distributed/the_one_ps.pb.h"
namespace
paddle
{
namespace
distributed
{
// DownpourUnitAccessor
class
CtrCommonAccessor
:
public
ValueAccessor
{
public:
struct
CtrCommonFeatureValue
{
/*
float slot;
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
std::vector<float> embed_g2sum;
std::vector<float> embedx_w;
std::<vector>float embedx_g2sum;
*/
int
Dim
()
{
return
6
+
embed_sgd_dim
+
embedx_sgd_dim
+
embedx_dim
;
}
int
DimSize
(
size_t
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
int
Size
()
{
return
Dim
()
*
sizeof
(
float
);
}
int
SlotIndex
()
{
return
0
;
}
int
UnseenDaysIndex
()
{
return
SlotIndex
()
+
1
;
}
int
DeltaScoreIndex
()
{
return
UnseenDaysIndex
()
+
1
;
}
int
ShowIndex
()
{
return
DeltaScoreIndex
()
+
1
;
}
int
ClickIndex
()
{
return
ShowIndex
()
+
1
;
}
int
EmbedWIndex
()
{
return
ClickIndex
()
+
1
;
}
int
EmbedG2SumIndex
()
{
return
EmbedWIndex
()
+
1
;
}
int
EmbedxWIndex
()
{
return
EmbedG2SumIndex
()
+
embed_sgd_dim
;
}
int
EmbedxG2SumIndex
()
{
return
EmbedxWIndex
()
+
embedx_dim
;
}
float
&
UnseenDays
(
float
*
val
)
{
return
val
[
UnseenDaysIndex
()];
}
float
&
DeltaScore
(
float
*
val
)
{
return
val
[
DeltaScoreIndex
()];
}
float
&
Show
(
float
*
val
)
{
return
val
[
ShowIndex
()];
}
float
&
Click
(
float
*
val
)
{
return
val
[
ClickIndex
()];
}
float
&
Slot
(
float
*
val
)
{
return
val
[
SlotIndex
()];
}
float
&
EmbedW
(
float
*
val
)
{
return
val
[
EmbedWIndex
()];
}
float
&
EmbedG2Sum
(
float
*
val
)
{
return
val
[
EmbedG2SumIndex
()];
}
float
&
EmbedxW
(
float
*
val
)
{
return
val
[
EmbedxWIndex
()];
}
float
&
EmbedxG2Sum
(
float
*
val
)
{
return
val
[
EmbedxG2SumIndex
()];
}
int
embed_sgd_dim
;
int
embedx_dim
;
int
embedx_sgd_dim
;
};
struct
CtrCommonPushValue
{
/*
float slot;
float show;
float click;
float embed_g;
std::vector<float> embedx_g;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
4
+
embedx_dim
;
}
static
int
DimSize
(
int
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
Dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
SlotIndex
()
{
return
0
;
}
static
int
ShowIndex
()
{
return
CtrCommonPushValue
::
SlotIndex
()
+
1
;
}
static
int
ClickIndex
()
{
return
CtrCommonPushValue
::
ShowIndex
()
+
1
;
}
static
int
EmbedGIndex
()
{
return
CtrCommonPushValue
::
ClickIndex
()
+
1
;
}
static
int
EmbedxGIndex
()
{
return
CtrCommonPushValue
::
EmbedGIndex
()
+
1
;
}
static
float
&
Slot
(
float
*
val
)
{
return
val
[
CtrCommonPushValue
::
SlotIndex
()];
}
static
float
&
Show
(
float
*
val
)
{
return
val
[
CtrCommonPushValue
::
ShowIndex
()];
}
static
float
&
Click
(
float
*
val
)
{
return
val
[
CtrCommonPushValue
::
ClickIndex
()];
}
static
float
&
EmbedG
(
float
*
val
)
{
return
val
[
CtrCommonPushValue
::
EmbedGIndex
()];
}
static
float
*
EmbedxG
(
float
*
val
)
{
return
val
+
CtrCommonPushValue
::
EmbedxGIndex
();
}
};
struct
CtrCommonPullValue
{
/*
float show;
float click;
float embed_w;
std::vector<float> embedx_w;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
3
+
embedx_dim
;
}
static
int
DimSize
(
size_t
dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
Dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
ShowIndex
()
{
return
0
;
}
static
int
ClickIndex
()
{
return
1
;
}
static
int
EmbedWIndex
()
{
return
2
;
}
static
int
EmbedxWIndex
()
{
return
3
;
}
static
float
&
Show
(
float
*
val
)
{
return
val
[
CtrCommonPullValue
::
ShowIndex
()];
}
static
float
&
Click
(
float
*
val
)
{
return
val
[
CtrCommonPullValue
::
ClickIndex
()];
}
static
float
&
EmbedW
(
float
*
val
)
{
return
val
[
CtrCommonPullValue
::
EmbedWIndex
()];
}
static
float
*
EmbedxW
(
float
*
val
)
{
return
val
+
CtrCommonPullValue
::
EmbedxWIndex
();
}
};
CtrCommonAccessor
()
{}
virtual
~
CtrCommonAccessor
()
{}
virtual
int
Initialize
();
// 初始化AccessorInfo
virtual
void
InitAccessorInfo
();
// 判断该value是否进行shrink
virtual
bool
Shrink
(
float
*
value
);
// 判断该value是否保存到ssd
// virtual bool save_ssd(float* value);
virtual
bool
NeedExtendMF
(
float
*
value
);
virtual
bool
HasMF
(
int
size
);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 2, save xbox base feature
bool
Save
(
float
*
value
,
int
param
)
override
;
bool
SaveCache
(
float
*
value
,
int
param
,
double
global_cache_threshold
)
override
;
bool
SaveSSD
(
float
*
value
)
override
;
// update delta_score and unseen_days after save
void
UpdateStatAfterSave
(
float
*
value
,
int
param
)
override
;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual
int32_t
Create
(
float
**
value
,
size_t
num
);
// 从values中选取到select_values中
virtual
int32_t
Select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
);
// 将update_values聚合到一起
virtual
int32_t
Merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t Merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual
int32_t
Update
(
float
**
values
,
const
float
**
update_values
,
size_t
num
);
std
::
string
ParseToString
(
const
float
*
value
,
int
param
)
override
;
int32_t
ParseFromString
(
const
std
::
string
&
str
,
float
*
v
)
override
;
virtual
bool
CreateValue
(
int
type
,
const
float
*
value
);
// 这个接口目前只用来取show
float
GetField
(
float
*
value
,
const
std
::
string
&
name
)
override
{
// CHECK(name == "show");
if
(
name
==
"show"
)
{
return
common_feature_value
.
Show
(
value
);
}
return
0.0
;
}
private:
// float ShowClickScore(float show, float click);
// SparseValueSGDRule* _embed_sgd_rule;
// SparseValueSGDRule* _embedx_sgd_rule;
// CtrCommonFeatureValue common_feature_value;
float
_show_click_decay_rate
;
int32_t
_ssd_unseenday_threshold
;
bool
_show_scale
=
false
;
public:
// TODO(zhaocaibei123): it should be private, but we make it public
// for unit test
CtrCommonFeatureValue
common_feature_value
;
float
ShowClickScore
(
float
show
,
float
click
);
SparseValueSGDRule
*
_embed_sgd_rule
;
SparseValueSGDRule
*
_embedx_sgd_rule
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/ctr_double_accessor.cc
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h"
#include <gflags/gflags.h>
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"
namespace
paddle
{
namespace
distributed
{
int
CtrDoubleAccessor
::
Initialize
()
{
auto
name
=
_config
.
embed_sgd_param
().
name
();
_embed_sgd_rule
=
CREATE_PSCORE_CLASS
(
SparseValueSGDRule
,
name
);
_embed_sgd_rule
->
LoadConfig
(
_config
.
embed_sgd_param
(),
1
);
name
=
_config
.
embedx_sgd_param
().
name
();
_embedx_sgd_rule
=
CREATE_PSCORE_CLASS
(
SparseValueSGDRule
,
name
);
_embedx_sgd_rule
->
LoadConfig
(
_config
.
embedx_sgd_param
(),
_config
.
embedx_dim
());
_show_click_decay_rate
=
_config
.
ctr_accessor_param
().
show_click_decay_rate
();
_ssd_unseenday_threshold
=
_config
.
ctr_accessor_param
().
ssd_unseenday_threshold
();
if
(
_config
.
ctr_accessor_param
().
show_scale
())
{
_show_scale
=
true
;
}
InitAccessorInfo
();
return
0
;
}
void
CtrDoubleAccessor
::
InitAccessorInfo
()
{
auto
embedx_dim
=
_config
.
embedx_dim
();
_accessor_info
.
dim
=
CtrDoubleFeatureValue
::
Dim
(
embedx_dim
);
_accessor_info
.
size
=
CtrDoubleFeatureValue
::
Size
(
embedx_dim
);
_accessor_info
.
select_dim
=
3
+
embedx_dim
;
_accessor_info
.
select_size
=
_accessor_info
.
select_dim
*
sizeof
(
float
);
_accessor_info
.
update_dim
=
4
+
embedx_dim
;
_accessor_info
.
update_size
=
_accessor_info
.
update_dim
*
sizeof
(
float
);
_accessor_info
.
mf_size
=
(
embedx_dim
+
1
)
*
sizeof
(
float
);
}
bool
CtrDoubleAccessor
::
Shrink
(
float
*
value
)
{
// auto base_threshold = _config.ctr_accessor_param().base_threshold();
// auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
// auto delete_threshold = _config.ctr_accessor_param().delete_threshold();
auto
delete_after_unseen_days
=
_config
.
ctr_accessor_param
().
delete_after_unseen_days
();
auto
delete_threshold
=
_config
.
ctr_accessor_param
().
delete_threshold
();
// time_decay first
CtrDoubleFeatureValue
::
Show
(
value
)
*=
_show_click_decay_rate
;
CtrDoubleFeatureValue
::
Click
(
value
)
*=
_show_click_decay_rate
;
// shrink after
auto
score
=
ShowClickScore
(
CtrDoubleFeatureValue
::
Show
(
value
),
CtrDoubleFeatureValue
::
Click
(
value
));
auto
unseen_days
=
CtrDoubleFeatureValue
::
UnseenDays
(
value
);
if
(
score
<
delete_threshold
||
unseen_days
>
delete_after_unseen_days
)
{
return
true
;
}
return
false
;
}
bool
CtrDoubleAccessor
::
SaveSSD
(
float
*
value
)
{
if
(
CtrDoubleFeatureValue
::
UnseenDays
(
value
)
>
_ssd_unseenday_threshold
)
{
return
true
;
}
return
false
;
}
bool
CtrDoubleAccessor
::
SaveCache
(
float
*
value
,
int
param
,
double
global_cache_threshold
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
ShowClickScore
(
CtrDoubleFeatureValue
::
Show
(
value
),
CtrDoubleFeatureValue
::
Click
(
value
))
>=
base_threshold
&&
CtrDoubleFeatureValue
::
UnseenDays
(
value
)
<=
delta_keep_days
)
{
return
CtrDoubleFeatureValue
::
Show
(
value
)
>
global_cache_threshold
;
}
return
false
;
}
bool
CtrDoubleAccessor
::
Save
(
float
*
value
,
int
param
)
{
// auto base_threshold = _config.ctr_accessor_param().base_threshold();
// auto delta_threshold = _config.ctr_accessor_param().delta_threshold();
// auto delta_keep_days = _config.ctr_accessor_param().delta_keep_days();
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
param
==
2
)
{
delta_threshold
=
0
;
}
switch
(
param
)
{
// save all
case
0
:
{
return
true
;
}
// save xbox delta
case
1
:
// save xbox base
case
2
:
{
if
(
ShowClickScore
(
CtrDoubleFeatureValue
::
Show
(
value
),
CtrDoubleFeatureValue
::
Click
(
value
))
>=
base_threshold
&&
CtrDoubleFeatureValue
::
DeltaScore
(
value
)
>=
delta_threshold
&&
CtrDoubleFeatureValue
::
UnseenDays
(
value
)
<=
delta_keep_days
)
{
// do this after save, because it must not be modified when retry
if
(
param
==
2
)
{
CtrDoubleFeatureValue
::
DeltaScore
(
value
)
=
0
;
}
return
true
;
}
else
{
return
false
;
}
}
// already decayed in shrink
case
3
:
{
// CtrDoubleFeatureValue::Show(value) *= _show_click_decay_rate;
// CtrDoubleFeatureValue::Click(value) *= _show_click_decay_rate;
// do this after save, because it must not be modified when retry
// CtrDoubleFeatureValue::UnseenDays(value)++;
return
true
;
}
default:
return
true
;
}
}
void
CtrDoubleAccessor
::
UpdateStatAfterSave
(
float
*
value
,
int
param
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
param
==
2
)
{
delta_threshold
=
0
;
}
switch
(
param
)
{
case
1
:
{
if
(
ShowClickScore
(
CtrDoubleFeatureValue
::
Show
(
value
),
CtrDoubleFeatureValue
::
Click
(
value
))
>=
base_threshold
&&
CtrDoubleFeatureValue
::
DeltaScore
(
value
)
>=
delta_threshold
&&
CtrDoubleFeatureValue
::
UnseenDays
(
value
)
<=
delta_keep_days
)
{
CtrDoubleFeatureValue
::
DeltaScore
(
value
)
=
0
;
}
}
return
;
case
3
:
{
CtrDoubleFeatureValue
::
UnseenDays
(
value
)
++
;
}
return
;
default:
return
;
}
}
int32_t
CtrDoubleAccessor
::
Create
(
float
**
values
,
size_t
num
)
{
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
value
=
values
[
value_item
];
value
[
CtrDoubleFeatureValue
::
UnseenDaysIndex
()]
=
0
;
value
[
CtrDoubleFeatureValue
::
DeltaScoreIndex
()]
=
0
;
*
reinterpret_cast
<
double
*>
(
value
+
CtrDoubleFeatureValue
::
ShowIndex
())
=
0
;
*
(
double
*
)(
value
+
CtrDoubleFeatureValue
::
ClickIndex
())
=
0
;
value
[
CtrDoubleFeatureValue
::
SlotIndex
()]
=
-
1
;
bool
zero_init
=
_config
.
ctr_accessor_param
().
zero_init
();
_embed_sgd_rule
->
InitValue
(
value
+
CtrDoubleFeatureValue
::
EmbedWIndex
(),
value
+
CtrDoubleFeatureValue
::
EmbedG2SumIndex
(),
zero_init
);
_embedx_sgd_rule
->
InitValue
(
value
+
CtrDoubleFeatureValue
::
EmbedxWIndex
(),
value
+
CtrDoubleFeatureValue
::
EmbedxG2SumIndex
(),
false
);
}
return
0
;
}
bool
CtrDoubleAccessor
::
NeedExtendMF
(
float
*
value
)
{
auto
show
=
((
double
*
)(
value
+
CtrDoubleFeatureValue
::
ShowIndex
()))[
0
];
auto
click
=
((
double
*
)(
value
+
CtrDoubleFeatureValue
::
ClickIndex
()))[
0
];
// float score = (show - click) * _config.ctr_accessor_param().nonclk_coeff()
auto
score
=
(
show
-
click
)
*
_config
.
ctr_accessor_param
().
nonclk_coeff
()
+
click
*
_config
.
ctr_accessor_param
().
click_coeff
();
//+ click * _config.ctr_accessor_param().click_coeff();
return
score
>=
_config
.
embedx_threshold
();
}
// from CtrDoubleFeatureValue to CtrDoublePullValue
int32_t
CtrDoubleAccessor
::
Select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
select_value
=
select_values
[
value_item
];
float
*
value
=
const_cast
<
float
*>
(
values
[
value_item
]);
select_value
[
CtrDoublePullValue
::
ShowIndex
()]
=
(
float
)
*
(
double
*
)(
value
+
CtrDoubleFeatureValue
::
ShowIndex
());
select_value
[
CtrDoublePullValue
::
ClickIndex
()]
=
(
float
)
*
(
double
*
)(
value
+
CtrDoubleFeatureValue
::
ClickIndex
());
select_value
[
CtrDoublePullValue
::
EmbedWIndex
()]
=
value
[
CtrDoubleFeatureValue
::
EmbedWIndex
()];
memcpy
(
select_value
+
CtrDoublePullValue
::
EmbedxWIndex
(),
value
+
CtrDoubleFeatureValue
::
EmbedxWIndex
(),
embedx_dim
*
sizeof
(
float
));
}
return
0
;
}
// from CtrDoublePushValue to CtrDoublePushValue
// first dim: item
// second dim: field num
int32_t
CtrDoubleAccessor
::
Merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
size_t
total_dim
=
CtrDoublePushValue
::
Dim
(
embedx_dim
);
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
update_value
=
update_values
[
value_item
];
const
float
*
other_update_value
=
other_update_values
[
value_item
];
/**(double*)(update_value + CtrDoublePushValue::ShowIndex()) +=
*(double*)(other_update_value + CtrDoublePushValue::ShowIndex());
*(double*)(update_value + CtrDoublePushValue::ClickIndex()) +=
*(double*)(other_update_value + CtrDoublePushValue::ClickIndex());
for (auto i = 3u; i < total_dim; ++i) {
update_value[i] += other_update_value[i];
}*/
for
(
size_t
i
=
0
;
i
<
total_dim
;
++
i
)
{
if
(
static_cast
<
int
>
(
i
)
!=
CtrDoublePushValue
::
SlotIndex
())
{
update_value
[
i
]
+=
other_update_value
[
i
];
}
}
}
return
0
;
}
// from CtrDoublePushValue to CtrDoubleFeatureValue
// first dim: item
// second dim: field num
int32_t
CtrDoubleAccessor
::
Update
(
float
**
update_values
,
const
float
**
push_values
,
size_t
num
)
{
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
update_value
=
update_values
[
value_item
];
const
float
*
push_value
=
push_values
[
value_item
];
float
push_show
=
push_value
[
CtrDoublePushValue
::
ShowIndex
()];
float
push_click
=
push_value
[
CtrDoublePushValue
::
ClickIndex
()];
float
slot
=
push_value
[
CtrDoublePushValue
::
SlotIndex
()];
*
(
double
*
)(
update_value
+
CtrDoubleFeatureValue
::
ShowIndex
())
+=
(
double
)
push_show
;
*
(
double
*
)(
update_value
+
CtrDoubleFeatureValue
::
ClickIndex
())
+=
(
double
)
push_click
;
update_value
[
CtrDoubleFeatureValue
::
SlotIndex
()]
=
slot
;
update_value
[
CtrDoubleFeatureValue
::
DeltaScoreIndex
()]
+=
(
push_show
-
push_click
)
*
_config
.
ctr_accessor_param
().
nonclk_coeff
()
+
push_click
*
_config
.
ctr_accessor_param
().
click_coeff
();
//(push_show - push_click) * _config.ctr_accessor_param().nonclk_coeff() +
// push_click * _config.ctr_accessor_param().click_coeff();
update_value
[
CtrDoubleFeatureValue
::
UnseenDaysIndex
()]
=
0
;
if
(
!
_show_scale
)
{
push_show
=
1
;
}
VLOG
(
3
)
<<
"accessor show scale:"
<<
_show_scale
<<
", push_show:"
<<
push_show
;
_embed_sgd_rule
->
UpdateValue
(
update_value
+
CtrDoubleFeatureValue
::
EmbedWIndex
(),
update_value
+
CtrDoubleFeatureValue
::
EmbedG2SumIndex
(),
push_value
+
CtrDoublePushValue
::
EmbedGIndex
(),
push_show
);
_embedx_sgd_rule
->
UpdateValue
(
update_value
+
CtrDoubleFeatureValue
::
EmbedxWIndex
(),
update_value
+
CtrDoubleFeatureValue
::
EmbedxG2SumIndex
(),
push_value
+
CtrDoublePushValue
::
EmbedxGIndex
(),
push_show
);
}
return
0
;
}
bool
CtrDoubleAccessor
::
CreateValue
(
int
stage
,
const
float
*
value
)
{
// stage == 0, pull
// stage == 1, push
if
(
stage
==
0
)
{
return
true
;
}
else
if
(
stage
==
1
)
{
auto
show
=
CtrDoublePushValue
::
Show
(
const_cast
<
float
*>
(
value
));
auto
click
=
CtrDoublePushValue
::
Click
(
const_cast
<
float
*>
(
value
));
auto
score
=
ShowClickScore
(
show
,
click
);
if
(
score
<=
0
)
{
return
false
;
}
if
(
score
>=
1
)
{
return
true
;
}
return
local_uniform_real_distribution
<
float
>
()(
local_random_engine
())
<
score
;
}
else
{
return
true
;
}
}
double
CtrDoubleAccessor
::
ShowClickScore
(
double
show
,
double
click
)
{
// auto nonclk_coeff = _config.ctr_accessor_param().nonclk_coeff();
// auto click_coeff = _config.ctr_accessor_param().click_coeff();
auto
nonclk_coeff
=
_config
.
ctr_accessor_param
().
nonclk_coeff
();
auto
click_coeff
=
_config
.
ctr_accessor_param
().
click_coeff
();
return
(
show
-
click
)
*
nonclk_coeff
+
click
*
click_coeff
;
}
std
::
string
CtrDoubleAccessor
::
ParseToString
(
const
float
*
v
,
int
param_size
)
{
thread_local
std
::
ostringstream
os
;
os
.
clear
();
os
.
str
(
""
);
os
<<
v
[
0
]
<<
" "
<<
v
[
1
]
<<
" "
<<
(
float
)((
double
*
)(
v
+
2
))[
0
]
<<
" "
<<
(
float
)((
double
*
)(
v
+
4
))[
0
]
<<
" "
<<
v
[
6
]
<<
" "
<<
v
[
7
]
<<
" "
<<
v
[
8
];
auto
show
=
CtrDoubleFeatureValue
::
Show
(
const_cast
<
float
*>
(
v
));
auto
click
=
CtrDoubleFeatureValue
::
Click
(
const_cast
<
float
*>
(
v
));
auto
score
=
ShowClickScore
(
show
,
click
);
if
(
score
>=
_config
.
embedx_threshold
()
&&
param_size
>
9
)
{
os
<<
" "
<<
v
[
9
];
for
(
size_t
i
=
0
;
i
<
_config
.
embedx_dim
();
++
i
)
{
os
<<
" "
<<
v
[
10
+
i
];
}
}
return
os
.
str
();
}
int
CtrDoubleAccessor
::
ParseFromString
(
const
std
::
string
&
str
,
float
*
value
)
{
int
embedx_dim
=
_config
.
embedx_dim
();
float
data_buff
[
_accessor_info
.
dim
+
2
];
float
*
data_buff_ptr
=
data_buff
;
_embedx_sgd_rule
->
InitValue
(
data_buff_ptr
+
CtrDoubleFeatureValue
::
EmbedxWIndex
(),
data_buff_ptr
+
CtrDoubleFeatureValue
::
EmbedxG2SumIndex
());
auto
str_len
=
paddle
::
string
::
str_to_float
(
str
.
data
(),
data_buff_ptr
);
CHECK
(
str_len
>=
6
)
<<
"expect more than 6 real:"
<<
str_len
;
int
show_index
=
CtrDoubleFeatureValue
::
ShowIndex
();
int
click_index
=
CtrDoubleFeatureValue
::
ClickIndex
();
int
embed_w_index
=
CtrDoubleFeatureValue
::
EmbedWIndex
();
// no slot, embedx
int
value_dim
=
_accessor_info
.
dim
;
int
embedx_g2sum_index
=
CtrDoubleFeatureValue
::
EmbedxG2SumIndex
();
value
[
CtrDoubleFeatureValue
::
SlotIndex
()]
=
-
1
;
// other case
if
(
str_len
==
(
value_dim
-
1
))
{
// copy unseen_days..delta_score
memcpy
(
value
,
data_buff_ptr
,
show_index
*
sizeof
(
float
));
// copy show & click
*
(
double
*
)(
value
+
show_index
)
=
(
double
)
data_buff_ptr
[
2
];
*
(
double
*
)(
value
+
click_index
)
=
(
double
)
data_buff_ptr
[
3
];
// copy others
value
[
CtrDoubleFeatureValue
::
EmbedWIndex
()]
=
data_buff_ptr
[
4
];
value
[
CtrDoubleFeatureValue
::
EmbedG2SumIndex
()]
=
data_buff_ptr
[
5
];
memcpy
(
value
+
embedx_g2sum_index
,
data_buff_ptr
+
6
,
(
embedx_dim
+
1
)
*
sizeof
(
float
));
}
else
{
// copy unseen_days..delta_score
memcpy
(
value
,
data_buff_ptr
,
show_index
*
sizeof
(
float
));
// copy show & click
*
(
double
*
)(
value
+
show_index
)
=
(
double
)
data_buff_ptr
[
2
];
*
(
double
*
)(
value
+
click_index
)
=
(
double
)
data_buff_ptr
[
3
];
// copy embed_w..embedx_w
memcpy
(
value
+
embed_w_index
,
data_buff_ptr
+
4
,
(
str_len
-
4
)
*
sizeof
(
float
));
}
if
(
str_len
==
(
value_dim
-
1
)
||
str_len
==
6
)
{
str_len
+=
1
;
}
return
str_len
+
2
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/ctr_double_accessor.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
#include "paddle/fluid/distributed/the_one_ps.pb.h"
namespace
paddle
{
namespace
distributed
{
class
CtrDoubleAccessor
:
public
ValueAccessor
{
public:
struct
CtrDoubleFeatureValue
{
/*
float unseen_days;
float delta_score;
double show;
double click;
float embed_w;
float embed_g2sum;
float slot;
float embedx_g2sum;
std::vector<float> embedx_w;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
8
+
embedx_dim
;
}
static
int
DimSize
(
size_t
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
(
Dim
(
embedx_dim
)
+
2
)
*
sizeof
(
float
);
}
static
int
UnseenDaysIndex
()
{
return
0
;
}
static
int
DeltaScoreIndex
()
{
return
CtrDoubleFeatureValue
::
UnseenDaysIndex
()
+
1
;
}
static
int
ShowIndex
()
{
return
CtrDoubleFeatureValue
::
DeltaScoreIndex
()
+
1
;
}
// show is double
static
int
ClickIndex
()
{
return
CtrDoubleFeatureValue
::
ShowIndex
()
+
2
;
}
// click is double
static
int
EmbedWIndex
()
{
return
CtrDoubleFeatureValue
::
ClickIndex
()
+
2
;
}
static
int
EmbedG2SumIndex
()
{
return
CtrDoubleFeatureValue
::
EmbedWIndex
()
+
1
;
}
static
int
SlotIndex
()
{
return
CtrDoubleFeatureValue
::
EmbedG2SumIndex
()
+
1
;
}
static
int
EmbedxG2SumIndex
()
{
return
CtrDoubleFeatureValue
::
SlotIndex
()
+
1
;
}
static
int
EmbedxWIndex
()
{
return
CtrDoubleFeatureValue
::
EmbedxG2SumIndex
()
+
1
;
}
static
float
&
UnseenDays
(
float
*
val
)
{
return
val
[
CtrDoubleFeatureValue
::
UnseenDaysIndex
()];
}
static
float
&
DeltaScore
(
float
*
val
)
{
return
val
[
CtrDoubleFeatureValue
::
DeltaScoreIndex
()];
}
static
double
&
Show
(
float
*
val
)
{
return
((
double
*
)(
val
+
CtrDoubleFeatureValue
::
ShowIndex
()))[
0
];
}
static
double
&
Click
(
float
*
val
)
{
return
((
double
*
)(
val
+
CtrDoubleFeatureValue
::
ClickIndex
()))[
0
];
}
static
float
&
Slot
(
float
*
val
)
{
return
val
[
CtrDoubleFeatureValue
::
SlotIndex
()];
}
static
float
&
EmbedW
(
float
*
val
)
{
return
val
[
CtrDoubleFeatureValue
::
EmbedWIndex
()];
}
static
float
&
EmbedG2Sum
(
float
*
val
)
{
return
val
[
CtrDoubleFeatureValue
::
EmbedG2SumIndex
()];
}
static
float
&
EmbedxG2Sum
(
float
*
val
)
{
return
val
[
CtrDoubleFeatureValue
::
EmbedxG2SumIndex
()];
}
static
float
*
EmbedxW
(
float
*
val
)
{
return
(
val
+
CtrDoubleFeatureValue
::
EmbedxWIndex
());
}
};
struct
CtrDoublePushValue
{
/*
float slot;
float show;
float click;
float embed_g;
std::vector<float> embedx_g;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
4
+
embedx_dim
;
}
static
int
DimSize
(
int
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
Dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
SlotIndex
()
{
return
0
;
}
static
int
ShowIndex
()
{
return
CtrDoublePushValue
::
SlotIndex
()
+
1
;
}
static
int
ClickIndex
()
{
return
CtrDoublePushValue
::
ShowIndex
()
+
1
;
}
static
int
EmbedGIndex
()
{
return
CtrDoublePushValue
::
ClickIndex
()
+
1
;
}
static
int
EmbedxGIndex
()
{
return
CtrDoublePushValue
::
EmbedGIndex
()
+
1
;
}
static
float
&
Slot
(
float
*
val
)
{
return
val
[
CtrDoublePushValue
::
SlotIndex
()];
}
static
float
&
Show
(
float
*
val
)
{
return
val
[
CtrDoublePushValue
::
ShowIndex
()];
}
static
float
&
Click
(
float
*
val
)
{
return
val
[
CtrDoublePushValue
::
ClickIndex
()];
}
static
float
&
EmbedG
(
float
*
val
)
{
return
val
[
CtrDoublePushValue
::
EmbedGIndex
()];
}
static
float
*
EmbedxG
(
float
*
val
)
{
return
val
+
CtrDoublePushValue
::
EmbedxGIndex
();
}
};
struct
CtrDoublePullValue
{
/*
float show;
float click;
float embed_w;
std::vector<float> embedx_w;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
3
+
embedx_dim
;
}
static
int
DimSize
(
size_t
dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
Dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
ShowIndex
()
{
return
0
;
}
static
int
ClickIndex
()
{
return
1
;
}
static
int
EmbedWIndex
()
{
return
2
;
}
static
int
EmbedxWIndex
()
{
return
3
;
}
static
float
&
Show
(
float
*
val
)
{
return
val
[
CtrDoublePullValue
::
ShowIndex
()];
}
static
float
&
Click
(
float
*
val
)
{
return
val
[
CtrDoublePullValue
::
ClickIndex
()];
}
static
float
&
EmbedW
(
float
*
val
)
{
return
val
[
CtrDoublePullValue
::
EmbedWIndex
()];
}
static
float
*
EmbedxW
(
float
*
val
)
{
return
val
+
CtrDoublePullValue
::
EmbedxWIndex
();
}
};
CtrDoubleAccessor
()
{}
virtual
~
CtrDoubleAccessor
()
{}
virtual
int
Initialize
();
// 初始化AccessorInfo
virtual
void
InitAccessorInfo
();
// 判断该value是否进行shrink
virtual
bool
Shrink
(
float
*
value
);
virtual
bool
NeedExtendMF
(
float
*
value
);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 3, save all feature with time decay
virtual
bool
Save
(
float
*
value
,
int
param
)
override
;
bool
SaveCache
(
float
*
value
,
int
param
,
double
global_cache_threshold
)
override
;
// update delta_score and unseen_days after save
virtual
void
UpdateStatAfterSave
(
float
*
value
,
int
param
)
override
;
// 判断该value是否保存到ssd
virtual
bool
SaveSSD
(
float
*
value
);
// virtual bool save_cache(float* value, int param, double
// global_cache_threshold) override;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual
int32_t
Create
(
float
**
value
,
size_t
num
);
// 从values中选取到select_values中
virtual
int32_t
Select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
);
// 将update_values聚合到一起
virtual
int32_t
Merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t Merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual
int32_t
Update
(
float
**
values
,
const
float
**
update_values
,
size_t
num
);
virtual
std
::
string
ParseToString
(
const
float
*
value
,
int
param
)
override
;
virtual
int32_t
ParseFromString
(
const
std
::
string
&
str
,
float
*
v
)
override
;
virtual
bool
CreateValue
(
int
type
,
const
float
*
value
);
//这个接口目前只用来取show
virtual
float
GetField
(
float
*
value
,
const
std
::
string
&
name
)
override
{
CHECK
(
name
==
"show"
);
if
(
name
==
"show"
)
{
return
(
float
)
CtrDoubleFeatureValue
::
Show
(
value
);
}
return
0.0
;
}
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, show)
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, click)
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, embed_w)
// DEFINE_GET_INDEX(CtrDoubleFeatureValue, embedx_w)
private:
double
ShowClickScore
(
double
show
,
double
click
);
private:
SparseValueSGDRule
*
_embed_sgd_rule
;
SparseValueSGDRule
*
_embedx_sgd_rule
;
float
_show_click_decay_rate
;
int32_t
_ssd_unseenday_threshold
;
bool
_show_scale
=
false
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h"
#include <gflags/gflags.h>
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"
namespace
paddle
{
namespace
distributed
{
int
CtrDymfAccessor
::
Initialize
()
{
auto
name
=
_config
.
embed_sgd_param
().
name
();
_embed_sgd_rule
=
CREATE_PSCORE_CLASS
(
SparseValueSGDRule
,
name
);
_embed_sgd_rule
->
LoadConfig
(
_config
.
embed_sgd_param
(),
1
);
name
=
_config
.
embedx_sgd_param
().
name
();
_embedx_sgd_rule
=
CREATE_PSCORE_CLASS
(
SparseValueSGDRule
,
name
);
_embedx_sgd_rule
->
LoadConfig
(
_config
.
embedx_sgd_param
(),
_config
.
embedx_dim
());
common_feature_value
.
optimizer_name
=
name
;
common_feature_value
.
embed_sgd_dim
=
_embed_sgd_rule
->
Dim
();
common_feature_value
.
embedx_dim
=
_config
.
embedx_dim
();
common_feature_value
.
embedx_sgd_dim
=
_embedx_sgd_rule
->
Dim
();
_show_click_decay_rate
=
_config
.
ctr_accessor_param
().
show_click_decay_rate
();
_ssd_unseenday_threshold
=
_config
.
ctr_accessor_param
().
ssd_unseenday_threshold
();
if
(
_config
.
ctr_accessor_param
().
show_scale
())
{
_show_scale
=
true
;
}
VLOG
(
0
)
<<
" INTO CtrDymfAccessor::Initialize(); embed_sgd_dim:"
<<
common_feature_value
.
embed_sgd_dim
<<
" embedx_dim:"
<<
common_feature_value
.
embedx_dim
<<
" embedx_sgd_dim:"
<<
common_feature_value
.
embedx_sgd_dim
;
InitAccessorInfo
();
return
0
;
}
void
CtrDymfAccessor
::
InitAccessorInfo
()
{
_accessor_info
.
dim
=
common_feature_value
.
Dim
();
_accessor_info
.
size
=
common_feature_value
.
Size
();
auto
embedx_dim
=
_config
.
embedx_dim
();
VLOG
(
0
)
<<
"InitAccessorInfo embedx_dim:"
<<
embedx_dim
;
_accessor_info
.
select_dim
=
4
+
embedx_dim
;
_accessor_info
.
select_size
=
_accessor_info
.
select_dim
*
sizeof
(
float
);
_accessor_info
.
update_dim
=
5
+
embedx_dim
;
_accessor_info
.
update_size
=
_accessor_info
.
update_dim
*
sizeof
(
float
);
_accessor_info
.
mf_size
=
(
embedx_dim
+
common_feature_value
.
embedx_sgd_dim
)
*
sizeof
(
float
);
}
bool
CtrDymfAccessor
::
Shrink
(
float
*
value
)
{
auto
delete_after_unseen_days
=
_config
.
ctr_accessor_param
().
delete_after_unseen_days
();
auto
delete_threshold
=
_config
.
ctr_accessor_param
().
delete_threshold
();
// time_decay first
common_feature_value
.
Show
(
value
)
*=
_show_click_decay_rate
;
common_feature_value
.
Click
(
value
)
*=
_show_click_decay_rate
;
// shrink after
auto
score
=
ShowClickScore
(
common_feature_value
.
Show
(
value
),
common_feature_value
.
Click
(
value
));
auto
unseen_days
=
common_feature_value
.
UnseenDays
(
value
);
if
(
score
<
delete_threshold
||
unseen_days
>
delete_after_unseen_days
)
{
return
true
;
}
return
false
;
}
bool
CtrDymfAccessor
::
SaveCache
(
float
*
value
,
int
param
,
double
global_cache_threshold
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
ShowClickScore
(
common_feature_value
.
Show
(
value
),
common_feature_value
.
Click
(
value
))
>=
base_threshold
&&
common_feature_value
.
UnseenDays
(
value
)
<=
delta_keep_days
)
{
return
common_feature_value
.
Show
(
value
)
>
global_cache_threshold
;
}
return
false
;
}
bool
CtrDymfAccessor
::
SaveSSD
(
float
*
value
)
{
if
(
common_feature_value
.
UnseenDays
(
value
)
>
_ssd_unseenday_threshold
)
{
return
true
;
}
return
false
;
}
bool
CtrDymfAccessor
::
Save
(
float
*
value
,
int
param
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
param
==
2
)
{
delta_threshold
=
0
;
}
switch
(
param
)
{
// save all
case
0
:
{
return
true
;
}
// save xbox delta
case
1
:
// save xbox base
case
2
:
{
if
(
ShowClickScore
(
common_feature_value
.
Show
(
value
),
common_feature_value
.
Click
(
value
))
>=
base_threshold
&&
common_feature_value
.
DeltaScore
(
value
)
>=
delta_threshold
&&
common_feature_value
.
UnseenDays
(
value
)
<=
delta_keep_days
)
{
// do this after save, because it must not be modified when retry
if
(
param
==
2
)
{
common_feature_value
.
DeltaScore
(
value
)
=
0
;
}
return
true
;
}
else
{
return
false
;
}
}
// already decayed in shrink
case
3
:
{
// do this after save, because it must not be modified when retry
// common_feature_value.UnseenDays(value)++;
return
true
;
}
// save revert batch_model
case
5
:
{
return
true
;
}
default:
return
true
;
}
}
void
CtrDymfAccessor
::
UpdateStatAfterSave
(
float
*
value
,
int
param
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
param
==
2
)
{
delta_threshold
=
0
;
}
switch
(
param
)
{
case
1
:
{
if
(
ShowClickScore
(
common_feature_value
.
Show
(
value
),
common_feature_value
.
Click
(
value
))
>=
base_threshold
&&
common_feature_value
.
DeltaScore
(
value
)
>=
delta_threshold
&&
common_feature_value
.
UnseenDays
(
value
)
<=
delta_keep_days
)
{
common_feature_value
.
DeltaScore
(
value
)
=
0
;
}
}
return
;
case
3
:
{
common_feature_value
.
UnseenDays
(
value
)
++
;
}
return
;
default:
return
;
}
}
int32_t
CtrDymfAccessor
::
Create
(
float
**
values
,
size_t
num
)
{
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
value
=
values
[
value_item
];
value
[
common_feature_value
.
UnseenDaysIndex
()]
=
0
;
value
[
common_feature_value
.
DeltaScoreIndex
()]
=
0
;
value
[
common_feature_value
.
ShowIndex
()]
=
0
;
value
[
common_feature_value
.
ClickIndex
()]
=
0
;
value
[
common_feature_value
.
SlotIndex
()]
=
-
1
;
value
[
common_feature_value
.
MfDimIndex
()]
=
-
1
;
_embed_sgd_rule
->
InitValue
(
value
+
common_feature_value
.
EmbedWIndex
(),
value
+
common_feature_value
.
EmbedG2SumIndex
(),
false
);
// adam embed init not zero, adagrad embed init zero
_embedx_sgd_rule
->
InitValue
(
value
+
common_feature_value
.
EmbedxWIndex
(),
value
+
common_feature_value
.
EmbedxG2SumIndex
(),
false
);
}
return
0
;
}
bool
CtrDymfAccessor
::
NeedExtendMF
(
float
*
value
)
{
float
show
=
value
[
common_feature_value
.
ShowIndex
()];
float
click
=
value
[
common_feature_value
.
ClickIndex
()];
float
score
=
(
show
-
click
)
*
_config
.
ctr_accessor_param
().
nonclk_coeff
()
+
click
*
_config
.
ctr_accessor_param
().
click_coeff
();
return
score
>=
_config
.
embedx_threshold
();
}
bool
CtrDymfAccessor
::
HasMF
(
int
size
)
{
return
size
>
common_feature_value
.
EmbedxG2SumIndex
();
}
// from CommonFeatureValue to CtrDymfPullValue
int32_t
CtrDymfAccessor
::
Select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
select_value
=
select_values
[
value_item
];
const
float
*
value
=
values
[
value_item
];
select_value
[
CtrDymfPullValue
::
ShowIndex
()]
=
value
[
common_feature_value
.
ShowIndex
()];
select_value
[
CtrDymfPullValue
::
ClickIndex
()]
=
value
[
common_feature_value
.
ClickIndex
()];
select_value
[
CtrDymfPullValue
::
EmbedWIndex
()]
=
value
[
common_feature_value
.
EmbedWIndex
()];
memcpy
(
select_value
+
CtrDymfPullValue
::
EmbedxWIndex
(),
value
+
common_feature_value
.
EmbedxWIndex
(),
embedx_dim
*
sizeof
(
float
));
}
return
0
;
}
// from CtrDymfPushValue to CtrDymfPushValue
// first dim: item
// second dim: field num
int32_t
CtrDymfAccessor
::
Merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
)
{
// currently merge in cpu is not supported
return
0
;
}
// from CtrDymfPushValue to CommonFeatureValue
// first dim: item
// second dim: field num
int32_t
CtrDymfAccessor
::
Update
(
float
**
update_values
,
const
float
**
push_values
,
size_t
num
)
{
// currently update in cpu is not supported
return
0
;
}
bool
CtrDymfAccessor
::
CreateValue
(
int
stage
,
const
float
*
value
)
{
// stage == 0, pull
// stage == 1, push
if
(
stage
==
0
)
{
return
true
;
}
else
if
(
stage
==
1
)
{
// operation
auto
show
=
CtrDymfPushValue
::
Show
(
const_cast
<
float
*>
(
value
));
auto
click
=
CtrDymfPushValue
::
Click
(
const_cast
<
float
*>
(
value
));
auto
score
=
ShowClickScore
(
show
,
click
);
if
(
score
<=
0
)
{
return
false
;
}
if
(
score
>=
1
)
{
return
true
;
}
return
local_uniform_real_distribution
<
float
>
()(
local_random_engine
())
<
score
;
}
else
{
return
true
;
}
}
float
CtrDymfAccessor
::
ShowClickScore
(
float
show
,
float
click
)
{
auto
nonclk_coeff
=
_config
.
ctr_accessor_param
().
nonclk_coeff
();
auto
click_coeff
=
_config
.
ctr_accessor_param
().
click_coeff
();
return
(
show
-
click
)
*
nonclk_coeff
+
click
*
click_coeff
;
}
std
::
string
CtrDymfAccessor
::
ParseToString
(
const
float
*
v
,
int
param
)
{
/*
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
std::vector<float> embed_g2sum; // float embed_g2sum
float slot;
float mf_dim;
std::<vector>float embedx_g2sum; // float embedx_g2sum
std::vector<float> embedx_w;
*/
thread_local
std
::
ostringstream
os
;
os
.
clear
();
os
.
str
(
""
);
os
<<
v
[
0
]
<<
" "
<<
v
[
1
]
<<
" "
<<
v
[
2
]
<<
" "
<<
v
[
3
]
<<
" "
<<
v
[
4
];
// << v[5] << " " << v[6];
for
(
int
i
=
common_feature_value
.
EmbedG2SumIndex
();
i
<
common_feature_value
.
EmbedxG2SumIndex
();
i
++
)
{
os
<<
" "
<<
v
[
i
];
}
auto
show
=
common_feature_value
.
Show
(
const_cast
<
float
*>
(
v
));
auto
click
=
common_feature_value
.
Click
(
const_cast
<
float
*>
(
v
));
auto
score
=
ShowClickScore
(
show
,
click
);
auto
mf_dim
=
int
(
common_feature_value
.
MfDim
(
const_cast
<
float
*>
(
v
)));
if
(
score
>=
_config
.
embedx_threshold
()
&&
param
>
common_feature_value
.
EmbedxG2SumIndex
())
{
for
(
auto
i
=
common_feature_value
.
EmbedxG2SumIndex
();
i
<
common_feature_value
.
Dim
(
mf_dim
);
++
i
)
{
os
<<
" "
<<
v
[
i
];
}
}
return
os
.
str
();
}
int
CtrDymfAccessor
::
ParseFromString
(
const
std
::
string
&
str
,
float
*
value
)
{
auto
ret
=
paddle
::
string
::
str_to_float
(
str
.
data
(),
value
);
CHECK
(
ret
>=
7
)
<<
"expect more than 7 real:"
<<
ret
;
return
ret
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
#include "paddle/fluid/distributed/the_one_ps.pb.h"
namespace
paddle
{
namespace
distributed
{
// DownpourUnitAccessor
class
CtrDymfAccessor
:
public
ValueAccessor
{
public:
struct
CtrDymfFeatureValue
{
/*
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
// float embed_g2sum;
std::vector<float> embed_g2sum;
float slot;
float mf_dim
std::<vector>float embedx_g2sum;
// float embedx_g2sum;
std::vector<float> embedx_w;
*/
int
Dim
()
{
return
7
+
embed_sgd_dim
+
embedx_sgd_dim
+
embedx_dim
;
}
int
DimSize
(
size_t
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
int
Size
()
{
return
Dim
()
*
sizeof
(
float
);
}
int
UnseenDaysIndex
()
{
return
0
;
}
int
DeltaScoreIndex
()
{
return
UnseenDaysIndex
()
+
1
;
}
int
ShowIndex
()
{
return
DeltaScoreIndex
()
+
1
;
}
int
ClickIndex
()
{
return
ShowIndex
()
+
1
;
}
int
EmbedWIndex
()
{
return
ClickIndex
()
+
1
;
}
int
EmbedG2SumIndex
()
{
return
EmbedWIndex
()
+
1
;
}
int
SlotIndex
()
{
return
EmbedG2SumIndex
()
+
embed_sgd_dim
;
}
int
MfDimIndex
()
{
return
SlotIndex
()
+
1
;
}
int
EmbedxG2SumIndex
()
{
return
MfDimIndex
()
+
1
;
}
int
EmbedxWIndex
()
{
return
EmbedxG2SumIndex
()
+
embedx_sgd_dim
;
}
// 根据mf_dim计算的总长度
int
Dim
(
int
&
mf_dim
)
{
int
tmp_embedx_sgd_dim
=
1
;
if
(
optimizer_name
==
"SparseAdamSGDRule"
)
{
// adam
tmp_embedx_sgd_dim
=
mf_dim
*
2
+
2
;
}
else
if
(
optimizer_name
==
"SparseSharedAdamSGDRule"
)
{
// shared_adam
tmp_embedx_sgd_dim
=
4
;
}
return
7
+
embed_sgd_dim
+
tmp_embedx_sgd_dim
+
mf_dim
;
}
// 根据mf_dim计算的总byte数
int
Size
(
int
&
mf_dim
)
{
return
(
Dim
(
mf_dim
))
*
sizeof
(
float
);
}
float
&
UnseenDays
(
float
*
val
)
{
return
val
[
UnseenDaysIndex
()];
}
float
&
DeltaScore
(
float
*
val
)
{
return
val
[
DeltaScoreIndex
()];
}
float
&
Show
(
float
*
val
)
{
return
val
[
ShowIndex
()];
}
float
&
Click
(
float
*
val
)
{
return
val
[
ClickIndex
()];
}
float
&
Slot
(
float
*
val
)
{
return
val
[
SlotIndex
()];
}
float
&
MfDim
(
float
*
val
)
{
return
val
[
MfDimIndex
()];
}
float
&
EmbedW
(
float
*
val
)
{
return
val
[
EmbedWIndex
()];
}
float
&
EmbedG2Sum
(
float
*
val
)
{
return
val
[
EmbedG2SumIndex
()];
}
float
&
EmbedxG2Sum
(
float
*
val
)
{
return
val
[
EmbedxG2SumIndex
()];
}
float
&
EmbedxW
(
float
*
val
)
{
return
val
[
EmbedxWIndex
()];
}
int
embed_sgd_dim
;
int
embedx_dim
;
int
embedx_sgd_dim
;
std
::
string
optimizer_name
;
};
struct
CtrDymfPushValue
{
/*
float slot;
float show;
float click;
float mf_dim;
float embed_g;
std::vector<float> embedx_g;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
5
+
embedx_dim
;
}
static
int
DimSize
(
int
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
Dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
SlotIndex
()
{
return
0
;
}
static
int
ShowIndex
()
{
return
CtrDymfPushValue
::
SlotIndex
()
+
1
;
}
static
int
ClickIndex
()
{
return
CtrDymfPushValue
::
ShowIndex
()
+
1
;
}
static
int
MfDimIndex
()
{
return
CtrDymfPushValue
::
ClickIndex
()
+
1
;
}
static
int
EmbedGIndex
()
{
return
CtrDymfPushValue
::
MfDimIndex
()
+
1
;
}
static
int
EmbedxGIndex
()
{
return
CtrDymfPushValue
::
EmbedGIndex
()
+
1
;
}
static
float
&
Slot
(
float
*
val
)
{
return
val
[
CtrDymfPushValue
::
SlotIndex
()];
}
static
float
&
Show
(
float
*
val
)
{
return
val
[
CtrDymfPushValue
::
ShowIndex
()];
}
static
float
&
Click
(
float
*
val
)
{
return
val
[
CtrDymfPushValue
::
ClickIndex
()];
}
static
float
&
MfDim
(
float
*
val
)
{
return
val
[
CtrDymfPushValue
::
MfDimIndex
()];
}
static
float
&
EmbedG
(
float
*
val
)
{
return
val
[
CtrDymfPushValue
::
EmbedGIndex
()];
}
static
float
*
EmbedxG
(
float
*
val
)
{
return
val
+
CtrDymfPushValue
::
EmbedxGIndex
();
}
};
struct
CtrDymfPullValue
{
/*
float show;
float click;
float mf_dim;
float embed_w;
std::vector<float> embedx_w;
*/
static
int
Dim
(
int
embedx_dim
)
{
return
4
+
embedx_dim
;
}
static
int
DimSize
(
size_t
dim
)
{
return
sizeof
(
float
);
}
static
int
Size
(
int
embedx_dim
)
{
return
Dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
ShowIndex
()
{
return
0
;
}
static
int
ClickIndex
()
{
return
1
;
}
static
int
MfDimIndex
()
{
return
2
;
}
static
int
EmbedWIndex
()
{
return
3
;
}
static
int
EmbedxWIndex
()
{
return
4
;
}
static
float
&
Show
(
float
*
val
)
{
return
val
[
CtrDymfPullValue
::
ShowIndex
()];
}
static
float
&
Click
(
float
*
val
)
{
return
val
[
CtrDymfPullValue
::
ClickIndex
()];
}
static
float
&
MfDim
(
float
*
val
)
{
return
val
[
CtrDymfPullValue
::
MfDimIndex
()];
}
static
float
&
EmbedW
(
float
*
val
)
{
return
val
[
CtrDymfPullValue
::
EmbedWIndex
()];
}
static
float
*
EmbedxW
(
float
*
val
)
{
return
val
+
CtrDymfPullValue
::
EmbedxWIndex
();
}
};
CtrDymfAccessor
()
{}
virtual
~
CtrDymfAccessor
()
{}
virtual
int
Initialize
();
// 初始化AccessorInfo
virtual
void
InitAccessorInfo
();
// 判断该value是否进行shrink
virtual
bool
Shrink
(
float
*
value
);
// 判断该value是否保存到ssd
// virtual bool save_ssd(float* value);
virtual
bool
NeedExtendMF
(
float
*
value
);
virtual
bool
HasMF
(
int
size
);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 2, save xbox base feature
bool
Save
(
float
*
value
,
int
param
)
override
;
bool
SaveCache
(
float
*
value
,
int
param
,
double
global_cache_threshold
)
override
;
bool
SaveSSD
(
float
*
value
)
override
;
// update delta_score and unseen_days after save
void
UpdateStatAfterSave
(
float
*
value
,
int
param
)
override
;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual
int32_t
Create
(
float
**
value
,
size_t
num
);
// 从values中选取到select_values中
virtual
int32_t
Select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
);
// 将update_values聚合到一起
virtual
int32_t
Merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t Merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual
int32_t
Update
(
float
**
values
,
const
float
**
update_values
,
size_t
num
);
std
::
string
ParseToString
(
const
float
*
value
,
int
param
)
override
;
int32_t
ParseFromString
(
const
std
::
string
&
str
,
float
*
v
)
override
;
virtual
bool
CreateValue
(
int
type
,
const
float
*
value
);
// 这个接口目前只用来取show
float
GetField
(
float
*
value
,
const
std
::
string
&
name
)
override
{
// CHECK(name == "show");
if
(
name
==
"show"
)
{
return
common_feature_value
.
Show
(
value
);
}
return
0.0
;
}
private:
// float ShowClickScore(float show, float click);
// SparseValueSGDRule* _embed_sgd_rule;
// SparseValueSGDRule* _embedx_sgd_rule;
// CtrDymfFeatureValue common_feature_value;
float
_show_click_decay_rate
;
int32_t
_ssd_unseenday_threshold
;
bool
_show_scale
=
false
;
public:
// TODO(zhaocaibei123): it should be private, but we make it public
// for unit test
CtrDymfFeatureValue
common_feature_value
;
float
ShowClickScore
(
float
show
,
float
click
);
SparseValueSGDRule
*
_embed_sgd_rule
;
SparseValueSGDRule
*
_embedx_sgd_rule
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/dense.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h> // for sqrt in CPU and CUDA
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/distributed/common/utils.h"
namespace
paddle
{
namespace
distributed
{
// dense optimzier
// TODO(tangwei12) integrate with sparse optimzer later.
class
DenseOptimizer
{
public:
DenseOptimizer
()
{}
explicit
DenseOptimizer
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{}
virtual
void
Update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
=
0
;
virtual
void
SetGlobalLR
(
float
*
lr
)
{
global_learning_rate_
=
lr
;
}
protected:
float
*
global_learning_rate_
;
};
// sum calc for dense tensor
class
DSUM
:
public
DenseOptimizer
{
public:
explicit
DSUM
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{
auto
&
names
=
accessor
.
params
();
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
}
}
void
Update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
override
{
auto
update_numel
=
end
-
begin
;
GetBlas
<
float
>
().
VADD
(
update_numel
,
update_values
+
begin
,
param
+
begin
,
param
+
begin
);
}
float
*
param
;
};
// sgd optimizer for dense tensor
class
DSGD
:
public
DenseOptimizer
{
public:
explicit
DSGD
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{
auto
&
names
=
accessor
.
params
();
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"LearningRate"
)
{
learning_rate
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
}
}
void
Update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
override
{
auto
update_numel
=
end
-
begin
;
std
::
vector
<
float
>
grads
;
grads
.
resize
(
update_numel
);
auto
blas
=
GetBlas
<
float
>
();
float
lr
=
*
(
global_learning_rate_
)
*
(
*
learning_rate
);
blas
.
VCOPY
(
update_numel
,
update_values
+
begin
,
grads
.
data
());
blas
.
SCAL
(
update_numel
,
lr
,
grads
.
data
());
blas
.
VSUB
(
update_numel
,
param
+
begin
,
grads
.
data
(),
param
+
begin
);
}
float
*
learning_rate
;
float
*
param
;
};
// adam optimizer for dense tensor
// TODO(zhaocaibei123): add CHECK(memory_dense_table.task_pool_size_) == 1
class
DAdam
:
public
DenseOptimizer
{
public:
explicit
DAdam
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{
auto
&
names
=
accessor
.
params
();
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"LearningRate"
)
{
learning_rate
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Moment1"
)
{
moment1
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Moment2"
)
{
moment2
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Beta1Pow"
)
{
beta1_pow
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Beta2Pow"
)
{
beta2_pow
=
(
*
values
)[
x
].
data
();
}
}
// add attr later
beta1
=
0.9
;
beta2
=
0.999
;
epsilon
=
1.0e-8
;
}
// make sure memory_dense_table.task_pool_size_ == 1;
// otherwise, task_pool_size_ times beta1_pow/beta2_pow multiplication
void
Update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
override
{
auto
update_numel
=
end
-
begin
;
std
::
vector
<
float
>
grad
,
grad2
,
tmp
;
grad
.
resize
(
update_numel
);
grad2
.
resize
(
update_numel
);
tmp
.
resize
(
update_numel
);
auto
blas
=
GetBlas
<
float
>
();
blas
.
VCOPY
(
update_numel
,
update_values
+
begin
,
grad
.
data
());
blas
.
VCOPY
(
update_numel
,
update_values
+
begin
,
grad2
.
data
());
blas
.
SCAL
(
update_numel
,
1
-
beta1
,
grad
.
data
());
blas
.
VSQUARE
(
update_numel
,
grad2
.
data
(),
grad2
.
data
());
blas
.
SCAL
(
update_numel
,
1
-
beta2
,
grad2
.
data
());
blas
.
SCAL
(
update_numel
,
beta1
,
moment1
+
begin
);
blas
.
VADD
(
update_numel
,
moment1
+
begin
,
grad
.
data
(),
moment1
+
begin
);
blas
.
SCAL
(
update_numel
,
beta2
,
moment2
+
begin
);
blas
.
VADD
(
update_numel
,
moment2
+
begin
,
grad2
.
data
(),
moment2
+
begin
);
beta1_pow
[
0
]
=
beta1_pow
[
0
]
*
beta1
;
beta2_pow
[
0
]
=
beta2_pow
[
0
]
*
beta2
;
float
lr_
=
*
(
global_learning_rate_
)
*
learning_rate
[
0
];
lr_
*=
sqrt
(
1
-
beta2_pow
[
0
])
/
(
1
-
beta1_pow
[
0
]);
float
*
tmp_
=
tmp
.
data
();
float
eps_
=
epsilon
*
sqrt
(
1
-
beta2_pow
[
0
]);
SQRT
<
float
>
(
update_numel
,
moment2
+
begin
,
tmp_
);
ADD
<
float
>
(
update_numel
,
tmp_
,
eps_
,
tmp_
);
blas
.
VDIV
(
update_numel
,
moment1
+
begin
,
tmp_
,
tmp_
);
blas
.
SCAL
(
update_numel
,
lr_
,
tmp_
);
blas
.
VSUB
(
update_numel
,
param
+
begin
,
tmp_
,
param
+
begin
);
}
float
*
learning_rate
;
float
*
param
;
float
*
moment1
;
float
*
moment2
;
float
*
beta1_pow
;
float
*
beta2_pow
;
float
beta1
;
float
beta2
;
float
epsilon
;
};
// adam optimizer for dense tensor
class
DAdamD2Sum
:
public
DenseOptimizer
{
public:
explicit
DAdamD2Sum
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{
lr_hardcode
=
5e-6
;
auto
&
names
=
accessor
.
params
();
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"LearningRate"
)
{
learning_rate
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"Moment"
)
{
mom_velocity
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"G2Sum"
)
{
ada_g2sum
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"D2Sum"
)
{
ada_d2sum
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"MomentDecayRate"
)
{
mom_decay_rate
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"AdaDecayRate"
)
{
ada_decay_rate
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"AdaEpsilon"
)
{
ada_epsilon
=
(
*
values
)[
x
].
data
();
}
}
}
void
Update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
override
{
auto
update_numel
=
end
-
begin
;
Eigen
::
Map
<
Eigen
::
MatrixXf
>
mat_ada_g2sum
(
ada_g2sum
+
begin
,
1
,
update_numel
);
Eigen
::
Map
<
Eigen
::
MatrixXf
>
mat_ada_d2sum
(
ada_d2sum
+
begin
,
1
,
update_numel
);
Eigen
::
Map
<
Eigen
::
MatrixXf
>
mat_mom_velocity
(
mom_velocity
+
begin
,
1
,
update_numel
);
Eigen
::
Map
<
Eigen
::
MatrixXf
>
mat_w
(
param
+
begin
,
1
,
update_numel
);
Eigen
::
Map
<
const
Eigen
::
MatrixXf
>
mat_grad
(
update_values
+
begin
,
1
,
update_numel
);
mat_ada_d2sum
=
(
mat_ada_d2sum
*
ada_decay_rate
[
0
]).
array
()
+
1
;
mat_ada_g2sum
=
(
mat_ada_g2sum
*
ada_decay_rate
[
0
])
+
mat_grad
.
cwiseProduct
(
mat_grad
);
thread_local
std
::
vector
<
float
>
scale_vec
;
scale_vec
.
resize
(
update_numel
);
Eigen
::
Map
<
Eigen
::
MatrixXf
>
scale
(
scale_vec
.
data
(),
1
,
update_numel
);
memcpy
(
scale_vec
.
data
(),
mat_ada_d2sum
.
data
(),
sizeof
(
float
)
*
update_numel
);
scale
=
scale
.
array
()
*
ada_epsilon
[
0
];
scale
=
(
mat_ada_d2sum
+
scale
).
cwiseQuotient
(
mat_ada_g2sum
+
scale
);
scale
=
scale
.
cwiseSqrt
();
mat_mom_velocity
=
(
mat_mom_velocity
+
mat_grad
)
*
mom_decay_rate
[
0
]
-
mat_grad
;
mat_w
+=
learning_rate
[
0
]
*
mat_mom_velocity
.
cwiseProduct
(
scale
);
}
float
*
learning_rate
;
float
lr_hardcode
;
float
*
param
;
float
*
mom_velocity
;
float
*
ada_g2sum
;
float
*
ada_d2sum
;
float
*
mom_decay_rate
;
float
*
ada_decay_rate
;
float
*
ada_epsilon
;
};
// for data_norm
class
DSummary
:
public
DenseOptimizer
{
public:
explicit
DSummary
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{
auto
&
names
=
accessor
.
params
();
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"SummaryDecayRate"
)
{
summary_decay_rate
=
(
*
values
)[
x
].
data
();
}
}
}
void
Update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
override
{
auto
update_numel
=
end
-
begin
;
Eigen
::
Map
<
Eigen
::
MatrixXf
>
mat_w
(
param
+
begin
,
1
,
update_numel
);
Eigen
::
Map
<
const
Eigen
::
MatrixXf
>
mat_grad
(
update_values
+
begin
,
1
,
update_numel
);
mat_w
=
mat_w
*
summary_decay_rate_d
+
mat_grad
;
}
float
*
summary_decay_rate
;
double
summary_decay_rate_d
=
0.999999
;
float
*
param
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/feature_value.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mct/hash-map.hpp>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/distributed/common/chunk_allocator.h"
namespace
paddle
{
namespace
distributed
{
static
const
int
CTR_SPARSE_SHARD_BUCKET_NUM_BITS
=
6
;
static
const
size_t
CTR_SPARSE_SHARD_BUCKET_NUM
=
static_cast
<
size_t
>
(
1
)
<<
CTR_SPARSE_SHARD_BUCKET_NUM_BITS
;
class
FixedFeatureValue
{
public:
FixedFeatureValue
()
{}
~
FixedFeatureValue
()
{}
float
*
data
()
{
return
_data
.
data
();
}
size_t
size
()
{
return
_data
.
size
();
}
void
resize
(
size_t
size
)
{
_data
.
resize
(
size
);
}
void
shrink_to_fit
()
{
_data
.
shrink_to_fit
();
}
private:
std
::
vector
<
float
>
_data
;
};
template
<
class
KEY
,
class
VALUE
>
struct
alignas
(
64
)
SparseTableShard
{
public:
typedef
typename
mct
::
closed_hash_map
<
KEY
,
mct
::
Pointer
,
std
::
hash
<
KEY
>>
map_type
;
struct
iterator
{
typename
map_type
::
iterator
it
;
size_t
bucket
;
map_type
*
buckets
;
friend
bool
operator
==
(
const
iterator
&
a
,
const
iterator
&
b
)
{
return
a
.
it
==
b
.
it
;
}
friend
bool
operator
!=
(
const
iterator
&
a
,
const
iterator
&
b
)
{
return
a
.
it
!=
b
.
it
;
}
const
KEY
&
key
()
const
{
return
it
->
first
;
}
VALUE
&
value
()
const
{
return
*
(
VALUE
*
)(
void
*
)
it
->
second
;
}
// NOLINT
VALUE
*
value_ptr
()
const
{
return
(
VALUE
*
)(
void
*
)
it
->
second
;
}
// NOLINT
iterator
&
operator
++
()
{
++
it
;
while
(
it
==
buckets
[
bucket
].
end
()
&&
bucket
+
1
<
CTR_SPARSE_SHARD_BUCKET_NUM
)
{
it
=
buckets
[
++
bucket
].
begin
();
}
return
*
this
;
}
iterator
operator
++
(
int
)
{
iterator
ret
=
*
this
;
++*
this
;
return
ret
;
}
};
struct
local_iterator
{
typename
map_type
::
iterator
it
;
friend
bool
operator
==
(
const
local_iterator
&
a
,
const
local_iterator
&
b
)
{
return
a
.
it
==
b
.
it
;
}
friend
bool
operator
!=
(
const
local_iterator
&
a
,
const
local_iterator
&
b
)
{
return
a
.
it
!=
b
.
it
;
}
const
KEY
&
key
()
const
{
return
it
->
first
;
}
VALUE
&
value
()
const
{
return
*
(
VALUE
*
)(
void
*
)
it
->
second
;
}
// NOLINT
local_iterator
&
operator
++
()
{
++
it
;
return
*
this
;
}
local_iterator
operator
++
(
int
)
{
return
{
it
++
};
}
};
~
SparseTableShard
()
{
clear
();
}
bool
empty
()
{
return
_alloc
.
size
()
==
0
;
}
size_t
size
()
{
return
_alloc
.
size
();
}
void
set_max_load_factor
(
float
x
)
{
for
(
size_t
bucket
=
0
;
bucket
<
CTR_SPARSE_SHARD_BUCKET_NUM
;
bucket
++
)
{
_buckets
[
bucket
].
max_load_factor
(
x
);
}
}
size_t
bucket_count
()
{
return
CTR_SPARSE_SHARD_BUCKET_NUM
;
}
size_t
bucket_size
(
size_t
bucket
)
{
return
_buckets
[
bucket
].
size
();
}
void
clear
()
{
for
(
size_t
bucket
=
0
;
bucket
<
CTR_SPARSE_SHARD_BUCKET_NUM
;
bucket
++
)
{
map_type
&
data
=
_buckets
[
bucket
];
for
(
auto
it
=
data
.
begin
();
it
!=
data
.
end
();
++
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
->
second
);
// NOLINT
}
data
.
clear
();
}
}
iterator
begin
()
{
auto
it
=
_buckets
[
0
].
begin
();
size_t
bucket
=
0
;
while
(
it
==
_buckets
[
bucket
].
end
()
&&
bucket
+
1
<
CTR_SPARSE_SHARD_BUCKET_NUM
)
{
it
=
_buckets
[
++
bucket
].
begin
();
}
return
{
it
,
bucket
,
_buckets
};
}
iterator
end
()
{
return
{
_buckets
[
CTR_SPARSE_SHARD_BUCKET_NUM
-
1
].
end
(),
CTR_SPARSE_SHARD_BUCKET_NUM
-
1
,
_buckets
};
}
local_iterator
begin
(
size_t
bucket
)
{
return
{
_buckets
[
bucket
].
begin
()};
}
local_iterator
end
(
size_t
bucket
)
{
return
{
_buckets
[
bucket
].
end
()};
}
iterator
find
(
const
KEY
&
key
)
{
size_t
hash
=
_hasher
(
key
);
size_t
bucket
=
compute_bucket
(
hash
);
auto
it
=
_buckets
[
bucket
].
find_with_hash
(
key
,
hash
);
if
(
it
==
_buckets
[
bucket
].
end
())
{
return
end
();
}
return
{
it
,
bucket
,
_buckets
};
}
VALUE
&
operator
[](
const
KEY
&
key
)
{
return
emplace
(
key
).
first
.
value
();
}
std
::
pair
<
iterator
,
bool
>
insert
(
const
KEY
&
key
,
const
VALUE
&
val
)
{
return
emplace
(
key
,
val
);
}
std
::
pair
<
iterator
,
bool
>
insert
(
const
KEY
&
key
,
VALUE
&&
val
)
{
return
emplace
(
key
,
std
::
move
(
val
));
}
template
<
class
...
ARGS
>
std
::
pair
<
iterator
,
bool
>
emplace
(
const
KEY
&
key
,
ARGS
&&
...
args
)
{
size_t
hash
=
_hasher
(
key
);
size_t
bucket
=
compute_bucket
(
hash
);
auto
res
=
_buckets
[
bucket
].
insert_with_hash
({
key
,
NULL
},
hash
);
if
(
res
.
second
)
{
res
.
first
->
second
=
_alloc
.
acquire
(
std
::
forward
<
ARGS
>
(
args
)...);
}
return
{{
res
.
first
,
bucket
,
_buckets
},
res
.
second
};
}
iterator
erase
(
iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
size_t
bucket
=
it
.
bucket
;
auto
it2
=
_buckets
[
bucket
].
erase
(
it
.
it
);
while
(
it2
==
_buckets
[
bucket
].
end
()
&&
bucket
+
1
<
CTR_SPARSE_SHARD_BUCKET_NUM
)
{
it2
=
_buckets
[
++
bucket
].
begin
();
}
return
{
it2
,
bucket
,
_buckets
};
}
void
quick_erase
(
iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
_buckets
[
it
.
bucket
].
quick_erase
(
it
.
it
);
}
local_iterator
erase
(
size_t
bucket
,
local_iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
return
{
_buckets
[
bucket
].
erase
(
it
.
it
)};
}
void
quick_erase
(
size_t
bucket
,
local_iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
_buckets
[
bucket
].
quick_erase
(
it
.
it
);
}
size_t
erase
(
const
KEY
&
key
)
{
auto
it
=
find
(
key
);
if
(
it
==
end
())
{
return
0
;
}
quick_erase
(
it
);
return
1
;
}
size_t
compute_bucket
(
size_t
hash
)
{
if
(
CTR_SPARSE_SHARD_BUCKET_NUM
==
1
)
{
return
0
;
}
else
{
return
hash
>>
(
sizeof
(
size_t
)
*
8
-
CTR_SPARSE_SHARD_BUCKET_NUM_BITS
);
}
}
private:
map_type
_buckets
[
CTR_SPARSE_SHARD_BUCKET_NUM
];
ChunkAllocator
<
VALUE
>
_alloc
;
std
::
hash
<
KEY
>
_hasher
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/geo_recorder.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ThreadPool.h>
#include <future> // NOLINT
#include <memory>
#include <unordered_set>
#include <vector>
namespace
paddle
{
namespace
distributed
{
class
ConcurrentSet
{
public:
ConcurrentSet
()
:
pool_
(
new
::
ThreadPool
(
1
))
{}
~
ConcurrentSet
()
{}
std
::
future
<
void
>
Update
(
const
std
::
vector
<
uint64_t
>&
rows
)
{
auto
task
=
[
this
,
rows
]
{
for
(
auto
row
:
rows
)
{
set_
.
insert
(
row
);
}
};
return
pool_
->
enqueue
(
std
::
move
(
task
));
}
std
::
future
<
void
>
GetAndClear
(
std
::
vector
<
uint64_t
>*
result
)
{
auto
task
=
[
this
,
&
result
]
{
result
->
clear
();
for
(
auto
&
id
:
set_
)
{
result
->
push_back
(
id
);
}
set_
.
clear
();
};
return
pool_
->
enqueue
(
std
::
move
(
task
));
}
private:
std
::
unordered_set
<
uint64_t
>
set_
;
std
::
unique_ptr
<::
ThreadPool
>
pool_
{
nullptr
};
};
class
GeoRecorder
{
public:
explicit
GeoRecorder
(
int
trainer_num
)
:
trainer_num_
(
trainer_num
)
{
trainer_rows_
.
reserve
(
trainer_num
);
for
(
auto
i
=
0
;
i
<
trainer_num
;
++
i
)
{
trainer_rows_
.
emplace_back
(
new
ConcurrentSet
());
}
}
~
GeoRecorder
()
=
default
;
void
Update
(
const
std
::
vector
<
uint64_t
>&
update_rows
)
{
VLOG
(
3
)
<<
" row size: "
<<
update_rows
.
size
();
std
::
vector
<
std
::
future
<
void
>>
fs
;
for
(
auto
&
set
:
trainer_rows_
)
{
fs
.
push_back
(
set
->
Update
(
update_rows
));
}
for
(
auto
&
f
:
fs
)
{
f
.
wait
();
}
}
void
GetAndClear
(
uint32_t
trainer_id
,
std
::
vector
<
uint64_t
>*
result
)
{
VLOG
(
3
)
<<
"GetAndClear for trainer: "
<<
trainer_id
;
trainer_rows_
.
at
(
trainer_id
)
->
GetAndClear
(
result
).
wait
();
}
private:
const
int
trainer_num_
;
std
::
vector
<
std
::
unique_ptr
<
ConcurrentSet
>>
trainer_rows_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/initializers.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/operators/truncated_gaussian_random_op.h"
namespace
paddle
{
namespace
distributed
{
class
Initializer
{
public:
Initializer
()
{}
explicit
Initializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{}
virtual
float
GetValue
()
=
0
;
virtual
void
GetValue
(
std
::
vector
<
float
>
*
values
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
values
->
push_back
(
GetValue
());
}
}
virtual
void
GetValue
(
float
*
value
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
GetValue
();
}
}
virtual
~
Initializer
()
{}
protected:
std
::
string
name_
;
unsigned
int
seed_
;
};
class
UniformInitializer
:
public
Initializer
{
public:
explicit
UniformInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
seed_
=
static_cast
<
unsigned
int
>
(
std
::
stoi
(
attrs
[
1
]));
min_
=
std
::
stof
(
attrs
[
2
]);
max_
=
std
::
stof
(
attrs
[
3
]);
dist_
=
std
::
uniform_real_distribution
<
float
>
(
min_
,
max_
);
random_engine_
=
framework
::
GetCPURandomEngine
(
seed_
);
}
float
GetValue
()
override
{
return
dist_
(
*
random_engine_
);
}
void
GetValue
(
float
*
value
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
dist_
(
*
random_engine_
);
}
}
private:
float
min_
;
float
max_
;
std
::
shared_ptr
<
std
::
mt19937_64
>
random_engine_
;
std
::
uniform_real_distribution
<
float
>
dist_
;
};
class
GaussianInitializer
:
public
Initializer
{
public:
explicit
GaussianInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
seed_
=
static_cast
<
unsigned
int
>
(
std
::
stoi
(
attrs
[
1
]));
mean_
=
std
::
stof
(
attrs
[
2
]);
std_
=
std
::
stof
(
attrs
[
3
]);
random_engine_
=
framework
::
GetCPURandomEngine
(
seed_
);
dist_
=
std
::
normal_distribution
<
float
>
(
mean_
,
std_
);
}
float
GetValue
()
override
{
return
dist_
(
*
random_engine_
);
}
void
GetValue
(
float
*
value
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
dist_
(
*
random_engine_
);
}
}
private:
float
std_
;
float
mean_
;
std
::
shared_ptr
<
std
::
mt19937_64
>
random_engine_
;
std
::
normal_distribution
<
float
>
dist_
;
};
class
TruncatedGaussianInitializer
:
public
Initializer
{
public:
explicit
TruncatedGaussianInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
seed_
=
static_cast
<
unsigned
int
>
(
std
::
stoi
(
attrs
[
1
]));
mean_
=
std
::
stof
(
attrs
[
2
]);
std_
=
std
::
stof
(
attrs
[
3
]);
std
::
uniform_real_distribution
<
float
>
dist_
(
std
::
numeric_limits
<
float
>::
min
(),
1.0
);
random_engine_
=
framework
::
GetCPURandomEngine
(
seed_
);
}
float
GetValue
()
override
{
paddle
::
operators
::
TruncatedNormal
<
float
>
truncated_normal
(
mean_
,
std_
);
float
value
=
truncated_normal
(
dist_
(
*
random_engine_
));
return
value
;
}
void
GetValue
(
float
*
value
,
int
numel
)
{
paddle
::
operators
::
TruncatedNormal
<
float
>
truncated_normal
(
mean_
,
std_
);
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
truncated_normal
(
dist_
(
*
random_engine_
));
}
}
private:
float
std_
;
float
mean_
;
std
::
shared_ptr
<
std
::
mt19937_64
>
random_engine_
;
std
::
uniform_real_distribution
<
float
>
dist_
;
};
class
FillConstantInitializer
:
public
Initializer
{
public:
explicit
FillConstantInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
value_
=
std
::
stof
(
attrs
[
1
]);
}
float
GetValue
()
override
{
return
value_
;
}
void
GetValue
(
float
*
value
,
int
numel
)
{
std
::
fill_n
(
value
,
numel
,
value_
);
}
private:
float
value_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <rocksdb/db.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/options.h>
#include <rocksdb/slice.h>
#include <rocksdb/table.h>
#include <rocksdb/write_batch.h>
#include <iostream>
#include <string>
namespace
paddle
{
namespace
distributed
{
class
RocksDBHandler
{
public:
RocksDBHandler
()
{}
~
RocksDBHandler
()
{}
static
RocksDBHandler
*
GetInstance
()
{
static
RocksDBHandler
handler
;
return
&
handler
;
}
int
initialize
(
const
std
::
string
&
db_path
,
const
int
colnum
)
{
VLOG
(
3
)
<<
"db path: "
<<
db_path
<<
" colnum: "
<<
colnum
;
rocksdb
::
Options
options
;
rocksdb
::
BlockBasedTableOptions
bbto
;
bbto
.
block_size
=
4
*
1024
;
bbto
.
block_cache
=
rocksdb
::
NewLRUCache
(
64
*
1024
*
1024
);
bbto
.
block_cache_compressed
=
rocksdb
::
NewLRUCache
(
64
*
1024
*
1024
);
bbto
.
cache_index_and_filter_blocks
=
false
;
bbto
.
filter_policy
.
reset
(
rocksdb
::
NewBloomFilterPolicy
(
20
,
false
));
bbto
.
whole_key_filtering
=
true
;
options
.
table_factory
.
reset
(
rocksdb
::
NewBlockBasedTableFactory
(
bbto
));
options
.
keep_log_file_num
=
100
;
options
.
max_log_file_size
=
50
*
1024
*
1024
;
// 50MB
options
.
create_if_missing
=
true
;
options
.
use_direct_reads
=
true
;
options
.
max_background_flushes
=
5
;
options
.
max_background_compactions
=
5
;
options
.
base_background_compactions
=
10
;
options
.
write_buffer_size
=
256
*
1024
*
1024
;
// 256MB
options
.
max_write_buffer_number
=
8
;
options
.
max_bytes_for_level_base
=
options
.
max_write_buffer_number
*
options
.
write_buffer_size
;
options
.
min_write_buffer_number_to_merge
=
1
;
options
.
target_file_size_base
=
1024
*
1024
*
1024
;
// 1024MB
options
.
memtable_prefix_bloom_size_ratio
=
0.02
;
options
.
num_levels
=
4
;
options
.
max_open_files
=
-
1
;
options
.
compression
=
rocksdb
::
kNoCompression
;
options
.
level0_file_num_compaction_trigger
=
8
;
options
.
level0_slowdown_writes_trigger
=
1.8
*
options
.
level0_file_num_compaction_trigger
;
options
.
level0_stop_writes_trigger
=
3.6
*
options
.
level0_file_num_compaction_trigger
;
if
(
!
db_path
.
empty
())
{
std
::
string
rm_cmd
=
"rm -rf "
+
db_path
;
system
(
rm_cmd
.
c_str
());
}
rocksdb
::
Status
s
=
rocksdb
::
DB
::
Open
(
options
,
db_path
,
&
_db
);
assert
(
s
.
ok
());
_handles
.
resize
(
colnum
);
for
(
int
i
=
0
;
i
<
colnum
;
i
++
)
{
s
=
_db
->
CreateColumnFamily
(
options
,
"shard_"
+
std
::
to_string
(
i
),
&
_handles
[
i
]);
assert
(
s
.
ok
());
}
LOG
(
INFO
)
<<
"DB initialize success, colnum:"
<<
colnum
;
return
0
;
}
int
put
(
int
id
,
const
char
*
key
,
int
key_len
,
const
char
*
value
,
int
value_len
)
{
rocksdb
::
WriteOptions
options
;
options
.
disableWAL
=
true
;
rocksdb
::
Status
s
=
_db
->
Put
(
options
,
_handles
[
id
],
rocksdb
::
Slice
(
key
,
key_len
),
rocksdb
::
Slice
(
value
,
value_len
));
assert
(
s
.
ok
());
return
0
;
}
int
put_batch
(
int
id
,
std
::
vector
<
std
::
pair
<
char
*
,
int
>>&
ssd_keys
,
std
::
vector
<
std
::
pair
<
char
*
,
int
>>&
ssd_values
,
int
n
)
{
rocksdb
::
WriteOptions
options
;
options
.
disableWAL
=
true
;
rocksdb
::
WriteBatch
batch
(
n
*
128
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
batch
.
Put
(
_handles
[
id
],
rocksdb
::
Slice
(
ssd_keys
[
i
].
first
,
ssd_keys
[
i
].
second
),
rocksdb
::
Slice
(
ssd_values
[
i
].
first
,
ssd_values
[
i
].
second
));
}
rocksdb
::
Status
s
=
_db
->
Write
(
options
,
&
batch
);
assert
(
s
.
ok
());
return
0
;
}
int
get
(
int
id
,
const
char
*
key
,
int
key_len
,
std
::
string
&
value
)
{
rocksdb
::
Status
s
=
_db
->
Get
(
rocksdb
::
ReadOptions
(),
_handles
[
id
],
rocksdb
::
Slice
(
key
,
key_len
),
&
value
);
if
(
s
.
IsNotFound
())
{
return
1
;
}
assert
(
s
.
ok
());
return
0
;
}
int
del_data
(
int
id
,
const
char
*
key
,
int
key_len
)
{
rocksdb
::
WriteOptions
options
;
options
.
disableWAL
=
true
;
rocksdb
::
Status
s
=
_db
->
Delete
(
options
,
_handles
[
id
],
rocksdb
::
Slice
(
key
,
key_len
));
assert
(
s
.
ok
());
return
0
;
}
int
flush
(
int
id
)
{
rocksdb
::
Status
s
=
_db
->
Flush
(
rocksdb
::
FlushOptions
(),
_handles
[
id
]);
assert
(
s
.
ok
());
return
0
;
}
rocksdb
::
Iterator
*
get_iterator
(
int
id
)
{
return
_db
->
NewIterator
(
rocksdb
::
ReadOptions
(),
_handles
[
id
]);
}
int
get_estimate_key_num
(
uint64_t
&
num_keys
)
{
_db
->
GetAggregatedIntProperty
(
"rocksdb.estimate-num-keys"
,
&
num_keys
);
return
0
;
}
private:
std
::
vector
<
rocksdb
::
ColumnFamilyHandle
*>
_handles
;
rocksdb
::
DB
*
_db
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/sparse_utils.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
distributed
{
struct
PullSparseValue
{
PullSparseValue
()
{}
explicit
PullSparseValue
(
int
numel
,
int
dim
)
:
numel_
(
numel
),
dim_
(
dim
),
is_training_
(
true
),
feasigns_
(
nullptr
),
frequencies_
(
nullptr
)
{}
explicit
PullSparseValue
(
std
::
vector
<
uint64_t
>&
feasigns
,
// NOLINT
std
::
vector
<
uint32_t
>&
frequencies
,
// NOLINT
int
dim
)
{
numel_
=
feasigns
.
size
();
dim_
=
dim
;
is_training_
=
true
;
feasigns_
=
feasigns
.
data
();
frequencies_
=
frequencies
.
data
();
}
void
DeserializeFromBytes
(
void
*
bytes
)
{
/*
|---isTraining--------------|
|---8*{num}B(keysData)------|
|---4*{num}B(Frequencies)---|
*/
auto
*
begin
=
reinterpret_cast
<
char
*>
(
bytes
);
is_training_
=
reinterpret_cast
<
bool
*>
(
begin
)[
0
];
feasigns_
=
reinterpret_cast
<
uint64_t
*>
(
begin
+
sizeof
(
bool
));
frequencies_
=
reinterpret_cast
<
uint32_t
*>
(
begin
+
sizeof
(
bool
)
+
sizeof
(
uint64_t
)
*
numel_
);
}
void
Fission
(
const
int
shard_id
,
const
int
shard_num
,
std
::
vector
<
int
>*
offset_shard
)
const
{
offset_shard
->
reserve
(
numel_
/
shard_num
+
1
);
for
(
int
x
=
0
;
x
<
numel_
;
++
x
)
{
if
(
int
(
feasigns_
[
x
]
%
shard_num
)
==
shard_id
)
{
offset_shard
->
push_back
(
x
);
}
}
}
int
numel_
;
int
dim_
;
bool
is_training_
;
uint64_t
*
feasigns_
;
uint32_t
*
frequencies_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/class_macro.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define DECLARE_GRAPH_FRIEND_CLASS(a) friend class a;
#define DECLARE_1_FRIEND_CLASS(a, ...) DECLARE_GRAPH_FRIEND_CLASS(a)
#define DECLARE_2_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_1_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_3_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_2_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_4_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_3_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_5_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_4_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_6_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_5_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_7_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_6_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_8_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_7_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_9_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_8_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_10_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_9_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_11_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_10_FRIEND_CLASS(__VA_ARGS__)
#define REGISTER_GRAPH_FRIEND_CLASS(n, ...) \
DECLARE_##n##_FRIEND_CLASS(__VA_ARGS__)
paddle/fluid/distributed/ps/table/graph/graph_edge.cc
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/graph/graph_edge.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
void
GraphEdgeBlob
::
add_edge
(
int64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
}
void
WeightedGraphEdgeBlob
::
add_edge
(
int64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
weight_arr
.
push_back
(
weight
);
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/graph_edge.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
namespace
paddle
{
namespace
distributed
{
class
GraphEdgeBlob
{
public:
GraphEdgeBlob
()
{}
virtual
~
GraphEdgeBlob
()
{}
size_t
size
()
{
return
id_arr
.
size
();
}
virtual
void
add_edge
(
int64_t
id
,
float
weight
);
int64_t
get_id
(
int
idx
)
{
return
id_arr
[
idx
];
}
virtual
float
get_weight
(
int
idx
)
{
return
1
;
}
std
::
vector
<
int64_t
>&
export_id_array
()
{
return
id_arr
;
}
protected:
std
::
vector
<
int64_t
>
id_arr
;
};
class
WeightedGraphEdgeBlob
:
public
GraphEdgeBlob
{
public:
WeightedGraphEdgeBlob
()
{}
virtual
~
WeightedGraphEdgeBlob
()
{}
virtual
void
add_edge
(
int64_t
id
,
float
weight
);
virtual
float
get_weight
(
int
idx
)
{
return
weight_arr
[
idx
];
}
protected:
std
::
vector
<
float
>
weight_arr
;
};
}
// namespace distributed
}
// namespace paddle
Prev
1
…
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment