Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Paddle
Commits
de2e6515
Commit
de2e6515
authored
Apr 26, 2023
by
yuguo960516yuguo
Browse files
2.4.1-dtk-23.04
parent
ad08b8ce
Pipeline
#228
failed with stages
in 0 seconds
Changes
272
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
1401 additions
and
0 deletions
+1401
-0
paddle/fluid/distributed/ps/table/depends/feature_value.h
paddle/fluid/distributed/ps/table/depends/feature_value.h
+202
-0
paddle/fluid/distributed/ps/table/depends/geo_recorder.h
paddle/fluid/distributed/ps/table/depends/geo_recorder.h
+91
-0
paddle/fluid/distributed/ps/table/depends/initializers.h
paddle/fluid/distributed/ps/table/depends/initializers.h
+161
-0
paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
+163
-0
paddle/fluid/distributed/ps/table/depends/sparse_utils.h
paddle/fluid/distributed/ps/table/depends/sparse_utils.h
+77
-0
paddle/fluid/distributed/ps/table/graph/class_macro.h
paddle/fluid/distributed/ps/table/graph/class_macro.h
+39
-0
paddle/fluid/distributed/ps/table/graph/graph_edge.cc
paddle/fluid/distributed/ps/table/graph/graph_edge.cc
+30
-0
paddle/fluid/distributed/ps/table/graph/graph_edge.h
paddle/fluid/distributed/ps/table/graph/graph_edge.h
+47
-0
paddle/fluid/distributed/ps/table/graph/graph_node.cc
paddle/fluid/distributed/ps/table/graph/graph_node.cc
+121
-0
paddle/fluid/distributed/ps/table/graph/graph_node.h
paddle/fluid/distributed/ps/table/graph/graph_node.h
+242
-0
paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc
...luid/distributed/ps/table/graph/graph_weighted_sampler.cc
+164
-0
paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h
...fluid/distributed/ps/table/graph/graph_weighted_sampler.h
+64
-0
No files found.
Too many changes to show.
To preserve performance only
272 of 272+
files are displayed.
Plain diff
Email patch
paddle/fluid/distributed/ps/table/depends/feature_value.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mct/hash-map.hpp>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/distributed/common/chunk_allocator.h"
namespace
paddle
{
namespace
distributed
{
static
const
int
CTR_SPARSE_SHARD_BUCKET_NUM_BITS
=
6
;
static
const
size_t
CTR_SPARSE_SHARD_BUCKET_NUM
=
static_cast
<
size_t
>
(
1
)
<<
CTR_SPARSE_SHARD_BUCKET_NUM_BITS
;
class
FixedFeatureValue
{
public:
FixedFeatureValue
()
{}
~
FixedFeatureValue
()
{}
float
*
data
()
{
return
_data
.
data
();
}
size_t
size
()
{
return
_data
.
size
();
}
void
resize
(
size_t
size
)
{
_data
.
resize
(
size
);
}
void
shrink_to_fit
()
{
_data
.
shrink_to_fit
();
}
private:
std
::
vector
<
float
>
_data
;
};
template
<
class
KEY
,
class
VALUE
>
struct
alignas
(
64
)
SparseTableShard
{
public:
typedef
typename
mct
::
closed_hash_map
<
KEY
,
mct
::
Pointer
,
std
::
hash
<
KEY
>>
map_type
;
struct
iterator
{
typename
map_type
::
iterator
it
;
size_t
bucket
;
map_type
*
buckets
;
friend
bool
operator
==
(
const
iterator
&
a
,
const
iterator
&
b
)
{
return
a
.
it
==
b
.
it
;
}
friend
bool
operator
!=
(
const
iterator
&
a
,
const
iterator
&
b
)
{
return
a
.
it
!=
b
.
it
;
}
const
KEY
&
key
()
const
{
return
it
->
first
;
}
VALUE
&
value
()
const
{
return
*
(
VALUE
*
)(
void
*
)
it
->
second
;
}
// NOLINT
VALUE
*
value_ptr
()
const
{
return
(
VALUE
*
)(
void
*
)
it
->
second
;
}
// NOLINT
iterator
&
operator
++
()
{
++
it
;
while
(
it
==
buckets
[
bucket
].
end
()
&&
bucket
+
1
<
CTR_SPARSE_SHARD_BUCKET_NUM
)
{
it
=
buckets
[
++
bucket
].
begin
();
}
return
*
this
;
}
iterator
operator
++
(
int
)
{
iterator
ret
=
*
this
;
++*
this
;
return
ret
;
}
};
struct
local_iterator
{
typename
map_type
::
iterator
it
;
friend
bool
operator
==
(
const
local_iterator
&
a
,
const
local_iterator
&
b
)
{
return
a
.
it
==
b
.
it
;
}
friend
bool
operator
!=
(
const
local_iterator
&
a
,
const
local_iterator
&
b
)
{
return
a
.
it
!=
b
.
it
;
}
const
KEY
&
key
()
const
{
return
it
->
first
;
}
VALUE
&
value
()
const
{
return
*
(
VALUE
*
)(
void
*
)
it
->
second
;
}
// NOLINT
local_iterator
&
operator
++
()
{
++
it
;
return
*
this
;
}
local_iterator
operator
++
(
int
)
{
return
{
it
++
};
}
};
~
SparseTableShard
()
{
clear
();
}
bool
empty
()
{
return
_alloc
.
size
()
==
0
;
}
size_t
size
()
{
return
_alloc
.
size
();
}
void
set_max_load_factor
(
float
x
)
{
for
(
size_t
bucket
=
0
;
bucket
<
CTR_SPARSE_SHARD_BUCKET_NUM
;
bucket
++
)
{
_buckets
[
bucket
].
max_load_factor
(
x
);
}
}
size_t
bucket_count
()
{
return
CTR_SPARSE_SHARD_BUCKET_NUM
;
}
size_t
bucket_size
(
size_t
bucket
)
{
return
_buckets
[
bucket
].
size
();
}
void
clear
()
{
for
(
size_t
bucket
=
0
;
bucket
<
CTR_SPARSE_SHARD_BUCKET_NUM
;
bucket
++
)
{
map_type
&
data
=
_buckets
[
bucket
];
for
(
auto
it
=
data
.
begin
();
it
!=
data
.
end
();
++
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
->
second
);
// NOLINT
}
data
.
clear
();
}
}
iterator
begin
()
{
auto
it
=
_buckets
[
0
].
begin
();
size_t
bucket
=
0
;
while
(
it
==
_buckets
[
bucket
].
end
()
&&
bucket
+
1
<
CTR_SPARSE_SHARD_BUCKET_NUM
)
{
it
=
_buckets
[
++
bucket
].
begin
();
}
return
{
it
,
bucket
,
_buckets
};
}
iterator
end
()
{
return
{
_buckets
[
CTR_SPARSE_SHARD_BUCKET_NUM
-
1
].
end
(),
CTR_SPARSE_SHARD_BUCKET_NUM
-
1
,
_buckets
};
}
local_iterator
begin
(
size_t
bucket
)
{
return
{
_buckets
[
bucket
].
begin
()};
}
local_iterator
end
(
size_t
bucket
)
{
return
{
_buckets
[
bucket
].
end
()};
}
iterator
find
(
const
KEY
&
key
)
{
size_t
hash
=
_hasher
(
key
);
size_t
bucket
=
compute_bucket
(
hash
);
auto
it
=
_buckets
[
bucket
].
find_with_hash
(
key
,
hash
);
if
(
it
==
_buckets
[
bucket
].
end
())
{
return
end
();
}
return
{
it
,
bucket
,
_buckets
};
}
VALUE
&
operator
[](
const
KEY
&
key
)
{
return
emplace
(
key
).
first
.
value
();
}
std
::
pair
<
iterator
,
bool
>
insert
(
const
KEY
&
key
,
const
VALUE
&
val
)
{
return
emplace
(
key
,
val
);
}
std
::
pair
<
iterator
,
bool
>
insert
(
const
KEY
&
key
,
VALUE
&&
val
)
{
return
emplace
(
key
,
std
::
move
(
val
));
}
template
<
class
...
ARGS
>
std
::
pair
<
iterator
,
bool
>
emplace
(
const
KEY
&
key
,
ARGS
&&
...
args
)
{
size_t
hash
=
_hasher
(
key
);
size_t
bucket
=
compute_bucket
(
hash
);
auto
res
=
_buckets
[
bucket
].
insert_with_hash
({
key
,
NULL
},
hash
);
if
(
res
.
second
)
{
res
.
first
->
second
=
_alloc
.
acquire
(
std
::
forward
<
ARGS
>
(
args
)...);
}
return
{{
res
.
first
,
bucket
,
_buckets
},
res
.
second
};
}
iterator
erase
(
iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
size_t
bucket
=
it
.
bucket
;
auto
it2
=
_buckets
[
bucket
].
erase
(
it
.
it
);
while
(
it2
==
_buckets
[
bucket
].
end
()
&&
bucket
+
1
<
CTR_SPARSE_SHARD_BUCKET_NUM
)
{
it2
=
_buckets
[
++
bucket
].
begin
();
}
return
{
it2
,
bucket
,
_buckets
};
}
void
quick_erase
(
iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
_buckets
[
it
.
bucket
].
quick_erase
(
it
.
it
);
}
local_iterator
erase
(
size_t
bucket
,
local_iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
return
{
_buckets
[
bucket
].
erase
(
it
.
it
)};
}
void
quick_erase
(
size_t
bucket
,
local_iterator
it
)
{
_alloc
.
release
((
VALUE
*
)(
void
*
)
it
.
it
->
second
);
// NOLINT
_buckets
[
bucket
].
quick_erase
(
it
.
it
);
}
size_t
erase
(
const
KEY
&
key
)
{
auto
it
=
find
(
key
);
if
(
it
==
end
())
{
return
0
;
}
quick_erase
(
it
);
return
1
;
}
size_t
compute_bucket
(
size_t
hash
)
{
if
(
CTR_SPARSE_SHARD_BUCKET_NUM
==
1
)
{
return
0
;
}
else
{
return
hash
>>
(
sizeof
(
size_t
)
*
8
-
CTR_SPARSE_SHARD_BUCKET_NUM_BITS
);
}
}
private:
map_type
_buckets
[
CTR_SPARSE_SHARD_BUCKET_NUM
];
ChunkAllocator
<
VALUE
>
_alloc
;
std
::
hash
<
KEY
>
_hasher
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/geo_recorder.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ThreadPool.h>
#include <future> // NOLINT
#include <memory>
#include <unordered_set>
#include <vector>
namespace
paddle
{
namespace
distributed
{
class
ConcurrentSet
{
public:
ConcurrentSet
()
:
pool_
(
new
::
ThreadPool
(
1
))
{}
~
ConcurrentSet
()
{}
std
::
future
<
void
>
Update
(
const
std
::
vector
<
uint64_t
>&
rows
)
{
auto
task
=
[
this
,
rows
]
{
for
(
auto
row
:
rows
)
{
set_
.
insert
(
row
);
}
};
return
pool_
->
enqueue
(
std
::
move
(
task
));
}
std
::
future
<
void
>
GetAndClear
(
std
::
vector
<
uint64_t
>*
result
)
{
auto
task
=
[
this
,
&
result
]
{
result
->
clear
();
for
(
auto
&
id
:
set_
)
{
result
->
push_back
(
id
);
}
set_
.
clear
();
};
return
pool_
->
enqueue
(
std
::
move
(
task
));
}
private:
std
::
unordered_set
<
uint64_t
>
set_
;
std
::
unique_ptr
<::
ThreadPool
>
pool_
{
nullptr
};
};
class
GeoRecorder
{
public:
explicit
GeoRecorder
(
int
trainer_num
)
:
trainer_num_
(
trainer_num
)
{
trainer_rows_
.
reserve
(
trainer_num
);
for
(
auto
i
=
0
;
i
<
trainer_num
;
++
i
)
{
trainer_rows_
.
emplace_back
(
new
ConcurrentSet
());
}
}
~
GeoRecorder
()
=
default
;
void
Update
(
const
std
::
vector
<
uint64_t
>&
update_rows
)
{
VLOG
(
3
)
<<
" row size: "
<<
update_rows
.
size
();
std
::
vector
<
std
::
future
<
void
>>
fs
;
for
(
auto
&
set
:
trainer_rows_
)
{
fs
.
push_back
(
set
->
Update
(
update_rows
));
}
for
(
auto
&
f
:
fs
)
{
f
.
wait
();
}
}
void
GetAndClear
(
uint32_t
trainer_id
,
std
::
vector
<
uint64_t
>*
result
)
{
VLOG
(
3
)
<<
"GetAndClear for trainer: "
<<
trainer_id
;
trainer_rows_
.
at
(
trainer_id
)
->
GetAndClear
(
result
).
wait
();
}
private:
const
int
trainer_num_
;
std
::
vector
<
std
::
unique_ptr
<
ConcurrentSet
>>
trainer_rows_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/initializers.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/operators/truncated_gaussian_random_op.h"
namespace
paddle
{
namespace
distributed
{
class
Initializer
{
public:
Initializer
()
{}
explicit
Initializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{}
virtual
float
GetValue
()
=
0
;
virtual
void
GetValue
(
std
::
vector
<
float
>
*
values
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
values
->
push_back
(
GetValue
());
}
}
virtual
void
GetValue
(
float
*
value
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
GetValue
();
}
}
virtual
~
Initializer
()
{}
protected:
std
::
string
name_
;
unsigned
int
seed_
;
};
class
UniformInitializer
:
public
Initializer
{
public:
explicit
UniformInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
seed_
=
static_cast
<
unsigned
int
>
(
std
::
stoi
(
attrs
[
1
]));
min_
=
std
::
stof
(
attrs
[
2
]);
max_
=
std
::
stof
(
attrs
[
3
]);
dist_
=
std
::
uniform_real_distribution
<
float
>
(
min_
,
max_
);
random_engine_
=
framework
::
GetCPURandomEngine
(
seed_
);
}
float
GetValue
()
override
{
return
dist_
(
*
random_engine_
);
}
void
GetValue
(
float
*
value
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
dist_
(
*
random_engine_
);
}
}
private:
float
min_
;
float
max_
;
std
::
shared_ptr
<
std
::
mt19937_64
>
random_engine_
;
std
::
uniform_real_distribution
<
float
>
dist_
;
};
class
GaussianInitializer
:
public
Initializer
{
public:
explicit
GaussianInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
seed_
=
static_cast
<
unsigned
int
>
(
std
::
stoi
(
attrs
[
1
]));
mean_
=
std
::
stof
(
attrs
[
2
]);
std_
=
std
::
stof
(
attrs
[
3
]);
random_engine_
=
framework
::
GetCPURandomEngine
(
seed_
);
dist_
=
std
::
normal_distribution
<
float
>
(
mean_
,
std_
);
}
float
GetValue
()
override
{
return
dist_
(
*
random_engine_
);
}
void
GetValue
(
float
*
value
,
int
numel
)
{
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
dist_
(
*
random_engine_
);
}
}
private:
float
std_
;
float
mean_
;
std
::
shared_ptr
<
std
::
mt19937_64
>
random_engine_
;
std
::
normal_distribution
<
float
>
dist_
;
};
class
TruncatedGaussianInitializer
:
public
Initializer
{
public:
explicit
TruncatedGaussianInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
seed_
=
static_cast
<
unsigned
int
>
(
std
::
stoi
(
attrs
[
1
]));
mean_
=
std
::
stof
(
attrs
[
2
]);
std_
=
std
::
stof
(
attrs
[
3
]);
std
::
uniform_real_distribution
<
float
>
dist_
(
std
::
numeric_limits
<
float
>::
min
(),
1.0
);
random_engine_
=
framework
::
GetCPURandomEngine
(
seed_
);
}
float
GetValue
()
override
{
paddle
::
operators
::
TruncatedNormal
<
float
>
truncated_normal
(
mean_
,
std_
);
float
value
=
truncated_normal
(
dist_
(
*
random_engine_
));
return
value
;
}
void
GetValue
(
float
*
value
,
int
numel
)
{
paddle
::
operators
::
TruncatedNormal
<
float
>
truncated_normal
(
mean_
,
std_
);
for
(
int
x
=
0
;
x
<
numel
;
++
x
)
{
value
[
x
]
=
truncated_normal
(
dist_
(
*
random_engine_
));
}
}
private:
float
std_
;
float
mean_
;
std
::
shared_ptr
<
std
::
mt19937_64
>
random_engine_
;
std
::
uniform_real_distribution
<
float
>
dist_
;
};
class
FillConstantInitializer
:
public
Initializer
{
public:
explicit
FillConstantInitializer
(
const
std
::
vector
<
std
::
string
>
&
attrs
)
{
name_
=
attrs
[
0
];
value_
=
std
::
stof
(
attrs
[
1
]);
}
float
GetValue
()
override
{
return
value_
;
}
void
GetValue
(
float
*
value
,
int
numel
)
{
std
::
fill_n
(
value
,
numel
,
value_
);
}
private:
float
value_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <rocksdb/db.h>
#include <rocksdb/filter_policy.h>
#include <rocksdb/options.h>
#include <rocksdb/slice.h>
#include <rocksdb/table.h>
#include <rocksdb/write_batch.h>
#include <iostream>
#include <string>
namespace
paddle
{
namespace
distributed
{
class
RocksDBHandler
{
public:
RocksDBHandler
()
{}
~
RocksDBHandler
()
{}
static
RocksDBHandler
*
GetInstance
()
{
static
RocksDBHandler
handler
;
return
&
handler
;
}
int
initialize
(
const
std
::
string
&
db_path
,
const
int
colnum
)
{
VLOG
(
3
)
<<
"db path: "
<<
db_path
<<
" colnum: "
<<
colnum
;
rocksdb
::
Options
options
;
rocksdb
::
BlockBasedTableOptions
bbto
;
bbto
.
block_size
=
4
*
1024
;
bbto
.
block_cache
=
rocksdb
::
NewLRUCache
(
64
*
1024
*
1024
);
bbto
.
block_cache_compressed
=
rocksdb
::
NewLRUCache
(
64
*
1024
*
1024
);
bbto
.
cache_index_and_filter_blocks
=
false
;
bbto
.
filter_policy
.
reset
(
rocksdb
::
NewBloomFilterPolicy
(
20
,
false
));
bbto
.
whole_key_filtering
=
true
;
options
.
table_factory
.
reset
(
rocksdb
::
NewBlockBasedTableFactory
(
bbto
));
options
.
keep_log_file_num
=
100
;
options
.
max_log_file_size
=
50
*
1024
*
1024
;
// 50MB
options
.
create_if_missing
=
true
;
options
.
use_direct_reads
=
true
;
options
.
max_background_flushes
=
5
;
options
.
max_background_compactions
=
5
;
options
.
base_background_compactions
=
10
;
options
.
write_buffer_size
=
256
*
1024
*
1024
;
// 256MB
options
.
max_write_buffer_number
=
8
;
options
.
max_bytes_for_level_base
=
options
.
max_write_buffer_number
*
options
.
write_buffer_size
;
options
.
min_write_buffer_number_to_merge
=
1
;
options
.
target_file_size_base
=
1024
*
1024
*
1024
;
// 1024MB
options
.
memtable_prefix_bloom_size_ratio
=
0.02
;
options
.
num_levels
=
4
;
options
.
max_open_files
=
-
1
;
options
.
compression
=
rocksdb
::
kNoCompression
;
options
.
level0_file_num_compaction_trigger
=
8
;
options
.
level0_slowdown_writes_trigger
=
1.8
*
options
.
level0_file_num_compaction_trigger
;
options
.
level0_stop_writes_trigger
=
3.6
*
options
.
level0_file_num_compaction_trigger
;
if
(
!
db_path
.
empty
())
{
std
::
string
rm_cmd
=
"rm -rf "
+
db_path
;
system
(
rm_cmd
.
c_str
());
}
rocksdb
::
Status
s
=
rocksdb
::
DB
::
Open
(
options
,
db_path
,
&
_db
);
assert
(
s
.
ok
());
_handles
.
resize
(
colnum
);
for
(
int
i
=
0
;
i
<
colnum
;
i
++
)
{
s
=
_db
->
CreateColumnFamily
(
options
,
"shard_"
+
std
::
to_string
(
i
),
&
_handles
[
i
]);
assert
(
s
.
ok
());
}
LOG
(
INFO
)
<<
"DB initialize success, colnum:"
<<
colnum
;
return
0
;
}
int
put
(
int
id
,
const
char
*
key
,
int
key_len
,
const
char
*
value
,
int
value_len
)
{
rocksdb
::
WriteOptions
options
;
options
.
disableWAL
=
true
;
rocksdb
::
Status
s
=
_db
->
Put
(
options
,
_handles
[
id
],
rocksdb
::
Slice
(
key
,
key_len
),
rocksdb
::
Slice
(
value
,
value_len
));
assert
(
s
.
ok
());
return
0
;
}
int
put_batch
(
int
id
,
std
::
vector
<
std
::
pair
<
char
*
,
int
>>&
ssd_keys
,
std
::
vector
<
std
::
pair
<
char
*
,
int
>>&
ssd_values
,
int
n
)
{
rocksdb
::
WriteOptions
options
;
options
.
disableWAL
=
true
;
rocksdb
::
WriteBatch
batch
(
n
*
128
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
batch
.
Put
(
_handles
[
id
],
rocksdb
::
Slice
(
ssd_keys
[
i
].
first
,
ssd_keys
[
i
].
second
),
rocksdb
::
Slice
(
ssd_values
[
i
].
first
,
ssd_values
[
i
].
second
));
}
rocksdb
::
Status
s
=
_db
->
Write
(
options
,
&
batch
);
assert
(
s
.
ok
());
return
0
;
}
int
get
(
int
id
,
const
char
*
key
,
int
key_len
,
std
::
string
&
value
)
{
rocksdb
::
Status
s
=
_db
->
Get
(
rocksdb
::
ReadOptions
(),
_handles
[
id
],
rocksdb
::
Slice
(
key
,
key_len
),
&
value
);
if
(
s
.
IsNotFound
())
{
return
1
;
}
assert
(
s
.
ok
());
return
0
;
}
int
del_data
(
int
id
,
const
char
*
key
,
int
key_len
)
{
rocksdb
::
WriteOptions
options
;
options
.
disableWAL
=
true
;
rocksdb
::
Status
s
=
_db
->
Delete
(
options
,
_handles
[
id
],
rocksdb
::
Slice
(
key
,
key_len
));
assert
(
s
.
ok
());
return
0
;
}
int
flush
(
int
id
)
{
rocksdb
::
Status
s
=
_db
->
Flush
(
rocksdb
::
FlushOptions
(),
_handles
[
id
]);
assert
(
s
.
ok
());
return
0
;
}
rocksdb
::
Iterator
*
get_iterator
(
int
id
)
{
return
_db
->
NewIterator
(
rocksdb
::
ReadOptions
(),
_handles
[
id
]);
}
int
get_estimate_key_num
(
uint64_t
&
num_keys
)
{
_db
->
GetAggregatedIntProperty
(
"rocksdb.estimate-num-keys"
,
&
num_keys
);
return
0
;
}
private:
std
::
vector
<
rocksdb
::
ColumnFamilyHandle
*>
_handles
;
rocksdb
::
DB
*
_db
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/depends/sparse_utils.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
distributed
{
struct
PullSparseValue
{
PullSparseValue
()
{}
explicit
PullSparseValue
(
int
numel
,
int
dim
)
:
numel_
(
numel
),
dim_
(
dim
),
is_training_
(
true
),
feasigns_
(
nullptr
),
frequencies_
(
nullptr
)
{}
explicit
PullSparseValue
(
std
::
vector
<
uint64_t
>&
feasigns
,
// NOLINT
std
::
vector
<
uint32_t
>&
frequencies
,
// NOLINT
int
dim
)
{
numel_
=
feasigns
.
size
();
dim_
=
dim
;
is_training_
=
true
;
feasigns_
=
feasigns
.
data
();
frequencies_
=
frequencies
.
data
();
}
void
DeserializeFromBytes
(
void
*
bytes
)
{
/*
|---isTraining--------------|
|---8*{num}B(keysData)------|
|---4*{num}B(Frequencies)---|
*/
auto
*
begin
=
reinterpret_cast
<
char
*>
(
bytes
);
is_training_
=
reinterpret_cast
<
bool
*>
(
begin
)[
0
];
feasigns_
=
reinterpret_cast
<
uint64_t
*>
(
begin
+
sizeof
(
bool
));
frequencies_
=
reinterpret_cast
<
uint32_t
*>
(
begin
+
sizeof
(
bool
)
+
sizeof
(
uint64_t
)
*
numel_
);
}
void
Fission
(
const
int
shard_id
,
const
int
shard_num
,
std
::
vector
<
int
>*
offset_shard
)
const
{
offset_shard
->
reserve
(
numel_
/
shard_num
+
1
);
for
(
int
x
=
0
;
x
<
numel_
;
++
x
)
{
if
(
int
(
feasigns_
[
x
]
%
shard_num
)
==
shard_id
)
{
offset_shard
->
push_back
(
x
);
}
}
}
int
numel_
;
int
dim_
;
bool
is_training_
;
uint64_t
*
feasigns_
;
uint32_t
*
frequencies_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/class_macro.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define DECLARE_GRAPH_FRIEND_CLASS(a) friend class a;
#define DECLARE_1_FRIEND_CLASS(a, ...) DECLARE_GRAPH_FRIEND_CLASS(a)
#define DECLARE_2_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_1_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_3_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_2_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_4_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_3_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_5_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_4_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_6_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_5_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_7_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_6_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_8_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_7_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_9_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_8_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_10_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_9_FRIEND_CLASS(__VA_ARGS__)
#define DECLARE_11_FRIEND_CLASS(a, ...) \
DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_10_FRIEND_CLASS(__VA_ARGS__)
#define REGISTER_GRAPH_FRIEND_CLASS(n, ...) \
DECLARE_##n##_FRIEND_CLASS(__VA_ARGS__)
paddle/fluid/distributed/ps/table/graph/graph_edge.cc
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/graph/graph_edge.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
void
GraphEdgeBlob
::
add_edge
(
int64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
}
void
WeightedGraphEdgeBlob
::
add_edge
(
int64_t
id
,
float
weight
=
1
)
{
id_arr
.
push_back
(
id
);
weight_arr
.
push_back
(
weight
);
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/graph_edge.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
namespace
paddle
{
namespace
distributed
{
class
GraphEdgeBlob
{
public:
GraphEdgeBlob
()
{}
virtual
~
GraphEdgeBlob
()
{}
size_t
size
()
{
return
id_arr
.
size
();
}
virtual
void
add_edge
(
int64_t
id
,
float
weight
);
int64_t
get_id
(
int
idx
)
{
return
id_arr
[
idx
];
}
virtual
float
get_weight
(
int
idx
)
{
return
1
;
}
std
::
vector
<
int64_t
>&
export_id_array
()
{
return
id_arr
;
}
protected:
std
::
vector
<
int64_t
>
id_arr
;
};
class
WeightedGraphEdgeBlob
:
public
GraphEdgeBlob
{
public:
WeightedGraphEdgeBlob
()
{}
virtual
~
WeightedGraphEdgeBlob
()
{}
virtual
void
add_edge
(
int64_t
id
,
float
weight
);
virtual
float
get_weight
(
int
idx
)
{
return
weight_arr
[
idx
];
}
protected:
std
::
vector
<
float
>
weight_arr
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/graph_node.cc
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/graph/graph_node.h"
#include <cstring>
namespace
paddle
{
namespace
distributed
{
GraphNode
::~
GraphNode
()
{
if
(
sampler
!=
nullptr
)
{
delete
sampler
;
sampler
=
nullptr
;
}
if
(
edges
!=
nullptr
)
{
delete
edges
;
edges
=
nullptr
;
}
}
int
Node
::
weight_size
=
sizeof
(
float
);
int
Node
::
id_size
=
sizeof
(
uint64_t
);
int
Node
::
int_size
=
sizeof
(
int
);
int
Node
::
get_size
(
bool
need_feature
)
{
return
id_size
+
int_size
;
}
void
Node
::
to_buffer
(
char
*
buffer
,
bool
need_feature
)
{
memcpy
(
buffer
,
&
id
,
id_size
);
buffer
+=
id_size
;
int
feat_num
=
0
;
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
}
void
Node
::
recover_from_buffer
(
char
*
buffer
)
{
memcpy
(
&
id
,
buffer
,
id_size
);
}
int
FeatureNode
::
get_size
(
bool
need_feature
)
{
int
size
=
id_size
+
int_size
;
// id, feat_num
if
(
need_feature
)
{
size
+=
feature
.
size
()
*
int_size
;
for
(
const
std
::
string
&
fea
:
feature
)
{
size
+=
fea
.
size
();
}
}
return
size
;
}
void
GraphNode
::
build_edges
(
bool
is_weighted
)
{
if
(
edges
==
nullptr
)
{
if
(
is_weighted
==
true
)
{
edges
=
new
WeightedGraphEdgeBlob
();
}
else
{
edges
=
new
GraphEdgeBlob
();
}
}
}
void
GraphNode
::
build_sampler
(
std
::
string
sample_type
)
{
if
(
sampler
!=
nullptr
)
{
return
;
}
if
(
sample_type
==
"random"
)
{
sampler
=
new
RandomSampler
();
}
else
if
(
sample_type
==
"weighted"
)
{
sampler
=
new
WeightedSampler
();
}
sampler
->
build
(
edges
);
}
void
FeatureNode
::
to_buffer
(
char
*
buffer
,
bool
need_feature
)
{
memcpy
(
buffer
,
&
id
,
id_size
);
buffer
+=
id_size
;
int
feat_num
=
0
;
int
feat_len
;
if
(
need_feature
)
{
feat_num
+=
feature
.
size
();
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
for
(
int
i
=
0
;
i
<
feat_num
;
++
i
)
{
feat_len
=
feature
[
i
].
size
();
memcpy
(
buffer
,
&
feat_len
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
memcpy
(
buffer
,
feature
[
i
].
c_str
(),
feature
[
i
].
size
());
buffer
+=
feature
[
i
].
size
();
}
}
else
{
memcpy
(
buffer
,
&
feat_num
,
sizeof
(
int
));
}
}
void
FeatureNode
::
recover_from_buffer
(
char
*
buffer
)
{
int
feat_num
,
feat_len
;
memcpy
(
&
id
,
buffer
,
id_size
);
buffer
+=
id_size
;
memcpy
(
&
feat_num
,
buffer
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
feature
.
clear
();
for
(
int
i
=
0
;
i
<
feat_num
;
++
i
)
{
memcpy
(
&
feat_len
,
buffer
,
sizeof
(
int
));
buffer
+=
sizeof
(
int
);
char
str
[
feat_len
+
1
];
memcpy
(
str
,
buffer
,
feat_len
);
buffer
+=
feat_len
;
str
[
feat_len
]
=
'\0'
;
feature
.
push_back
(
std
::
string
(
str
));
}
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/graph_node.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstring>
#include <iostream>
#include <memory>
#include <set>
#include <sstream>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/string_helper.h"
namespace
paddle
{
namespace
distributed
{
class
Node
{
public:
Node
()
{}
Node
(
uint64_t
id
)
:
id
(
id
)
{}
virtual
~
Node
()
{}
static
int
id_size
,
int_size
,
weight_size
;
uint64_t
get_id
()
{
return
id
;
}
int64_t
get_py_id
()
{
return
(
int64_t
)
id
;
}
void
set_id
(
uint64_t
id
)
{
this
->
id
=
id
;
}
virtual
void
build_edges
(
bool
is_weighted
)
{}
virtual
void
build_sampler
(
std
::
string
sample_type
)
{}
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
return
std
::
vector
<
int
>
();
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
0
;
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
1.
;
}
virtual
int
get_size
(
bool
need_feature
);
virtual
void
to_buffer
(
char
*
buffer
,
bool
need_feature
);
virtual
void
recover_from_buffer
(
char
*
buffer
);
virtual
std
::
string
get_feature
(
int
idx
)
{
return
std
::
string
(
""
);
}
virtual
int
get_feature_ids
(
std
::
vector
<
uint64_t
>
*
res
)
const
{
return
0
;
}
virtual
int
get_feature_ids
(
int
slot_idx
,
std
::
vector
<
uint64_t
>
*
res
)
const
{
return
0
;
}
virtual
void
set_feature
(
int
idx
,
const
std
::
string
&
str
)
{}
virtual
void
set_feature_size
(
int
size
)
{}
virtual
int
get_feature_size
()
{
return
0
;
}
virtual
size_t
get_neighbor_size
()
{
return
0
;
}
protected:
uint64_t
id
;
bool
is_weighted
;
};
class
GraphNode
:
public
Node
{
public:
GraphNode
()
:
Node
(),
sampler
(
nullptr
),
edges
(
nullptr
)
{}
GraphNode
(
uint64_t
id
)
:
Node
(
id
),
sampler
(
nullptr
),
edges
(
nullptr
)
{}
virtual
~
GraphNode
();
virtual
void
build_edges
(
bool
is_weighted
);
virtual
void
build_sampler
(
std
::
string
sample_type
);
virtual
void
add_edge
(
uint64_t
id
,
float
weight
)
{
edges
->
add_edge
(
id
,
weight
);
}
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
return
sampler
->
sample_k
(
k
,
rng
);
}
virtual
uint64_t
get_neighbor_id
(
int
idx
)
{
return
edges
->
get_id
(
idx
);
}
virtual
float
get_neighbor_weight
(
int
idx
)
{
return
edges
->
get_weight
(
idx
);
}
virtual
size_t
get_neighbor_size
()
{
return
edges
->
size
();
}
protected:
Sampler
*
sampler
;
GraphEdgeBlob
*
edges
;
};
class
FeatureNode
:
public
Node
{
public:
FeatureNode
()
:
Node
()
{}
FeatureNode
(
uint64_t
id
)
:
Node
(
id
)
{}
virtual
~
FeatureNode
()
{}
virtual
int
get_size
(
bool
need_feature
);
virtual
void
to_buffer
(
char
*
buffer
,
bool
need_feature
);
virtual
void
recover_from_buffer
(
char
*
buffer
);
virtual
std
::
string
get_feature
(
int
idx
)
{
if
(
idx
<
(
int
)
this
->
feature
.
size
())
{
return
this
->
feature
[
idx
];
}
else
{
return
std
::
string
(
""
);
}
}
virtual
int
get_feature_ids
(
std
::
vector
<
uint64_t
>
*
res
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
res
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"get_feature_ids res should not be null"
));
errno
=
0
;
for
(
auto
&
feature_item
:
feature
)
{
const
uint64_t
*
feas
=
(
const
uint64_t
*
)(
feature_item
.
c_str
());
size_t
num
=
feature_item
.
length
()
/
sizeof
(
uint64_t
);
CHECK
((
feature_item
.
length
()
%
sizeof
(
uint64_t
))
==
0
)
<<
"bad feature_item: ["
<<
feature_item
<<
"]"
;
size_t
n
=
res
->
size
();
res
->
resize
(
n
+
num
);
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
(
*
res
)[
n
+
i
]
=
feas
[
i
];
}
}
PADDLE_ENFORCE_EQ
(
errno
,
0
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"get_feature_ids get errno should be 0, but got %d."
,
errno
));
return
0
;
}
virtual
int
get_feature_ids
(
int
slot_idx
,
std
::
vector
<
uint64_t
>
*
res
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
res
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"get_feature_ids res should not be null"
));
res
->
clear
();
errno
=
0
;
if
(
slot_idx
<
(
int
)
this
->
feature
.
size
())
{
const
std
::
string
&
s
=
this
->
feature
[
slot_idx
];
const
uint64_t
*
feas
=
(
const
uint64_t
*
)(
s
.
c_str
());
size_t
num
=
s
.
length
()
/
sizeof
(
uint64_t
);
CHECK
((
s
.
length
()
%
sizeof
(
uint64_t
))
==
0
)
<<
"bad feature_item: ["
<<
s
<<
"]"
;
res
->
resize
(
num
);
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
(
*
res
)[
i
]
=
feas
[
i
];
}
}
PADDLE_ENFORCE_EQ
(
errno
,
0
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"get_feature_ids get errno should be 0, but got %d."
,
errno
));
return
0
;
}
virtual
std
::
string
*
mutable_feature
(
int
idx
)
{
if
(
idx
>=
(
int
)
this
->
feature
.
size
())
{
this
->
feature
.
resize
(
idx
+
1
);
}
return
&
(
this
->
feature
[
idx
]);
}
virtual
void
set_feature
(
int
idx
,
const
std
::
string
&
str
)
{
if
(
idx
>=
(
int
)
this
->
feature
.
size
())
{
this
->
feature
.
resize
(
idx
+
1
);
}
this
->
feature
[
idx
]
=
str
;
}
virtual
void
set_feature_size
(
int
size
)
{
this
->
feature
.
resize
(
size
);
}
virtual
int
get_feature_size
()
{
return
this
->
feature
.
size
();
}
template
<
typename
T
>
static
std
::
string
parse_value_to_bytes
(
std
::
vector
<
std
::
string
>
feat_str
)
{
T
v
;
size_t
Tsize
=
sizeof
(
T
)
*
feat_str
.
size
();
char
buffer
[
Tsize
];
for
(
size_t
i
=
0
;
i
<
feat_str
.
size
();
i
++
)
{
std
::
stringstream
ss
(
feat_str
[
i
]);
ss
>>
v
;
std
::
memcpy
(
buffer
+
sizeof
(
T
)
*
i
,
(
char
*
)
&
v
,
sizeof
(
T
));
}
return
std
::
string
(
buffer
,
Tsize
);
}
template
<
typename
T
>
static
void
parse_value_to_bytes
(
std
::
vector
<
std
::
string
>::
iterator
feat_str_begin
,
std
::
vector
<
std
::
string
>::
iterator
feat_str_end
,
std
::
string
*
output
)
{
T
v
;
size_t
feat_str_size
=
feat_str_end
-
feat_str_begin
;
size_t
Tsize
=
sizeof
(
T
)
*
feat_str_size
;
char
buffer
[
Tsize
]
=
{
'\0'
};
for
(
size_t
i
=
0
;
i
<
feat_str_size
;
i
++
)
{
std
::
stringstream
ss
(
*
(
feat_str_begin
+
i
));
ss
>>
v
;
std
::
memcpy
(
buffer
+
sizeof
(
T
)
*
i
,
(
char
*
)
&
v
,
sizeof
(
T
));
}
output
->
assign
(
buffer
);
}
template
<
typename
T
>
static
std
::
vector
<
T
>
parse_bytes_to_array
(
std
::
string
feat_str
)
{
T
v
;
std
::
vector
<
T
>
out
;
size_t
start
=
0
;
const
char
*
buffer
=
feat_str
.
data
();
while
(
start
<
feat_str
.
size
())
{
std
::
memcpy
((
char
*
)
&
v
,
buffer
+
start
,
sizeof
(
T
));
start
+=
sizeof
(
T
);
out
.
push_back
(
v
);
}
return
out
;
}
template
<
typename
T
>
static
void
parse_value_to_bytes
(
std
::
vector
<
paddle
::
string
::
str_ptr
>::
iterator
feat_str_begin
,
std
::
vector
<
paddle
::
string
::
str_ptr
>::
iterator
feat_str_end
,
std
::
string
*
output
)
{
size_t
feat_str_size
=
feat_str_end
-
feat_str_begin
;
size_t
Tsize
=
sizeof
(
T
)
*
feat_str_size
;
size_t
num
=
output
->
length
();
output
->
resize
(
num
+
Tsize
);
T
*
fea_ptrs
=
(
T
*
)(
&
(
*
output
)[
num
]);
thread_local
paddle
::
string
::
str_ptr_stream
ss
;
for
(
size_t
i
=
0
;
i
<
feat_str_size
;
i
++
)
{
ss
.
reset
(
*
(
feat_str_begin
+
i
));
ss
>>
fea_ptrs
[
i
];
}
}
protected:
std
::
vector
<
std
::
string
>
feature
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h"
#include <iostream>
#include <memory>
#include <unordered_map>
#include "paddle/fluid/framework/generator.h"
namespace
paddle
{
namespace
distributed
{
void
RandomSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
this
->
edges
=
edges
;
}
std
::
vector
<
int
>
RandomSampler
::
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
int
n
=
edges
->
size
();
if
(
k
>=
n
)
{
k
=
n
;
std
::
vector
<
int
>
sample_result
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
sample_result
.
push_back
(
i
);
}
return
sample_result
;
}
std
::
vector
<
int
>
sample_result
;
std
::
unordered_map
<
int
,
int
>
replace_map
;
while
(
k
--
)
{
std
::
uniform_int_distribution
<
int
>
distrib
(
0
,
n
-
1
);
int
rand_int
=
distrib
(
*
rng
);
auto
iter
=
replace_map
.
find
(
rand_int
);
if
(
iter
==
replace_map
.
end
())
{
sample_result
.
push_back
(
rand_int
);
}
else
{
sample_result
.
push_back
(
iter
->
second
);
}
iter
=
replace_map
.
find
(
n
-
1
);
if
(
iter
==
replace_map
.
end
())
{
replace_map
[
rand_int
]
=
n
-
1
;
}
else
{
replace_map
[
rand_int
]
=
iter
->
second
;
}
--
n
;
}
return
sample_result
;
}
WeightedSampler
::
WeightedSampler
()
{
left
=
nullptr
;
right
=
nullptr
;
edges
=
nullptr
;
}
WeightedSampler
::~
WeightedSampler
()
{
if
(
left
!=
nullptr
)
{
delete
left
;
left
=
nullptr
;
}
if
(
right
!=
nullptr
)
{
delete
right
;
right
=
nullptr
;
}
}
void
WeightedSampler
::
build
(
GraphEdgeBlob
*
edges
)
{
if
(
left
!=
nullptr
)
{
delete
left
;
left
=
nullptr
;
}
if
(
right
!=
nullptr
)
{
delete
right
;
right
=
nullptr
;
}
return
build_one
((
WeightedGraphEdgeBlob
*
)
edges
,
0
,
edges
->
size
());
}
void
WeightedSampler
::
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
)
{
count
=
0
;
this
->
edges
=
edges
;
if
(
start
+
1
==
end
)
{
left
=
right
=
nullptr
;
idx
=
start
;
count
=
1
;
weight
=
edges
->
get_weight
(
idx
);
}
else
{
left
=
new
WeightedSampler
();
right
=
new
WeightedSampler
();
left
->
build_one
(
edges
,
start
,
start
+
(
end
-
start
)
/
2
);
right
->
build_one
(
edges
,
start
+
(
end
-
start
)
/
2
,
end
);
weight
=
left
->
weight
+
right
->
weight
;
count
=
left
->
count
+
right
->
count
;
}
}
std
::
vector
<
int
>
WeightedSampler
::
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
{
if
(
k
>=
count
)
{
k
=
count
;
std
::
vector
<
int
>
sample_result
;
for
(
int
i
=
0
;
i
<
k
;
i
++
)
{
sample_result
.
push_back
(
i
);
}
return
sample_result
;
}
std
::
vector
<
int
>
sample_result
;
float
subtract
;
std
::
unordered_map
<
WeightedSampler
*
,
float
>
subtract_weight_map
;
std
::
unordered_map
<
WeightedSampler
*
,
int
>
subtract_count_map
;
std
::
uniform_real_distribution
<
float
>
distrib
(
0
,
1.0
);
while
(
k
--
)
{
float
query_weight
=
distrib
(
*
rng
);
query_weight
*=
weight
-
subtract_weight_map
[
this
];
sample_result
.
push_back
(
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
));
}
return
sample_result
;
}
int
WeightedSampler
::
sample
(
float
query_weight
,
std
::
unordered_map
<
WeightedSampler
*
,
float
>
&
subtract_weight_map
,
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
)
{
if
(
left
==
nullptr
)
{
subtract_weight_map
[
this
]
=
weight
;
subtract
=
weight
;
subtract_count_map
[
this
]
=
1
;
return
idx
;
}
int
left_count
=
left
->
count
-
subtract_count_map
[
left
];
int
right_count
=
right
->
count
-
subtract_count_map
[
right
];
float
left_subtract
=
subtract_weight_map
[
left
];
int
return_idx
;
if
(
right_count
==
0
||
left_count
>
0
&&
left
->
weight
-
left_subtract
>=
query_weight
)
{
return_idx
=
left
->
sample
(
query_weight
,
subtract_weight_map
,
subtract_count_map
,
subtract
);
}
else
{
return_idx
=
right
->
sample
(
query_weight
-
(
left
->
weight
-
left_subtract
),
subtract_weight_map
,
subtract_count_map
,
subtract
);
}
subtract_weight_map
[
this
]
+=
subtract
;
subtract_count_map
[
this
]
++
;
return
return_idx
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h
0 → 100644
View file @
de2e6515
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <random>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/ps/table/graph/graph_edge.h"
namespace
paddle
{
namespace
distributed
{
class
Sampler
{
public:
virtual
~
Sampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
)
=
0
;
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
)
=
0
;
};
class
RandomSampler
:
public
Sampler
{
public:
virtual
~
RandomSampler
()
{}
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
);
GraphEdgeBlob
*
edges
;
};
class
WeightedSampler
:
public
Sampler
{
public:
WeightedSampler
();
virtual
~
WeightedSampler
();
WeightedSampler
*
left
,
*
right
;
float
weight
;
int
count
;
int
idx
;
GraphEdgeBlob
*
edges
;
virtual
void
build
(
GraphEdgeBlob
*
edges
);
virtual
void
build_one
(
WeightedGraphEdgeBlob
*
edges
,
int
start
,
int
end
);
virtual
std
::
vector
<
int
>
sample_k
(
int
k
,
const
std
::
shared_ptr
<
std
::
mt19937_64
>
rng
);
private:
int
sample
(
float
query_weight
,
std
::
unordered_map
<
WeightedSampler
*
,
float
>
&
subtract_weight_map
,
std
::
unordered_map
<
WeightedSampler
*
,
int
>
&
subtract_count_map
,
float
&
subtract
);
};
}
// namespace distributed
}
// namespace paddle
Prev
1
…
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment