Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Paddle
Commits
f0ef3442
Commit
f0ef3442
authored
Apr 26, 2023
by
yuguo960516yuguo
Browse files
2.3.2-dtk-22.10.1
parent
ad08b8ce
Pipeline
#227
failed with stages
in 0 seconds
Changes
274
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1873 additions
and
0 deletions
+1873
-0
paddle/fluid/distributed/fleet_executor/task_node.h
paddle/fluid/distributed/fleet_executor/task_node.h
+140
-0
paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt
paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt
+72
-0
paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc
...ed/fleet_executor/test/compute_interceptor_run_op_test.cc
+115
-0
paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
...stributed/fleet_executor/test/compute_interceptor_test.cc
+85
-0
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc
...ributed/fleet_executor/test/interceptor_ping_pong_test.cc
+81
-0
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc
...eet_executor/test/interceptor_ping_pong_with_brpc_test.cc
+140
-0
paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc
...leet_executor/test/interceptor_pipeline_long_path_test.cc
+112
-0
paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc
...eet_executor/test/interceptor_pipeline_short_path_test.cc
+123
-0
paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc
.../distributed/fleet_executor/test/sink_interceptor_test.cc
+91
-0
paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc
...istributed/fleet_executor/test/source_interceptor_test.cc
+85
-0
paddle/fluid/distributed/index_dataset/CMakeLists.txt
paddle/fluid/distributed/index_dataset/CMakeLists.txt
+19
-0
paddle/fluid/distributed/index_dataset/index_dataset.proto
paddle/fluid/distributed/index_dataset/index_dataset.proto
+33
-0
paddle/fluid/distributed/index_dataset/index_sampler.cc
paddle/fluid/distributed/index_dataset/index_sampler.cc
+138
-0
paddle/fluid/distributed/index_dataset/index_sampler.h
paddle/fluid/distributed/index_dataset/index_sampler.h
+139
-0
paddle/fluid/distributed/index_dataset/index_wrapper.cc
paddle/fluid/distributed/index_dataset/index_wrapper.cc
+202
-0
paddle/fluid/distributed/index_dataset/index_wrapper.h
paddle/fluid/distributed/index_dataset/index_wrapper.h
+125
-0
paddle/fluid/distributed/ps/CMakeLists.txt
paddle/fluid/distributed/ps/CMakeLists.txt
+4
-0
paddle/fluid/distributed/ps/README.md
paddle/fluid/distributed/ps/README.md
+39
-0
paddle/fluid/distributed/ps/service/CMakeLists.txt
paddle/fluid/distributed/ps/service/CMakeLists.txt
+122
-0
paddle/fluid/distributed/ps/service/README.md
paddle/fluid/distributed/ps/service/README.md
+8
-0
No files found.
Too many changes to show.
To preserve performance only
274 of 274+
files are displayed.
Plain diff
Email patch
paddle/fluid/distributed/fleet_executor/task_node.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
framework
{
class
OperatorBase
;
class
OpDesc
;
}
// namespace framework
namespace
distributed
{
class
TaskNode
final
{
public:
using
OperatorBase
=
paddle
::
framework
::
OperatorBase
;
TaskNode
(
int64_t
rank
,
int64_t
task_id
,
int64_t
max_run_times
);
TaskNode
(
int32_t
role
,
int64_t
rank
,
int64_t
task_id
,
int64_t
max_run_times
,
int64_t
max_slot_nums
);
TaskNode
(
int32_t
role
,
const
std
::
vector
<
framework
::
OpDesc
*>&
op_descs
,
int64_t
rank
,
int64_t
task_id
,
int64_t
max_run_times
,
int64_t
max_slot_nums
);
TaskNode
(
int32_t
role
,
const
std
::
vector
<
framework
::
OperatorBase
*>&
ops
,
int64_t
rank
,
int64_t
task_id
,
int64_t
max_run_times
,
int64_t
max_slot_nums
);
TaskNode
(
paddle
::
framework
::
ProgramDesc
*
program
,
int64_t
rank
,
int64_t
max_run_times
,
int64_t
max_slot_nums
);
TaskNode
(
paddle
::
framework
::
ProgramDesc
*
program
,
int64_t
rank
);
// TODO(liyurui): This will be the only constructor for task node
TaskNode
(
paddle
::
framework
::
ProgramDesc
*
program
,
int64_t
task_id
,
int64_t
rank
,
int64_t
max_run_times
,
int64_t
max_slot_nums
);
~
TaskNode
()
=
default
;
void
SetProgram
(
paddle
::
framework
::
ProgramDesc
*
program
);
void
Init
(
bool
use_feed_fetch_ops
=
true
);
int64_t
rank
()
const
{
return
rank_
;
}
int64_t
task_id
()
const
{
return
task_id_
;
}
int32_t
role
()
const
{
return
role_
;
}
int64_t
max_run_times
()
const
{
return
max_run_times_
;
}
int64_t
max_slot_nums
()
const
{
return
max_slot_nums_
;
}
int64_t
run_per_steps
()
const
{
return
run_per_steps_
;
}
int64_t
run_at_offset
()
const
{
return
run_at_offset_
;
}
int64_t
reply_up_per_steps
()
const
{
return
reply_up_per_steps_
;
}
int64_t
send_down_per_steps
()
const
{
return
send_down_per_steps_
;
}
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
upstream
()
const
{
return
upstream_
;
}
const
std
::
unordered_map
<
int64_t
,
int64_t
>&
downstream
()
const
{
return
downstream_
;
}
const
std
::
string
&
type
()
const
{
return
type_
;
}
const
paddle
::
framework
::
ProgramDesc
*
program
()
const
{
return
program_
;
}
const
std
::
vector
<
OperatorBase
*>&
ops
()
const
{
return
ops_
;
}
const
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>&
unique_ops
()
const
{
return
ops_vec_
;
}
const
std
::
unordered_map
<
const
OperatorBase
*
,
std
::
vector
<
std
::
string
>>&
unused_vars
()
const
{
return
unused_vars_
;
}
void
SetRunPerSteps
(
int64_t
value
);
void
SetRunAtOffset
(
int64_t
value
);
void
SetReplyUpPerSteps
(
int64_t
value
);
void
SetSendDownPerSteps
(
int64_t
value
);
void
SetType
(
const
std
::
string
&
type
)
{
type_
=
type
;
}
void
SetUnusedVars
(
const
std
::
unordered_map
<
const
OperatorBase
*
,
std
::
vector
<
std
::
string
>>&
unused_vars
)
{
unused_vars_
=
unused_vars
;
}
// upstream need buffs?
bool
AddUpstreamTask
(
int64_t
task_id
,
int64_t
buff_size
=
1
);
bool
AddDownstreamTask
(
int64_t
task_id
,
int64_t
buff_size
=
1
);
std
::
string
DebugString
()
const
;
private:
DISABLE_COPY_AND_ASSIGN
(
TaskNode
);
TaskNode
()
=
default
;
// ops_ will be removed in the future
std
::
vector
<
OperatorBase
*>
ops_
;
// task_id-->buff_size
std
::
unordered_map
<
int64_t
,
int64_t
>
upstream_
;
std
::
unordered_map
<
int64_t
,
int64_t
>
downstream_
;
framework
::
ProgramDesc
*
program_
;
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_vec_
;
std
::
unordered_map
<
const
OperatorBase
*
,
std
::
vector
<
std
::
string
>>
unused_vars_
;
int32_t
role_
;
int64_t
rank_
;
int64_t
task_id_
;
int64_t
max_run_times_
;
int64_t
max_slot_nums_
;
int64_t
run_per_steps_
{
1
};
int64_t
run_at_offset_
{
0
};
// one input produces multi times output
int64_t
reply_up_per_steps_
{
1
};
// one output need multi times input
int64_t
send_down_per_steps_
{
1
};
std
::
string
type_
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt
0 → 100644
View file @
f0ef3442
set_source_files_properties
(
interceptor_ping_pong_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
interceptor_ping_pong_test
SRCS interceptor_ping_pong_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
set_source_files_properties
(
compute_interceptor_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
compute_interceptor_test
SRCS compute_interceptor_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
set_source_files_properties
(
source_interceptor_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
source_interceptor_test
SRCS source_interceptor_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
set_source_files_properties
(
sink_interceptor_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
sink_interceptor_test
SRCS sink_interceptor_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
set_source_files_properties
(
interceptor_pipeline_short_path_test.cc
PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
interceptor_pipeline_short_path_test
SRCS interceptor_pipeline_short_path_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
set_source_files_properties
(
interceptor_pipeline_long_path_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
interceptor_pipeline_long_path_test
SRCS interceptor_pipeline_long_path_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
set_source_files_properties
(
compute_interceptor_run_op_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
compute_interceptor_run_op_test
SRCS compute_interceptor_run_op_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
op_registry
fill_constant_op
elementwise_add_op
scope
device_context
)
if
(
WITH_DISTRIBUTE
AND WITH_PSCORE
AND
NOT
(
WITH_ASCEND OR WITH_ASCEND_CL
))
set_source_files_properties
(
interceptor_ping_pong_with_brpc_test.cc
PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
interceptor_ping_pong_with_brpc_test
SRCS interceptor_ping_pong_with_brpc_test.cc
DEPS fleet_executor
${
BRPC_DEPS
}
)
endif
()
paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF
(
elementwise_add
);
USE_OP_ITSELF
(
fill_constant
);
PD_DECLARE_KERNEL
(
add
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
namespace
paddle
{
namespace
distributed
{
std
::
vector
<
framework
::
OperatorBase
*>
GetOps
()
{
framework
::
AttributeMap
attrs
;
attrs
[
"dtype"
]
=
framework
::
proto
::
VarType
::
FP32
;
attrs
[
"shape"
]
=
phi
::
vectorize
<
int
>
({
2
,
3
});
attrs
[
"value"
]
=
1.0
f
;
auto
zero_op
=
framework
::
OpRegistry
::
CreateOp
(
"fill_constant"
,
{},
{{
"Out"
,
{
"x"
}}},
attrs
);
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
"elementwise_add"
,
{{
"X"
,
{
"x"
}},
{
"Y"
,
{
"x"
}}},
{{
"Out"
,
{
"out"
}}},
framework
::
AttributeMap
());
// NOTE: don't delete
return
{
zero_op
.
release
(),
op
.
release
()};
}
framework
::
Scope
*
GetScope
()
{
framework
::
Scope
*
scope
=
new
framework
::
Scope
();
scope
->
Var
(
"x"
)
->
GetMutable
<
phi
::
DenseTensor
>
();
scope
->
Var
(
"out"
)
->
GetMutable
<
phi
::
DenseTensor
>
();
return
scope
;
}
TEST
(
ComputeInterceptor
,
Compute
)
{
std
::
vector
<
framework
::
OperatorBase
*>
ops
=
GetOps
();
framework
::
Scope
*
scope
=
GetScope
();
std
::
vector
<
framework
::
Scope
*>
scopes
=
{
scope
,
scope
};
platform
::
Place
place
=
platform
::
CPUPlace
();
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
SOURCE_ID
,
0
},
{
0
,
0
},
{
1
,
0
},
{
SINK_ID
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
"127.0.0.0:0"
}},
""
);
// FIXME: don't delete, otherwise interceptor will use undefined node
TaskNode
*
source
=
new
TaskNode
(
0
,
SOURCE_ID
,
2
);
// rank, task_id, max_run_times
TaskNode
*
node_a
=
new
TaskNode
(
0
,
ops
,
0
,
0
,
2
,
0
);
// role, ops, rank, task_id
TaskNode
*
node_b
=
new
TaskNode
(
0
,
0
,
1
,
2
,
0
);
TaskNode
*
sink
=
new
TaskNode
(
0
,
SINK_ID
,
2
);
// source->a->b->sink
source
->
AddDownstreamTask
(
0
);
node_a
->
AddUpstreamTask
(
SOURCE_ID
);
node_a
->
AddDownstreamTask
(
1
);
node_b
->
AddUpstreamTask
(
0
);
sink
->
AddUpstreamTask
(
1
);
node_b
->
AddDownstreamTask
(
SINK_ID
);
carrier
->
SetInterceptor
(
SOURCE_ID
,
InterceptorFactory
::
Create
(
"Source"
,
SOURCE_ID
,
source
));
auto
*
a
=
carrier
->
SetInterceptor
(
0
,
InterceptorFactory
::
Create
(
"Compute"
,
0
,
node_a
));
carrier
->
SetInterceptor
(
1
,
InterceptorFactory
::
Create
(
"Compute"
,
1
,
node_b
));
carrier
->
SetInterceptor
(
SINK_ID
,
InterceptorFactory
::
Create
(
"Sink"
,
SINK_ID
,
sink
));
a
->
SetPlace
(
place
);
a
->
SetMicroBatchScope
(
scopes
);
// start
InterceptorMessage
msg
;
msg
.
set_message_type
(
START
);
msg
.
set_dst_id
(
SOURCE_ID
);
carrier
->
EnqueueInterceptorMessage
(
msg
);
carrier
->
Wait
();
carrier
->
Release
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
namespace
paddle
{
namespace
distributed
{
class
StartInterceptor
:
public
Interceptor
{
public:
StartInterceptor
(
int64_t
interceptor_id
,
TaskNode
*
node
)
:
Interceptor
(
interceptor_id
,
node
)
{
RegisterMsgHandle
([
this
](
const
InterceptorMessage
&
msg
)
{
NOP
(
msg
);
});
}
void
NOP
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
InterceptorMessage
stop
;
stop
.
set_message_type
(
STOP
);
Send
(
1
,
stop
);
// stop 1, compute
return
;
}
std
::
cout
<<
GetInterceptorId
()
<<
" recv msg from "
<<
msg
.
src_id
()
<<
std
::
endl
;
}
};
TEST
(
ComputeInterceptor
,
Compute
)
{
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
0
,
0
},
{
1
,
0
},
{
2
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
"127.0.0.0:0"
}},
""
);
// NOTE: don't delete, otherwise interceptor will use undefined node
TaskNode
*
node_a
=
new
TaskNode
(
0
,
0
,
0
,
3
,
0
);
// role, rank, task_id
TaskNode
*
node_b
=
new
TaskNode
(
0
,
0
,
1
,
3
,
0
);
TaskNode
*
node_c
=
new
TaskNode
(
0
,
0
,
2
,
3
,
0
);
// a->b->c
node_a
->
AddDownstreamTask
(
1
,
3
);
node_b
->
AddUpstreamTask
(
0
,
3
);
node_b
->
AddDownstreamTask
(
2
);
node_c
->
AddUpstreamTask
(
1
);
Interceptor
*
a
=
carrier
->
SetInterceptor
(
0
,
std
::
make_unique
<
StartInterceptor
>
(
0
,
node_a
));
carrier
->
SetInterceptor
(
1
,
InterceptorFactory
::
Create
(
"Compute"
,
1
,
node_b
));
carrier
->
SetInterceptor
(
2
,
InterceptorFactory
::
Create
(
"Compute"
,
2
,
node_c
));
InterceptorMessage
msg
;
msg
.
set_message_type
(
DATA_IS_READY
);
// test run three times
a
->
Send
(
1
,
msg
);
a
->
Send
(
1
,
msg
);
a
->
Send
(
1
,
msg
);
carrier
->
Wait
();
carrier
->
Release
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
namespace
paddle
{
namespace
distributed
{
class
PingPongInterceptor
:
public
Interceptor
{
public:
PingPongInterceptor
(
int64_t
interceptor_id
,
TaskNode
*
node
)
:
Interceptor
(
interceptor_id
,
node
)
{
RegisterMsgHandle
([
this
](
const
InterceptorMessage
&
msg
)
{
PingPong
(
msg
);
});
}
void
PingPong
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
return
;
}
std
::
cout
<<
GetInterceptorId
()
<<
" recv msg, count="
<<
count_
<<
std
::
endl
;
++
count_
;
if
(
count_
==
20
)
{
InterceptorMessage
stop
;
stop
.
set_message_type
(
STOP
);
Send
(
0
,
stop
);
Send
(
1
,
stop
);
StopCarrier
();
return
;
}
InterceptorMessage
resp
;
Send
(
msg
.
src_id
(),
resp
);
}
private:
int
count_
{
0
};
};
REGISTER_INTERCEPTOR
(
PingPong
,
PingPongInterceptor
);
TEST
(
InterceptorTest
,
PingPong
)
{
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
0
,
0
},
{
1
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
"127.0.0.0:0"
}},
""
);
Interceptor
*
a
=
carrier
->
SetInterceptor
(
0
,
InterceptorFactory
::
Create
(
"PingPong"
,
0
,
nullptr
));
carrier
->
SetInterceptor
(
1
,
std
::
make_unique
<
PingPongInterceptor
>
(
1
,
nullptr
));
InterceptorMessage
msg
;
a
->
Send
(
1
,
msg
);
carrier
->
Wait
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/socket.h>
#include <time.h>
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
namespace
paddle
{
namespace
distributed
{
class
PingPongInterceptor
:
public
Interceptor
{
public:
PingPongInterceptor
(
int64_t
interceptor_id
,
TaskNode
*
node
)
:
Interceptor
(
interceptor_id
,
node
)
{
RegisterMsgHandle
([
this
](
const
InterceptorMessage
&
msg
)
{
PingPong
(
msg
);
});
}
void
PingPong
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
STOP
)
{
stop_
=
true
;
StopCarrier
();
return
;
}
std
::
cout
<<
GetInterceptorId
()
<<
" recv msg, count="
<<
count_
<<
std
::
endl
;
++
count_
;
if
(
count_
==
20
&&
GetInterceptorId
()
==
0
)
{
InterceptorMessage
stop
;
stop
.
set_message_type
(
STOP
);
Send
(
0
,
stop
);
Send
(
1
,
stop
);
return
;
}
InterceptorMessage
resp
;
int64_t
dst
=
GetInterceptorId
()
==
0
?
1
:
0
;
Send
(
dst
,
resp
);
}
private:
int
count_
{
0
};
};
REGISTER_INTERCEPTOR
(
PingPong
,
PingPongInterceptor
);
TEST
(
InterceptorTest
,
PingPong
)
{
std
::
cout
<<
"Ping pong test through brpc"
<<
std
::
endl
;
unsigned
int
seed
=
time
(
0
);
// random generated two ports in from 6000 to 9000
int
port0
=
6000
+
rand_r
(
&
seed
)
%
3000
;
int
port1
=
port0
+
1
;
// using socket to check the availability of the port
int
server_fd
=
-
1
;
server_fd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
int
opt
=
1
;
linger
ling
;
ling
.
l_onoff
=
1
;
ling
.
l_linger
=
0
;
setsockopt
(
server_fd
,
SOL_SOCKET
,
SO_LINGER
,
&
ling
,
sizeof
(
ling
));
setsockopt
(
server_fd
,
SOL_SOCKET
,
SO_REUSEADDR
,
&
opt
,
sizeof
(
opt
));
struct
sockaddr_in
address
;
address
.
sin_family
=
AF_INET
;
address
.
sin_addr
.
s_addr
=
INADDR_ANY
;
address
.
sin_port
=
htons
(
port0
);
while
(
bind
(
server_fd
,
(
struct
sockaddr
*
)
&
address
,
sizeof
(
address
))
==
-
1
)
{
port0
++
;
address
.
sin_port
=
htons
(
port0
);
}
close
(
server_fd
);
// use another socket to check another port
server_fd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
setsockopt
(
server_fd
,
SOL_SOCKET
,
SO_LINGER
,
&
ling
,
sizeof
(
ling
));
setsockopt
(
server_fd
,
SOL_SOCKET
,
SO_REUSEADDR
,
&
opt
,
sizeof
(
opt
));
port1
=
port0
+
1
;
address
.
sin_port
=
htons
(
port1
);
while
(
bind
(
server_fd
,
(
struct
sockaddr
*
)
&
address
,
sizeof
(
address
))
==
-
1
)
{
port1
++
;
address
.
sin_port
=
htons
(
port1
);
}
close
(
server_fd
);
std
::
string
ip0
=
"127.0.0.1:"
+
std
::
to_string
(
port0
);
std
::
string
ip1
=
"127.0.0.1:"
+
std
::
to_string
(
port1
);
std
::
cout
<<
"ip0: "
<<
ip0
<<
std
::
endl
;
std
::
cout
<<
"ip1: "
<<
ip1
<<
std
::
endl
;
std
::
unordered_map
<
int64_t
,
int64_t
>
interceptor_id_to_rank
=
{{
0
,
0
},
{
1
,
1
}};
std
::
string
carrier_id
=
"0"
;
int
pid
=
fork
();
if
(
pid
==
0
)
{
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
GlobalVal
<
std
::
string
>::
Set
(
new
std
::
string
(
carrier_id
));
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
ip0
},
{
1
,
ip1
}},
ip0
);
carrier
->
Init
(
0
,
interceptor_id_to_rank
);
Interceptor
*
a
=
carrier
->
SetInterceptor
(
0
,
InterceptorFactory
::
Create
(
"PingPong"
,
0
,
nullptr
));
msg_bus
->
Barrier
();
InterceptorMessage
msg
;
a
->
Send
(
1
,
msg
);
carrier
->
Wait
();
}
else
{
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
GlobalVal
<
std
::
string
>::
Set
(
new
std
::
string
(
carrier_id
));
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
1
,
{{
0
,
ip0
},
{
1
,
ip1
}},
ip1
);
carrier
->
Init
(
1
,
interceptor_id_to_rank
);
carrier
->
SetInterceptor
(
1
,
InterceptorFactory
::
Create
(
"PingPong"
,
1
,
nullptr
));
msg_bus
->
Barrier
();
carrier
->
Wait
();
}
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
namespace
paddle
{
namespace
distributed
{
void
LinkNodes
(
const
std
::
vector
<
TaskNode
*>&
nodes
)
{
size_t
size
=
nodes
.
size
();
if
(
size
<=
1
)
return
;
{
// i = 0
TaskNode
*
now
=
nodes
[
0
];
TaskNode
*
next
=
nodes
[
1
];
now
->
AddDownstreamTask
(
next
->
task_id
());
}
{
// i = size - 1
TaskNode
*
prev
=
nodes
[
size
-
2
];
TaskNode
*
now
=
nodes
[
size
-
1
];
now
->
AddUpstreamTask
(
prev
->
task_id
());
}
for
(
size_t
i
=
1
;
i
<
size
-
1
;
++
i
)
{
TaskNode
*
prev
=
nodes
[
i
-
1
];
TaskNode
*
now
=
nodes
[
i
];
TaskNode
*
next
=
nodes
[
i
+
1
];
now
->
AddUpstreamTask
(
prev
->
task_id
());
now
->
AddDownstreamTask
(
next
->
task_id
());
}
}
TEST
(
AmplifierInterceptor
,
Amplifier
)
{
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
SOURCE_ID
,
0
},
{
0
,
0
},
{
1
,
0
},
{
2
,
0
},
{
3
,
0
},
{
4
,
0
},
{
5
,
0
},
{
SINK_ID
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
"127.0.0.0:0"
}},
"127.0.0.0:0"
);
int64_t
micro_steps
=
3
;
// NOTE: don't delete, otherwise interceptor will use undefined node
TaskNode
*
source
=
new
TaskNode
(
0
,
SOURCE_ID
,
micro_steps
);
// rank, task_id, max_run_times
TaskNode
*
node_a
=
new
TaskNode
(
0
,
0
,
0
,
1
,
0
);
// role, rank, task_id
TaskNode
*
node_b
=
new
TaskNode
(
0
,
0
,
1
,
1
,
0
);
TaskNode
*
node_c
=
new
TaskNode
(
0
,
0
,
2
,
1
,
0
);
TaskNode
*
node_d
=
new
TaskNode
(
0
,
0
,
3
,
1
,
0
);
TaskNode
*
node_e
=
new
TaskNode
(
0
,
0
,
4
,
1
,
0
);
TaskNode
*
node_f
=
new
TaskNode
(
0
,
0
,
5
,
1
,
0
);
TaskNode
*
sink
=
new
TaskNode
(
0
,
SINK_ID
,
micro_steps
);
// source->a->b->c->d->e->f->sink
LinkNodes
({
source
,
node_a
,
node_b
,
node_c
,
node_d
,
node_e
,
node_f
,
sink
});
// LR->b(1:3)->F->B->e(3:1)->U
node_b
->
SetReplyUpPerSteps
(
micro_steps
);
node_e
->
SetSendDownPerSteps
(
micro_steps
);
carrier
->
SetInterceptor
(
SOURCE_ID
,
InterceptorFactory
::
Create
(
"Source"
,
SOURCE_ID
,
source
));
carrier
->
SetInterceptor
(
0
,
InterceptorFactory
::
Create
(
"Compute"
,
0
,
node_a
));
carrier
->
SetInterceptor
(
1
,
InterceptorFactory
::
Create
(
"Amplifier"
,
1
,
node_b
));
carrier
->
SetInterceptor
(
2
,
InterceptorFactory
::
Create
(
"Compute"
,
2
,
node_c
));
carrier
->
SetInterceptor
(
3
,
InterceptorFactory
::
Create
(
"Compute"
,
3
,
node_d
));
carrier
->
SetInterceptor
(
4
,
InterceptorFactory
::
Create
(
"Amplifier"
,
4
,
node_e
));
carrier
->
SetInterceptor
(
5
,
InterceptorFactory
::
Create
(
"Compute"
,
5
,
node_f
));
carrier
->
SetInterceptor
(
SINK_ID
,
InterceptorFactory
::
Create
(
"Sink"
,
SINK_ID
,
sink
));
// start
InterceptorMessage
msg
;
msg
.
set_message_type
(
START
);
msg
.
set_dst_id
(
SOURCE_ID
);
carrier
->
EnqueueInterceptorMessage
(
msg
);
carrier
->
Wait
();
carrier
->
Release
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
namespace
paddle
{
namespace
distributed
{
int64_t
GetBuffSize
(
const
std
::
map
<
std
::
pair
<
TaskNode
*
,
TaskNode
*>
,
int64_t
>
buffs
,
TaskNode
*
from
,
TaskNode
*
to
)
{
if
(
buffs
.
find
({
from
,
to
})
!=
buffs
.
end
())
{
return
buffs
.
at
({
from
,
to
});
}
if
(
buffs
.
find
({
to
,
from
})
!=
buffs
.
end
())
{
return
buffs
.
at
({
to
,
from
});
}
return
2
;
// set default 2
}
void
LinkNodes
(
const
std
::
vector
<
TaskNode
*>&
nodes
,
const
std
::
map
<
std
::
pair
<
TaskNode
*
,
TaskNode
*>
,
int64_t
>
buffs
)
{
size_t
size
=
nodes
.
size
();
if
(
size
<=
1
)
return
;
{
// i = 0
TaskNode
*
now
=
nodes
[
0
];
TaskNode
*
next
=
nodes
[
1
];
auto
buff_size
=
GetBuffSize
(
buffs
,
now
,
next
);
now
->
AddDownstreamTask
(
next
->
task_id
(),
buff_size
);
}
{
// i = size - 1
TaskNode
*
prev
=
nodes
[
size
-
2
];
TaskNode
*
now
=
nodes
[
size
-
1
];
auto
buff_size
=
GetBuffSize
(
buffs
,
prev
,
now
);
now
->
AddUpstreamTask
(
prev
->
task_id
(),
buff_size
);
}
for
(
size_t
i
=
1
;
i
<
size
-
1
;
++
i
)
{
TaskNode
*
prev
=
nodes
[
i
-
1
];
TaskNode
*
now
=
nodes
[
i
];
TaskNode
*
next
=
nodes
[
i
+
1
];
auto
buff_size
=
GetBuffSize
(
buffs
,
prev
,
now
);
now
->
AddUpstreamTask
(
prev
->
task_id
(),
buff_size
);
buff_size
=
GetBuffSize
(
buffs
,
now
,
next
);
now
->
AddDownstreamTask
(
next
->
task_id
(),
buff_size
);
}
}
TEST
(
AmplifierInterceptor
,
Amplifier
)
{
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
SOURCE_ID
,
0
},
{
0
,
0
},
{
1
,
0
},
{
2
,
0
},
{
3
,
0
},
{
SINK_ID
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
""
}},
""
);
int64_t
micro_steps
=
6
;
// NOTE: don't delete, otherwise interceptor will use undefined node
TaskNode
*
source
=
new
TaskNode
(
0
,
SOURCE_ID
,
micro_steps
);
// rank, task_id, max_run_times
TaskNode
*
node_a
=
new
TaskNode
(
0
,
0
,
0
,
micro_steps
,
0
);
// role, rank, task_id
TaskNode
*
node_b
=
new
TaskNode
(
0
,
0
,
1
,
3
,
0
);
TaskNode
*
node_c
=
new
TaskNode
(
0
,
0
,
2
,
3
,
0
);
TaskNode
*
node_d
=
new
TaskNode
(
0
,
0
,
3
,
micro_steps
,
0
);
TaskNode
*
sink
=
new
TaskNode
(
0
,
SINK_ID
,
micro_steps
);
// source->a->b->c->d->sink
// LR->F->B->U
LinkNodes
({
source
,
node_a
,
node_b
,
node_c
,
node_d
,
sink
},
{{{
node_b
,
node_c
},
1
}});
node_a
->
SetRunPerSteps
(
micro_steps
);
node_d
->
SetRunPerSteps
(
micro_steps
);
node_d
->
SetRunAtOffset
(
micro_steps
-
1
);
carrier
->
SetInterceptor
(
SOURCE_ID
,
InterceptorFactory
::
Create
(
"Source"
,
SOURCE_ID
,
source
));
carrier
->
SetInterceptor
(
0
,
InterceptorFactory
::
Create
(
"Amplifier"
,
0
,
node_a
));
carrier
->
SetInterceptor
(
1
,
InterceptorFactory
::
Create
(
"Compute"
,
1
,
node_b
));
carrier
->
SetInterceptor
(
2
,
InterceptorFactory
::
Create
(
"Compute"
,
2
,
node_c
));
carrier
->
SetInterceptor
(
3
,
InterceptorFactory
::
Create
(
"Amplifier"
,
3
,
node_d
));
carrier
->
SetInterceptor
(
SINK_ID
,
InterceptorFactory
::
Create
(
"Sink"
,
SINK_ID
,
sink
));
// start
InterceptorMessage
msg
;
msg
.
set_message_type
(
START
);
msg
.
set_dst_id
(
SOURCE_ID
);
carrier
->
EnqueueInterceptorMessage
(
msg
);
carrier
->
Wait
();
carrier
->
Release
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc
0 → 100644
View file @
f0ef3442
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
namespace
paddle
{
namespace
distributed
{
class
FakeInterceptor
:
public
Interceptor
{
public:
FakeInterceptor
(
int64_t
interceptor_id
,
TaskNode
*
node
)
:
Interceptor
(
interceptor_id
,
node
)
{
RegisterMsgHandle
([
this
](
const
InterceptorMessage
&
msg
)
{
NOP
(
msg
);
});
}
void
NOP
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
DATA_IS_READY
)
{
std
::
cout
<<
"FakeInterceptor run in scope "
<<
msg
.
scope_idx
()
<<
std
::
endl
;
InterceptorMessage
reply
;
reply
.
set_message_type
(
DATA_IS_USELESS
);
Send
(
SOURCE_ID
,
reply
);
InterceptorMessage
ready
;
ready
.
set_message_type
(
DATA_IS_READY
);
Send
(
SINK_ID
,
ready
);
}
else
if
(
msg
.
message_type
()
==
DATA_IS_USELESS
)
{
std
::
cout
<<
"FakeInterceptor remove result in scope "
<<
msg
.
scope_idx
()
<<
std
::
endl
;
}
}
private:
int64_t
step_
;
};
TEST
(
SourceInterceptor
,
Source
)
{
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
SOURCE_ID
,
0
},
{
0
,
0
},
{
SINK_ID
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
"127.0.0.0:0"
}},
""
);
// NOTE: don't delete, otherwise interceptor will use undefined node
TaskNode
*
source
=
new
TaskNode
(
0
,
SOURCE_ID
,
0
,
3
,
0
);
// role, rank, task_id
TaskNode
*
node_a
=
new
TaskNode
(
0
,
0
,
0
,
3
,
0
);
// role, rank, task_id
TaskNode
*
sink
=
new
TaskNode
(
0
,
SINK_ID
,
0
,
3
,
0
);
// role, rank, task_id
source
->
AddDownstreamTask
(
0
,
1
);
node_a
->
AddUpstreamTask
(
SOURCE_ID
,
1
);
node_a
->
AddDownstreamTask
(
SINK_ID
,
1
);
sink
->
AddUpstreamTask
(
0
,
1
);
carrier
->
SetInterceptor
(
SOURCE_ID
,
InterceptorFactory
::
Create
(
"Source"
,
SOURCE_ID
,
source
));
carrier
->
SetInterceptor
(
0
,
std
::
make_unique
<
FakeInterceptor
>
(
0
,
node_a
));
carrier
->
SetInterceptor
(
SINK_ID
,
InterceptorFactory
::
Create
(
"Sink"
,
SINK_ID
,
sink
));
// start
InterceptorMessage
msg
;
msg
.
set_message_type
(
START
);
msg
.
set_dst_id
(
SOURCE_ID
);
carrier
->
EnqueueInterceptorMessage
(
msg
);
carrier
->
Wait
();
carrier
->
Release
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc
0 → 100644
View file @
f0ef3442
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
namespace
paddle
{
namespace
distributed
{
class
FakeInterceptor
:
public
Interceptor
{
public:
FakeInterceptor
(
int64_t
interceptor_id
,
TaskNode
*
node
)
:
Interceptor
(
interceptor_id
,
node
)
{
step_
=
0
;
RegisterMsgHandle
([
this
](
const
InterceptorMessage
&
msg
)
{
NOP
(
msg
);
});
}
void
NOP
(
const
InterceptorMessage
&
msg
)
{
if
(
msg
.
message_type
()
==
DATA_IS_READY
)
{
std
::
cout
<<
"FakeInterceptor run in scope "
<<
msg
.
scope_idx
()
<<
std
::
endl
;
InterceptorMessage
reply
;
reply
.
set_message_type
(
DATA_IS_USELESS
);
Send
(
SOURCE_ID
,
reply
);
step_
++
;
if
(
step_
==
node_
->
max_run_times
())
{
carrier_
->
WakeUp
();
}
}
}
private:
int64_t
step_
;
};
TEST
(
SourceInterceptor
,
Source
)
{
std
::
string
carrier_id
=
"0"
;
Carrier
*
carrier
=
GlobalMap
<
std
::
string
,
Carrier
>::
Create
(
carrier_id
,
carrier_id
);
carrier
->
Init
(
0
,
{{
SOURCE_ID
,
0
},
{
0
,
0
}});
MessageBus
*
msg_bus
=
GlobalVal
<
MessageBus
>::
Create
();
msg_bus
->
Init
(
0
,
{{
0
,
"127.0.0.0:0"
}},
""
);
// NOTE: don't delete, otherwise interceptor will use undefined node
TaskNode
*
source
=
new
TaskNode
(
0
,
SOURCE_ID
,
0
,
3
,
0
);
// role, rank, task_id
TaskNode
*
node_a
=
new
TaskNode
(
0
,
0
,
0
,
3
,
0
);
// role, rank, task_id
source
->
AddDownstreamTask
(
0
,
1
);
node_a
->
AddUpstreamTask
(
SOURCE_ID
,
1
);
carrier
->
SetInterceptor
(
SOURCE_ID
,
InterceptorFactory
::
Create
(
"Source"
,
SOURCE_ID
,
source
));
carrier
->
SetInterceptor
(
0
,
std
::
make_unique
<
FakeInterceptor
>
(
0
,
node_a
));
// start
InterceptorMessage
msg
;
msg
.
set_message_type
(
START
);
msg
.
set_dst_id
(
SOURCE_ID
);
carrier
->
EnqueueInterceptorMessage
(
msg
);
carrier
->
Wait
();
carrier
->
Release
();
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/index_dataset/CMakeLists.txt
0 → 100644
View file @
f0ef3442
proto_library
(
index_dataset_proto SRCS index_dataset.proto
)
cc_library
(
index_wrapper
SRCS index_wrapper.cc
DEPS index_dataset_proto fs
)
if
(
WITH_MKLDNN
)
cc_library
(
index_sampler
SRCS index_sampler.cc
DEPS xxhash index_wrapper eigen3 mkldnn
)
else
()
cc_library
(
index_sampler
SRCS index_sampler.cc
DEPS xxhash index_wrapper eigen3
)
endif
()
if
(
WITH_PYTHON
)
py_proto_compile
(
index_dataset_py_proto SRCS index_dataset.proto
)
endif
()
paddle/fluid/distributed/index_dataset/index_dataset.proto
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax
=
"proto2"
;
package
paddle
.
distributed
;
message
IndexNode
{
required
uint64
id
=
1
;
required
bool
is_leaf
=
2
;
required
float
probability
=
3
;
optional
string
item_name
=
4
;
}
message
TreeMeta
{
required
int32
height
=
1
;
required
int32
branch
=
2
;
}
message
KVItem
{
required
bytes
key
=
1
;
required
bytes
value
=
2
;
}
paddle/fluid/distributed/index_dataset/index_sampler.cc
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/index_dataset/index_sampler.h"
#include "paddle/fluid/framework/data_feed.h"
namespace
paddle
{
namespace
distributed
{
std
::
vector
<
std
::
vector
<
uint64_t
>>
LayerWiseSampler
::
sample
(
const
std
::
vector
<
std
::
vector
<
uint64_t
>>&
user_inputs
,
const
std
::
vector
<
uint64_t
>&
target_ids
,
bool
with_hierarchy
)
{
auto
input_num
=
target_ids
.
size
();
auto
user_feature_num
=
user_inputs
[
0
].
size
();
std
::
vector
<
std
::
vector
<
uint64_t
>>
outputs
(
input_num
*
layer_counts_sum_
,
std
::
vector
<
uint64_t
>
(
user_feature_num
+
2
));
auto
max_layer
=
tree_
->
Height
();
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
input_num
;
i
++
)
{
auto
travel_codes
=
tree_
->
GetTravelCodes
(
target_ids
[
i
],
start_sample_layer_
);
auto
travel_path
=
tree_
->
GetNodes
(
travel_codes
);
for
(
size_t
j
=
0
;
j
<
travel_path
.
size
();
j
++
)
{
// user
if
(
j
>
0
&&
with_hierarchy
)
{
auto
ancestor_codes
=
tree_
->
GetAncestorCodes
(
user_inputs
[
i
],
max_layer
-
j
-
1
);
auto
hierarchical_user
=
tree_
->
GetNodes
(
ancestor_codes
);
for
(
int
idx_offset
=
0
;
idx_offset
<=
layer_counts_
[
j
];
idx_offset
++
)
{
for
(
size_t
k
=
0
;
k
<
user_feature_num
;
k
++
)
{
outputs
[
idx
+
idx_offset
][
k
]
=
hierarchical_user
[
k
].
id
();
}
}
}
else
{
for
(
int
idx_offset
=
0
;
idx_offset
<=
layer_counts_
[
j
];
idx_offset
++
)
{
for
(
size_t
k
=
0
;
k
<
user_feature_num
;
k
++
)
{
outputs
[
idx
+
idx_offset
][
k
]
=
user_inputs
[
i
][
k
];
}
}
}
// sampler ++
outputs
[
idx
][
user_feature_num
]
=
travel_path
[
j
].
id
();
outputs
[
idx
][
user_feature_num
+
1
]
=
1.0
;
idx
+=
1
;
for
(
int
idx_offset
=
0
;
idx_offset
<
layer_counts_
[
j
];
idx_offset
++
)
{
int
sample_res
=
0
;
do
{
sample_res
=
sampler_vec_
[
j
]
->
Sample
();
}
while
(
layer_ids_
[
j
][
sample_res
].
id
()
==
travel_path
[
j
].
id
());
outputs
[
idx
+
idx_offset
][
user_feature_num
]
=
layer_ids_
[
j
][
sample_res
].
id
();
outputs
[
idx
+
idx_offset
][
user_feature_num
+
1
]
=
0
;
}
idx
+=
layer_counts_
[
j
];
}
}
return
outputs
;
}
void
LayerWiseSampler
::
sample_from_dataset
(
const
uint16_t
sample_slot
,
std
::
vector
<
paddle
::
framework
::
Record
>*
src_datas
,
std
::
vector
<
paddle
::
framework
::
Record
>*
sample_results
)
{
sample_results
->
clear
();
for
(
auto
&
data
:
*
src_datas
)
{
VLOG
(
1
)
<<
"src data size = "
<<
src_datas
->
size
();
VLOG
(
1
)
<<
"float data size = "
<<
data
.
float_feasigns_
.
size
();
// data.Print();
uint64_t
start_idx
=
sample_results
->
size
();
VLOG
(
1
)
<<
"before sample, sample_results.size = "
<<
start_idx
;
uint64_t
sample_feasign_idx
=
-
1
;
bool
sample_sign
=
false
;
for
(
unsigned
int
i
=
0
;
i
<
data
.
uint64_feasigns_
.
size
();
i
++
)
{
VLOG
(
1
)
<<
"slot"
<<
i
<<
" = "
<<
data
.
uint64_feasigns_
[
i
].
slot
();
if
(
data
.
uint64_feasigns_
[
i
].
slot
()
==
sample_slot
)
{
sample_sign
=
true
;
sample_feasign_idx
=
i
;
}
if
(
sample_sign
)
break
;
}
VLOG
(
1
)
<<
"sample_feasign_idx: "
<<
sample_feasign_idx
;
if
(
sample_sign
)
{
auto
target_id
=
data
.
uint64_feasigns_
[
sample_feasign_idx
].
sign
().
uint64_feasign_
;
auto
travel_codes
=
tree_
->
GetTravelCodes
(
target_id
,
start_sample_layer_
);
auto
travel_path
=
tree_
->
GetNodes
(
travel_codes
);
for
(
unsigned
int
j
=
0
;
j
<
travel_path
.
size
();
j
++
)
{
paddle
::
framework
::
Record
instance
(
data
);
instance
.
uint64_feasigns_
[
sample_feasign_idx
].
sign
().
uint64_feasign_
=
travel_path
[
j
].
id
();
sample_results
->
push_back
(
instance
);
for
(
int
idx_offset
=
0
;
idx_offset
<
layer_counts_
[
j
];
idx_offset
++
)
{
int
sample_res
=
0
;
do
{
sample_res
=
sampler_vec_
[
j
]
->
Sample
();
}
while
(
layer_ids_
[
j
][
sample_res
].
id
()
==
travel_path
[
j
].
id
());
paddle
::
framework
::
Record
instance
(
data
);
instance
.
uint64_feasigns_
[
sample_feasign_idx
].
sign
().
uint64_feasign_
=
layer_ids_
[
j
][
sample_res
].
id
();
VLOG
(
1
)
<<
"layer id :"
<<
layer_ids_
[
j
][
sample_res
].
id
();
// sample_feasign_idx + 1 == label's id
instance
.
uint64_feasigns_
[
sample_feasign_idx
+
1
]
.
sign
()
.
uint64_feasign_
=
0
;
sample_results
->
push_back
(
instance
);
}
VLOG
(
1
)
<<
"layer end!!!!!!!!!!!!!!!!!!"
;
}
}
}
VLOG
(
1
)
<<
"after sample, sample_results.size = "
<<
sample_results
->
size
();
return
;
}
std
::
vector
<
uint64_t
>
float2int
(
std
::
vector
<
double
>
tmp
)
{
std
::
vector
<
uint64_t
>
tmp_int
;
for
(
auto
i
:
tmp
)
tmp_int
.
push_back
(
uint64_t
(
i
));
return
tmp_int
;
}
}
// end namespace distributed
}
// end namespace paddle
paddle/fluid/distributed/index_dataset/index_sampler.h
0 → 100644
View file @
f0ef3442
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/distributed/index_dataset/index_wrapper.h"
#include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
distributed
{
class
IndexSampler
{
public:
virtual
~
IndexSampler
()
{}
IndexSampler
()
{}
template
<
typename
T
>
static
std
::
shared_ptr
<
IndexSampler
>
Init
(
const
std
::
string
&
name
)
{
std
::
shared_ptr
<
IndexSampler
>
instance
=
nullptr
;
instance
.
reset
(
new
T
(
name
));
return
instance
;
}
virtual
void
init_layerwise_conf
(
const
std
::
vector
<
uint16_t
>&
layer_sample_counts
,
uint16_t
start_sample_layer
=
1
,
uint16_t
seed
=
0
)
{}
virtual
void
init_beamsearch_conf
(
const
int64_t
k
)
{}
virtual
std
::
vector
<
std
::
vector
<
uint64_t
>>
sample
(
const
std
::
vector
<
std
::
vector
<
uint64_t
>>&
user_inputs
,
const
std
::
vector
<
uint64_t
>&
input_targets
,
bool
with_hierarchy
=
false
)
=
0
;
virtual
void
sample_from_dataset
(
const
uint16_t
sample_slot
,
std
::
vector
<
paddle
::
framework
::
Record
>*
src_datas
,
std
::
vector
<
paddle
::
framework
::
Record
>*
sample_results
)
=
0
;
};
class
LayerWiseSampler
:
public
IndexSampler
{
public:
virtual
~
LayerWiseSampler
()
{}
explicit
LayerWiseSampler
(
const
std
::
string
&
name
)
{
tree_
=
IndexWrapper
::
GetInstance
()
->
get_tree_index
(
name
);
}
void
init_layerwise_conf
(
const
std
::
vector
<
uint16_t
>&
layer_sample_counts
,
uint16_t
start_sample_layer
,
uint16_t
seed
)
override
{
seed_
=
seed
;
start_sample_layer_
=
start_sample_layer
;
PADDLE_ENFORCE_GT
(
start_sample_layer_
,
0
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"start sampler layer = [%d], it should greater than 0."
,
start_sample_layer_
));
PADDLE_ENFORCE_LT
(
start_sample_layer_
,
tree_
->
Height
(),
paddle
::
platform
::
errors
::
InvalidArgument
(
"start sampler layer = [%d], it should less than "
"max_layer, which is [%d]."
,
start_sample_layer_
,
tree_
->
Height
()));
size_t
i
=
0
;
layer_counts_sum_
=
0
;
layer_counts_
.
clear
();
int
cur_layer
=
start_sample_layer_
;
while
(
cur_layer
<
tree_
->
Height
())
{
int
layer_sample_num
=
1
;
if
(
i
<
layer_sample_counts
.
size
())
{
layer_sample_num
=
layer_sample_counts
[
i
];
}
layer_counts_sum_
+=
layer_sample_num
+
1
;
layer_counts_
.
push_back
(
layer_sample_num
);
VLOG
(
3
)
<<
"[INFO] level "
<<
cur_layer
<<
" sample_layer_counts.push_back: "
<<
layer_sample_num
;
cur_layer
+=
1
;
i
+=
1
;
}
reverse
(
layer_counts_
.
begin
(),
layer_counts_
.
end
());
VLOG
(
3
)
<<
"sample counts sum: "
<<
layer_counts_sum_
;
auto
max_layer
=
tree_
->
Height
();
sampler_vec_
.
clear
();
layer_ids_
.
clear
();
auto
layer_index
=
max_layer
-
1
;
size_t
idx
=
0
;
while
(
layer_index
>=
start_sample_layer_
)
{
auto
layer_codes
=
tree_
->
GetLayerCodes
(
layer_index
);
layer_ids_
.
push_back
(
tree_
->
GetNodes
(
layer_codes
));
auto
sampler_temp
=
std
::
make_shared
<
paddle
::
operators
::
math
::
UniformSampler
>
(
layer_ids_
[
idx
].
size
()
-
1
,
seed_
);
sampler_vec_
.
push_back
(
sampler_temp
);
layer_index
--
;
idx
++
;
}
}
std
::
vector
<
std
::
vector
<
uint64_t
>>
sample
(
const
std
::
vector
<
std
::
vector
<
uint64_t
>>&
user_inputs
,
const
std
::
vector
<
uint64_t
>&
target_ids
,
bool
with_hierarchy
)
override
;
void
sample_from_dataset
(
const
uint16_t
sample_slot
,
std
::
vector
<
paddle
::
framework
::
Record
>*
src_datas
,
std
::
vector
<
paddle
::
framework
::
Record
>*
sample_results
)
override
;
private:
std
::
vector
<
int
>
layer_counts_
;
int64_t
layer_counts_sum_
{
0
};
std
::
shared_ptr
<
TreeIndex
>
tree_
{
nullptr
};
int
seed_
{
0
};
int
start_sample_layer_
{
1
};
std
::
vector
<
std
::
shared_ptr
<
paddle
::
operators
::
math
::
Sampler
>>
sampler_vec_
;
std
::
vector
<
std
::
vector
<
IndexNode
>>
layer_ids_
;
};
}
// end namespace distributed
}
// end namespace paddle
paddle/fluid/distributed/index_dataset/index_wrapper.cc
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/distributed/index_dataset/index_wrapper.h"
#include <memory>
#include <string>
#include <thread>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/io/fs.h"
namespace
paddle
{
namespace
distributed
{
std
::
shared_ptr
<
IndexWrapper
>
IndexWrapper
::
s_instance_
(
nullptr
);
int
TreeIndex
::
Load
(
const
std
::
string
filename
)
{
int
err_no
;
auto
fp
=
paddle
::
framework
::
fs_open_read
(
filename
,
&
err_no
,
""
);
PADDLE_ENFORCE_NE
(
fp
,
nullptr
,
platform
::
errors
::
InvalidArgument
(
"Open file %s failed. Please check whether the file exists."
,
filename
));
int
num
=
0
;
max_id_
=
0
;
fake_node_
.
set_id
(
0
);
fake_node_
.
set_is_leaf
(
false
);
fake_node_
.
set_probability
(
0.0
);
max_code_
=
0
;
size_t
ret
=
fread
(
&
num
,
sizeof
(
num
),
1
,
fp
.
get
());
while
(
ret
==
1
&&
num
>
0
)
{
std
::
string
content
(
num
,
'\0'
);
size_t
read_num
=
fread
(
const_cast
<
char
*>
(
content
.
data
()),
1
,
num
,
fp
.
get
());
PADDLE_ENFORCE_EQ
(
read_num
,
static_cast
<
size_t
>
(
num
),
platform
::
errors
::
InvalidArgument
(
"Read from file: %s failed. Valid Format is "
"an integer representing the length of the following string, "
"and the string itself.We got an iteger[% d], "
"but the following string's length is [%d]."
,
filename
,
num
,
read_num
));
KVItem
item
;
PADDLE_ENFORCE_EQ
(
item
.
ParseFromString
(
content
),
true
,
platform
::
errors
::
InvalidArgument
(
"Parse from file: %s failed. It's "
"content can't be parsed by KVItem."
,
filename
));
if
(
item
.
key
()
==
".tree_meta"
)
{
meta_
.
ParseFromString
(
item
.
value
());
}
else
{
auto
code
=
std
::
stoull
(
item
.
key
());
IndexNode
node
;
node
.
ParseFromString
(
item
.
value
());
// PADDLE_ENFORCE_NE(node.id(), 0,
// platform::errors::InvalidArgument(
// "Node'id should not be equel to zero."));
if
(
node
.
is_leaf
())
{
id_codes_map_
[
node
.
id
()]
=
code
;
}
data_
[
code
]
=
node
;
if
(
node
.
id
()
>
max_id_
)
{
max_id_
=
node
.
id
();
}
if
(
code
>
max_code_
)
{
max_code_
=
code
;
}
}
ret
=
fread
(
&
num
,
sizeof
(
num
),
1
,
fp
.
get
());
}
total_nodes_num_
=
data_
.
size
();
max_code_
+=
1
;
return
0
;
}
std
::
vector
<
IndexNode
>
TreeIndex
::
GetNodes
(
const
std
::
vector
<
uint64_t
>&
codes
)
{
std
::
vector
<
IndexNode
>
nodes
;
nodes
.
reserve
(
codes
.
size
());
for
(
size_t
i
=
0
;
i
<
codes
.
size
();
i
++
)
{
if
(
CheckIsValid
(
codes
[
i
]))
{
nodes
.
push_back
(
data_
.
at
(
codes
[
i
]));
}
else
{
nodes
.
push_back
(
fake_node_
);
}
}
return
nodes
;
}
std
::
vector
<
uint64_t
>
TreeIndex
::
GetLayerCodes
(
int
level
)
{
uint64_t
level_num
=
static_cast
<
uint64_t
>
(
std
::
pow
(
meta_
.
branch
(),
level
));
uint64_t
level_offset
=
level_num
-
1
;
std
::
vector
<
uint64_t
>
res
;
res
.
reserve
(
level_num
);
for
(
uint64_t
i
=
0
;
i
<
level_num
;
i
++
)
{
auto
code
=
level_offset
+
i
;
if
(
CheckIsValid
(
code
))
{
res
.
push_back
(
code
);
}
}
return
res
;
}
std
::
vector
<
uint64_t
>
TreeIndex
::
GetAncestorCodes
(
const
std
::
vector
<
uint64_t
>&
ids
,
int
level
)
{
std
::
vector
<
uint64_t
>
res
;
res
.
reserve
(
ids
.
size
());
int
cur_level
;
for
(
size_t
i
=
0
;
i
<
ids
.
size
();
i
++
)
{
if
(
id_codes_map_
.
find
(
ids
[
i
])
==
id_codes_map_
.
end
())
{
res
.
push_back
(
max_code_
);
}
else
{
auto
code
=
id_codes_map_
.
at
(
ids
[
i
]);
cur_level
=
meta_
.
height
()
-
1
;
while
(
level
>=
0
&&
cur_level
>
level
)
{
code
=
(
code
-
1
)
/
meta_
.
branch
();
cur_level
--
;
}
res
.
push_back
(
code
);
}
}
return
res
;
}
std
::
vector
<
uint64_t
>
TreeIndex
::
GetChildrenCodes
(
uint64_t
ancestor
,
int
level
)
{
auto
level_code_num
=
static_cast
<
uint64_t
>
(
std
::
pow
(
meta_
.
branch
(),
level
));
auto
code_min
=
level_code_num
-
1
;
auto
code_max
=
meta_
.
branch
()
*
level_code_num
-
1
;
std
::
vector
<
uint64_t
>
parent
;
parent
.
push_back
(
ancestor
);
std
::
vector
<
uint64_t
>
res
;
size_t
p_idx
=
0
;
while
(
true
)
{
size_t
p_size
=
parent
.
size
();
for
(;
p_idx
<
p_size
;
p_idx
++
)
{
for
(
int
i
=
0
;
i
<
meta_
.
branch
();
i
++
)
{
auto
code
=
parent
[
p_idx
]
*
meta_
.
branch
()
+
i
+
1
;
if
(
data_
.
find
(
code
)
!=
data_
.
end
())
parent
.
push_back
(
code
);
}
}
if
((
code_min
<=
parent
[
p_idx
])
&&
(
parent
[
p_idx
]
<
code_max
))
{
break
;
}
}
return
std
::
vector
<
uint64_t
>
(
parent
.
begin
()
+
p_idx
,
parent
.
end
());
}
std
::
vector
<
uint64_t
>
TreeIndex
::
GetTravelCodes
(
uint64_t
id
,
int
start_level
)
{
std
::
vector
<
uint64_t
>
res
;
PADDLE_ENFORCE_NE
(
id_codes_map_
.
find
(
id
),
id_codes_map_
.
end
(),
paddle
::
platform
::
errors
::
InvalidArgument
(
"id = %d doesn't exist in Tree."
,
id
));
auto
code
=
id_codes_map_
.
at
(
id
);
int
level
=
meta_
.
height
()
-
1
;
while
(
level
>=
start_level
)
{
res
.
push_back
(
code
);
code
=
(
code
-
1
)
/
meta_
.
branch
();
level
--
;
}
return
res
;
}
std
::
vector
<
IndexNode
>
TreeIndex
::
GetAllLeafs
()
{
std
::
vector
<
IndexNode
>
res
;
res
.
reserve
(
id_codes_map_
.
size
());
for
(
auto
&
ite
:
id_codes_map_
)
{
auto
code
=
ite
.
second
;
res
.
push_back
(
data_
.
at
(
code
));
}
return
res
;
}
}
// end namespace distributed
}
// end namespace paddle
paddle/fluid/distributed/index_dataset/index_wrapper.h
0 → 100644
View file @
f0ef3442
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cmath>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/distributed/index_dataset/index_dataset.pb.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
distributed
{
class
Index
{
public:
Index
()
{}
~
Index
()
{}
};
class
TreeIndex
:
public
Index
{
public:
TreeIndex
()
{}
~
TreeIndex
()
{}
int
Height
()
{
return
meta_
.
height
();
}
int
Branch
()
{
return
meta_
.
branch
();
}
uint64_t
TotalNodeNums
()
{
return
total_nodes_num_
;
}
uint64_t
EmbSize
()
{
return
max_id_
+
1
;
}
int
Load
(
const
std
::
string
path
);
inline
bool
CheckIsValid
(
int
code
)
{
if
(
data_
.
find
(
code
)
!=
data_
.
end
())
{
return
true
;
}
else
{
return
false
;
}
}
std
::
vector
<
IndexNode
>
GetNodes
(
const
std
::
vector
<
uint64_t
>&
codes
);
std
::
vector
<
uint64_t
>
GetLayerCodes
(
int
level
);
std
::
vector
<
uint64_t
>
GetAncestorCodes
(
const
std
::
vector
<
uint64_t
>&
ids
,
int
level
);
std
::
vector
<
uint64_t
>
GetChildrenCodes
(
uint64_t
ancestor
,
int
level
);
std
::
vector
<
uint64_t
>
GetTravelCodes
(
uint64_t
id
,
int
start_level
);
std
::
vector
<
IndexNode
>
GetAllLeafs
();
std
::
unordered_map
<
uint64_t
,
IndexNode
>
data_
;
std
::
unordered_map
<
uint64_t
,
uint64_t
>
id_codes_map_
;
uint64_t
total_nodes_num_
;
TreeMeta
meta_
;
uint64_t
max_id_
;
uint64_t
max_code_
;
IndexNode
fake_node_
;
};
using
TreePtr
=
std
::
shared_ptr
<
TreeIndex
>
;
class
IndexWrapper
{
public:
virtual
~
IndexWrapper
()
{}
IndexWrapper
()
{}
void
clear_tree
()
{
tree_map
.
clear
();
}
TreePtr
get_tree_index
(
const
std
::
string
name
)
{
PADDLE_ENFORCE_NE
(
tree_map
.
find
(
name
),
tree_map
.
end
(),
paddle
::
platform
::
errors
::
InvalidArgument
(
"tree [%s] doesn't exist. Please insert it firstly "
"by API[
\'
insert_tree_index
\'
]."
,
name
));
return
tree_map
[
name
];
}
void
insert_tree_index
(
const
std
::
string
name
,
const
std
::
string
tree_path
)
{
if
(
tree_map
.
find
(
name
)
!=
tree_map
.
end
())
{
VLOG
(
0
)
<<
"Tree "
<<
name
<<
" has already existed."
;
return
;
}
TreePtr
tree
=
std
::
make_shared
<
TreeIndex
>
();
int
ret
=
tree
->
Load
(
tree_path
);
PADDLE_ENFORCE_EQ
(
ret
,
0
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"Load tree[%s] from path[%s] failed. Please "
"check whether the file exists."
,
name
,
tree_path
));
tree_map
.
insert
(
std
::
pair
<
std
::
string
,
TreePtr
>
{
name
,
tree
});
}
static
std
::
shared_ptr
<
IndexWrapper
>
GetInstancePtr
()
{
if
(
NULL
==
s_instance_
)
{
s_instance_
.
reset
(
new
paddle
::
distributed
::
IndexWrapper
());
}
return
s_instance_
;
}
static
IndexWrapper
*
GetInstance
()
{
if
(
NULL
==
s_instance_
)
{
s_instance_
.
reset
(
new
paddle
::
distributed
::
IndexWrapper
());
}
return
s_instance_
.
get
();
}
private:
static
std
::
shared_ptr
<
IndexWrapper
>
s_instance_
;
std
::
unordered_map
<
std
::
string
,
TreePtr
>
tree_map
;
};
}
// end namespace distributed
}
// end namespace paddle
paddle/fluid/distributed/ps/CMakeLists.txt
0 → 100644
View file @
f0ef3442
set_property
(
GLOBAL PROPERTY RPC_DEPS sendrecv_rpc
${
BRPC_DEPS
}
string_helper
)
add_subdirectory
(
table
)
add_subdirectory
(
service
)
add_subdirectory
(
wrapper
)
paddle/fluid/distributed/ps/README.md
0 → 100644
View file @
f0ef3442
# 目录说明
Table: for param storage and update
-----MemorySparseTable: table for sparse param, used in cpu async mode
-----MemoryDenseTable: table for dense param, used in cpu async/geo mode
-----MemorySparseGeoTable: table for sparse param, used in cpu async mode
-----CommonGraphTable: table used for graph learning
-----BarrierTable: table for barrier function, used in cpu sync mode
-----TensorTable: table which run program, used for learning rate decay only
ValueAccessor: for pull param and push gradient
-----CtrCommonAccessor: pull/push value with show/click, float type
-----CtrDoubleAccessor: same as CtrCommonAccessor, other than show/click with double type
-----SparseAccessor: used for common embedding, pull value without show/click, push value with show/click
-----CommMergeAccessor: used for dense table only, for get param dim
PsService(proto): for server to handle request
-----PsBaseService
----------BrpcPsService: for cpu dnn training task
----------GraphBrpcService: for graph learning
-----HeterService: for dnn training task with heterogeneous computing resources
PSServer: recv request from trainer and handle it by service
-----BrpcPsServer: for cpu dnn training task
-----GraphBrpcServer: for graph learning
-----PsLocalServer: for GpuPS
HeterServer: for HeterPS
PSClient: pull param and push gradient for trainer
-----BrpcPsClient: for cpu dnn training task
----------GraphBrpcClient: for graph learning
-----PsLocalClient: for GpuPS
HeterClient: for HeterPS
PSCore: Wrapper for InitServer
GraphPyService: for graph learning
paddle/fluid/distributed/ps/service/CMakeLists.txt
0 → 100644
View file @
f0ef3442
set
(
BRPC_SRCS ps_client.cc server.cc
)
set_source_files_properties
(
${
BRPC_SRCS
}
)
if
(
WITH_HETERPS
)
set
(
BRPC_DEPS
brpc
ssl
crypto
protobuf
gflags
glog
zlib
leveldb
snappy
gflags
glog
device_context
rocksdb
)
else
()
set
(
BRPC_DEPS
brpc
ssl
crypto
protobuf
gflags
glog
zlib
leveldb
snappy
gflags
glog
device_context
)
endif
()
brpc_library
(
sendrecv_rpc
SRCS
${
BRPC_SRCS
}
PROTO
sendrecv.proto
DEPS
${
BRPC_DEPS
}
)
#set_property(GLOBAL PROPERTY RPC_DEPS sendrecv_rpc ${BRPC_DEPS} string_helper)
get_property
(
RPC_DEPS GLOBAL PROPERTY RPC_DEPS
)
set_source_files_properties
(
communicator/communicator.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
ps_service/service.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
brpc_ps_server.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
brpc_ps_client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
ps_local_client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
brpc_utils.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
heter_server.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
heter_client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
ps_client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
server.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
graph_brpc_server.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
graph_brpc_client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
coordinator_client.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
ps_service/graph_py_service.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_library
(
brpc_utils
SRCS brpc_utils.cc
DEPS tensor device_context
${
COMMON_DEPS
}
${
RPC_DEPS
}
)
cc_library
(
ps_service
SRCS graph_brpc_server.cc
brpc_ps_server.cc
server.cc
graph_brpc_client.cc
brpc_ps_client.cc
ps_local_client.cc
coordinator_client.cc
ps_client.cc
communicator/communicator.cc
ps_service/service.cc
ps_service/graph_py_service.cc
DEPS eigen3
table
brpc_utils
simple_threadpool
scope
math_function
selected_rows_functor
${
RPC_DEPS
}
)
cc_library
(
heter_client
SRCS heter_client.cc
DEPS brpc_utils
${
COMMON_DEPS
}
${
RPC_DEPS
}
)
cc_library
(
heter_server
SRCS heter_server.cc
DEPS heter_client brpc_utils
${
COMMON_DEPS
}
${
RPC_DEPS
}
)
paddle/fluid/distributed/ps/service/README.md
0 → 100644
View file @
f0ef3442
# 目录说明
*
PSServer
*
PSClient
*
PsService
*
Communicator
*
MessageBusFramework
*
*
.proto
Prev
1
…
7
8
9
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment