2.3.2-dtk-22.10.1

f0ef3442 · yuguo960516yuguo · ad08b8ce · f0ef3442 · f0ef3442 · f0ef3442
Commit f0ef3442 authored Apr 26, 2023 by yuguo960516yuguo
20 changed files
--- a/paddle/.set_port.sh
+++ b/paddle/.set_port.sh
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DIRNAME=`dirname $0`
+source $DIRNAME/.common_test_util.sh
+set_port $@
--- a/paddle/.set_python_path.sh
+++ b/paddle/.set_python_path.sh
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# A simple test driver for cmake. 
+# set PYTHONPATH before run command.
+# Usage:
+#    ./.set_python_pash.sh -p YOUR_PYTHON_PATH {exec...}
+# 
+# It same as PYTHONPATH=${YOUR_PYTHON_PATH}:$PYTHONPATH {exec...}
+#
+PYPATH=""
+set -x
+while getopts "d:" opt; do
+  case $opt in
+    d)
+      PYPATH=$OPTARG
+      ;;
+  esac
+done
+shift $(($OPTIND - 1))
+export PYTHONPATH=$PYPATH:$PYTHONPATH
+$@
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
+add_subdirectory(utils)
+add_subdirectory(scripts)
+add_subdirectory(testing)
+set(PYTHON_TESTS_DIR
+    ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests
+    CACHE INTERNAL "python tests directory")
+add_subdirectory(phi)
+add_subdirectory(infrt)
+add_subdirectory(fluid)
--- a/paddle/extension.h
+++ b/paddle/extension.h
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+// All paddle apis in C++ frontend
+#include "paddle/phi/api/all.h"
--- a/paddle/fluid.zip
+++ b/paddle/fluid.zip
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
+paddle.fluid.optimizer.PipelineOptimizer (paddle.fluid.optimizer.PipelineOptimizer, ('document', '2e55a29dbeb874934f7a1a1af3a22b8c'))
+paddle.fluid.optimizer.PipelineOptimizer.__init__ (ArgSpec(args=['self', 'optimizer', 'num_microbatches', 'start_cpu_core_id'], varargs=None, keywords=None, defaults=(1, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.PipelineOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.audio.features (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e'))
+paddle.audio.features.layers.LogMelSpectrogram (ArgSpec(), ('document', 'c38b53606aa89215c4f00d3833e158b8'))
+paddle.audio.features.layers.LogMelSpectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'x': <class 'paddle.Tensor'>}), ('document', '6c14f6f78dc697a6981cf90412e2f1ea'))
+paddle.audio.features.layers.LogMelSpectrogram.load_dict (ArgSpec(args=[], varargs='args', varkw='kwargs', defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={}), ('document', '01221a60445ee437f439a8cbe293f759'))
+paddle.audio.features.layers.LogMelSpectrogram.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers', 'structured_name_prefix', 'use_hook'], varargs=None, varkw=None, defaults=(None, True, '', True), kwonlyargs=[], kwonlydefaults=None, annotations={}), ('document', '0c01cb0c12220c9426ae49549b145b0b'))
+paddle.audio.features.layers.MFCC (ArgSpec(), ('document', 'bcbe6499830d9228a4f746ddd63b6c0f'))
+paddle.audio.features.layers.MFCC.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'x': <class 'paddle.Tensor'>}), ('document', 'd86bcaa345f26851089bfdb3efecd9e7'))
+paddle.audio.features.layers.MelSpectrogram (ArgSpec(), ('document', 'adf4012310984568ae9da6170aa89f91'))
+paddle.audio.features.layers.MelSpectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'x': <class 'paddle.Tensor'>}), ('document', '458e9d454c8773091567c6b400f48cf5'))
+paddle.audio.features.layers.Spectrogram (ArgSpec(), ('document', '83811af6da032099bf147e3e01a458e1'))
+paddle.audio.features.layers.Spectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'x': <class 'paddle.Tensor'>}), ('document', 'ab11e318fca1410f743b5432394dea35'))
+paddle.audio.functional (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e'))
+paddle.audio.functional.functional.compute_fbank_matrix (ArgSpec(args=['sr', 'n_fft', 'n_mels', 'f_min', 'f_max', 'htk', 'norm', 'dtype'], varargs=None, varkw=None, defaults=(64, 0.0, None, False, 'slaney', 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'sr': <class 'int'>, 'n_fft': <class 'int'>, 'n_mels': <class 'int'>, 'f_min': <class 'float'>, 'f_max': typing.Union[float, NoneType], 'htk': <class 'bool'>, 'norm': typing.Union[str, float], 'dtype': <class 'str'>}), ('document', '3c5411caa6baedb68860b09c81e0147c'))
+paddle.audio.functional.functional.create_dct (ArgSpec(args=['n_mfcc', 'n_mels', 'norm', 'dtype'], varargs=None, varkw=None, defaults=('ortho', 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'n_mfcc': <class 'int'>, 'n_mels': <class 'int'>, 'norm': typing.Union[str, NoneType], 'dtype': <class 'str'>}), ('document', 'c9c57550671f9725b053769411d2f65a'))
+paddle.audio.functional.functional.fft_frequencies (ArgSpec(args=['sr', 'n_fft', 'dtype'], varargs=None, varkw=None, defaults=('float32',), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'sr': <class 'int'>, 'n_fft': <class 'int'>, 'dtype': <class 'str'>}), ('document', '057b990e79c9c780622407267c0a43c6'))
+paddle.audio.functional.functional.hz_to_mel (ArgSpec(args=['freq', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[paddle.Tensor, float], 'freq': typing.Union[paddle.Tensor, float], 'htk': <class 'bool'>}), ('document', '7ca01521dd0bf26cd3f72c67f7168dc4'))
+paddle.audio.functional.functional.mel_frequencies (ArgSpec(args=['n_mels', 'f_min', 'f_max', 'htk', 'dtype'], varargs=None, varkw=None, defaults=(64, 0.0, 11025.0, False, 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'n_mels': <class 'int'>, 'f_min': <class 'float'>, 'f_max': <class 'float'>, 'htk': <class 'bool'>, 'dtype': <class 'str'>}), ('document', '2af3cf997ed1274214ec240b2b59a98d'))
+paddle.audio.functional.functional.mel_to_hz (ArgSpec(args=['mel', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[float, paddle.Tensor], 'mel': typing.Union[float, paddle.Tensor], 'htk': <class 'bool'>}), ('document', 'e93b432d382f98c60d7c7599489e7072'))
+paddle.audio.functional.functional.power_to_db (ArgSpec(args=['spect', 'ref_value', 'amin', 'top_db'], varargs=None, varkw=None, defaults=(1.0, 1e-10, 80.0), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'spect': <class 'paddle.Tensor'>, 'ref_value': <class 'float'>, 'amin': <class 'float'>, 'top_db': typing.Union[float, NoneType]}), ('document', '28bbb1973e8399e856bfaea0415cecb9'))
+paddle.audio.functional.window.get_window (ArgSpec(args=['window', 'win_length', 'fftbins', 'dtype'], varargs=None, varkw=None, defaults=(True, 'float64'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'window': typing.Union[str, typing.Tuple[str, float]], 'win_length': <class 'int'>, 'fftbins': <class 'bool'>, 'dtype': <class 'str'>}), ('document', '2418d63da10c0cd5da9ecf0a88ddf783'))
--- a/paddle/fluid/CMakeLists.txt
+++ b/paddle/fluid/CMakeLists.txt
+add_subdirectory(memory)
+add_subdirectory(platform)
+add_subdirectory(distributed)
+add_subdirectory(framework)
+add_subdirectory(imperative)
+add_subdirectory(operators)
+add_subdirectory(pybind)
+add_subdirectory(eager)
+add_subdirectory(jit)
+# NOTE: please add subdirectory inference at last.
+add_subdirectory(inference)
--- a/paddle/fluid/distributed/CMakeLists.txt
+++ b/paddle/fluid/distributed/CMakeLists.txt
+add_subdirectory(auto_parallel)
+add_subdirectory(collective)
+add_subdirectory(store)
+if(WITH_PYTHON)
+  py_proto_compile(ps_py_proto SRCS the_one_ps.proto)
+  add_custom_target(
+    ps_py_proto_init ALL
+    COMMAND ${CMAKE_COMMAND} -E make_directory
+            ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto)
+  add_dependencies(ps_py_proto ps_py_proto_init)
+  if(NOT WIN32)
+    add_custom_command(
+      TARGET ps_py_proto
+      POST_BUILD
+      COMMAND mv the_one_ps_pb2.py
+              ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/)
+  else()
+    string(
+      REPLACE "/" "\\" fleet_proto_dstpath
+              "${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/")
+    add_custom_command(
+      TARGET ps_py_proto
+      POST_BUILD
+      COMMAND copy /Y the_one_ps_pb2.py ${fleet_proto_dstpath}
+      COMMENT
+        "Copy generated python the_one_ps_pb2 into directory ${fleet_proto_dstpath}."
+    )
+  endif()
+endif()
+
+if(NOT WITH_PSCORE)
+  add_subdirectory(fleet_executor)
+  return()
+endif()
+
+proto_library(ps_framework_proto SRCS the_one_ps.proto)
+
+set(DISTRIBUTE_COMPILE_FLAGS
+    "-Wno-error=unused-value -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result"
+)
+
+if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+  set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
+endif()
+if(LINUX)
+  add_subdirectory(rpc)
+endif()
+add_subdirectory(common)
+add_subdirectory(ps)
+add_subdirectory(test)
+add_subdirectory(index_dataset)
+add_subdirectory(fleet_executor)
--- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
+++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt
+proto_library(auto_parallel_proto SRCS auto_parallel.proto)
+
+cc_library(
+  device_mesh
+  SRCS device_mesh.cc
+  DEPS auto_parallel_proto phi_enforce)
+
+cc_library(
+  process_mesh
+  SRCS process_mesh.cc
+  DEPS auto_parallel_proto phi_enforce)
+
+cc_library(
+  dist_attr
+  SRCS dist_attr.cc
+  DEPS process_mesh auto_parallel_proto proto_desc phi_enforce)
+
+cc_library(
+  dist_mapper
+  SRCS dist_mapper.cc
+  DEPS device_mesh auto_parallel_proto phi_enforce)
+
+cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper)
--- a/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
+++ b/paddle/fluid/distributed/auto_parallel/auto_parallel.proto
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless optional by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+syntax = "proto2";
+
+package paddle.distributed.auto_parallel;
+
+// ProcessMesh is used to organize processes and like n-dimension array.
+message ProcessMeshProto {
+  // The size of each dimension.
+  repeated int64 shape = 1;
+
+  // These process ids are stored by a row-major way.
+  // There are no duplicate process ids within one process mesh.
+  repeated int64 process_ids = 2;
+
+  // The name of each dimension. 
+  repeated string dim_names = 3;
+
+}
+
+// This distributed attribute describes how to distribute the corresponding tensor,
+// and store any other information needed by auto parallel.
+message TensorDistAttrProto {
+  // The process mesh where a tensor is distributed.
+  optional ProcessMeshProto process_mesh = 1;
+
+  // The length of dims_mapping is same as the length of the tensor shape.
+  // The i-th dimension of the tensor will be sharded by the dims_mapping[i]-th dimension 
+  // of the above process mesh. If dims_mapping[i] is -1, the i-th dimension of the tensor
+  // will not be sharded. For example, given a tensor shape [2, 6, 12], a process mesh
+  // shape [2, 3] and a dims_mapping [-1, 1, 0], each sharded tensor will have a shape [2, 2, 6].
+  repeated int64 dims_mapping = 2;
+
+  // The batch dimension of the corresponding tensor. 
+  optional int64 batch_dim = 3;
+
+  // If the dynamic_dims[i] is True, the i-th dimension of the corresponding tensor 
+  // is dynamic changed. Otherwise, the i-th dimension of the tensor is static determined. 
+  repeated bool dynamic_dims = 4;
+}
+
+// This distributed attribute describes how to distribute the corresponding operator,
+// and store any other information needed by auto parallel.
+message OperatorDistAttrProto {
+  message TensorDistAttrMappingEntryProto {
+    optional string name = 1;
+    optional TensorDistAttrProto tensor_dist_attr = 2;
+  } 
+  // The key of this map is the input tensor name and the value is the distributed attribute
+  // of the input tensor required by this corresponding operator.   
+  // The distributed attribute of the actual tensor may be not the same as that within 
+  // the distributed attribute of the operator.
+  repeated TensorDistAttrMappingEntryProto input_dist_attrs = 1;
+
+  // The key of this map is the output tensor name and the value is the distributed attribute
+  // of the output tensor required by this corresponding operator.   
+  // The distributed attribute of the actual tensor may be not the same as that within 
+  // the distributed attribute of the operator.
+  repeated TensorDistAttrMappingEntryProto output_dist_attrs = 2;
+
+  // The process mesh where a op is distributed.
+  optional ProcessMeshProto process_mesh = 3;
+
+  // A operator ideally has a distributed operator which may have multiple distributed implementations.
+  // This filed is usually same as the operator type. However, some operators such as the element-wise operators
+  // may shared the same distributed operator, the field is use for this scenario.
+  optional string impl_type = 4;
+
+  // This field tells which distributed implementations of this corresponding operator 
+  // will be selected for the actual computation.
+  optional int64 impl_idx = 5;
+}
+
+// This proto describes the capability of one device such as the computation and memory.
+message DeviceCapabilityProto {
+  optional double single_precision_flops = 1;
+
+  optional double double_precision_flops = 2;
+
+  optional double memory_size_in_bytes = 3;
+
+  optional double clock_rate_in_ghz = 4;
+}
+
+// This proto represents a device.
+message DeviceProto {
+  // The global id of this device within the cluster.
+  optional int64 global_id = 1;
+
+  // The local id of this device within the machine.
+  optional int64 local_id = 2;
+
+  // The id of the machine own this device.
+  optional int64 machine_id = 3;
+
+  // The id of the machine has this device.
+  optional string type = 4;
+
+  // The capability of this device.
+  optional DeviceCapabilityProto capability = 5; 
+}
+
+// This proto describes the capability of the link between two devices.        
+message LinkCapabilityProto {        
+  optional int64 bandwidth = 1; // Bytes/s       
+  optional int64 latency = 2;        
+}
+
+message LinkProto {
+  // The global id of the source device.
+  optional int64 source_id = 1;
+
+  // The global id of the source device.
+  optional int64 target_id = 2;
+
+  // Represent the link type.
+  optional string type = 3;
+      
+  // The capability of this link.
+  optional LinkCapabilityProto capability = 4; 
+}
+
+// DeviceMesh is used to organize devices and like n-dimension array.
+message DeviceMeshProto {
+  // The global id of this mesh. 
+  optional string name = 1;
+
+  // The size of each dimension.
+  repeated int64 shape = 2;
+
+  // These device ids are stored by a row-major way.
+  // There are no duplicate device ids within one device mesh.
+  repeated int64 device_ids = 3;
+
+  // The name of each dimension. 
+  repeated string dim_names = 4;
+
+  // The devices of this mesh.
+  repeated DeviceProto devices = 5;
+
+  // The links are between devices. 
+  repeated LinkProto links = 6;
+}
+
+// Record the mapping between the logical processes and the physical devices.
+message DistributedMapperProto {
+  // The device meshes used by this distributed computation,
+  // which may be shared by different multiple device meshes.
+  repeated DeviceMeshProto device_meshes = 1;
+
+  message MapperEntryProto {
+    optional int64 process_id = 1;
+    optional string device_mesh_name = 2;
+    repeated int64 device_ids = 3;
+  }
+
+  // The mapping from process ids to device ids.
+  // It is also possible for one process to use multiple devices.
+  // It is possible for one device shared by multiple processes.
+  repeated MapperEntryProto process_id_to_device_ids = 2;
+}
--- a/paddle/fluid/distributed/auto_parallel/device_mesh.cc
+++ b/paddle/fluid/distributed/auto_parallel/device_mesh.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include <iterator>
+
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+std::string DeviceCapability::to_string() const {
+  std::string str;
+  str += "{sflops: " + to_string_with_precision(single_precision_flops) + ", ";
+  str += "dflops: " + to_string_with_precision(double_precision_flops) + ", ";
+  str += "memory: " + to_string_with_precision(memory_size_in_bytes) + ", ";
+  str += "rate: " + to_string_with_precision(clock_rate_in_ghz) + "}";
+  return str;
+}
+
+DeviceCapability DeviceCapability::from_proto(
+    const DeviceCapabilityProto &proto) {
+  DeviceCapability capability;
+  capability.single_precision_flops = proto.single_precision_flops();
+  capability.double_precision_flops = proto.double_precision_flops();
+  capability.memory_size_in_bytes = proto.memory_size_in_bytes();
+  capability.clock_rate_in_ghz = proto.clock_rate_in_ghz();
+  return capability;
+}
+
+DeviceCapabilityProto DeviceCapability::to_proto() const {
+  DeviceCapabilityProto proto;
+  proto.set_single_precision_flops(single_precision_flops);
+  proto.set_double_precision_flops(double_precision_flops);
+  proto.set_memory_size_in_bytes(memory_size_in_bytes);
+  proto.set_clock_rate_in_ghz(clock_rate_in_ghz);
+  return proto;
+}
+
+std::string Device::to_string() const {
+  std::string str = "{global_id: " + std::to_string(global_id_) + ", ";
+  str += "local_id: " + std::to_string(local_id_) + ", ";
+  str += "machine_id: " + std::to_string(machine_id_) + ", ";
+  str += "type: " + type_ + ", ";
+  str += "capability: " + capability_.to_string() + "}";
+  return str;
+}
+
+Device Device::from_proto(const DeviceProto &proto) {
+  Device device;
+  device.global_id_ = proto.global_id();
+  device.local_id_ = proto.local_id();
+  device.machine_id_ = proto.machine_id();
+  device.type_ = proto.type();
+  device.capability_ = DeviceCapability::from_proto(proto.capability());
+  return device;
+}
+
+DeviceProto Device::to_proto() const {
+  DeviceProto proto;
+  proto.set_global_id(global_id_);
+  proto.set_local_id(local_id_);
+  proto.set_machine_id(machine_id_);
+  proto.set_type(type_);
+  proto.mutable_capability()->CopyFrom(capability_.to_proto());
+  return proto;
+}
+
+bool operator==(const Device &lhs, const Device &rhs) {
+  if (lhs.global_id() != rhs.global_id()) {
+    return false;
+  }
+  if (lhs.local_id() != rhs.local_id()) {
+    return false;
+  }
+  if (lhs.machine_id() != rhs.machine_id()) {
+    return false;
+  }
+  if (lhs.type() != rhs.type()) {
+    return false;
+  }
+  return true;
+}
+
+std::string LinkCapability::to_string() const {
+  std::string str;
+  str += "{bandwidth: " + to_string_with_precision(bandwidth) + ",";
+  str += "latency: " + to_string_with_precision(latency) + "}";
+  return str;
+}
+
+LinkCapability LinkCapability::from_proto(const LinkCapabilityProto &proto) {
+  LinkCapability capability;
+  capability.bandwidth = proto.bandwidth();
+  capability.latency = proto.latency();
+  return capability;
+}
+
+LinkCapabilityProto LinkCapability::to_proto() const {
+  LinkCapabilityProto proto;
+  proto.set_bandwidth(bandwidth);
+  proto.set_latency(latency);
+  return proto;
+}
+
+std::string Link::to_string() const {
+  std::string str = "{source_id:" + std::to_string(source_id_) + ",";
+  str += "target_id:" + std::to_string(target_id_) + ",";
+  str += "type:" + type_ + ",";
+  str += "capability:" + capability_.to_string() + "}";
+  return str;
+}
+
+Link Link::from_proto(const LinkProto &proto) {
+  Link link;
+  link.source_id_ = proto.source_id();
+  link.target_id_ = proto.target_id();
+  link.type_ = proto.type();
+  link.capability_ = LinkCapability::from_proto(proto.capability());
+  return link;
+}
+
+LinkProto Link::to_proto() const {
+  LinkProto proto;
+  proto.set_source_id(source_id_);
+  proto.set_target_id(target_id_);
+  proto.set_type(type_);
+  proto.mutable_capability()->CopyFrom(capability_.to_proto());
+  return proto;
+}
+
+bool operator==(const Link &lhs, const Link &rhs) {
+  if (lhs.source_id() != rhs.source_id()) {
+    return false;
+  }
+  if (lhs.target_id() != rhs.target_id()) {
+    return false;
+  }
+  if (lhs.type() != rhs.type()) {
+    return false;
+  }
+  return true;
+}
+
+bool Machine::contains(int64_t device_id) const {
+  if (devices_.count(device_id) == 1) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void Machine::add_device(const Device &device) {
+  if (id() == -1) {
+    set_id(device.machine_id());
+  } else {
+    PADDLE_ENFORCE_EQ(device.machine_id(),
+                      id(),
+                      platform::errors::InvalidArgument(
+                          "The machine id [%d] of the device should be equal "
+                          "to this machine id [%d].",
+                          device.machine_id(),
+                          id_));
+  }
+  devices_[device.global_id()] = &device;
+}
+
+void Machine::add_link(const Link &link) {
+  PADDLE_ENFORCE_EQ(contains(link.source_id()),
+                    true,
+                    platform::errors::InvalidArgument(
+                        "The source device id of the added link [%s] "
+                        "cannot be found in the device_ids. Please add the "
+                        "source device before adding this link",
+                        std::to_string(link.source_id())));
+  links_[link.source_id()][link.target_id()] = &link;
+}
+
+std::string Machine::to_string() const {
+  std::string str = "{devices: [";
+  for (const auto &device : devices_) {
+    str += device.second->to_string() + ", ";
+  }
+  str.replace(str.size() - 2, 2, "], ");
+
+  str += "links: [";
+  for (const auto &item : links_) {
+    str += "{";
+    str += "source_id: " + std::to_string(item.first) + ", neighbors: [";
+    for (const auto &link : item.second) {
+      str += link.second->to_string() + ", ";
+    }
+    str.replace(str.size() - 2, 2, "]}, ");
+  }
+  str.replace(str.size() - 4, 4, "]}");
+  return str;
+}
+
+DeviceMesh::DeviceMesh(const std::string &name,
+                       const std::vector<int64_t> &shape,
+                       const std::vector<int64_t> &device_ids,
+                       const std::vector<std::string> &dim_names) {
+  name_ = name;
+  shape_ = shape;
+  int64_t size = this->size();
+
+  PADDLE_ENFORCE_EQ(size,
+                    device_ids.size(),
+                    platform::errors::InvalidArgument(
+                        "The size %d of this device mesh must be "
+                        "equal to the size %d of its device ids.",
+                        size,
+                        device_ids.size()));
+  PADDLE_ENFORCE_EQ(
+      has_duplicates(device_ids),
+      false,
+      platform::errors::InvalidArgument("The device ids [%s] must be unique.",
+                                        str_join(device_ids)));
+  device_ids_ = device_ids;
+
+  PADDLE_ENFORCE_EQ(
+      shape_.size(),
+      dim_names.size(),
+      platform::errors::InvalidArgument(
+          "The size %d of mesh shape must be equal to the size %d "
+          "of the dimension names.",
+          shape_.size(),
+          dim_names.size()));
+  PADDLE_ENFORCE_EQ(has_duplicates(dim_names),
+                    false,
+                    platform::errors::InvalidArgument(
+                        "The names [%s] of each dimension must be unique.",
+                        str_join(dim_names)));
+  dim_names_ = dim_names;
+}
+
+int64_t DeviceMesh::size() const {
+  if (shape_.empty()) return 0;
+  int64_t size = 1;
+  for (const int64_t dim_size : shape_) size *= dim_size;
+  return size;
+}
+
+bool DeviceMesh::contains(int64_t device_id) const {
+  auto result =
+      std::find(std::begin(device_ids_), std::end(device_ids_), device_id);
+  if (result != std::end(device_ids_)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void DeviceMesh::add_device(const Device &device) {
+  PADDLE_ENFORCE_EQ(
+      contains(device.global_id()),
+      true,
+      platform::errors::InvalidArgument(
+          "The added device id [%s] cannot be found in the device_ids.",
+          std::to_string(device.global_id())));
+  // Operator [] will create a new object if it cannot find one.
+  // So we add the default constructor for Device and Machine
+  // to make sure the new object can be created.
+  devices_[device.global_id()] = device;
+  machines_[device.machine_id()].add_device(devices_[device.global_id()]);
+}
+
+void DeviceMesh::add_link(const Link &link) {
+  PADDLE_ENFORCE_EQ(
+      contains(link.source_id()),
+      true,
+      platform::errors::InvalidArgument("The source id of the added link [%s] "
+                                        "cannot be found in the device_ids.",
+                                        std::to_string(link.source_id())));
+  PADDLE_ENFORCE_EQ(
+      contains(link.target_id()),
+      true,
+      platform::errors::InvalidArgument("The source id of the added link [%s] "
+                                        "cannot be found in the device_ids.",
+                                        std::to_string(link.target_id())));
+  // Operator [] will create a new object if it cannot find one.
+  // So we add the default constructor for Device and Machine
+  // to make sure the new object can be created.
+  links_[link.source_id()][link.target_id()] = link;
+  const Device &source_device = devices_[link.source_id()];
+  machines_[source_device.machine_id()].add_link(
+      links_[link.source_id()][link.target_id()]);
+}
+
+std::string DeviceMesh::to_string() const {
+  std::string mesh_str = "{name: " + name_ + ", ";
+  mesh_str += "shape: [" + str_join(shape_) + "], ";
+  mesh_str += "device_ids: [" + str_join(device_ids_) + "], ";
+  mesh_str += "dim_names: [" + str_join(dim_names_) + "], ";
+  mesh_str += "\ndevices: [\n";
+  for (const auto &device : devices_) {
+    mesh_str += "  " + device.second.to_string() + ",\n";
+  }
+  mesh_str.replace(mesh_str.size() - 2, 2, "],");
+
+  mesh_str += "\nlinks: [\n";
+  for (const auto &item : links_) {
+    mesh_str += "  {";
+    mesh_str += "source_id: " + std::to_string(item.first) + ", neighbors: [";
+    for (const auto &link : item.second) {
+      mesh_str += link.second.to_string() + ", ";
+    }
+    mesh_str.replace(mesh_str.size() - 2, 2, "]},\n");
+  }
+  mesh_str.replace(mesh_str.size() - 4, 4, "]}");
+  return mesh_str;
+}
+
+DeviceMesh DeviceMesh::from_proto(const DeviceMeshProto &proto) {
+  DeviceMesh mesh;
+
+  mesh.name_ = proto.name();
+
+  mesh.shape_.resize(proto.shape_size());
+  for (int64_t i = 0; i < proto.shape_size(); ++i) {
+    mesh.shape_[i] = proto.shape(i);
+  }
+
+  mesh.device_ids_.resize(proto.device_ids_size());
+  for (int64_t i = 0; i < proto.device_ids_size(); ++i) {
+    mesh.device_ids_[i] = proto.device_ids(i);
+  }
+
+  mesh.dim_names_.resize(proto.dim_names_size());
+  for (int64_t i = 0; i < proto.dim_names_size(); ++i) {
+    mesh.dim_names_[i] = proto.dim_names(i);
+  }
+
+  for (int64_t i = 0; i < proto.devices_size(); ++i) {
+    mesh.add_device(Device::from_proto(proto.devices(i)));
+  }
+
+  for (int64_t i = 0; i < proto.links_size(); ++i) {
+    mesh.add_link(Link::from_proto(proto.links(i)));
+  }
+
+  return mesh;
+}
+
+DeviceMeshProto DeviceMesh::to_proto() const {
+  DeviceMeshProto proto;
+
+  proto.set_name(name_);
+
+  for (const auto &i : shape_) {
+    proto.add_shape(i);
+  }
+
+  for (const auto &i : device_ids_) {
+    proto.add_device_ids(i);
+  }
+
+  for (const auto &i : dim_names_) {
+    proto.add_dim_names(i);
+  }
+
+  for (const auto &device : devices_) {
+    proto.mutable_devices()->Add()->CopyFrom(device.second.to_proto());
+  }
+
+  for (const auto &neighbors : links_) {
+    for (const auto &link : neighbors.second) {
+      proto.mutable_links()->Add()->CopyFrom(link.second.to_proto());
+    }
+  }
+
+  return proto;
+}
+
+bool operator==(const DeviceMesh &lhs, const DeviceMesh &rhs) {
+  // Use the unique name to do the fast comparison
+  if (lhs.name() != rhs.name()) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/device_mesh.h
+++ b/paddle/fluid/distributed/auto_parallel/device_mesh.h
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+struct DeviceCapability {
+  double single_precision_flops = 0.0;
+  double double_precision_flops = 0.0;
+  double memory_size_in_bytes = 0.0;
+  double clock_rate_in_ghz = 0.0;
+
+  // DeviceCapability from_string(const std::string& str);
+  std::string to_string() const;
+
+  static DeviceCapability from_proto(const DeviceCapabilityProto& proto);
+  DeviceCapabilityProto to_proto() const;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const DeviceCapability& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+class Device {
+ public:
+  Device() = default;
+  Device(int64_t global_id,
+         int64_t local_id,
+         int64_t machine_id,
+         const std::string& type)
+      : global_id_(global_id),
+        local_id_(local_id),
+        machine_id_(machine_id),
+        type_(type) {}
+
+  int64_t global_id() const { return global_id_; }
+  int64_t local_id() const { return local_id_; }
+  int64_t machine_id() const { return machine_id_; }
+  const std::string& type() const { return type_; }
+
+  const DeviceCapability& capability() const { return capability_; }
+  void set_capability(const DeviceCapability& capability) {
+    capability_ = capability;
+  }
+
+  // Device from_string(const std::string& mesh_str);
+  std::string to_string() const;
+
+  static Device from_proto(const DeviceProto& proto);
+  DeviceProto to_proto() const;
+
+ private:
+  int64_t global_id_;
+  int64_t local_id_;
+  int64_t machine_id_;
+  std::string type_;
+  DeviceCapability capability_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Device& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const Device& lhs, const Device& rhs);
+
+inline bool operator!=(const Device& lhs, const Device& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+struct LinkCapability {
+  double bandwidth = 0.0;  // Bytes/s
+  double latency = 0.0;
+
+  // LinkCapability from_string(const std::string& str);
+  std::string to_string() const;
+
+  static LinkCapability from_proto(const LinkCapabilityProto& proto);
+  LinkCapabilityProto to_proto() const;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const LinkCapability& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+class Link {
+ public:
+  Link() = default;
+
+  Link(int64_t source_id, int64_t target_id, const std::string& type)
+      : source_id_(source_id), target_id_(target_id), type_(type) {}
+
+  int64_t source_id() const { return source_id_; }
+  int64_t target_id() const { return target_id_; }
+  const std::string& type() const { return type_; }
+
+  const LinkCapability& capability() const { return capability_; }
+  void set_capability(const LinkCapability& capability) {
+    capability_ = capability;
+  }
+
+  // Link from_string(const std::string& str);
+  std::string to_string() const;
+
+  static Link from_proto(const LinkProto& proto);
+  LinkProto to_proto() const;
+
+ private:
+  int64_t source_id_;
+  int64_t target_id_;
+  std::string type_;
+  LinkCapability capability_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Link& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const Link& lhs, const Link& rhs);
+
+inline bool operator!=(const Link& lhs, const Link& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+class Machine {
+ public:
+  Machine() = default;
+
+  explicit Machine(int64_t id) : id_(id) {}
+
+  int64_t id() const { return id_; }
+
+  void set_id(int64_t id) { id_ = id; }
+
+  const std::unordered_map<int64_t, const Device*>& devices() const {
+    return devices_;
+  }
+
+  const std::unordered_map<int64_t, std::unordered_map<int64_t, const Link*>>&
+  links() const {
+    return links_;
+  }
+
+  const Device& device(int64_t global_id) const {
+    return *devices_.at(global_id);
+  }
+
+  const Link& link(int64_t source_id, int64_t target_id) const {
+    return *links_.at(source_id).at(target_id);
+  }
+
+  bool contains(int64_t device_id) const;
+
+  void add_device(const Device& device);
+
+  void add_link(const Link& link);
+
+  // Machine from_string(const std::string& str);
+  std::string to_string() const;
+
+ private:
+  int64_t id_ = -1;
+  std::unordered_map<int64_t, const Device*> devices_;
+  std::unordered_map<int64_t, std::unordered_map<int64_t, const Link*>> links_;
+};
+
+class DeviceMesh {
+ public:
+  DeviceMesh() = default;
+
+  DeviceMesh(const std::string& name,
+             const std::vector<int64_t>& shape,
+             const std::vector<int64_t>& device_ids,
+             const std::vector<std::string>& dim_names);
+
+  const std::string& name() const { return name_; }
+
+  void set_name(const std::string& name) { name_ = name; }
+
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  const std::vector<int64_t>& device_ids() const { return device_ids_; }
+
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  std::string device_type() const {
+    if (empty()) return "UNKNOWN";
+    if (devices_.empty())
+      return "UNKNOWN";
+    else
+      return std::begin(devices_)->second.type();
+  }
+
+  const std::unordered_map<int64_t, Device>& devices() const {
+    return devices_;
+  }
+
+  const std::unordered_map<int64_t, std::unordered_map<int64_t, Link>>& links()
+      const {
+    return links_;
+  }
+
+  const std::unordered_map<int64_t, Machine>& machines() const {
+    return machines_;
+  }
+
+  const Device& device(int64_t global_id) const {
+    return devices_.at(global_id);
+  }
+
+  const Link& link(int64_t source_id, int64_t target_id) const {
+    return links_.at(source_id).at(target_id);
+  }
+
+  const Machine& machine(int64_t machine_id) const {
+    return machines_.at(machine_id);
+  }
+
+  int64_t size() const;
+  int64_t ndim() const { return shape_.size(); }
+
+  int64_t dim_size(int64_t dim) const {
+    int64_t cdim = canonical_dim(dim, shape_.size());
+    return shape_[cdim];
+  }
+
+  int64_t dim_size(const std::string& dim_name) const {
+    for (std::size_t i = 0; i < dim_names_.size(); ++i) {
+      if (dim_names_[i] == dim_name) {
+        return shape_[i];
+      }
+    }
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Cannot find the dimension of %s in this device mesh.", dim_name));
+  }
+
+  bool empty() const { return (shape_.empty() || device_ids_.empty()); }
+  bool contains(int64_t device_id) const;
+
+  void add_device(const Device& device);
+  void add_link(const Link& link);
+
+  // DeviceMesh from_string(const std::string& mesh_str);
+  std::string to_string() const;
+
+  static DeviceMesh from_proto(const DeviceMeshProto& proto);
+  DeviceMeshProto to_proto() const;
+
+ private:
+  std::string name_;
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> device_ids_;
+  std::vector<std::string> dim_names_;
+  std::unordered_map<int64_t, Device> devices_;
+  std::unordered_map<int64_t, std::unordered_map<int64_t, Link>> links_;
+  std::unordered_map<int64_t, Machine> machines_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const DeviceMesh& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const DeviceMesh& lhs, const DeviceMesh& rhs);
+
+inline bool operator!=(const DeviceMesh& lhs, const DeviceMesh& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/dist_attr.cc
+++ b/paddle/fluid/distributed/auto_parallel/dist_attr.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+
+#include "paddle/fluid/distributed/auto_parallel/dist_attr.h"
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/var_desc.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+std::vector<std::string> TensorDistAttr::fields_{
+    "process_mesh", "dims_mapping", "batch_dim", "dynamic_dims"};
+
+TensorDistAttr::TensorDistAttr(const VarDesc& tensor)
+    : tensor_(&tensor), batch_dim_(0) {
+  set_default_dims_mapping();
+  std::vector<int64_t> tensor_shape = tensor_->GetShape();
+  for (std::size_t i = 0; i < tensor_shape.size(); ++i) {
+    dynamic_dims_.push_back(false);
+  }
+}
+
+TensorDistAttr::TensorDistAttr(const TensorDistAttr& dist_attr) {
+  if (tensor_ == nullptr) {
+    tensor_ = dist_attr.tensor();
+  }
+  set_process_mesh(dist_attr.process_mesh());
+  set_dims_mapping(dist_attr.dims_mapping());
+  set_batch_dim(dist_attr.batch_dim());
+  set_dynamic_dims(dist_attr.dynamic_dims());
+  set_annotated(dist_attr.annotated());
+}
+
+TensorDistAttr& TensorDistAttr::operator=(const TensorDistAttr& dist_attr) {
+  if (tensor_ == nullptr) {
+    tensor_ = dist_attr.tensor();
+  }
+  set_process_mesh(dist_attr.process_mesh());
+  set_dims_mapping(dist_attr.dims_mapping());
+  set_batch_dim(dist_attr.batch_dim());
+  set_dynamic_dims(dist_attr.dynamic_dims());
+  set_annotated(dist_attr.annotated());
+  return *this;
+}
+
+void TensorDistAttr::set_process_mesh(const ProcessMesh& process_mesh) {
+  PADDLE_ENFORCE_EQ(verify_process_mesh(process_mesh),
+                    true,
+                    platform::errors::InvalidArgument(
+                        "Wrong process mesh %s.", process_mesh.to_string()));
+  process_mesh_ = process_mesh;
+}
+
+void TensorDistAttr::set_dims_mapping(
+    const std::vector<int64_t>& dims_mapping) {
+  PADDLE_ENFORCE_EQ(verify_dims_mapping(dims_mapping),
+                    true,
+                    platform::errors::InvalidArgument("Wrong dims_mapping %s.",
+                                                      str_join(dims_mapping)));
+  dims_mapping_ = dims_mapping;
+}
+
+void TensorDistAttr::set_batch_dim(int64_t batch_dim) {
+  PADDLE_ENFORCE_EQ(
+      verify_batch_dim(batch_dim),
+      true,
+      platform::errors::InvalidArgument(
+          "Wrong batch_dim %d in this distributed attribute.", batch_dim));
+  if (tensor_ != nullptr) {
+    std::vector<int64_t> tensor_shape = tensor_->GetShape();
+    int64_t canonical_batch_dim = canonical_dim(batch_dim, tensor_shape.size());
+    batch_dim_ = canonical_batch_dim;
+  } else {
+    batch_dim_ = batch_dim;
+  }
+}
+
+void TensorDistAttr::set_dynamic_dims(const std::vector<bool>& dynamic_dims) {
+  PADDLE_ENFORCE_EQ(
+      verify_dynamic_dims(dynamic_dims),
+      true,
+      platform::errors::InvalidArgument("The dynamic_dims [%s] is wrong.",
+                                        str_join(dynamic_dims)));
+  dynamic_dims_ = dynamic_dims;
+}
+
+void TensorDistAttr::set_annotated(
+    const std::map<std::string, bool>& annotated) {
+  PADDLE_ENFORCE_EQ(verify_annotated(annotated),
+                    true,
+                    platform::errors::InvalidArgument(
+                        "The annotated [%s] is wrong.", str_join(annotated)));
+  annotated_ = annotated;
+}
+
+void TensorDistAttr::set_default_dims_mapping() {
+  if (tensor_ != nullptr) {
+    std::vector<int64_t> tensor_shape = tensor_->GetShape();
+    dims_mapping_ = std::vector<int64_t>(tensor_shape.size(), -1);
+  }
+}
+
+void TensorDistAttr::annotate(const std::string& name) {
+  auto result = std::find(std::begin(fields_), std::end(fields_), name);
+  if (result != std::end(fields_)) {
+    annotated_[name] = true;
+  }
+}
+
+bool TensorDistAttr::verify_process_mesh(
+    const ProcessMesh& process_mesh) const {
+  if (!process_mesh_.empty()) {
+    for (int64_t dim_mapping : dims_mapping_) {
+      if (dim_mapping < -1 || dim_mapping >= process_mesh_.ndim()) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool TensorDistAttr::verify_dims_mapping(
+    const std::vector<int64_t>& dims_mapping) const {
+  if (tensor_ != nullptr) {
+    std::vector<int64_t> tensor_shape = tensor_->GetShape();
+    if (dims_mapping.size() != tensor_shape.size()) {
+      return false;
+    }
+  }
+  std::unordered_map<int64_t, int64_t> map;
+  if (!process_mesh_.empty()) {
+    for (int64_t i : dims_mapping) {
+      if (i < -1 || i >= process_mesh_.ndim()) {
+        return false;
+      }
+      ++map[i];
+      if (i != -1 && map[i] > 1) {
+        return false;
+      }
+    }
+  } else {
+    for (int64_t i : dims_mapping) {
+      ++map[i];
+      if (i != -1 && map[i] > 1) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool TensorDistAttr::verify_batch_dim(int64_t dim) const {
+  if (tensor_ != nullptr) {
+    std::vector<int64_t> tensor_shape = tensor_->GetShape();
+    int64_t ndim = tensor_shape.size();
+    if (dim < 0) {
+      dim = dim + ndim;
+    }
+    if (dim < 0 || dim >= ndim) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool TensorDistAttr::verify_dynamic_dims(
+    const std::vector<bool>& dynamic_dims) const {
+  if (tensor_ != nullptr) {
+    std::vector<int64_t> tensor_shape = tensor_->GetShape();
+    if (dynamic_dims.size() != tensor_shape.size()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool TensorDistAttr::verify_annotated(
+    const std::map<std::string, bool>& annotated) const {
+  for (const auto& item : annotated) {
+    auto result = std::find(std::begin(fields_), std::end(fields_), item.first);
+    if (result == std::end(fields_)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool TensorDistAttr::verify() const {
+  if (tensor_ == nullptr) {
+    return false;
+  }
+  if (!verify_process_mesh(process_mesh_)) {
+    return false;
+  }
+  if (!verify_dims_mapping(dims_mapping_)) {
+    return false;
+  }
+  if (!verify_batch_dim(batch_dim_)) {
+    return false;
+  }
+  if (!verify_dynamic_dims(dynamic_dims_)) {
+    return false;
+  }
+  if (!verify_annotated(annotated_)) {
+    return false;
+  }
+  return true;
+}
+
+std::string TensorDistAttr::to_string() const {
+  std::string dist_str;
+  if (tensor_ != nullptr) {
+    dist_str = "{tensor_name: " + tensor_->Name() + ", ";
+  } else {
+    dist_str = "{tensor_name: None, ";
+  }
+  dist_str += "process_mesh: " + process_mesh_.to_string() + ", ";
+  dist_str += "dims_mappings: [" + str_join(dims_mapping_) + "], ";
+  dist_str += "batch_dim: " + std::to_string(batch_dim_) + ", ";
+  dist_str += "dynamic_dims: [" + str_join(dynamic_dims_) + "], ";
+  dist_str += "annotated: [" + str_join(annotated_) + "]}";
+  return dist_str;
+}
+
+TensorDistAttr TensorDistAttr::from_proto(const TensorDistAttrProto& proto) {
+  TensorDistAttr dist_attr;
+  dist_attr.process_mesh_ = ProcessMesh::from_proto(proto.process_mesh());
+  dist_attr.dims_mapping_.resize(proto.dims_mapping_size());
+  for (int64_t i = 0; i < proto.dims_mapping_size(); ++i) {
+    dist_attr.dims_mapping_[i] = proto.dims_mapping(i);
+  }
+  dist_attr.batch_dim_ = proto.batch_dim();
+  dist_attr.dynamic_dims_.resize(proto.dynamic_dims_size());
+  for (int64_t i = 0; i < proto.dynamic_dims_size(); ++i) {
+    dist_attr.dynamic_dims_[i] = proto.dynamic_dims(i);
+  }
+  return dist_attr;
+}
+
+TensorDistAttrProto TensorDistAttr::to_proto() const {
+  TensorDistAttrProto proto;
+  proto.mutable_process_mesh()->CopyFrom(process_mesh_.to_proto());
+  for (const auto& i : dims_mapping_) {
+    proto.add_dims_mapping(i);
+  }
+  proto.set_batch_dim(batch_dim_);
+  for (const auto& i : dynamic_dims_) {
+    proto.add_dynamic_dims(i);
+  }
+  return proto;
+}
+
+bool operator==(const TensorDistAttr& lhs, const TensorDistAttr& rhs) {
+  if (lhs.process_mesh() != rhs.process_mesh()) {
+    return false;
+  }
+  if (lhs.dims_mapping() != rhs.dims_mapping()) {
+    return false;
+  }
+  if (lhs.batch_dim() != rhs.batch_dim()) {
+    return false;
+  }
+  if (lhs.dynamic_dims() != rhs.dynamic_dims()) {
+    return false;
+  }
+  return true;
+}
+
+std::vector<std::string> OperatorDistAttr::fields_{
+    "process_mesh", "impl_type", "impl_idx"};
+
+OperatorDistAttr::OperatorDistAttr(const OpDesc& op) : op_(&op) {
+  for (std::string name : op_->InputArgumentNames()) {
+    VarDesc* input = op_->Block()->FindVarRecursive(name);
+    inputs_[name] = input;
+    input_dist_attrs_[name] = TensorDistAttr(*input);
+  }
+  for (std::string name : op_->OutputArgumentNames()) {
+    VarDesc* output = op_->Block()->FindVarRecursive(name);
+    outputs_[name] = output;
+    output_dist_attrs_[name] = TensorDistAttr(*output);
+  }
+  impl_type_ = "default";
+  impl_idx_ = 0;
+}
+
+OperatorDistAttr::OperatorDistAttr(const OperatorDistAttr& dist_attr) {
+  if (op_ == nullptr) {
+    op_ = dist_attr.op();
+  }
+  for (const auto& item : dist_attr.input_dist_attrs()) {
+    set_input_dist_attr(item.first, item.second);
+  }
+  for (const auto& item : dist_attr.output_dist_attrs()) {
+    set_output_dist_attr(item.first, item.second);
+  }
+  set_process_mesh(dist_attr.process_mesh());
+  set_impl_type(dist_attr.impl_type());
+  set_impl_idx(dist_attr.impl_idx());
+  set_annotated(dist_attr.annotated());
+}
+
+OperatorDistAttr& OperatorDistAttr::operator=(
+    const OperatorDistAttr& dist_attr) {
+  if (op_ == nullptr) {
+    op_ = dist_attr.op();
+  }
+  for (const auto& item : dist_attr.input_dist_attrs()) {
+    set_input_dist_attr(item.first, item.second);
+  }
+  for (const auto& item : dist_attr.output_dist_attrs()) {
+    set_output_dist_attr(item.first, item.second);
+  }
+  set_process_mesh(dist_attr.process_mesh());
+  set_impl_type(dist_attr.impl_type());
+  set_impl_idx(dist_attr.impl_idx());
+  set_annotated(dist_attr.annotated());
+  return *this;
+}
+
+void OperatorDistAttr::set_input_dist_attr(const std::string& name,
+                                           const TensorDistAttr& dist_attr) {
+  PADDLE_ENFORCE_EQ(
+      verify_input_dist_attr(name, dist_attr),
+      true,
+      platform::errors::InvalidArgument(
+          "Wrong dist_attr %s for %s.", dist_attr.to_string(), name));
+  input_dist_attrs_[name] = dist_attr;
+  // Make sure the process mesh of input be same as that of the op
+  input_dist_attrs_[name].set_process_mesh(process_mesh_);
+}
+
+void OperatorDistAttr::set_output_dist_attr(const std::string& name,
+                                            const TensorDistAttr& dist_attr) {
+  PADDLE_ENFORCE_EQ(
+      verify_output_dist_attr(name, dist_attr),
+      true,
+      platform::errors::InvalidArgument(
+          "Wrong dist_attr %s for %s.", dist_attr.to_string(), name));
+  output_dist_attrs_[name] = dist_attr;
+  // Make sure the process mesh of output be same as that of the op
+  output_dist_attrs_[name].set_process_mesh(process_mesh_);
+}
+
+void OperatorDistAttr::set_process_mesh(const ProcessMesh& process_mesh) {
+  for (auto& item : input_dist_attrs_) {
+    item.second.set_process_mesh(process_mesh);
+  }
+  for (auto& item : output_dist_attrs_) {
+    item.second.set_process_mesh(process_mesh);
+  }
+  process_mesh_ = process_mesh;
+}
+
+void OperatorDistAttr::annotate(const std::string& name) {
+  auto result = std::find(std::begin(fields_), std::end(fields_), name);
+  if (result != std::end(fields_)) {
+    annotated_[name] = true;
+  }
+  if (name == "process_mesh") {
+    for (auto& item : input_dist_attrs_) {
+      item.second.annotate(name);
+    }
+    for (auto& item : output_dist_attrs_) {
+      item.second.annotate(name);
+    }
+  }
+}
+
+void OperatorDistAttr::set_annotated(
+    const std::map<std::string, bool>& annotated) {
+  PADDLE_ENFORCE_EQ(verify_annotated(annotated),
+                    true,
+                    platform::errors::InvalidArgument(
+                        "The annotated [%s] is wrong.", str_join(annotated)));
+  annotated_ = annotated;
+}
+
+bool OperatorDistAttr::verify_input_dist_attr(
+    const std::string& name, const TensorDistAttr& dist_attr) const {
+  if (!dist_attr.verify()) {
+    return false;
+  }
+  if (op_ != nullptr) {
+    if (dist_attr.tensor() != nullptr) {
+      if (name != dist_attr.tensor()->Name()) {
+        return false;
+      }
+    }
+    if (input_dist_attrs_.count(name) == 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool OperatorDistAttr::verify_output_dist_attr(
+    const std::string& name, const TensorDistAttr& dist_attr) const {
+  if (!dist_attr.verify()) {
+    return false;
+  }
+  if (op_ != nullptr) {
+    if (dist_attr.tensor() != nullptr) {
+      if (name != dist_attr.tensor()->Name()) {
+        return false;
+      }
+    }
+    if (output_dist_attrs_.count(name) == 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool OperatorDistAttr::verify_process_mesh(
+    const ProcessMesh& process_mesh) const {
+  if (process_mesh != process_mesh_) {
+    return false;
+  }
+  for (auto& item : input_dist_attrs_) {
+    if (item.second.process_mesh() != process_mesh) {
+      return false;
+    }
+  }
+  for (auto& item : output_dist_attrs_) {
+    if (item.second.process_mesh() != process_mesh) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool OperatorDistAttr::verify_annotated(
+    const std::map<std::string, bool>& annotated) const {
+  for (const auto& item : annotated) {
+    auto result = std::find(std::begin(fields_), std::end(fields_), item.first);
+    if (result == std::end(fields_)) {
+      return false;
+    }
+  }
+  for (auto& item : input_dist_attrs_) {
+    if (!item.second.verify_annotated(item.second.annotated())) {
+      return false;
+    }
+  }
+  for (auto& item : output_dist_attrs_) {
+    if (!item.second.verify_annotated(item.second.annotated())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool OperatorDistAttr::verify() const {
+  if (op_ == nullptr) {
+    return false;
+  }
+  if (!verify_process_mesh(process_mesh_)) {
+    return false;
+  }
+  for (auto const& item : input_dist_attrs_) {
+    auto input_names = op_->InputArgumentNames();
+    auto found =
+        std::find(std::begin(input_names), std::end(input_names), item.first);
+    if (found == std::end(input_names)) {
+      return false;
+    }
+    if (!verify_input_dist_attr(item.first, item.second)) {
+      return false;
+    }
+  }
+  for (auto const& item : output_dist_attrs_) {
+    auto output_names = op_->OutputArgumentNames();
+    auto found =
+        std::find(std::begin(output_names), std::end(output_names), item.first);
+    if (found == std::end(output_names)) {
+      return false;
+    }
+    if (!verify_output_dist_attr(item.first, item.second)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::string OperatorDistAttr::to_string() const {
+  std::string str;
+  if (op_ != nullptr) {
+    str += "{op_type: " + op_->Type() + ", ";
+  } else {
+    str += "{op_type: None, ";
+  }
+  str += "impl_type: " + impl_type_ + ", ";
+  str += "impl_idx: " + std::to_string(impl_idx_) + ", ";
+  str += "annotated: [" + str_join(annotated_) + "], ";
+  str += "\nprocess_mesh: " + process_mesh_.to_string() + ", ";
+  str += "\ninput_dist_attrs: [\n";
+  for (auto const& item : input_dist_attrs_) {
+    str += "  " + item.second.to_string() + ",\n";
+  }
+  str.replace(str.size() - 2, 2, "]");
+  str += "\noutput_dist_attrs: [\n";
+  for (auto const& item : output_dist_attrs_) {
+    str += "  " + item.second.to_string() + ",\n";
+  }
+  str.replace(str.size() - 2, 2, "]}");
+  return str;
+}
+
+OperatorDistAttr OperatorDistAttr::from_proto(
+    const OperatorDistAttrProto& proto) {
+  OperatorDistAttr dist_attr;
+  for (int64_t i = 0; i < proto.input_dist_attrs_size(); ++i) {
+    dist_attr.input_dist_attrs_[proto.input_dist_attrs(i).name()] =
+        TensorDistAttr::from_proto(
+            proto.input_dist_attrs(i).tensor_dist_attr());
+  }
+  for (int64_t i = 0; i < proto.output_dist_attrs_size(); ++i) {
+    dist_attr.output_dist_attrs_[proto.output_dist_attrs(i).name()] =
+        TensorDistAttr::from_proto(
+            proto.output_dist_attrs(i).tensor_dist_attr());
+  }
+  dist_attr.process_mesh_ = ProcessMesh::from_proto(proto.process_mesh());
+  dist_attr.impl_type_ = proto.impl_type();
+  dist_attr.impl_idx_ = proto.impl_idx();
+  return dist_attr;
+}
+
+OperatorDistAttrProto OperatorDistAttr::to_proto() const {
+  OperatorDistAttrProto proto;
+  for (const auto& item : input_dist_attrs_) {
+    auto proto_item = proto.mutable_input_dist_attrs()->Add();
+    proto_item->set_name(item.first);
+    proto_item->mutable_tensor_dist_attr()->CopyFrom(item.second.to_proto());
+  }
+  for (const auto& item : output_dist_attrs_) {
+    auto proto_item = proto.mutable_output_dist_attrs()->Add();
+    proto_item->set_name(item.first);
+    proto_item->mutable_tensor_dist_attr()->CopyFrom(item.second.to_proto());
+  }
+  proto.mutable_process_mesh()->CopyFrom(process_mesh_.to_proto());
+  proto.set_impl_type(impl_type_);
+  proto.set_impl_idx(impl_idx_);
+  return proto;
+}
+
+bool operator==(const OperatorDistAttr& lhs, const OperatorDistAttr& rhs) {
+  if (lhs.process_mesh() != rhs.process_mesh()) {
+    return false;
+  }
+  if (lhs.impl_type() != rhs.impl_type()) {
+    return false;
+  }
+  if (lhs.impl_idx() != rhs.impl_idx()) {
+    return false;
+  }
+  for (auto const& item : lhs.input_dist_attrs()) {
+    if (rhs.input_dist_attrs().count(item.first) != 1) {
+      return false;
+    }
+    if (rhs.input_dist_attrs().at(item.first) !=
+        lhs.input_dist_attrs().at(item.first)) {
+      return false;
+    }
+  }
+  for (auto const& item : lhs.output_dist_attrs()) {
+    if (rhs.output_dist_attrs().count(item.first) != 1) {
+      return false;
+    }
+    if (rhs.output_dist_attrs().at(item.first) !=
+        lhs.output_dist_attrs().at(item.first)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/dist_attr.h
+++ b/paddle/fluid/distributed/auto_parallel/dist_attr.h
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+
+// Forward Declaration
+namespace framework {
+
+class BlockDesc;
+class OpDesc;
+class ProgramDesc;
+class VarDesc;
+
+}  // namespace framework
+
+namespace distributed {
+namespace auto_parallel {
+
+using framework::BlockDesc;
+using framework::OpDesc;
+using framework::ProgramDesc;
+using framework::VarDesc;
+
+class TensorDistAttr {
+ public:
+  TensorDistAttr() = default;
+
+  explicit TensorDistAttr(const VarDesc& tensor);
+
+  TensorDistAttr(const TensorDistAttr& tensor);
+
+  TensorDistAttr& operator=(const TensorDistAttr& dist_attr);
+
+  const VarDesc* tensor() const { return tensor_; }
+
+  const ProcessMesh& process_mesh() const { return process_mesh_; }
+
+  void set_process_mesh(const ProcessMesh& process_mesh);
+
+  const std::vector<int64_t>& dims_mapping() const { return dims_mapping_; }
+
+  void set_dims_mapping(const std::vector<int64_t>& dims_mapping);
+
+  int64_t batch_dim() const { return batch_dim_; }
+
+  void set_batch_dim(int64_t batch_dim);
+
+  const std::vector<bool>& dynamic_dims() const { return dynamic_dims_; }
+
+  void set_dynamic_dims(const std::vector<bool>& dynamic_dims);
+
+  const std::map<std::string, bool>& annotated() const { return annotated_; }
+
+  void set_annotated(const std::map<std::string, bool>& annotated);
+
+  void set_default_dims_mapping();
+
+  bool is_annotated(const std::string& name) const {
+    return annotated_.count(name) == 1;
+  }
+
+  void annotate(const std::string& name);
+
+  bool verify_process_mesh(const ProcessMesh& process_mesh) const;
+
+  bool verify_dims_mapping(const std::vector<int64_t>& dims_mapping) const;
+
+  bool verify_batch_dim(int64_t dim) const;
+
+  bool verify_dynamic_dims(const std::vector<bool>& dynamic_dims) const;
+
+  bool verify_annotated(const std::map<std::string, bool>& annotated) const;
+
+  bool verify() const;
+
+  // TensorDistAttr from_string(const std::string& dist_str);
+  std::string to_string() const;
+
+  static TensorDistAttr from_proto(const TensorDistAttrProto& proto);
+
+  TensorDistAttrProto to_proto() const;
+
+ private:
+  static std::vector<std::string> fields_;
+  const VarDesc* tensor_{nullptr};
+  ProcessMesh process_mesh_;
+  std::vector<int64_t> dims_mapping_;
+  int64_t batch_dim_;
+  std::vector<bool> dynamic_dims_;
+  std::map<std::string, bool> annotated_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const TensorDistAttr& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const TensorDistAttr& lhs, const TensorDistAttr& rhs);
+
+inline bool operator!=(const TensorDistAttr& lhs, const TensorDistAttr& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+class OperatorDistAttr {
+ public:
+  OperatorDistAttr() = default;
+
+  explicit OperatorDistAttr(const OpDesc& op);
+
+  OperatorDistAttr(const OperatorDistAttr& dist_attr);
+
+  OperatorDistAttr& operator=(const OperatorDistAttr& dist_attr);
+
+  const OpDesc* op() const { return op_; }
+
+  const VarDesc& input(const std::string& name) const {
+    return *inputs_.at(name);
+  }
+
+  const VarDesc& output(const std::string& name) const {
+    return *outputs_.at(name);
+  }
+
+  const std::map<std::string, TensorDistAttr>& input_dist_attrs() const {
+    return input_dist_attrs_;
+  }
+
+  const std::map<std::string, TensorDistAttr>& output_dist_attrs() const {
+    return output_dist_attrs_;
+  }
+
+  const TensorDistAttr& input_dist_attr(const std::string& name) const {
+    return input_dist_attrs_.at(name);
+  }
+
+  TensorDistAttr& input_dist_attr(const std::string& name) {
+    return input_dist_attrs_.at(name);
+  }
+
+  void set_input_dist_attr(const std::string& name,
+                           const TensorDistAttr& dist_attr);
+
+  const TensorDistAttr& output_dist_attr(const std::string& name) const {
+    return output_dist_attrs_.at(name);
+  }
+
+  TensorDistAttr& output_dist_attr(const std::string& name) {
+    return output_dist_attrs_.at(name);
+  }
+
+  void set_output_dist_attr(const std::string& name,
+                            const TensorDistAttr& dist_attr);
+
+  const ProcessMesh& process_mesh() const { return process_mesh_; }
+
+  void set_process_mesh(const ProcessMesh& process_mesh);
+
+  const std::string& impl_type() const { return impl_type_; }
+
+  void set_impl_type(const std::string& impl_type) { impl_type_ = impl_type; }
+
+  int64_t impl_idx() const { return impl_idx_; }
+
+  void set_impl_idx(const int64_t& impl_idx) { impl_idx_ = impl_idx; }
+
+  const std::map<std::string, bool>& annotated() const { return annotated_; }
+
+  void set_annotated(const std::map<std::string, bool>& annotated);
+
+  bool is_annotated(const std::string& name) const {
+    return annotated_.count(name) == 1;
+  }
+
+  void annotate(const std::string& name);
+
+  bool verify_input_dist_attr(const std::string& name,
+                              const TensorDistAttr& dist_attr) const;
+
+  bool verify_output_dist_attr(const std::string& name,
+                               const TensorDistAttr& dist_attr) const;
+
+  bool verify_process_mesh(const ProcessMesh& process_mesh) const;
+
+  bool verify_annotated(const std::map<std::string, bool>& annotated) const;
+
+  bool verify() const;
+
+  // OperatorDistAttr from_string(const std::string& dist_str);
+  std::string to_string() const;
+
+  static OperatorDistAttr from_proto(const OperatorDistAttrProto& proto);
+
+  OperatorDistAttrProto to_proto() const;
+
+ private:
+  static std::vector<std::string> fields_;
+  const OpDesc* op_{nullptr};
+  std::map<std::string, VarDesc*> inputs_;
+  std::map<std::string, VarDesc*> outputs_;
+  std::map<std::string, TensorDistAttr> input_dist_attrs_;
+  std::map<std::string, TensorDistAttr> output_dist_attrs_;
+  ProcessMesh process_mesh_;
+  std::string impl_type_;
+  int64_t impl_idx_ = -1;
+  std::map<std::string, bool> annotated_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const OperatorDistAttr& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const OperatorDistAttr& lhs, const OperatorDistAttr& rhs);
+
+inline bool operator!=(const OperatorDistAttr& lhs,
+                       const OperatorDistAttr& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/dist_mapper.cc
+++ b/paddle/fluid/distributed/auto_parallel/dist_mapper.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+
+#include "paddle/fluid/distributed/auto_parallel/dist_mapper.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+void DistributedMapper::set_process_id_to_device_ids(
+    const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
+        process_id_to_device_ids) {
+  std::vector<std::string> device_mesh_names;
+  for (const auto& item : device_meshes_) {
+    device_mesh_names.push_back(item.first);
+  }
+  for (const auto& item : process_id_to_device_ids) {
+    PADDLE_ENFORCE_GE(
+        item.first,
+        0,
+        platform::errors::InvalidArgument(
+            "The process id %d must be greater than or equal to 0.",
+            item.first));
+    std::string device_mesh_name = item.second.first;
+    const std::vector<int64_t>& device_ids = item.second.second;
+    PADDLE_ENFORCE_EQ(
+        device_meshes_.count(device_mesh_name),
+        1,
+        platform::errors::InvalidArgument(
+            "Cannot find the device mesh %d in device_mesh ids [%s].",
+            device_mesh_name,
+            str_join(device_mesh_names)));
+    PADDLE_ENFORCE_EQ(
+        has_duplicates(device_ids),
+        false,
+        platform::errors::InvalidArgument(
+            "The mapped device ids [%s] of process_mesh %d must be unique.",
+            str_join(device_ids),
+            item.first));
+    const DeviceMesh& device_mesh = device_meshes_[device_mesh_name];
+    const std::vector<int64_t> cur_device_ids = device_mesh.device_ids();
+    for (int64_t device_id : device_ids) {
+      bool found =
+          std::find(cur_device_ids.begin(), cur_device_ids.end(), device_id) !=
+          cur_device_ids.end();
+      PADDLE_ENFORCE_EQ(
+          found,
+          true,
+          platform::errors::InvalidArgument(
+              "The device id %d cannot be find in the device mesh [%s].",
+              device_id,
+              str_join(cur_device_ids)));
+    }
+  }
+  process_id_to_device_ids_ = process_id_to_device_ids;
+}
+
+DistributedMapper DistributedMapper::from_proto(
+    const DistributedMapperProto& proto) {
+  DistributedMapper dist_mapper;
+  for (int64_t i = 0; i < proto.device_meshes_size(); ++i) {
+    dist_mapper.device_meshes_[proto.device_meshes(i).name()] =
+        DeviceMesh::from_proto(proto.device_meshes(i));
+  }
+  for (int64_t i = 0; i < proto.process_id_to_device_ids_size(); ++i) {
+    int64_t process_id = proto.process_id_to_device_ids(i).process_id();
+    std::string device_mesh_name =
+        proto.process_id_to_device_ids(i).device_mesh_name();
+    std::vector<int64_t> device_ids;
+    int64_t num_devices = proto.process_id_to_device_ids(i).device_ids_size();
+    for (int64_t j = 0; j < num_devices; ++j) {
+      device_ids.push_back(proto.process_id_to_device_ids(i).device_ids(j));
+    }
+    dist_mapper.process_id_to_device_ids_[process_id].first = device_mesh_name;
+    dist_mapper.process_id_to_device_ids_[process_id].second = device_ids;
+  }
+  return dist_mapper;
+}
+
+DistributedMapperProto DistributedMapper::to_proto() const {
+  DistributedMapperProto proto;
+  for (const auto& item : device_meshes_) {
+    proto.mutable_device_meshes()->Add()->CopyFrom(item.second.to_proto());
+  }
+  for (const auto& outer : process_id_to_device_ids_) {
+    auto proto_item = proto.mutable_process_id_to_device_ids()->Add();
+    proto_item->set_process_id(outer.first);
+    proto_item->set_device_mesh_name(outer.second.first);
+    for (const auto& inner : outer.second.second) {
+      proto_item->add_device_ids(inner);
+    }
+  }
+  return proto;
+}
+
+std::string DistributedMapper::to_string() const {
+  std::string mapper_str = "{device_meshes: [";
+  for (const auto& item : device_meshes_) {
+    mapper_str += item.second.to_string() + ", ";
+  }
+  mapper_str.replace(mapper_str.size() - 2, 2, "]");
+
+  mapper_str += "\nprocess_id_to_device_ids: [";
+  for (const auto& item : process_id_to_device_ids_) {
+    mapper_str += "{";
+    mapper_str +=
+        "process_id: " + std::to_string(item.first) + ", device_ids: [";
+    for (const auto& device_id : item.second.second) {
+      mapper_str +=
+          "{" + item.second.first + ", " + std::to_string(device_id) + "}, ";
+    }
+    mapper_str.replace(mapper_str.size() - 2, 2, "]");
+    mapper_str += "}, ";
+  }
+  mapper_str.replace(mapper_str.size() - 2, 2, "]");
+  mapper_str += "}";
+  return mapper_str;
+}
+
+bool operator==(const DistributedMapper& lhs, const DistributedMapper& rhs) {
+  if (lhs.device_meshes() != rhs.device_meshes()) {
+    return false;
+  }
+  if (lhs.process_id_to_device_ids() != rhs.process_id_to_device_ids()) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/dist_mapper.h
+++ b/paddle/fluid/distributed/auto_parallel/dist_mapper.h
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <utility>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+class DistributedMapper {
+ public:
+  DistributedMapper() = default;
+
+  const std::map<std::string, DeviceMesh>& device_meshes() const {
+    return device_meshes_;
+  }
+
+  const DeviceMesh& device_mesh(const std::string& name) const {
+    return device_meshes_.at(name);
+  }
+
+  void add_device_mesh(const DeviceMesh& device_mesh) {
+    device_meshes_[device_mesh.name()] = device_mesh;
+  }
+
+  const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
+  process_id_to_device_ids() const {
+    return process_id_to_device_ids_;
+  }
+
+  void set_process_id_to_device_ids(
+      const std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>&
+          process_id_to_device_ids);
+
+  // DistributedMapper from_string(const std::string& mapper_str);
+  std::string to_string() const;
+
+  static DistributedMapper from_proto(const DistributedMapperProto& proto);
+  DistributedMapperProto to_proto() const;
+
+ private:
+  std::map<std::string, DeviceMesh> device_meshes_;
+  std::map<int64_t, std::pair<std::string, std::vector<int64_t>>>
+      process_id_to_device_ids_;
+};
+
+bool operator==(const DistributedMapper& lhs, const DistributedMapper& rhs);
+
+inline std::ostream& operator<<(std::ostream& os,
+                                const DistributedMapper& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/process_mesh.cc
+++ b/paddle/fluid/distributed/auto_parallel/process_mesh.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include <iterator>
+
+#include "paddle/fluid/distributed/auto_parallel/process_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+ProcessMesh::ProcessMesh(const std::vector<int64_t> &shape,
+                         const std::vector<int64_t> &process_ids,
+                         const std::vector<std::string> &dim_names) {
+  shape_ = shape;
+  int64_t size = this->size();
+  PADDLE_ENFORCE_EQ(
+      size,
+      process_ids.size(),
+      platform::errors::InvalidArgument("The size of this process mesh must be "
+                                        "equal to the size of its process ids.",
+                                        size,
+                                        process_ids.size()));
+  PADDLE_ENFORCE_EQ(
+      has_duplicates(process_ids),
+      false,
+      platform::errors::InvalidArgument("The process ids [%s] must be unique.",
+                                        str_join(process_ids_)));
+  process_ids_ = process_ids;
+
+  PADDLE_ENFORCE_EQ(shape_.size(),
+                    dim_names.size(),
+                    platform::errors::InvalidArgument(
+                        "The size of mesh shape must be equal to the size "
+                        "of the dimension names.",
+                        shape_.size(),
+                        dim_names_.size()));
+  PADDLE_ENFORCE_EQ(has_duplicates(dim_names),
+                    false,
+                    platform::errors::InvalidArgument(
+                        "The names [%s] of each dimension must be unique.",
+                        str_join(dim_names)));
+  dim_names_ = dim_names;
+}
+
+int64_t ProcessMesh::size() const {
+  if (shape_.empty()) return 0;
+  int64_t size = 1;
+  for (const int64_t dim_size : shape_) size *= dim_size;
+  return size;
+}
+
+bool ProcessMesh::contains(int64_t process_id) const {
+  auto result =
+      std::find(std::begin(process_ids_), std::end(process_ids_), process_id);
+  if (result != std::end(process_ids_)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+std::string ProcessMesh::to_string() const {
+  std::string mesh_str = "{shape: [" + str_join(shape_) + "], ";
+  mesh_str += "process_ids: [" + str_join(process_ids_) + "], ";
+  mesh_str += "dim_names: [" + str_join(dim_names_) + "]}";
+  return mesh_str;
+}
+
+ProcessMesh ProcessMesh::from_proto(const ProcessMeshProto &proto) {
+  ProcessMesh mesh;
+
+  mesh.shape_.resize(proto.shape_size());
+  for (int64_t i = 0; i < proto.shape_size(); ++i) {
+    mesh.shape_[i] = proto.shape(i);
+  }
+
+  mesh.process_ids_.resize(proto.process_ids_size());
+  for (int64_t i = 0; i < proto.process_ids_size(); ++i) {
+    mesh.process_ids_[i] = proto.process_ids(i);
+  }
+
+  mesh.dim_names_.resize(proto.dim_names_size());
+  for (int64_t i = 0; i < proto.dim_names_size(); ++i) {
+    mesh.dim_names_[i] = proto.dim_names(i);
+  }
+
+  return mesh;
+}
+
+ProcessMeshProto ProcessMesh::to_proto() const {
+  ProcessMeshProto proto;
+
+  for (const auto &i : shape_) {
+    proto.add_shape(i);
+  }
+
+  for (const auto &i : process_ids_) {
+    proto.add_process_ids(i);
+  }
+
+  for (const auto &i : dim_names_) {
+    proto.add_dim_names(i);
+  }
+
+  return proto;
+}
+
+bool operator==(const ProcessMesh &lhs, const ProcessMesh &rhs) {
+  if (lhs.shape() != rhs.shape()) {
+    return false;
+  }
+  if (lhs.process_ids() != rhs.process_ids()) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/process_mesh.h
+++ b/paddle/fluid/distributed/auto_parallel/process_mesh.h
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/distributed/auto_parallel/auto_parallel.pb.h"
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include "paddle/fluid/distributed/auto_parallel/utils.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+class ProcessMesh {
+ public:
+  ProcessMesh() = default;
+
+  ProcessMesh(const std::vector<int64_t>& shape,
+              const std::vector<int64_t>& process_ids,
+              const std::vector<std::string>& dim_names);
+
+  const std::vector<int64_t>& shape() const { return shape_; }
+
+  const std::vector<int64_t>& process_ids() const { return process_ids_; }
+
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  int64_t size() const;
+
+  int64_t ndim() const { return shape_.size(); }
+
+  int64_t dim_size(int64_t dim) const {
+    int64_t cdim = canonical_dim(dim, shape_.size());
+    return shape_[cdim];
+  }
+
+  int64_t dim_size(const std::string& dim_name) const {
+    for (std::size_t i = 0; i < dim_names_.size(); ++i) {
+      if (dim_names_[i] == dim_name) {
+        return shape_[i];
+      }
+    }
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Cannot find the dimension of %s in this process mesh.", dim_name));
+  }
+
+  bool empty() const { return (shape_.empty() || process_ids_.empty()); }
+  bool contains(int64_t process_id) const;
+
+  // ProcessMesh from_string(const std::string& mesh_str);
+  std::string to_string() const;
+
+  static ProcessMesh from_proto(const ProcessMeshProto& proto);
+  ProcessMeshProto to_proto() const;
+
+ private:
+  std::vector<int64_t> shape_;
+  std::vector<int64_t> process_ids_;
+  std::vector<std::string> dim_names_;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const ProcessMesh& obj) {
+  os << obj.to_string();
+  return os;
+}
+
+bool operator==(const ProcessMesh& lhs, const ProcessMesh& rhs);
+
+inline bool operator!=(const ProcessMesh& lhs, const ProcessMesh& rhs) {
+  return !operator==(lhs, rhs);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt
+++ b/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt
+cc_test(
+  device_mesh_test
+  SRCS device_mesh_test.cc
+  DEPS device_mesh)
+
+cc_test(
+  process_mesh_test
+  SRCS process_mesh_test.cc
+  DEPS process_mesh)
+
+cc_test(
+  dist_attr_test
+  SRCS dist_attr_test.cc
+  DEPS dist_attr)
+
+cc_test(
+  dist_mapper_test
+  SRCS dist_mapper_test.cc
+  DEPS dist_mapper)
--- a/paddle/fluid/distributed/auto_parallel/test/device_mesh_test.cc
+++ b/paddle/fluid/distributed/auto_parallel/test/device_mesh_test.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/distributed/auto_parallel/device_mesh.h"
+#include <iostream>
+#include <sstream>
+#include "gtest/gtest.h"
+
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+TEST(DeviceMesh, Ctor) {
+  std::vector<int64_t> shape = {2, 3};
+  std::vector<int64_t> device_ids = {0, 1, 2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"x", "y"};
+  std::string device_type = "GPU";
+  int64_t size = shape[0] * shape[1];
+
+  DeviceMesh device_mesh("mesh", shape, device_ids, dim_names);
+  for (int64_t i = 0; i < shape[0]; ++i) {
+    for (int64_t j = 0; j < shape[1]; ++j) {
+      int64_t global_id = i * shape[1] + j;
+      int64_t local_id = j;
+      int64_t machine_id = i;
+      device_mesh.add_device(
+          Device(global_id, local_id, machine_id, device_type));
+    }
+  }
+  for (int64_t i = 0; i < size; ++i) {
+    for (int64_t j = 0; j < size; ++j) {
+      device_mesh.add_link(Link(i, j, "NVL"));
+    }
+  }
+
+  EXPECT_EQ(device_mesh.name(), "mesh");
+  EXPECT_EQ(device_mesh.shape(), shape);
+  EXPECT_EQ(device_mesh.device_ids(), device_ids);
+  EXPECT_EQ(device_mesh.dim_names()[0], "x");
+  EXPECT_EQ(device_mesh.dim_names()[1], "y");
+  EXPECT_EQ(device_mesh.device_type(), device_type);
+  EXPECT_EQ(device_mesh.size(), size);
+  EXPECT_EQ(device_mesh.ndim(), static_cast<int64_t>(shape.size()));
+  EXPECT_EQ(device_mesh.dim_size(0), shape[0]);
+  EXPECT_EQ(device_mesh.dim_size(-1), shape[1]);
+  EXPECT_EQ(device_mesh.dim_size("x"), shape[0]);
+  EXPECT_EQ(device_mesh.dim_size("y"), shape[1]);
+  EXPECT_EQ(device_mesh.empty(), false);
+  EXPECT_EQ(device_mesh.contains(0), true);
+  EXPECT_EQ(device_mesh.contains(6), false);
+  EXPECT_EQ(device_mesh.device(3).global_id(), 3);
+  EXPECT_EQ(device_mesh.device(3).local_id(), 0);
+  EXPECT_EQ(device_mesh.device(3).machine_id(), 1);
+  EXPECT_EQ(device_mesh.device(3).type(), "GPU");
+  EXPECT_EQ(device_mesh.link(3, 4).source_id(), 3);
+  EXPECT_EQ(device_mesh.link(3, 4).target_id(), 4);
+  EXPECT_EQ(device_mesh.link(3, 4).type(), "NVL");
+  for (int64_t i = 0; i < shape[0]; ++i) {
+    for (int64_t j = 0; j < shape[1]; ++j) {
+      int64_t global_id = i * shape[1] + j;
+      int64_t local_id = j;
+      int64_t machine_id = i;
+      auto device = device_mesh.devices().at(global_id);
+      EXPECT_EQ(device, Device(global_id, local_id, machine_id, device_type));
+    }
+  }
+  for (int64_t i = 0; i < size; ++i) {
+    for (int64_t j = 0; j < size; ++j) {
+      EXPECT_EQ(device_mesh.links().at(i).at(j), Link(i, j, "NVL"));
+    }
+  }
+  std::stringstream sstream;
+  sstream << device_mesh;
+  EXPECT_EQ(sstream.str(), device_mesh.to_string());
+  auto proto = device_mesh.to_proto();
+  DeviceMesh new_device_mesh = DeviceMesh::from_proto(proto);
+  EXPECT_EQ(device_mesh, new_device_mesh);
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle