2.4.1-dtk-23.04

de2e6515 · yuguo960516yuguo · ad08b8ce · de2e6515 · de2e6515 · de2e6515
Commit de2e6515 authored Apr 26, 2023 by yuguo960516yuguo
12 changed files
--- a/paddle/fluid/distributed/ps/table/depends/feature_value.h
+++ b/paddle/fluid/distributed/ps/table/depends/feature_value.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <mct/hash-map.hpp>
+#include <vector>
+
+#include "gflags/gflags.h"
+#include "paddle/fluid/distributed/common/chunk_allocator.h"
+
+namespace paddle {
+namespace distributed {
+
+static const int CTR_SPARSE_SHARD_BUCKET_NUM_BITS = 6;
+static const size_t CTR_SPARSE_SHARD_BUCKET_NUM =
+    static_cast<size_t>(1) << CTR_SPARSE_SHARD_BUCKET_NUM_BITS;
+
+class FixedFeatureValue {
+ public:
+  FixedFeatureValue() {}
+  ~FixedFeatureValue() {}
+  float* data() { return _data.data(); }
+  size_t size() { return _data.size(); }
+  void resize(size_t size) { _data.resize(size); }
+  void shrink_to_fit() { _data.shrink_to_fit(); }
+
+ private:
+  std::vector<float> _data;
+};
+
+template <class KEY, class VALUE>
+struct alignas(64) SparseTableShard {
+ public:
+  typedef typename mct::closed_hash_map<KEY, mct::Pointer, std::hash<KEY>>
+      map_type;
+  struct iterator {
+    typename map_type::iterator it;
+    size_t bucket;
+    map_type* buckets;
+    friend bool operator==(const iterator& a, const iterator& b) {
+      return a.it == b.it;
+    }
+    friend bool operator!=(const iterator& a, const iterator& b) {
+      return a.it != b.it;
+    }
+    const KEY& key() const { return it->first; }
+    VALUE& value() const { return *(VALUE*)(void*)it->second; }     // NOLINT
+    VALUE* value_ptr() const { return (VALUE*)(void*)it->second; }  // NOLINT
+    iterator& operator++() {
+      ++it;
+
+      while (it == buckets[bucket].end() &&
+             bucket + 1 < CTR_SPARSE_SHARD_BUCKET_NUM) {
+        it = buckets[++bucket].begin();
+      }
+
+      return *this;
+    }
+    iterator operator++(int) {
+      iterator ret = *this;
+      ++*this;
+      return ret;
+    }
+  };
+  struct local_iterator {
+    typename map_type::iterator it;
+    friend bool operator==(const local_iterator& a, const local_iterator& b) {
+      return a.it == b.it;
+    }
+    friend bool operator!=(const local_iterator& a, const local_iterator& b) {
+      return a.it != b.it;
+    }
+    const KEY& key() const { return it->first; }
+    VALUE& value() const { return *(VALUE*)(void*)it->second; }  // NOLINT
+    local_iterator& operator++() {
+      ++it;
+      return *this;
+    }
+    local_iterator operator++(int) { return {it++}; }
+  };
+
+  ~SparseTableShard() { clear(); }
+  bool empty() { return _alloc.size() == 0; }
+  size_t size() { return _alloc.size(); }
+  void set_max_load_factor(float x) {
+    for (size_t bucket = 0; bucket < CTR_SPARSE_SHARD_BUCKET_NUM; bucket++) {
+      _buckets[bucket].max_load_factor(x);
+    }
+  }
+  size_t bucket_count() { return CTR_SPARSE_SHARD_BUCKET_NUM; }
+  size_t bucket_size(size_t bucket) { return _buckets[bucket].size(); }
+  void clear() {
+    for (size_t bucket = 0; bucket < CTR_SPARSE_SHARD_BUCKET_NUM; bucket++) {
+      map_type& data = _buckets[bucket];
+      for (auto it = data.begin(); it != data.end(); ++it) {
+        _alloc.release((VALUE*)(void*)it->second);  // NOLINT
+      }
+      data.clear();
+    }
+  }
+  iterator begin() {
+    auto it = _buckets[0].begin();
+    size_t bucket = 0;
+    while (it == _buckets[bucket].end() &&
+           bucket + 1 < CTR_SPARSE_SHARD_BUCKET_NUM) {
+      it = _buckets[++bucket].begin();
+    }
+    return {it, bucket, _buckets};
+  }
+  iterator end() {
+    return {_buckets[CTR_SPARSE_SHARD_BUCKET_NUM - 1].end(),
+            CTR_SPARSE_SHARD_BUCKET_NUM - 1,
+            _buckets};
+  }
+  local_iterator begin(size_t bucket) { return {_buckets[bucket].begin()}; }
+  local_iterator end(size_t bucket) { return {_buckets[bucket].end()}; }
+  iterator find(const KEY& key) {
+    size_t hash = _hasher(key);
+    size_t bucket = compute_bucket(hash);
+    auto it = _buckets[bucket].find_with_hash(key, hash);
+    if (it == _buckets[bucket].end()) {
+      return end();
+    }
+    return {it, bucket, _buckets};
+  }
+  VALUE& operator[](const KEY& key) { return emplace(key).first.value(); }
+  std::pair<iterator, bool> insert(const KEY& key, const VALUE& val) {
+    return emplace(key, val);
+  }
+  std::pair<iterator, bool> insert(const KEY& key, VALUE&& val) {
+    return emplace(key, std::move(val));
+  }
+  template <class... ARGS>
+  std::pair<iterator, bool> emplace(const KEY& key, ARGS&&... args) {
+    size_t hash = _hasher(key);
+    size_t bucket = compute_bucket(hash);
+    auto res = _buckets[bucket].insert_with_hash({key, NULL}, hash);
+
+    if (res.second) {
+      res.first->second = _alloc.acquire(std::forward<ARGS>(args)...);
+    }
+
+    return {{res.first, bucket, _buckets}, res.second};
+  }
+  iterator erase(iterator it) {
+    _alloc.release((VALUE*)(void*)it.it->second);  // NOLINT
+    size_t bucket = it.bucket;
+    auto it2 = _buckets[bucket].erase(it.it);
+    while (it2 == _buckets[bucket].end() &&
+           bucket + 1 < CTR_SPARSE_SHARD_BUCKET_NUM) {
+      it2 = _buckets[++bucket].begin();
+    }
+    return {it2, bucket, _buckets};
+  }
+  void quick_erase(iterator it) {
+    _alloc.release((VALUE*)(void*)it.it->second);  // NOLINT
+    _buckets[it.bucket].quick_erase(it.it);
+  }
+  local_iterator erase(size_t bucket, local_iterator it) {
+    _alloc.release((VALUE*)(void*)it.it->second);  // NOLINT
+    return {_buckets[bucket].erase(it.it)};
+  }
+  void quick_erase(size_t bucket, local_iterator it) {
+    _alloc.release((VALUE*)(void*)it.it->second);  // NOLINT
+    _buckets[bucket].quick_erase(it.it);
+  }
+  size_t erase(const KEY& key) {
+    auto it = find(key);
+    if (it == end()) {
+      return 0;
+    }
+    quick_erase(it);
+    return 1;
+  }
+  size_t compute_bucket(size_t hash) {
+    if (CTR_SPARSE_SHARD_BUCKET_NUM == 1) {
+      return 0;
+    } else {
+      return hash >> (sizeof(size_t) * 8 - CTR_SPARSE_SHARD_BUCKET_NUM_BITS);
+    }
+  }
+
+ private:
+  map_type _buckets[CTR_SPARSE_SHARD_BUCKET_NUM];
+  ChunkAllocator<VALUE> _alloc;
+  std::hash<KEY> _hasher;
+};
+
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/depends/geo_recorder.h
+++ b/paddle/fluid/distributed/ps/table/depends/geo_recorder.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <ThreadPool.h>
+
+#include <future>  // NOLINT
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+namespace paddle {
+namespace distributed {
+
+class ConcurrentSet {
+ public:
+  ConcurrentSet() : pool_(new ::ThreadPool(1)) {}
+  ~ConcurrentSet() {}
+
+  std::future<void> Update(const std::vector<uint64_t>& rows) {
+    auto task = [this, rows] {
+      for (auto row : rows) {
+        set_.insert(row);
+      }
+    };
+    return pool_->enqueue(std::move(task));
+  }
+
+  std::future<void> GetAndClear(std::vector<uint64_t>* result) {
+    auto task = [this, &result] {
+      result->clear();
+      for (auto& id : set_) {
+        result->push_back(id);
+      }
+      set_.clear();
+    };
+    return pool_->enqueue(std::move(task));
+  }
+
+ private:
+  std::unordered_set<uint64_t> set_;
+  std::unique_ptr<::ThreadPool> pool_{nullptr};
+};
+
+class GeoRecorder {
+ public:
+  explicit GeoRecorder(int trainer_num) : trainer_num_(trainer_num) {
+    trainer_rows_.reserve(trainer_num);
+    for (auto i = 0; i < trainer_num; ++i) {
+      trainer_rows_.emplace_back(new ConcurrentSet());
+    }
+  }
+
+  ~GeoRecorder() = default;
+
+  void Update(const std::vector<uint64_t>& update_rows) {
+    VLOG(3) << " row size: " << update_rows.size();
+
+    std::vector<std::future<void>> fs;
+    for (auto& set : trainer_rows_) {
+      fs.push_back(set->Update(update_rows));
+    }
+    for (auto& f : fs) {
+      f.wait();
+    }
+  }
+
+  void GetAndClear(uint32_t trainer_id, std::vector<uint64_t>* result) {
+    VLOG(3) << "GetAndClear for trainer: " << trainer_id;
+    trainer_rows_.at(trainer_id)->GetAndClear(result).wait();
+  }
+
+ private:
+  const int trainer_num_;
+  std::vector<std::unique_ptr<ConcurrentSet>> trainer_rows_;
+};
+
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/depends/initializers.h
+++ b/paddle/fluid/distributed/ps/table/depends/initializers.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gflags/gflags.h"
+#include "paddle/fluid/framework/generator.h"
+#include "paddle/fluid/operators/truncated_gaussian_random_op.h"
+
+namespace paddle {
+namespace distributed {
+
+class Initializer {
+ public:
+  Initializer() {}
+
+  explicit Initializer(const std::vector<std::string> &attrs) {}
+
+  virtual float GetValue() = 0;
+
+  virtual void GetValue(std::vector<float> *values, int numel) {
+    for (int x = 0; x < numel; ++x) {
+      values->push_back(GetValue());
+    }
+  }
+
+  virtual void GetValue(float *value, int numel) {
+    for (int x = 0; x < numel; ++x) {
+      value[x] = GetValue();
+    }
+  }
+
+  virtual ~Initializer() {}
+
+ protected:
+  std::string name_;
+  unsigned int seed_;
+};
+
+class UniformInitializer : public Initializer {
+ public:
+  explicit UniformInitializer(const std::vector<std::string> &attrs) {
+    name_ = attrs[0];
+    seed_ = static_cast<unsigned int>(std::stoi(attrs[1]));
+    min_ = std::stof(attrs[2]);
+    max_ = std::stof(attrs[3]);
+
+    dist_ = std::uniform_real_distribution<float>(min_, max_);
+    random_engine_ = framework::GetCPURandomEngine(seed_);
+  }
+
+  float GetValue() override { return dist_(*random_engine_); }
+  void GetValue(float *value, int numel) {
+    for (int x = 0; x < numel; ++x) {
+      value[x] = dist_(*random_engine_);
+    }
+  }
+
+ private:
+  float min_;
+  float max_;
+
+  std::shared_ptr<std::mt19937_64> random_engine_;
+  std::uniform_real_distribution<float> dist_;
+};
+
+class GaussianInitializer : public Initializer {
+ public:
+  explicit GaussianInitializer(const std::vector<std::string> &attrs) {
+    name_ = attrs[0];
+    seed_ = static_cast<unsigned int>(std::stoi(attrs[1]));
+    mean_ = std::stof(attrs[2]);
+    std_ = std::stof(attrs[3]);
+
+    random_engine_ = framework::GetCPURandomEngine(seed_);
+
+    dist_ = std::normal_distribution<float>(mean_, std_);
+  }
+
+  float GetValue() override { return dist_(*random_engine_); }
+  void GetValue(float *value, int numel) {
+    for (int x = 0; x < numel; ++x) {
+      value[x] = dist_(*random_engine_);
+    }
+  }
+
+ private:
+  float std_;
+  float mean_;
+
+  std::shared_ptr<std::mt19937_64> random_engine_;
+  std::normal_distribution<float> dist_;
+};
+
+class TruncatedGaussianInitializer : public Initializer {
+ public:
+  explicit TruncatedGaussianInitializer(const std::vector<std::string> &attrs) {
+    name_ = attrs[0];
+    seed_ = static_cast<unsigned int>(std::stoi(attrs[1]));
+    mean_ = std::stof(attrs[2]);
+    std_ = std::stof(attrs[3]);
+
+    std::uniform_real_distribution<float> dist_(
+        std::numeric_limits<float>::min(), 1.0);
+    random_engine_ = framework::GetCPURandomEngine(seed_);
+  }
+
+  float GetValue() override {
+    paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
+    float value = truncated_normal(dist_(*random_engine_));
+    return value;
+  }
+
+  void GetValue(float *value, int numel) {
+    paddle::operators::TruncatedNormal<float> truncated_normal(mean_, std_);
+    for (int x = 0; x < numel; ++x) {
+      value[x] = truncated_normal(dist_(*random_engine_));
+    }
+  }
+
+ private:
+  float std_;
+  float mean_;
+
+  std::shared_ptr<std::mt19937_64> random_engine_;
+  std::uniform_real_distribution<float> dist_;
+};
+
+class FillConstantInitializer : public Initializer {
+ public:
+  explicit FillConstantInitializer(const std::vector<std::string> &attrs) {
+    name_ = attrs[0];
+    value_ = std::stof(attrs[1]);
+  }
+
+  float GetValue() override { return value_; }
+  void GetValue(float *value, int numel) { std::fill_n(value, numel, value_); }
+
+ private:
+  float value_;
+};
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
+++ b/paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <glog/logging.h>
+#include <rocksdb/db.h>
+#include <rocksdb/filter_policy.h>
+#include <rocksdb/options.h>
+#include <rocksdb/slice.h>
+#include <rocksdb/table.h>
+#include <rocksdb/write_batch.h>
+
+#include <iostream>
+#include <string>
+
+namespace paddle {
+namespace distributed {
+
+class RocksDBHandler {
+ public:
+  RocksDBHandler() {}
+  ~RocksDBHandler() {}
+
+  static RocksDBHandler* GetInstance() {
+    static RocksDBHandler handler;
+    return &handler;
+  }
+
+  int initialize(const std::string& db_path, const int colnum) {
+    VLOG(3) << "db path: " << db_path << " colnum: " << colnum;
+    rocksdb::Options options;
+    rocksdb::BlockBasedTableOptions bbto;
+    bbto.block_size = 4 * 1024;
+    bbto.block_cache = rocksdb::NewLRUCache(64 * 1024 * 1024);
+    bbto.block_cache_compressed = rocksdb::NewLRUCache(64 * 1024 * 1024);
+    bbto.cache_index_and_filter_blocks = false;
+    bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(20, false));
+    bbto.whole_key_filtering = true;
+    options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbto));
+
+    options.keep_log_file_num = 100;
+    options.max_log_file_size = 50 * 1024 * 1024;  // 50MB
+    options.create_if_missing = true;
+    options.use_direct_reads = true;
+    options.max_background_flushes = 5;
+    options.max_background_compactions = 5;
+    options.base_background_compactions = 10;
+    options.write_buffer_size = 256 * 1024 * 1024;  // 256MB
+    options.max_write_buffer_number = 8;
+    options.max_bytes_for_level_base =
+        options.max_write_buffer_number * options.write_buffer_size;
+    options.min_write_buffer_number_to_merge = 1;
+    options.target_file_size_base = 1024 * 1024 * 1024;  // 1024MB
+    options.memtable_prefix_bloom_size_ratio = 0.02;
+    options.num_levels = 4;
+    options.max_open_files = -1;
+
+    options.compression = rocksdb::kNoCompression;
+    options.level0_file_num_compaction_trigger = 8;
+    options.level0_slowdown_writes_trigger =
+        1.8 * options.level0_file_num_compaction_trigger;
+    options.level0_stop_writes_trigger =
+        3.6 * options.level0_file_num_compaction_trigger;
+
+    if (!db_path.empty()) {
+      std::string rm_cmd = "rm -rf " + db_path;
+      system(rm_cmd.c_str());
+    }
+
+    rocksdb::Status s = rocksdb::DB::Open(options, db_path, &_db);
+    assert(s.ok());
+    _handles.resize(colnum);
+    for (int i = 0; i < colnum; i++) {
+      s = _db->CreateColumnFamily(
+          options, "shard_" + std::to_string(i), &_handles[i]);
+      assert(s.ok());
+    }
+    LOG(INFO) << "DB initialize success, colnum:" << colnum;
+    return 0;
+  }
+
+  int put(
+      int id, const char* key, int key_len, const char* value, int value_len) {
+    rocksdb::WriteOptions options;
+    options.disableWAL = true;
+    rocksdb::Status s = _db->Put(options,
+                                 _handles[id],
+                                 rocksdb::Slice(key, key_len),
+                                 rocksdb::Slice(value, value_len));
+    assert(s.ok());
+    return 0;
+  }
+
+  int put_batch(int id,
+                std::vector<std::pair<char*, int>>& ssd_keys,
+                std::vector<std::pair<char*, int>>& ssd_values,
+                int n) {
+    rocksdb::WriteOptions options;
+    options.disableWAL = true;
+    rocksdb::WriteBatch batch(n * 128);
+    for (int i = 0; i < n; i++) {
+      batch.Put(_handles[id],
+                rocksdb::Slice(ssd_keys[i].first, ssd_keys[i].second),
+                rocksdb::Slice(ssd_values[i].first, ssd_values[i].second));
+    }
+    rocksdb::Status s = _db->Write(options, &batch);
+    assert(s.ok());
+    return 0;
+  }
+
+  int get(int id, const char* key, int key_len, std::string& value) {
+    rocksdb::Status s = _db->Get(rocksdb::ReadOptions(),
+                                 _handles[id],
+                                 rocksdb::Slice(key, key_len),
+                                 &value);
+    if (s.IsNotFound()) {
+      return 1;
+    }
+    assert(s.ok());
+    return 0;
+  }
+
+  int del_data(int id, const char* key, int key_len) {
+    rocksdb::WriteOptions options;
+    options.disableWAL = true;
+    rocksdb::Status s =
+        _db->Delete(options, _handles[id], rocksdb::Slice(key, key_len));
+    assert(s.ok());
+    return 0;
+  }
+
+  int flush(int id) {
+    rocksdb::Status s = _db->Flush(rocksdb::FlushOptions(), _handles[id]);
+    assert(s.ok());
+    return 0;
+  }
+
+  rocksdb::Iterator* get_iterator(int id) {
+    return _db->NewIterator(rocksdb::ReadOptions(), _handles[id]);
+  }
+
+  int get_estimate_key_num(uint64_t& num_keys) {
+    _db->GetAggregatedIntProperty("rocksdb.estimate-num-keys", &num_keys);
+    return 0;
+  }
+
+ private:
+  std::vector<rocksdb::ColumnFamilyHandle*> _handles;
+  rocksdb::DB* _db;
+};
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/depends/sparse_utils.h
+++ b/paddle/fluid/distributed/ps/table/depends/sparse_utils.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace paddle {
+namespace distributed {
+
+struct PullSparseValue {
+  PullSparseValue() {}
+  explicit PullSparseValue(int numel, int dim)
+      : numel_(numel),
+        dim_(dim),
+        is_training_(true),
+        feasigns_(nullptr),
+        frequencies_(nullptr) {}
+
+  explicit PullSparseValue(std::vector<uint64_t>& feasigns,     // NOLINT
+                           std::vector<uint32_t>& frequencies,  // NOLINT
+                           int dim) {
+    numel_ = feasigns.size();
+    dim_ = dim;
+    is_training_ = true;
+    feasigns_ = feasigns.data();
+    frequencies_ = frequencies.data();
+  }
+
+  void DeserializeFromBytes(void* bytes) {
+    /*
+    |---isTraining--------------|
+    |---8*{num}B(keysData)------|
+    |---4*{num}B(Frequencies)---|
+    */
+    auto* begin = reinterpret_cast<char*>(bytes);
+    is_training_ = reinterpret_cast<bool*>(begin)[0];
+    feasigns_ = reinterpret_cast<uint64_t*>(begin + sizeof(bool));
+    frequencies_ = reinterpret_cast<uint32_t*>(begin + sizeof(bool) +
+                                               sizeof(uint64_t) * numel_);
+  }
+
+  void Fission(const int shard_id,
+               const int shard_num,
+               std::vector<int>* offset_shard) const {
+    offset_shard->reserve(numel_ / shard_num + 1);
+    for (int x = 0; x < numel_; ++x) {
+      if (int(feasigns_[x] % shard_num) == shard_id) {
+        offset_shard->push_back(x);
+      }
+    }
+  }
+
+  int numel_;
+  int dim_;
+  bool is_training_;
+  uint64_t* feasigns_;
+  uint32_t* frequencies_;
+};
+
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/graph/class_macro.h
+++ b/paddle/fluid/distributed/ps/table/graph/class_macro.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#define DECLARE_GRAPH_FRIEND_CLASS(a) friend class a;
+#define DECLARE_1_FRIEND_CLASS(a, ...) DECLARE_GRAPH_FRIEND_CLASS(a)
+#define DECLARE_2_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_1_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_3_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_2_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_4_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_3_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_5_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_4_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_6_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_5_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_7_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_6_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_8_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_7_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_9_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_8_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_10_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_9_FRIEND_CLASS(__VA_ARGS__)
+#define DECLARE_11_FRIEND_CLASS(a, ...) \
+  DECLARE_GRAPH_FRIEND_CLASS(a) DECLARE_10_FRIEND_CLASS(__VA_ARGS__)
+#define REGISTER_GRAPH_FRIEND_CLASS(n, ...) \
+  DECLARE_##n##_FRIEND_CLASS(__VA_ARGS__)
--- a/paddle/fluid/distributed/ps/table/graph/graph_edge.cc
+++ b/paddle/fluid/distributed/ps/table/graph/graph_edge.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/distributed/ps/table/graph/graph_edge.h"
+
+#include <cstring>
+namespace paddle {
+namespace distributed {
+
+void GraphEdgeBlob::add_edge(int64_t id, float weight = 1) {
+  id_arr.push_back(id);
+}
+
+void WeightedGraphEdgeBlob::add_edge(int64_t id, float weight = 1) {
+  id_arr.push_back(id);
+  weight_arr.push_back(weight);
+}
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/graph/graph_edge.h
+++ b/paddle/fluid/distributed/ps/table/graph/graph_edge.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+namespace paddle {
+namespace distributed {
+
+class GraphEdgeBlob {
+ public:
+  GraphEdgeBlob() {}
+  virtual ~GraphEdgeBlob() {}
+  size_t size() { return id_arr.size(); }
+  virtual void add_edge(int64_t id, float weight);
+  int64_t get_id(int idx) { return id_arr[idx]; }
+  virtual float get_weight(int idx) { return 1; }
+  std::vector<int64_t>& export_id_array() { return id_arr; }
+
+ protected:
+  std::vector<int64_t> id_arr;
+};
+
+class WeightedGraphEdgeBlob : public GraphEdgeBlob {
+ public:
+  WeightedGraphEdgeBlob() {}
+  virtual ~WeightedGraphEdgeBlob() {}
+  virtual void add_edge(int64_t id, float weight);
+  virtual float get_weight(int idx) { return weight_arr[idx]; }
+
+ protected:
+  std::vector<float> weight_arr;
+};
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/graph/graph_node.cc
+++ b/paddle/fluid/distributed/ps/table/graph/graph_node.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/distributed/ps/table/graph/graph_node.h"
+
+#include <cstring>
+namespace paddle {
+namespace distributed {
+
+GraphNode::~GraphNode() {
+  if (sampler != nullptr) {
+    delete sampler;
+    sampler = nullptr;
+  }
+  if (edges != nullptr) {
+    delete edges;
+    edges = nullptr;
+  }
+}
+
+int Node::weight_size = sizeof(float);
+int Node::id_size = sizeof(uint64_t);
+int Node::int_size = sizeof(int);
+
+int Node::get_size(bool need_feature) { return id_size + int_size; }
+
+void Node::to_buffer(char* buffer, bool need_feature) {
+  memcpy(buffer, &id, id_size);
+  buffer += id_size;
+
+  int feat_num = 0;
+  memcpy(buffer, &feat_num, sizeof(int));
+}
+
+void Node::recover_from_buffer(char* buffer) { memcpy(&id, buffer, id_size); }
+
+int FeatureNode::get_size(bool need_feature) {
+  int size = id_size + int_size;  // id, feat_num
+  if (need_feature) {
+    size += feature.size() * int_size;
+    for (const std::string& fea : feature) {
+      size += fea.size();
+    }
+  }
+  return size;
+}
+
+void GraphNode::build_edges(bool is_weighted) {
+  if (edges == nullptr) {
+    if (is_weighted == true) {
+      edges = new WeightedGraphEdgeBlob();
+    } else {
+      edges = new GraphEdgeBlob();
+    }
+  }
+}
+void GraphNode::build_sampler(std::string sample_type) {
+  if (sampler != nullptr) {
+    return;
+  }
+  if (sample_type == "random") {
+    sampler = new RandomSampler();
+  } else if (sample_type == "weighted") {
+    sampler = new WeightedSampler();
+  }
+  sampler->build(edges);
+}
+void FeatureNode::to_buffer(char* buffer, bool need_feature) {
+  memcpy(buffer, &id, id_size);
+  buffer += id_size;
+
+  int feat_num = 0;
+  int feat_len;
+  if (need_feature) {
+    feat_num += feature.size();
+    memcpy(buffer, &feat_num, sizeof(int));
+    buffer += sizeof(int);
+    for (int i = 0; i < feat_num; ++i) {
+      feat_len = feature[i].size();
+      memcpy(buffer, &feat_len, sizeof(int));
+      buffer += sizeof(int);
+      memcpy(buffer, feature[i].c_str(), feature[i].size());
+      buffer += feature[i].size();
+    }
+  } else {
+    memcpy(buffer, &feat_num, sizeof(int));
+  }
+}
+void FeatureNode::recover_from_buffer(char* buffer) {
+  int feat_num, feat_len;
+  memcpy(&id, buffer, id_size);
+  buffer += id_size;
+
+  memcpy(&feat_num, buffer, sizeof(int));
+  buffer += sizeof(int);
+
+  feature.clear();
+  for (int i = 0; i < feat_num; ++i) {
+    memcpy(&feat_len, buffer, sizeof(int));
+    buffer += sizeof(int);
+
+    char str[feat_len + 1];
+    memcpy(str, buffer, feat_len);
+    buffer += feat_len;
+    str[feat_len] = '\0';
+    feature.push_back(std::string(str));
+  }
+}
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/graph/graph_node.h
+++ b/paddle/fluid/distributed/ps/table/graph/graph_node.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <vector>
+
+#include "glog/logging.h"
+#include "paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/string/string_helper.h"
+
+namespace paddle {
+namespace distributed {
+
+class Node {
+ public:
+  Node() {}
+  Node(uint64_t id) : id(id) {}
+  virtual ~Node() {}
+  static int id_size, int_size, weight_size;
+  uint64_t get_id() { return id; }
+  int64_t get_py_id() { return (int64_t)id; }
+  void set_id(uint64_t id) { this->id = id; }
+
+  virtual void build_edges(bool is_weighted) {}
+  virtual void build_sampler(std::string sample_type) {}
+  virtual void add_edge(uint64_t id, float weight) {}
+  virtual std::vector<int> sample_k(
+      int k, const std::shared_ptr<std::mt19937_64> rng) {
+    return std::vector<int>();
+  }
+  virtual uint64_t get_neighbor_id(int idx) { return 0; }
+  virtual float get_neighbor_weight(int idx) { return 1.; }
+
+  virtual int get_size(bool need_feature);
+  virtual void to_buffer(char *buffer, bool need_feature);
+  virtual void recover_from_buffer(char *buffer);
+  virtual std::string get_feature(int idx) { return std::string(""); }
+  virtual int get_feature_ids(std::vector<uint64_t> *res) const { return 0; }
+  virtual int get_feature_ids(int slot_idx, std::vector<uint64_t> *res) const {
+    return 0;
+  }
+  virtual void set_feature(int idx, const std::string &str) {}
+  virtual void set_feature_size(int size) {}
+  virtual int get_feature_size() { return 0; }
+  virtual size_t get_neighbor_size() { return 0; }
+
+ protected:
+  uint64_t id;
+  bool is_weighted;
+};
+
+class GraphNode : public Node {
+ public:
+  GraphNode() : Node(), sampler(nullptr), edges(nullptr) {}
+  GraphNode(uint64_t id) : Node(id), sampler(nullptr), edges(nullptr) {}
+  virtual ~GraphNode();
+  virtual void build_edges(bool is_weighted);
+  virtual void build_sampler(std::string sample_type);
+  virtual void add_edge(uint64_t id, float weight) {
+    edges->add_edge(id, weight);
+  }
+  virtual std::vector<int> sample_k(
+      int k, const std::shared_ptr<std::mt19937_64> rng) {
+    return sampler->sample_k(k, rng);
+  }
+  virtual uint64_t get_neighbor_id(int idx) { return edges->get_id(idx); }
+  virtual float get_neighbor_weight(int idx) { return edges->get_weight(idx); }
+  virtual size_t get_neighbor_size() { return edges->size(); }
+
+ protected:
+  Sampler *sampler;
+  GraphEdgeBlob *edges;
+};
+
+class FeatureNode : public Node {
+ public:
+  FeatureNode() : Node() {}
+  FeatureNode(uint64_t id) : Node(id) {}
+  virtual ~FeatureNode() {}
+  virtual int get_size(bool need_feature);
+  virtual void to_buffer(char *buffer, bool need_feature);
+  virtual void recover_from_buffer(char *buffer);
+  virtual std::string get_feature(int idx) {
+    if (idx < (int)this->feature.size()) {
+      return this->feature[idx];
+    } else {
+      return std::string("");
+    }
+  }
+
+  virtual int get_feature_ids(std::vector<uint64_t> *res) const {
+    PADDLE_ENFORCE_NOT_NULL(res,
+                            paddle::platform::errors::InvalidArgument(
+                                "get_feature_ids res should not be null"));
+    errno = 0;
+    for (auto &feature_item : feature) {
+      const uint64_t *feas = (const uint64_t *)(feature_item.c_str());
+      size_t num = feature_item.length() / sizeof(uint64_t);
+      CHECK((feature_item.length() % sizeof(uint64_t)) == 0)
+          << "bad feature_item: [" << feature_item << "]";
+      size_t n = res->size();
+      res->resize(n + num);
+      for (size_t i = 0; i < num; ++i) {
+        (*res)[n + i] = feas[i];
+      }
+    }
+    PADDLE_ENFORCE_EQ(
+        errno,
+        0,
+        paddle::platform::errors::InvalidArgument(
+            "get_feature_ids get errno should be 0, but got %d.", errno));
+    return 0;
+  }
+
+  virtual int get_feature_ids(int slot_idx, std::vector<uint64_t> *res) const {
+    PADDLE_ENFORCE_NOT_NULL(res,
+                            paddle::platform::errors::InvalidArgument(
+                                "get_feature_ids res should not be null"));
+    res->clear();
+    errno = 0;
+    if (slot_idx < (int)this->feature.size()) {
+      const std::string &s = this->feature[slot_idx];
+      const uint64_t *feas = (const uint64_t *)(s.c_str());
+
+      size_t num = s.length() / sizeof(uint64_t);
+      CHECK((s.length() % sizeof(uint64_t)) == 0)
+          << "bad feature_item: [" << s << "]";
+      res->resize(num);
+      for (size_t i = 0; i < num; ++i) {
+        (*res)[i] = feas[i];
+      }
+    }
+    PADDLE_ENFORCE_EQ(
+        errno,
+        0,
+        paddle::platform::errors::InvalidArgument(
+            "get_feature_ids get errno should be 0, but got %d.", errno));
+    return 0;
+  }
+
+  virtual std::string *mutable_feature(int idx) {
+    if (idx >= (int)this->feature.size()) {
+      this->feature.resize(idx + 1);
+    }
+    return &(this->feature[idx]);
+  }
+
+  virtual void set_feature(int idx, const std::string &str) {
+    if (idx >= (int)this->feature.size()) {
+      this->feature.resize(idx + 1);
+    }
+    this->feature[idx] = str;
+  }
+  virtual void set_feature_size(int size) { this->feature.resize(size); }
+  virtual int get_feature_size() { return this->feature.size(); }
+
+  template <typename T>
+  static std::string parse_value_to_bytes(std::vector<std::string> feat_str) {
+    T v;
+    size_t Tsize = sizeof(T) * feat_str.size();
+    char buffer[Tsize];
+    for (size_t i = 0; i < feat_str.size(); i++) {
+      std::stringstream ss(feat_str[i]);
+      ss >> v;
+      std::memcpy(buffer + sizeof(T) * i, (char *)&v, sizeof(T));
+    }
+    return std::string(buffer, Tsize);
+  }
+
+  template <typename T>
+  static void parse_value_to_bytes(
+      std::vector<std::string>::iterator feat_str_begin,
+      std::vector<std::string>::iterator feat_str_end,
+      std::string *output) {
+    T v;
+    size_t feat_str_size = feat_str_end - feat_str_begin;
+    size_t Tsize = sizeof(T) * feat_str_size;
+    char buffer[Tsize] = {'\0'};
+    for (size_t i = 0; i < feat_str_size; i++) {
+      std::stringstream ss(*(feat_str_begin + i));
+      ss >> v;
+      std::memcpy(buffer + sizeof(T) * i, (char *)&v, sizeof(T));
+    }
+    output->assign(buffer);
+  }
+
+  template <typename T>
+  static std::vector<T> parse_bytes_to_array(std::string feat_str) {
+    T v;
+    std::vector<T> out;
+    size_t start = 0;
+    const char *buffer = feat_str.data();
+    while (start < feat_str.size()) {
+      std::memcpy((char *)&v, buffer + start, sizeof(T));
+      start += sizeof(T);
+      out.push_back(v);
+    }
+    return out;
+  }
+
+  template <typename T>
+  static void parse_value_to_bytes(
+      std::vector<paddle::string::str_ptr>::iterator feat_str_begin,
+      std::vector<paddle::string::str_ptr>::iterator feat_str_end,
+      std::string *output) {
+    size_t feat_str_size = feat_str_end - feat_str_begin;
+    size_t Tsize = sizeof(T) * feat_str_size;
+    size_t num = output->length();
+    output->resize(num + Tsize);
+
+    T *fea_ptrs = (T *)(&(*output)[num]);
+
+    thread_local paddle::string::str_ptr_stream ss;
+    for (size_t i = 0; i < feat_str_size; i++) {
+      ss.reset(*(feat_str_begin + i));
+      ss >> fea_ptrs[i];
+    }
+  }
+
+ protected:
+  std::vector<std::string> feature;
+};
+
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc
+++ b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h"
+
+#include <iostream>
+#include <memory>
+#include <unordered_map>
+
+#include "paddle/fluid/framework/generator.h"
+namespace paddle {
+namespace distributed {
+
+void RandomSampler::build(GraphEdgeBlob *edges) { this->edges = edges; }
+
+std::vector<int> RandomSampler::sample_k(
+    int k, const std::shared_ptr<std::mt19937_64> rng) {
+  int n = edges->size();
+  if (k >= n) {
+    k = n;
+    std::vector<int> sample_result;
+    for (int i = 0; i < k; i++) {
+      sample_result.push_back(i);
+    }
+    return sample_result;
+  }
+  std::vector<int> sample_result;
+  std::unordered_map<int, int> replace_map;
+  while (k--) {
+    std::uniform_int_distribution<int> distrib(0, n - 1);
+    int rand_int = distrib(*rng);
+    auto iter = replace_map.find(rand_int);
+    if (iter == replace_map.end()) {
+      sample_result.push_back(rand_int);
+    } else {
+      sample_result.push_back(iter->second);
+    }
+
+    iter = replace_map.find(n - 1);
+    if (iter == replace_map.end()) {
+      replace_map[rand_int] = n - 1;
+    } else {
+      replace_map[rand_int] = iter->second;
+    }
+    --n;
+  }
+  return sample_result;
+}
+
+WeightedSampler::WeightedSampler() {
+  left = nullptr;
+  right = nullptr;
+  edges = nullptr;
+}
+
+WeightedSampler::~WeightedSampler() {
+  if (left != nullptr) {
+    delete left;
+    left = nullptr;
+  }
+  if (right != nullptr) {
+    delete right;
+    right = nullptr;
+  }
+}
+
+void WeightedSampler::build(GraphEdgeBlob *edges) {
+  if (left != nullptr) {
+    delete left;
+    left = nullptr;
+  }
+  if (right != nullptr) {
+    delete right;
+    right = nullptr;
+  }
+  return build_one((WeightedGraphEdgeBlob *)edges, 0, edges->size());
+}
+
+void WeightedSampler::build_one(WeightedGraphEdgeBlob *edges,
+                                int start,
+                                int end) {
+  count = 0;
+  this->edges = edges;
+  if (start + 1 == end) {
+    left = right = nullptr;
+    idx = start;
+    count = 1;
+    weight = edges->get_weight(idx);
+
+  } else {
+    left = new WeightedSampler();
+    right = new WeightedSampler();
+    left->build_one(edges, start, start + (end - start) / 2);
+    right->build_one(edges, start + (end - start) / 2, end);
+    weight = left->weight + right->weight;
+    count = left->count + right->count;
+  }
+}
+std::vector<int> WeightedSampler::sample_k(
+    int k, const std::shared_ptr<std::mt19937_64> rng) {
+  if (k >= count) {
+    k = count;
+    std::vector<int> sample_result;
+    for (int i = 0; i < k; i++) {
+      sample_result.push_back(i);
+    }
+    return sample_result;
+  }
+  std::vector<int> sample_result;
+  float subtract;
+  std::unordered_map<WeightedSampler *, float> subtract_weight_map;
+  std::unordered_map<WeightedSampler *, int> subtract_count_map;
+  std::uniform_real_distribution<float> distrib(0, 1.0);
+  while (k--) {
+    float query_weight = distrib(*rng);
+    query_weight *= weight - subtract_weight_map[this];
+    sample_result.push_back(sample(
+        query_weight, subtract_weight_map, subtract_count_map, subtract));
+  }
+  return sample_result;
+}
+
+int WeightedSampler::sample(
+    float query_weight,
+    std::unordered_map<WeightedSampler *, float> &subtract_weight_map,
+    std::unordered_map<WeightedSampler *, int> &subtract_count_map,
+    float &subtract) {
+  if (left == nullptr) {
+    subtract_weight_map[this] = weight;
+    subtract = weight;
+    subtract_count_map[this] = 1;
+    return idx;
+  }
+  int left_count = left->count - subtract_count_map[left];
+  int right_count = right->count - subtract_count_map[right];
+  float left_subtract = subtract_weight_map[left];
+  int return_idx;
+  if (right_count == 0 ||
+      left_count > 0 && left->weight - left_subtract >= query_weight) {
+    return_idx = left->sample(
+        query_weight, subtract_weight_map, subtract_count_map, subtract);
+  } else {
+    return_idx = right->sample(query_weight - (left->weight - left_subtract),
+                               subtract_weight_map,
+                               subtract_count_map,
+                               subtract);
+  }
+  subtract_weight_map[this] += subtract;
+  subtract_count_map[this]++;
+  return return_idx;
+}
+}  // namespace distributed
+}  // namespace paddle
--- a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h
+++ b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <ctime>
+#include <memory>
+#include <random>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/distributed/ps/table/graph/graph_edge.h"
+namespace paddle {
+namespace distributed {
+
+class Sampler {
+ public:
+  virtual ~Sampler() {}
+  virtual void build(GraphEdgeBlob *edges) = 0;
+  virtual std::vector<int> sample_k(
+      int k, const std::shared_ptr<std::mt19937_64> rng) = 0;
+};
+
+class RandomSampler : public Sampler {
+ public:
+  virtual ~RandomSampler() {}
+  virtual void build(GraphEdgeBlob *edges);
+  virtual std::vector<int> sample_k(int k,
+                                    const std::shared_ptr<std::mt19937_64> rng);
+  GraphEdgeBlob *edges;
+};
+
+class WeightedSampler : public Sampler {
+ public:
+  WeightedSampler();
+  virtual ~WeightedSampler();
+  WeightedSampler *left, *right;
+  float weight;
+  int count;
+  int idx;
+  GraphEdgeBlob *edges;
+  virtual void build(GraphEdgeBlob *edges);
+  virtual void build_one(WeightedGraphEdgeBlob *edges, int start, int end);
+  virtual std::vector<int> sample_k(int k,
+                                    const std::shared_ptr<std::mt19937_64> rng);
+
+ private:
+  int sample(float query_weight,
+             std::unordered_map<WeightedSampler *, float> &subtract_weight_map,
+             std::unordered_map<WeightedSampler *, int> &subtract_count_map,
+             float &subtract);
+};
+}  // namespace distributed
+}  // namespace paddle