[Misc] Replace \xxx with @XXX in structured comment. (#4822)

* param * brief * note * return * tparam * brief2 * file * return2 * return * blabla * all Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Replace \xxx with @XXX in structured comment. (#4822)
* param * brief * note * return * tparam * brief2 * file * return2 * return * blabla * all Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
619d735d · Hongzhi (Steve), Chen · GitHub · 96297fb8 · 619d735d · 619d735d
Unverified Commit 619d735d authored Nov 07, 2022 by Hongzhi (Steve), Chen Committed by GitHub Nov 07, 2022
20 changed files
--- a/src/rpc/network/socket_pool.h
+++ b/src/rpc/network/socket_pool.h
 /*!
 *  Copyright (c) 2021 by Contributors
- * \file socket_pool.h
- * \brief Socket pool of nonblocking sockets for DGL distributed training.
+ * @file socket_pool.h
+ * @brief Socket pool of nonblocking sockets for DGL distributed training.
 */
 #ifndef DGL_RPC_NETWORK_SOCKET_POOL_H_
 #define DGL_RPC_NETWORK_SOCKET_POOL_H_
@@ -16,7 +16,7 @@ namespace network {
 class TCPSocket;

 /*!
- * \brief SocketPool maintains a group of nonblocking sockets, and can provide
+ * @brief SocketPool maintains a group of nonblocking sockets, and can provide
 * active sockets.
 * Currently SocketPool is based on epoll, a scalable I/O event notification
 * mechanism in Linux operating system.
@@ -24,69 +24,69 @@ class TCPSocket;
 class SocketPool {
 public:
  /*!
-   * \brief socket mode read/receive
+   * @brief socket mode read/receive
   */
  static const int READ = 1;
  /*!
-   * \brief socket mode write/send
+   * @brief socket mode write/send
   */
  static const int WRITE = 2;
  /*!
-   * \brief SocketPool constructor
+   * @brief SocketPool constructor
   */
  SocketPool();

  /*!
-   * \brief Add a socket to SocketPool
-   * \param socket tcp socket to add
-   * \param socket_id receiver/sender id of the socket
-   * \param events READ, WRITE or READ + WRITE
+   * @brief Add a socket to SocketPool
+   * @param socket tcp socket to add
+   * @param socket_id receiver/sender id of the socket
+   * @param events READ, WRITE or READ + WRITE
   */
  void AddSocket(
      std::shared_ptr<TCPSocket> socket, int socket_id, int events = READ);

  /*!
-   * \brief Remove socket from SocketPool
-   * \param socket tcp socket to remove
-   * \return number of remaing sockets in the pool
+   * @brief Remove socket from SocketPool
+   * @param socket tcp socket to remove
+   * @return number of remaing sockets in the pool
   */
  size_t RemoveSocket(std::shared_ptr<TCPSocket> socket);

  /*!
-   * \brief SocketPool destructor
+   * @brief SocketPool destructor
   */
  ~SocketPool();

  /*!
-   * \brief Get current active socket. This is a blocking method
-   * \param socket_id output parameter of the socket_id of active socket
-   * \return active TCPSocket
+   * @brief Get current active socket. This is a blocking method
+   * @param socket_id output parameter of the socket_id of active socket
+   * @return active TCPSocket
   */
  std::shared_ptr<TCPSocket> GetActiveSocket(int* socket_id);

 private:
  /*!
-   * \brief Wait for event notification
+   * @brief Wait for event notification
   */
  void Wait();

  /*!
-   * \brief map from fd to TCPSocket
+   * @brief map from fd to TCPSocket
   */
  std::unordered_map<int, std::shared_ptr<TCPSocket>> tcp_sockets_;

  /*!
-   * \brief map from fd to socket_id
+   * @brief map from fd to socket_id
   */
  std::unordered_map<int, int> socket_ids_;

  /*!
-   * \brief fd for epoll base
+   * @brief fd for epoll base
   */
  int epfd_;

  /*!
-   * \brief queue for current active fds
+   * @brief queue for current active fds
   */
  std::queue<int> pending_fds_;
 };

--- a/src/rpc/network/tcp_socket.cc
+++ b/src/rpc/network/tcp_socket.cc
 /*!
 *  Copyright (c) 2019 by Contributors
- * \file tcp_socket.cc
- * \brief TCP socket for DGL distributed training.
+ * @file tcp_socket.cc
+ * @brief TCP socket for DGL distributed training.
 */
 #include "tcp_socket.h"


--- a/src/rpc/network/tcp_socket.h
+++ b/src/rpc/network/tcp_socket.h
 /*!
 *  Copyright (c) 2019 by Contributors
- * \file tcp_socket.h
- * \brief TCP socket for DGL distributed training.
+ * @file tcp_socket.h
+ * @brief TCP socket for DGL distributed training.
 */
 #ifndef DGL_RPC_NETWORK_TCP_SOCKET_H_
 #define DGL_RPC_NETWORK_TCP_SOCKET_H_
@@ -20,107 +20,107 @@ namespace dgl {
 namespace network {

 /*!
- * \brief TCPSocket is a simple wrapper around a socket.
+ * @brief TCPSocket is a simple wrapper around a socket.
 * It supports only TCP connections.
 */
 class TCPSocket {
 public:
  /*!
-   * \brief TCPSocket constructor
+   * @brief TCPSocket constructor
   */
  TCPSocket();

  /*!
-   * \brief TCPSocket deconstructor
+   * @brief TCPSocket deconstructor
   */
  ~TCPSocket();

  /*!
-   * \brief Connect to a given server address
-   * \param ip ip address
-   * \param port end port
-   * \return true for success and false for failure
+   * @brief Connect to a given server address
+   * @param ip ip address
+   * @param port end port
+   * @return true for success and false for failure
   */
  bool Connect(const char* ip, int port);

  /*!
-   * \brief Bind on the given IP and PORT
-   * \param ip ip address
-   * \param port end port
-   * \return true for success and false for failure
+   * @brief Bind on the given IP and PORT
+   * @param ip ip address
+   * @param port end port
+   * @return true for success and false for failure
   */
  bool Bind(const char* ip, int port);

  /*!
-   * \brief listen for remote connection
-   * \param max_connection maximal connection
-   * \return true for success and false for failure
+   * @brief listen for remote connection
+   * @param max_connection maximal connection
+   * @return true for success and false for failure
   */
  bool Listen(int max_connection);

  /*!
-   * \brief wait doe a new connection
-   * \param socket new SOCKET will be stored to socket
-   * \param ip_client new IP will be stored to ip_client
-   * \param port_client new PORT will be stored to port_client
-   * \return true for success and false for failure
+   * @brief wait doe a new connection
+   * @param socket new SOCKET will be stored to socket
+   * @param ip_client new IP will be stored to ip_client
+   * @param port_client new PORT will be stored to port_client
+   * @return true for success and false for failure
   */
  bool Accept(TCPSocket* socket, std::string* ip_client, int* port_client);

  /*!
-   * \brief SetNonBlocking() is needed refering to this example of epoll:
+   * @brief SetNonBlocking() is needed refering to this example of epoll:
   * http://www.kernel.org/doc/man-pages/online/pages/man4/epoll.4.html
-   * \param flag true for nonblocking, false for blocking
-   * \return true for success and false for failure
+   * @param flag true for nonblocking, false for blocking
+   * @return true for success and false for failure
   */
  bool SetNonBlocking(bool flag);

  /*!
-   * \brief Set timeout for socket
-   * \param timeout seconds timeout
+   * @brief Set timeout for socket
+   * @param timeout seconds timeout
   */
  void SetTimeout(int timeout);

  /*!
-   * \brief Shut down one or both halves of the connection.
-   * \param ways ways for shutdown
+   * @brief Shut down one or both halves of the connection.
+   * @param ways ways for shutdown
   * If ways is SHUT_RD, further receives are disallowed.
   * If ways is SHUT_WR, further sends are disallowed.
   * If ways is SHUT_RDWR, further sends and receives are disallowed.
-   * \return true for success and false for failure
+   * @return true for success and false for failure
   */
  bool ShutDown(int ways);

  /*!
-   * \brief close socket.
+   * @brief close socket.
   */
  void Close();

  /*!
-   * \brief Send data.
-   * \param data data for sending
-   * \param len_data length of data
-   * \return return number of bytes sent if OK, -1 on error
+   * @brief Send data.
+   * @param data data for sending
+   * @param len_data length of data
+   * @return return number of bytes sent if OK, -1 on error
   */
  int64_t Send(const char* data, int64_t len_data);

  /*!
-   * \brief Receive data.
-   * \param buffer buffer for receving
-   * \param size_buffer size of buffer
-   * \return return number of bytes received if OK, -1 on error
+   * @brief Receive data.
+   * @param buffer buffer for receving
+   * @param size_buffer size of buffer
+   * @return return number of bytes received if OK, -1 on error
   */
  int64_t Receive(char* buffer, int64_t size_buffer);

  /*!
-   * \brief Get socket's file descriptor
-   * \return socket's file descriptor
+   * @brief Get socket's file descriptor
+   * @return socket's file descriptor
   */
  int Socket() const;

 private:
  /*!
-   * \brief socket's file descriptor
+   * @brief socket's file descriptor
   */
  int socket_;
 };

--- a/src/rpc/rpc.cc
+++ b/src/rpc/rpc.cc
 /*!
 *  Copyright (c) 2020 by Contributors
- * \file rpc/rpc.cc
- * \brief Implementation of RPC utilities used by both server and client sides.
+ * @file rpc/rpc.cc
+ * @brief Implementation of RPC utilities used by both server and client sides.
 */
 #if defined(__linux__)
 #include "./rpc.h"
@@ -383,8 +383,8 @@ DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetTensors")

 #if defined(__linux__)
 /*!
- * \brief The signal handler.
- * \param s signal
+ * @brief The signal handler.
+ * @param s signal
 */
 void SigHandler(int s) {
  LOG(INFO) << "\nUser pressed Ctrl+C, Exiting";

--- a/src/rpc/rpc.h
+++ b/src/rpc/rpc.h
 /*!
 *  Copyright (c) 2020 by Contributors
- * \file rpc/rpc.h
- * \brief Common headers for remote process call (RPC).
+ * @file rpc/rpc.h
+ * @brief Common headers for remote process call (RPC).
 */
 #ifndef DGL_RPC_RPC_H_
 #define DGL_RPC_RPC_H_
@@ -34,10 +34,10 @@ struct RPCContext;
 // Communicator handler type
 typedef void* CommunicatorHandle;

-/*! \brief Context information for RPC communication */
+/*! @brief Context information for RPC communication */
 struct RPCContext {
  /*!
-   * \brief Rank of this process.
+   * @brief Rank of this process.
   *
   * If the process is a client, this is equal to client ID. Otherwise, the
   * process is a server and this is equal to server ID.
@@ -45,57 +45,57 @@ struct RPCContext {
  int32_t rank = -1;

  /*!
-   * \brief Cuurent machine ID
+   * @brief Cuurent machine ID
   */
  int32_t machine_id = -1;

  /*!
-   * \brief Total number of machines.
+   * @brief Total number of machines.
   */
  int32_t num_machines = 0;

  /*!
-   * \brief Message sequence number.
+   * @brief Message sequence number.
   */
  std::atomic<int64_t> msg_seq{0};

  /*!
-   * \brief Total number of server.
+   * @brief Total number of server.
   */
  int32_t num_servers = 0;

  /*!
-   * \brief Total number of client.
+   * @brief Total number of client.
   */
  int32_t num_clients = 0;

  /*!
-   * \brief Current barrier count
+   * @brief Current barrier count
   */
  std::unordered_map<int32_t, int32_t> barrier_count;

  /*!
-   * \brief Total number of server per machine.
+   * @brief Total number of server per machine.
   */
  int32_t num_servers_per_machine = 0;

  /*!
-   * \brief Sender communicator.
+   * @brief Sender communicator.
   */
  std::shared_ptr<RPCSender> sender;

  /*!
-   * \brief Receiver communicator.
+   * @brief Receiver communicator.
   */
  std::shared_ptr<RPCReceiver> receiver;

  /*!
-   * \brief Tensorpipe global context
+   * @brief Tensorpipe global context
   */
  std::shared_ptr<tensorpipe::Context> ctx;

  /*!
-   * \brief Server state data.
+   * @brief Server state data.
   *
   * If the process is a server, this stores necessary
   * server-side data. Otherwise, the process is a client and it stores a cache
@@ -106,19 +106,19 @@ struct RPCContext {
  std::shared_ptr<ServerState> server_state;

  /*!
-   * \brief Cuurent group ID
+   * @brief Cuurent group ID
   */
  int32_t group_id = -1;
  int32_t curr_client_id = -1;
  std::unordered_map<int32_t, std::unordered_map<int32_t, int32_t>> clients_;

-  /*! \brief Get the RPC context singleton */
+  /*! @brief Get the RPC context singleton */
  static RPCContext* getInstance() {
    static RPCContext ctx;
    return &ctx;
  }

-  /*! \brief Reset the RPC context */
+  /*! @brief Reset the RPC context */
  static void Reset() {
    auto* t = getInstance();
    t->rank = -1;
@@ -160,7 +160,7 @@ struct RPCContext {
 };

 /*!
- * \brief Send out one RPC message.
+ * @brief Send out one RPC message.
 *
 * The operation is non-blocking -- it does not guarantee the payloads have
 * reached the target or even have left the sender process. However,
@@ -172,19 +172,19 @@ struct RPCContext {
 * The underlying sending threads will hold references to the tensors until
 * the contents have been transmitted.
 *
- * \param msg RPC message to send
- * \return status flag
+ * @param msg RPC message to send
+ * @return status flag
 */
 RPCStatus SendRPCMessage(const RPCMessage& msg);

 /*!
- * \brief Receive one RPC message.
+ * @brief Receive one RPC message.
 *
 * The operation is blocking -- it returns when it receives any message
 *
- * \param msg The received message
- * \param timeout The timeout value in milliseconds. If zero, wait indefinitely.
- * \return status flag
+ * @param msg The received message
+ * @param timeout The timeout value in milliseconds. If zero, wait indefinitely.
+ * @return status flag
 */
 RPCStatus RecvRPCMessage(RPCMessage* msg, int32_t timeout = 0);


--- a/src/rpc/rpc_msg.h
+++ b/src/rpc/rpc_msg.h
 /*!
 *  Copyright (c) 2020 by Contributors
- * \file rpc/rpc_msg.h
- * \brief Common headers for remote process call (RPC).
+ * @file rpc/rpc_msg.h
+ * @brief Common headers for remote process call (RPC).
 */
 #ifndef DGL_RPC_RPC_MSG_H_
 #define DGL_RPC_RPC_MSG_H_
@@ -16,31 +16,31 @@
 namespace dgl {
 namespace rpc {

-/*! \brief RPC message data structure
+/*! @brief RPC message data structure
 *
 * This structure is exposed to Python and can be used as argument or return
 * value in C API.
 */
 struct RPCMessage : public runtime::Object {
-  /*! \brief Service ID */
+  /*! @brief Service ID */
  int32_t service_id;

-  /*! \brief Sequence number of this message. */
+  /*! @brief Sequence number of this message. */
  int64_t msg_seq;

-  /*! \brief Client ID. */
+  /*! @brief Client ID. */
  int32_t client_id;

-  /*! \brief Server ID. */
+  /*! @brief Server ID. */
  int32_t server_id;

-  /*! \brief Payload buffer carried by this request.*/
+  /*! @brief Payload buffer carried by this request.*/
  std::string data;

-  /*! \brief Extra payloads in the form of tensors.*/
+  /*! @brief Extra payloads in the form of tensors.*/
  std::vector<runtime::NDArray> tensors;

-  /*! \brief Group ID. */
+  /*! @brief Group ID. */
  int32_t group_id{0};

  bool Load(dmlc::Stream* stream) {
@@ -70,7 +70,7 @@ struct RPCMessage : public runtime::Object {

 DGL_DEFINE_OBJECT_REF(RPCMessageRef, RPCMessage);

-/*! \brief RPC status flag */
+/*! @brief RPC status flag */
 enum RPCStatus {
  kRPCSuccess = 0,
  kRPCTimeOut,

--- a/src/rpc/server_state.h
+++ b/src/rpc/server_state.h
 /*!
 *  Copyright (c) 2020 by Contributors
- * \file rpc/server_state.h
- * \brief Implementation of RPC utilities used by both server and client sides.
+ * @file rpc/server_state.h
+ * @brief Implementation of RPC utilities used by both server and client sides.
 */

 #ifndef DGL_RPC_SERVER_STATE_H_
@@ -18,7 +18,7 @@ namespace dgl {
 namespace rpc {

 /*!
- * \brief Data stored in one DGL server.
+ * @brief Data stored in one DGL server.
 *
 * In a distributed setting, DGL partitions all data associated with the graph
 * (e.g., node and edge features, graph structure, etc.) to multiple partitions,
@@ -39,16 +39,16 @@ namespace rpc {
 * shared memory.
 */
 struct ServerState : public runtime::Object {
-  /*! \brief Key value store for NDArray data */
+  /*! @brief Key value store for NDArray data */
  std::unordered_map<std::string, runtime::NDArray> kv_store;

-  /*! \brief Graph structure of one partition */
+  /*! @brief Graph structure of one partition */
  HeteroGraphPtr graph;

-  /*! \brief Total number of nodes */
+  /*! @brief Total number of nodes */
  int64_t total_num_nodes = 0;

-  /*! \brief Total number of edges */
+  /*! @brief Total number of edges */
  int64_t total_num_edges = 0;

  static constexpr const char* _type_key = "server_state.ServerState";

--- a/src/rpc/tensorpipe/tp_communicator.cc
+++ b/src/rpc/tensorpipe/tp_communicator.cc
 /*!
 *  Copyright (c) 2019 by Contributors
- * \file tp_communicator.cc
- * \brief Tensorpipe Communicator for DGL distributed training.
+ * @file tp_communicator.cc
+ * @brief Tensorpipe Communicator for DGL distributed training.
 */

 #include "tp_communicator.h"

--- a/src/rpc/tensorpipe/tp_communicator.h
+++ b/src/rpc/tensorpipe/tp_communicator.h
 /*!
 *  Copyright (c) 2019 by Contributors
- * \file tp_communicator.h
- * \brief Tensorpipe Communicator for DGL distributed training.
+ * @file tp_communicator.h
+ * @brief Tensorpipe Communicator for DGL distributed training.
 */
 #ifndef DGL_RPC_TENSORPIPE_TP_COMMUNICATOR_H_
 #define DGL_RPC_TENSORPIPE_TP_COMMUNICATOR_H_
@@ -26,15 +26,15 @@ namespace rpc {
 typedef Queue<RPCMessage> RPCMessageQueue;

 /*!
- * \brief TPSender for DGL distributed training.
+ * @brief TPSender for DGL distributed training.
 *
 * TPSender is the communicator implemented by tcp socket.
 */
 class TPSender : public RPCSender {
 public:
  /*!
-   * \brief Sender constructor
-   * \param queue_size size of message queue
+   * @brief Sender constructor
+   * @param queue_size size of message queue
   */
  explicit TPSender(std::shared_ptr<tensorpipe::Context> ctx) {
    CHECK(ctx) << "Context is not initialized";
@@ -42,40 +42,40 @@ class TPSender : public RPCSender {
  }

  /*!
-   * \brief Sender destructor
+   * @brief Sender destructor
   */
  ~TPSender() { Finalize(); }

  /*!
-   * \brief Connect to a receiver.
+   * @brief Connect to a receiver.
   *
   * When there are multiple receivers to be connected, application will call
   * `ConnectReceiver` for each and then call `ConnectReceiverFinalize` to make
   * sure that either all the connections are successfully established or some
   * of them fail.
   *
-   * \param addr Networking address, e.g., 'tcp://127.0.0.1:50091'
-   * \param recv_id receiver's ID
-   * \return True for success and False for fail
+   * @param addr Networking address, e.g., 'tcp://127.0.0.1:50091'
+   * @param recv_id receiver's ID
+   * @return True for success and False for fail
   *
   * The function is *not* thread-safe; only one thread can invoke this API.
   */
  bool ConnectReceiver(const std::string& addr, int recv_id) override;

  /*!
-   * \brief Send RPCMessage to specified Receiver.
-   * \param msg data message
-   * \param recv_id receiver's ID
+   * @brief Send RPCMessage to specified Receiver.
+   * @param msg data message
+   * @param recv_id receiver's ID
   */
  void Send(const RPCMessage& msg, int recv_id) override;

  /*!
-   * \brief Finalize TPSender
+   * @brief Finalize TPSender
   */
  void Finalize() override;

  /*!
-   * \brief Communicator type: 'tp'
+   * @brief Communicator type: 'tp'
   */
  const std::string& NetType() const override {
    static const std::string net_type = "tensorpipe";
@@ -84,32 +84,32 @@ class TPSender : public RPCSender {

 private:
  /*!
-   * \brief global context of tensorpipe
+   * @brief global context of tensorpipe
   */
  std::shared_ptr<tensorpipe::Context> context;

  /*!
-   * \brief pipe for each connection of receiver
+   * @brief pipe for each connection of receiver
   */
  std::unordered_map<int /* receiver ID */, std::shared_ptr<tensorpipe::Pipe>>
      pipes_;

  /*!
-   * \brief receivers' listening address
+   * @brief receivers' listening address
   */
  std::unordered_map<int /* receiver ID */, std::string> receiver_addrs_;
 };

 /*!
- * \brief TPReceiver for DGL distributed training.
+ * @brief TPReceiver for DGL distributed training.
 *
 * Tensorpipe Receiver is the communicator implemented by tcp socket.
 */
 class TPReceiver : public RPCReceiver {
 public:
  /*!
-   * \brief Receiver constructor
-   * \param queue_size size of message queue.
+   * @brief Receiver constructor
+   * @param queue_size size of message queue.
   */
  explicit TPReceiver(std::shared_ptr<tensorpipe::Context> ctx) {
    CHECK(ctx) << "Context is not initialized";
@@ -118,16 +118,16 @@ class TPReceiver : public RPCReceiver {
  }

  /*!
-   * \brief Receiver destructor
+   * @brief Receiver destructor
   */
  ~TPReceiver() { Finalize(); }

  /*!
-   * \brief Wait for all the Senders to connect
-   * \param addr Networking address, e.g., 'tcp://127.0.0.1:50051'
-   * \param num_sender total number of Senders
-   * \param blocking whether to wait blockingly
-   * \return True for success and False for fail
+   * @brief Wait for all the Senders to connect
+   * @param addr Networking address, e.g., 'tcp://127.0.0.1:50051'
+   * @param num_sender total number of Senders
+   * @param blocking whether to wait blockingly
+   * @return True for success and False for fail
   *
   * Wait() is not thread-safe and only one thread can invoke this API.
   */
@@ -135,23 +135,23 @@ class TPReceiver : public RPCReceiver {
      const std::string& addr, int num_sender, bool blocking = true) override;

  /*!
-   * \brief Recv RPCMessage from Sender. Actually removing data from queue.
-   * \param msg pointer of RPCmessage
-   * \param timeout The timeout value in milliseconds. If zero, wait
+   * @brief Recv RPCMessage from Sender. Actually removing data from queue.
+   * @param msg pointer of RPCmessage
+   * @param timeout The timeout value in milliseconds. If zero, wait
   * indefinitely.
-   * \return RPCStatus: kRPCSuccess or kRPCTimeOut.
+   * @return RPCStatus: kRPCSuccess or kRPCTimeOut.
   */
  RPCStatus Recv(RPCMessage* msg, int timeout) override;

  /*!
-   * \brief Finalize SocketReceiver
+   * @brief Finalize SocketReceiver
   *
   * Finalize() is not thread-safe and only one thread can invoke this API.
   */
  void Finalize() override;

  /*!
-   * \brief Communicator type: 'tp' (tensorpipe)
+   * @brief Communicator type: 'tp' (tensorpipe)
   */
  const std::string& NetType() const override {
    static const std::string net_type = "tensorpipe";
@@ -159,7 +159,7 @@ class TPReceiver : public RPCReceiver {
  }

  /*!
-   * \brief Issue a receive request on pipe, and push the result into queue
+   * @brief Issue a receive request on pipe, and push the result into queue
   */
  static void ReceiveFromPipe(
      std::shared_ptr<tensorpipe::Pipe> pipe,
@@ -167,45 +167,45 @@ class TPReceiver : public RPCReceiver {

 private:
  /*!
-   * \brief Callback for new connection is accepted.
+   * @brief Callback for new connection is accepted.
   */
  void OnAccepted(const tensorpipe::Error&, std::shared_ptr<tensorpipe::Pipe>);

 private:
  /*!
-   * \brief number of sender
+   * @brief number of sender
   */
  int num_sender_;

  /*!
-   * \brief listener to build pipe
+   * @brief listener to build pipe
   */
  std::shared_ptr<tensorpipe::Listener> listener;

  /*!
-   * \brief global context of tensorpipe
+   * @brief global context of tensorpipe
   */
  std::shared_ptr<tensorpipe::Context> context;

  /*!
-   * \brief pipe for each client connections
+   * @brief pipe for each client connections
   */
  std::unordered_map<
      int /* Sender (virutal) ID */, std::shared_ptr<tensorpipe::Pipe>>
      pipes_;

  /*!
-   * \brief RPCMessage queue
+   * @brief RPCMessage queue
   */
  std::shared_ptr<RPCMessageQueue> queue_;

  /*!
-   * \brief number of accepted connections
+   * @brief number of accepted connections
   */
  std::atomic<int32_t> num_connected_{0};

  /*!
-   * \brief listner
+   * @brief listner
   */
  std::shared_ptr<tensorpipe::Listener> listener_{nullptr};
 };

--- a/src/runtime/c_object_api.cc
+++ b/src/runtime/c_object_api.cc
 /*!
 *  Copyright (c) 2016 by Contributors
 * Implementation of C API (reference: tvm/src/api/c_api.cc)
- * \file c_api.cc
+ * @file c_api.cc
 */
 #include <dgl/packed_func_ext.h>
 #include <dgl/runtime/c_object_api.h>
@@ -17,19 +17,19 @@

 #include "runtime_base.h"

-/*! \brief entry to to easily hold returning information */
+/*! @brief entry to to easily hold returning information */
 struct DGLAPIThreadLocalEntry {
-  /*! \brief result holder for returning strings */
+  /*! @brief result holder for returning strings */
  std::vector<std::string> ret_vec_str;
-  /*! \brief result holder for returning string pointers */
+  /*! @brief result holder for returning string pointers */
  std::vector<const char*> ret_vec_charp;
-  /*! \brief result holder for retruning string */
+  /*! @brief result holder for retruning string */
  std::string ret_str;
 };

 using namespace dgl::runtime;

-/*! \brief Thread local store that can be used to hold return values. */
+/*! @brief Thread local store that can be used to hold return values. */
 typedef dmlc::ThreadLocalStore<DGLAPIThreadLocalEntry> DGLAPIThreadLocalStore;

 using DGLAPIObject = std::shared_ptr<Object>;

--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
 /*!
 *  Copyright (c) 2016-2022 by Contributors
- * \file c_runtime_api.cc
- * \brief Runtime API implementation
+ * @file c_runtime_api.cc
+ * @brief Runtime API implementation
 */
 #include <dgl/runtime/c_backend_api.h>
 #include <dgl/runtime/c_runtime_api.h>
@@ -23,8 +23,8 @@ namespace dgl {
 namespace runtime {

 /*!
- * \brief The name of Device API factory.
- * \param type The device type.
+ * @brief The name of Device API factory.
+ * @param type The device type.
 */
 inline std::string DeviceName(int type) {
  switch (type) {

--- a/src/runtime/config.cc
+++ b/src/runtime/config.cc
 /*!
 *  Copyright (c) 2019 by Contributors
- * \file runtime/config.cc
- * \brief DGL runtime config
+ * @file runtime/config.cc
+ * @brief DGL runtime config
 */

 #include <dgl/runtime/config.h>

--- a/src/runtime/cpu_device_api.cc
+++ b/src/runtime/cpu_device_api.cc
 /*!
 *  Copyright (c) 2016-2022 by Contributors
- * \file cpu_device_api.cc
+ * @file cpu_device_api.cc
 */
 #include <dgl/runtime/device_api.h>
 #include <dgl/runtime/registry.h>

--- a/src/runtime/cuda/cuda_common.h
+++ b/src/runtime/cuda/cuda_common.h
 /*!
 *  Copyright (c) 2017 by Contributors
- * \file cuda_common.h
- * \brief Common utilities for CUDA
+ * @file cuda_common.h
+ * @brief Common utilities for CUDA
 */
 #ifndef DGL_RUNTIME_CUDA_CUDA_COMMON_H_
 #define DGL_RUNTIME_CUDA_CUDA_COMMON_H_
@@ -110,7 +110,7 @@ inline const char* curandGetErrorString(curandStatus_t error) {
 }

 /*
- * \brief Cast data type to cudaDataType_t.
+ * @brief Cast data type to cudaDataType_t.
 */
 template <typename T>
 struct cuda_dtype {
@@ -134,7 +134,7 @@ struct cuda_dtype<double> {

 #if CUDART_VERSION >= 11000
 /*
- * \brief Cast index data type to cusparseIndexType_t.
+ * @brief Cast index data type to cusparseIndexType_t.
 */
 template <typename T>
 struct cusparse_idtype {
@@ -152,24 +152,24 @@ struct cusparse_idtype<int64_t> {
 };
 #endif

-/*! \brief Thread local workspace */
+/*! @brief Thread local workspace */
 class CUDAThreadEntry {
 public:
-  /*! \brief The cusparse handler */
+  /*! @brief The cusparse handler */
  cusparseHandle_t cusparse_handle{nullptr};
-  /*! \brief The cublas handler */
+  /*! @brief The cublas handler */
  cublasHandle_t cublas_handle{nullptr};
-  /*! \brief The curand generator */
+  /*! @brief The curand generator */
  curandGenerator_t curand_gen{nullptr};
-  /*! \brief thread local pool*/
+  /*! @brief thread local pool*/
  WorkspacePool pool;
-  /*! \brief constructor */
+  /*! @brief constructor */
  CUDAThreadEntry();
  // get the threadlocal workspace
  static CUDAThreadEntry* ThreadLocal();
 };

-/*! \brief Get the current CUDA stream */
+/*! @brief Get the current CUDA stream */
 cudaStream_t getCurrentCUDAStream();
 }  // namespace runtime
 }  // namespace dgl

--- a/src/runtime/cuda/cuda_device_api.cc
+++ b/src/runtime/cuda/cuda_device_api.cc
 /*!
 *  Copyright (c) 2017-2022 by Contributors
- * \file cuda_device_api.cc
- * \brief GPU specific API
+ * @file cuda_device_api.cc
+ * @brief GPU specific API
 */
 #include <cuda_runtime.h>
 #include <dgl/runtime/device_api.h>

--- a/src/runtime/cuda/cuda_hashtable.cu
+++ b/src/runtime/cuda/cuda_hashtable.cu
 /*!
 *  Copyright (c) 2021 by Contributors
- * \file runtime/cuda/cuda_device_common.cuh
- * \brief Device level functions for within cuda kernels.
+ * @file runtime/cuda/cuda_device_common.cuh
+ * @brief Device level functions for within cuda kernels.
 */

 #include <cassert>
@@ -60,13 +60,13 @@ class MutableDeviceOrderedHashTable : public DeviceOrderedHashTable<IdType> {
  }

  /**
-   * \brief Attempt to insert into the hash table at a specific location.
+   * @brief Attempt to insert into the hash table at a specific location.
   *
-   * \param pos The position to insert at.
-   * \param id The ID to insert into the hash table.
-   * \param index The original index of the item being inserted.
+   * @param pos The position to insert at.
+   * @param id The ID to insert into the hash table.
+   * @param index The original index of the item being inserted.
   *
-   * \return True, if the insertion was successful.
+   * @return True, if the insertion was successful.
   */
  inline __device__ bool AttemptInsertAt(
      const size_t pos, const IdType id, const size_t index) {
@@ -165,15 +165,15 @@ struct BlockPrefixCallbackOp {
 }  // namespace

 /**
- * \brief This generates a hash map where the keys are the global item numbers,
+ * @brief This generates a hash map where the keys are the global item numbers,
 * and the values are indexes, and inputs may have duplciates.
 *
- * \tparam IdType The type of of id.
- * \tparam BLOCK_SIZE The size of the thread block.
- * \tparam TILE_SIZE The number of entries each thread block will process.
- * \param items The items to insert.
- * \param num_items The number of items to insert.
- * \param table The hash table.
+ * @tparam IdType The type of of id.
+ * @tparam BLOCK_SIZE The size of the thread block.
+ * @tparam TILE_SIZE The number of entries each thread block will process.
+ * @param items The items to insert.
+ * @param num_items The number of items to insert.
+ * @param table The hash table.
 */
 template <typename IdType, int BLOCK_SIZE, size_t TILE_SIZE>
 __global__ void generate_hashmap_duplicates(
@@ -194,15 +194,15 @@ __global__ void generate_hashmap_duplicates(
 }

 /**
- * \brief This generates a hash map where the keys are the global item numbers,
+ * @brief This generates a hash map where the keys are the global item numbers,
 * and the values are indexes, and all inputs are unique.
 *
- * \tparam IdType The type of of id.
- * \tparam BLOCK_SIZE The size of the thread block.
- * \tparam TILE_SIZE The number of entries each thread block will process.
- * \param items The unique items to insert.
- * \param num_items The number of items to insert.
- * \param table The hash table.
+ * @tparam IdType The type of of id.
+ * @tparam BLOCK_SIZE The size of the thread block.
+ * @tparam TILE_SIZE The number of entries each thread block will process.
+ * @param items The unique items to insert.
+ * @param num_items The number of items to insert.
+ * @param table The hash table.
 */
 template <typename IdType, int BLOCK_SIZE, size_t TILE_SIZE>
 __global__ void generate_hashmap_unique(
@@ -229,15 +229,15 @@ __global__ void generate_hashmap_unique(
 }

 /**
- * \brief This counts the number of nodes inserted per thread block.
+ * @brief This counts the number of nodes inserted per thread block.
 *
- * \tparam IdType The type of of id.
- * \tparam BLOCK_SIZE The size of the thread block.
- * \tparam TILE_SIZE The number of entries each thread block will process.
- * \param input The nodes to insert.
- * \param num_input The number of nodes to insert.
- * \param table The hash table.
- * \param num_unique The number of nodes inserted into the hash table per thread
+ * @tparam IdType The type of of id.
+ * @tparam BLOCK_SIZE The size of the thread block.
+ * @tparam TILE_SIZE The number of entries each thread block will process.
+ * @param input The nodes to insert.
+ * @param num_input The number of nodes to insert.
+ * @param table The hash table.
+ * @param num_unique The number of nodes inserted into the hash table per thread
 * block.
 */
 template <typename IdType, int BLOCK_SIZE, size_t TILE_SIZE>
@@ -278,18 +278,18 @@ __global__ void count_hashmap(
 }

 /**
- * \brief Update the local numbering of elements in the hashmap.
+ * @brief Update the local numbering of elements in the hashmap.
 *
- * \tparam IdType The type of id.
- * \tparam BLOCK_SIZE The size of the thread blocks.
- * \tparam TILE_SIZE The number of elements each thread block works on.
- * \param items The set of non-unique items to update from.
- * \param num_items The number of non-unique items.
- * \param table The hash table.
- * \param num_items_prefix The number of unique items preceding each thread
+ * @tparam IdType The type of id.
+ * @tparam BLOCK_SIZE The size of the thread blocks.
+ * @tparam TILE_SIZE The number of elements each thread block works on.
+ * @param items The set of non-unique items to update from.
+ * @param num_items The number of non-unique items.
+ * @param table The hash table.
+ * @param num_items_prefix The number of unique items preceding each thread
 * block.
- * \param unique_items The set of unique items (output).
- * \param num_unique_items The number of unique items (output).
+ * @param unique_items The set of unique items (output).
+ * @param num_unique_items The number of unique items (output).
 */
 template <typename IdType, int BLOCK_SIZE, size_t TILE_SIZE>
 __global__ void compact_hashmap(

--- a/src/runtime/cuda/cuda_hashtable.cuh
+++ b/src/runtime/cuda/cuda_hashtable.cuh
 /*!
 *  Copyright (c) 2021 by Contributors
- * \file runtime/cuda/cuda_device_common.cuh
- * \brief Device level functions for within cuda kernels.
+ * @file runtime/cuda/cuda_device_common.cuh
+ * @brief Device level functions for within cuda kernels.
 */

 #ifndef DGL_RUNTIME_CUDA_CUDA_HASHTABLE_CUH_
@@ -20,7 +20,7 @@ template <typename>
 class OrderedHashTable;

 /*!
- * \brief A device-side handle for a GPU hashtable for mapping items to the
+ * @brief A device-side handle for a GPU hashtable for mapping items to the
 * first index at which they appear in the provided data array.
 *
 * For any ID array A, one can view it as a mapping from the index `i`
@@ -60,25 +60,25 @@ class OrderedHashTable;
 * to get the remaped array:
 * [0, 1, 2, 0, 3, 4, 1, 2, 5, 3]
 *
- * \tparam IdType The type of the IDs.
+ * @tparam IdType The type of the IDs.
 */
 template <typename IdType>
 class DeviceOrderedHashTable {
 public:
  /**
-   * \brief An entry in the hashtable.
+   * @brief An entry in the hashtable.
   */
  struct Mapping {
    /**
-     * \brief The ID of the item inserted.
+     * @brief The ID of the item inserted.
     */
    IdType key;
    /**
-     * \brief The index of the item in the unique list.
+     * @brief The index of the item in the unique list.
     */
    IdType local;
    /**
-     * \brief The index of the item when inserted into the hashtable (e.g.,
+     * @brief The index of the item when inserted into the hashtable (e.g.,
     * the index within the array passed into FillWithDuplicates()).
     */
    int64_t index;
@@ -91,14 +91,14 @@ class DeviceOrderedHashTable {
      default;

  /**
-   * \brief Find the non-mutable mapping of a given key within the hash table.
+   * @brief Find the non-mutable mapping of a given key within the hash table.
   *
   * WARNING: The key must exist within the hashtable. Searching for a key not
   * in the hashtable is undefined behavior.
   *
-   * \param id The key to search for.
+   * @param id The key to search for.
   *
-   * \return An iterator to the mapping.
+   * @return An iterator to the mapping.
   */
  inline __device__ ConstIterator Search(const IdType id) const {
    const IdType pos = SearchForPosition(id);
@@ -107,11 +107,11 @@ class DeviceOrderedHashTable {
  }

  /**
-   * \brief Check whether a key exists within the hashtable.
+   * @brief Check whether a key exists within the hashtable.
   *
-   * \param id The key to check for.
+   * @param id The key to check for.
   *
-   * \return True if the key exists in the hashtable.
+   * @return True if the key exists in the hashtable.
   */
  inline __device__ bool Contains(const IdType id) const {
    IdType pos = Hash(id);
@@ -135,22 +135,22 @@ class DeviceOrderedHashTable {
  size_t size_;

  /**
-   * \brief Create a new device-side handle to the hash table.
+   * @brief Create a new device-side handle to the hash table.
   *
-   * \param table The table stored in GPU memory.
-   * \param size The size of the table.
+   * @param table The table stored in GPU memory.
+   * @param size The size of the table.
   */
  explicit DeviceOrderedHashTable(const Mapping* table, size_t size);

  /**
-   * \brief Search for an item in the hash table which is known to exist.
+   * @brief Search for an item in the hash table which is known to exist.
   *
   * WARNING: If the ID searched for does not exist within the hashtable, this
   * function will never return.
   *
-   * \param id The ID of the item to search for.
+   * @param id The ID of the item to search for.
   *
-   * \return The the position of the item in the hashtable.
+   * @return The the position of the item in the hashtable.
   */
  inline __device__ IdType SearchForPosition(const IdType id) const {
    IdType pos = Hash(id);
@@ -168,11 +168,11 @@ class DeviceOrderedHashTable {
  }

  /**
-   * \brief Hash an ID to a to a position in the hash table.
+   * @brief Hash an ID to a to a position in the hash table.
   *
-   * \param id The ID to hash.
+   * @param id The ID to hash.
   *
-   * \return The hash.
+   * @return The hash.
   */
  inline __device__ size_t Hash(const IdType id) const { return id % size_; }

@@ -180,7 +180,7 @@ class DeviceOrderedHashTable {
 };

 /*!
- * \brief A host-side handle for a GPU hashtable for mapping items to the
+ * @brief A host-side handle for a GPU hashtable for mapping items to the
 * first index at which they appear in the provided data array. This host-side
 * handle is responsible for allocating and free the GPU memory of the
 * hashtable.
@@ -208,7 +208,7 @@ class DeviceOrderedHashTable {
 *   {key: 1, local: 5, index: 8}
 * ]
 *
- * \tparam IdType The type of the IDs.
+ * @tparam IdType The type of the IDs.
 */
 template <typename IdType>
 class OrderedHashTable {
@@ -218,21 +218,21 @@ class OrderedHashTable {
  using Mapping = typename DeviceOrderedHashTable<IdType>::Mapping;

  /**
-   * \brief Create a new ordered hash table. The amoutn of GPU memory
+   * @brief Create a new ordered hash table. The amoutn of GPU memory
   * consumed by the resulting hashtable is O(`size` * 2^`scale`).
   *
-   * \param size The number of items to insert into the hashtable.
-   * \param ctx The device context to store the hashtable on.
-   * \param scale The power of two times larger the number of buckets should
+   * @param size The number of items to insert into the hashtable.
+   * @param ctx The device context to store the hashtable on.
+   * @param scale The power of two times larger the number of buckets should
   * be than the number of items.
-   * \param stream The stream to use for initializing the hashtable.
+   * @param stream The stream to use for initializing the hashtable.
   */
  OrderedHashTable(
      const size_t size, DGLContext ctx, cudaStream_t stream,
      const int scale = kDefaultScale);

  /**
-   * \brief Cleanup after the hashtable.
+   * @brief Cleanup after the hashtable.
   */
  ~OrderedHashTable();

@@ -241,33 +241,33 @@ class OrderedHashTable {
  OrderedHashTable& operator=(const OrderedHashTable& other) = delete;

  /**
-   * \brief Fill the hashtable with the array containing possibly duplicate
+   * @brief Fill the hashtable with the array containing possibly duplicate
   * IDs.
   *
-   * \param input The array of IDs to insert.
-   * \param num_input The number of IDs to insert.
-   * \param unique The list of unique IDs inserted.
-   * \param num_unique The number of unique IDs inserted.
-   * \param stream The stream to perform operations on.
+   * @param input The array of IDs to insert.
+   * @param num_input The number of IDs to insert.
+   * @param unique The list of unique IDs inserted.
+   * @param num_unique The number of unique IDs inserted.
+   * @param stream The stream to perform operations on.
   */
  void FillWithDuplicates(
      const IdType* const input, const size_t num_input, IdType* const unique,
      int64_t* const num_unique, cudaStream_t stream);

  /**
-   * \brief Fill the hashtable with an array of unique keys.
+   * @brief Fill the hashtable with an array of unique keys.
   *
-   * \param input The array of unique IDs.
-   * \param num_input The number of keys.
-   * \param stream The stream to perform operations on.
+   * @param input The array of unique IDs.
+   * @param num_input The number of keys.
+   * @param stream The stream to perform operations on.
   */
  void FillWithUnique(
      const IdType* const input, const size_t num_input, cudaStream_t stream);

  /**
-   * \brief Get a verison of the hashtable usable from device functions.
+   * @brief Get a verison of the hashtable usable from device functions.
   *
-   * \return This hashtable.
+   * @return This hashtable.
   */
  DeviceOrderedHashTable<IdType> DeviceHandle() const;


--- a/src/runtime/cuda/nccl_api.cu
+++ b/src/runtime/cuda/nccl_api.cu
@@ -13,8 +13,8 @@
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
- * \file nccl_api.cu
- * \brief Implementation of wrapper around NCCL routines.
+ * @file nccl_api.cu
+ * @brief Implementation of wrapper around NCCL routines.
 */

 #include <cuda_fp16.h>

--- a/src/runtime/cuda/nccl_api.h
+++ b/src/runtime/cuda/nccl_api.h
@@ -13,8 +13,8 @@
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
- * \file nccl_api.h
- * \brief Wrapper around NCCL routines.
+ * @file nccl_api.h
+ * @brief Wrapper around NCCL routines.
 */

 #ifndef DGL_RUNTIME_CUDA_NCCL_API_H_

--- a/src/runtime/dlpack_convert.cc
+++ b/src/runtime/dlpack_convert.cc
 /*!
 *  Copyright (c) 2022 by Contributors
- * \file src/runtime/dlpack_convert.cc
- * \brief Conversion between NDArray and DLPack.
+ * @file src/runtime/dlpack_convert.cc
+ * @brief Conversion between NDArray and DLPack.
 */
 #include <dgl/runtime/c_runtime_api.h>
 #include <dgl/runtime/device_api.h>