docker-cluster-up.sh 5.63 KB
Newer Older
1
2
3
4
5
#!/bin/bash
set -e

# =====================================================================
# 节点映射表
one's avatar
one committed
6
# 1. 当前节点是主节点,会挂载一个工作目录
7
8
# 2. 第一列是物理机hostname或IP,第二列是docker容器hostname
# =====================================================================
one's avatar
one committed
9
10
11
12
13
CLUSTER_CONFIG="
$(hostname)    node01
node1          node02
node2          node03
node3          node04
14
15
"

16
# =====================================================================
one's avatar
one committed
17
# 默认值和命令行参数
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# =====================================================================
IMAGE_NAME=harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.11.0-ubuntu22.04-dtk26.04-0130-py3.10-20260204
CONTAINER_NAME=cluster-dtk26-20260204
SSH_PORT=3333     # 确保端口可用
WORKDIR="${PWD}"  # 修改为主节点的rocHPL/rocHPCG等应用目录
FORCE_RM=0
SKIP_PULL=0

usage() {
  echo "Usage: $0 [-f|--force] [-i|--image IMAGE] [-n|--name NAME] [-p|--port PORT] [-w|--workdir WORKDIR]"
  echo "  -f, --force              Force remove existing container before starting"
  echo "  -i, --image    IMAGE     Docker image name (default: ${IMAGE_NAME})"
  echo "  -n, --name     NAME      Container name across all nodes (default: ${CONTAINER_NAME})"
  echo "  -p, --port     PORT      SSH port inside container (default: ${SSH_PORT})"
  echo "  -w, --workdir  WORKDIR   Workspace directory to mount for the master node (default: \$PWD)"
  echo "  --no-pull                Skip pulling the image"
  echo "  -h, --help               Show this help message"
  exit 1
}

one's avatar
one committed
38
OPTS=$(getopt -o hfi:n:p:w: --long help,force,image:,name:,port:,workdir:,no-pull -n "$0" -- "$@") || usage
39
40
41
42
eval set -- "${OPTS}"

while true; do
  case "$1" in
one's avatar
one committed
43
    -h|--help)       usage ;;
44
45
46
47
48
49
50
51
52
53
54
    -f|--force)      FORCE_RM=1; shift ;;
    -i|--image)      IMAGE_NAME="$2"; shift 2 ;;
    -n|--name)       CONTAINER_NAME="$2"; shift 2 ;;
    -p|--port)       SSH_PORT="$2"; shift 2 ;;
    -w|--workdir)    WORKDIR="$2"; shift 2 ;;
    --no-pull)       SKIP_PULL=1; shift ;;
    --)              shift; break ;;
    *)               usage ;;
  esac
done

55
56
57
58
59
60
61
62
63
# =====================================================================
# 解析hosts
# =====================================================================
resolve_ip() {
  local target=$1
  local ip=""

  # Try getent
  if command -v getent >/dev/null 2>&1; then
64
    ip=$(getent ahosts "$target" 2>/dev/null | awk '{print $1}' | head -n 1 || true)
65
66
67
68
  fi

  # Try ping
  if [ -z "$ip" ] && command -v ping >/dev/null 2>&1; then
69
    ip=$(ping -c 1 -n "$target" 2>/dev/null | awk -F'[()]' '/PING/{print $2}' || true)
70
71
72
73
74
75
76
77
78
79
  fi

  # Fallback to /etc/hosts
  if [ -z "$ip" ]; then
    ip=$(awk -v h="$target" '!/^#/ && ($2 == h || $3 == h) {print $1; exit}' /etc/hosts)
  fi

  echo "$ip"
}

one's avatar
one committed
80
81
82
83
84
MASTER_NODE=$(hostname)
DOCKER_MASTER=""
MASTER_IP=""
DOCKER_ADD_HOSTS=""
MAPPING_STR=""
85
WORKER_NODE_ARR=()
one's avatar
one committed
86
87

# 读取hostname映射表
88
89
90
91
92
while read -r phys_host std_name rest; do
  [[ -z "$phys_host" || "$phys_host" == \#* ]] && continue
  
  IP=$(resolve_ip "$phys_host")
  if [ -z "$IP" ]; then
one's avatar
one committed
93
    echo "[Error] Failed to resolve IP for node '$phys_host'!"
94
95
96
    exit 1
  fi
  
one's avatar
one committed
97
  # 所有节点的主机记录都需要加上
98
99
  DOCKER_ADD_HOSTS="${DOCKER_ADD_HOSTS} --add-host ${std_name}:${IP}"
  
one's avatar
one committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  if [ -z "$MAPPING_STR" ]; then
    MAPPING_STR="${phys_host}:${std_name}"
  else
    MAPPING_STR="${MAPPING_STR},${phys_host}:${std_name}"
  fi

  # 判断是否是本物理机(主节点)
  if [ "$phys_host" = "$MASTER_NODE" ]; then
    DOCKER_MASTER=$std_name
    MASTER_IP=$IP
    echo "[INFO] Master node: ${phys_host} -> ${std_name} (${IP})"
  else
    WORKER_NODE_ARR+=("$phys_host")
    echo "[INFO] Worker node: ${phys_host} -> ${std_name} (${IP})"
  fi
done <<< "$CLUSTER_CONFIG"

if [ -z "$DOCKER_MASTER" ]; then
  echo "[Error] Current node '$MASTER_NODE' is not found in CLUSTER_CONFIG!"
  exit 1
fi
121
122
123
124
125
126
127
128
129

WORKER_NODES=$(IFS=,; echo "${WORKER_NODE_ARR[*]}")

ALL_NODES="${MASTER_NODE}"
[ -n "$WORKER_NODES" ] && ALL_NODES="${ALL_NODES},${WORKER_NODES}"

# =====================================================================
# 启动容器
# =====================================================================
130
131
132
133
134
135
if [ "${SKIP_PULL}" == "1" ]; then
  echo "[INFO] Image pulling skipped."
else
  echo "[INFO] Pulling the image on each node..."
  pdsh -w ${ALL_NODES} -S "docker pull ${IMAGE_NAME} >/dev/null"
fi
136

137
138
if [ "${FORCE_RM}" == "1" ]; then
  echo "[INFO] Force removing existing containers..."
139
140
141
  pdsh -w ${ALL_NODES} -S "docker rm -f ${CONTAINER_NAME} 2>/dev/null || true"
fi

142
143
echo "[INFO] Starting docker containers..."
mkdir -p ${WORKDIR}
144
DOCKER_ARGS="--name=${CONTAINER_NAME} \
one's avatar
one committed
145
  -e NODE_MAPPING=${MAPPING_STR} \
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
  -v /opt/hyhal:/opt/hyhal:ro \
  -v /root/.ssh:/root/.ssh \
  -w /workspace \
  ${DOCKER_ADD_HOSTS} \
  --network=host \
  --ipc=host \
  --device=/dev/kfd \
  --device=/dev/mkfd \
  --device=/dev/dri \
  --shm-size=512G \
  --privileged \
  --group-add video \
  --cap-add=SYS_PTRACE \
  -u root \
  --security-opt seccomp=unconfined"

docker run -itd \
  ${DOCKER_ARGS} \
164
  -v ${WORKDIR}:/workspace \
165
  ${IMAGE_NAME} \
one's avatar
one committed
166
  bash -c "hostname ${DOCKER_MASTER} && mkdir -p /run/sshd && /usr/sbin/sshd -p ${SSH_PORT}; sleep infinity"
167
168
169
170
171

if [ -n "$WORKER_NODES" ]; then
  pdsh -w ${WORKER_NODES} -S "docker run -itd \
    ${DOCKER_ARGS} \
    ${IMAGE_NAME} \
one's avatar
one committed
172
    bash -c 'PHYS=\$(hostname); for m in \${NODE_MAPPING//,/ }; do [ \"\${m%%:*}\" = \"\$PHYS\" ] && hostname \${m##*:} && break; done; mkdir -p /run/sshd && /usr/sbin/sshd -p ${SSH_PORT}; sleep infinity'"
173
174
fi
  
175
176
echo "[INFO] All containers are ready!"
echo "[INFO] To access the container on the current node, run:"
one's avatar
one committed
177
echo
178
echo "       docker exec -it ${CONTAINER_NAME} bash"
one's avatar
one committed
179
echo