"git@developer.sourcefind.cn:change/sglang.git" did not exist on "41d71ca48834fa64c727f9b63c414dcaf3d01d80"
Commit 920cc934 authored by rusty1s's avatar rusty1s
Browse files

graclus cuda, cleanup old code

parent d2cc3162
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGraclus.cu"
#else
void THCTensor_(graclus)(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col, THCTensor *weight) {
THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, self, row, col, weight));
THC_TENSOR_GRACLUS(state, self, row,
while(!THCudaLongTensor_color(state, self)) {
THCTensor_(propose)(state, self, prop, row, col, weight, degree, cumDegree);
THCTensor_(response)(state, self, prop, row, col, weight, degree, cumDegree);
}
)
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGraclus.h"
#else
void THCTensor_(graclus)(THCState *state, THCudaLongTensor *self, THCudaLongTensor *row,
THCudaLongTensor *col, THCTensor *weight);
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGrid.cu"
#else
void THCTensor_(grid)(THCState *state, THCudaLongTensor *self, THCTensor *pos, THCTensor *size,
THCudaLongTensor *count) {
THCAssertSameGPU(THCTensor_(checkGPU)(state, 4, self, pos, size, count));
ptrdiff_t nNodes = THCudaLongTensor_nElement(state, self);
int64_t *selfData = THCudaLongTensor_data(state, self);
TensorInfo<real> posInfo = THCTensor_(getTensorInfo)(state, pos);
real *sizeData = THCTensor_(data)(state, size);
int64_t *countData = THCudaLongTensor_data(state, count);
KERNEL_REAL_RUN(gridKernel, nNodes, selfData, posInfo, sizeData, countData);
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCGrid.h"
#else
void THCTensor_(grid)(THCState *state, THCudaLongTensor *self, THCTensor *pos, THCTensor *size,
THCudaLongTensor *count);
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCPropose.cuh"
#else
void THCTensor_(propose)(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
THCudaLongTensor *row, THCudaLongTensor *col, THCTensor *weight,
THCudaLongTensor *degree, THCudaLongTensor *cumDegree) {
KERNEL_REAL_RUN(weightedProposeKernel, THCudaLongTensor_nElement(state, color),
THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
THCTensor_(data)(state, weight), THCudaLongTensor_data(state, degree),
THCudaLongTensor_data(state, cumDegree));
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCResponse.cuh"
#else
void THCTensor_(response)(THCState *state, THCudaLongTensor *color, THCudaLongTensor *prop,
THCudaLongTensor *row, THCudaLongTensor *col, THCTensor *weight,
THCudaLongTensor *degree, THCudaLongTensor *cumDegree) {
KERNEL_REAL_RUN(weightedResponseKernel, THCudaLongTensor_nElement(state, color),
THCudaLongTensor_data(state, color), THCudaLongTensor_data(state, prop),
THCudaLongTensor_data(state, row), THCudaLongTensor_data(state, col),
THCTensor_(data)(state, weight), THCudaLongTensor_data(state, degree),
THCudaLongTensor_data(state, cumDegree));
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.cuh"
#else
TensorInfo<real> THCTensor_(getTensorInfo)(THCState *state, THCTensor *tensor) {
TensorInfo<real> tensorInfo = TensorInfo<real>();
tensorInfo.data = THCTensor_(data)(state, tensor);
tensorInfo.dims = THCTensor_(nDimension)(state, tensor);
for (ptrdiff_t d = 0; d < tensorInfo.dims; d++) {
tensorInfo.size[d] = THCTensor_(size)(state, tensor, d);
tensorInfo.stride[d] = THCTensor_(stride)(state, tensor, d);
}
return tensorInfo;
}
#endif // THC_GENERIC_FILE
#include <THC/THC.h>
#include "THC.h"
#define THCCTensor_(NAME) TH_CONCAT_4(THCC,Real,Tensor_,NAME)
extern THCState *state;
void THCCTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col) {
THCTensor_graclus(state, self, row, col);
}
#include "generic/THCCGraclus.c"
#include "THCGenerateAllTypes.h"
void THCCTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col);
void THCCByteTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaByteTensor *weight);
void THCCCharTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaCharTensor *weight);
void THCCShortTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaShortTensor *weight);
void THCCIntTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaIntTensor *weight);
void THCCLongTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaLongTensor *weight);
void THCCFloatTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaTensor *weight);
void THCCDoubleTensor_graclus(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col, THCudaDoubleTensor *weight);
#include <THC/THC.h>
#include "THC.h"
#define THCCTensor_(NAME) TH_CONCAT_4(THCC,Real,Tensor_,NAME)
extern THCState *state;
#include "generic/THCCGrid.c"
#include "THCGenerateAllTypes.h"
void THCCByteTensor_grid(THCudaLongTensor *self, THCudaByteTensor *pos, THCudaByteTensor *size, THCudaLongTensor *count);
void THCCCharTensor_grid(THCudaLongTensor *self, THCudaCharTensor *pos, THCudaCharTensor *size, THCudaLongTensor *count);
void THCCShortTensor_grid(THCudaLongTensor *self, THCudaShortTensor *pos, THCudaShortTensor *size, THCudaLongTensor *count);
void THCCIntTensor_grid(THCudaLongTensor *self, THCudaIntTensor *pos, THCudaIntTensor *size, THCudaLongTensor *count);
void THCCLongTensor_grid(THCudaLongTensor *self, THCudaLongTensor *pos, THCudaLongTensor *size, THCudaLongTensor *count);
void THCCFloatTensor_grid(THCudaLongTensor *self, THCudaTensor *pos, THCudaTensor *size, THCudaLongTensor *count);
void THCCDoubleTensor_grid(THCudaLongTensor *self, THCudaDoubleTensor *pos, THCudaDoubleTensor *size, THCudaLongTensor *count);
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCCGraclus.c"
#else
void THCCTensor_(graclus)(THCudaLongTensor *self, THCudaLongTensor *row, THCudaLongTensor *col,
THCTensor *weight) {
THCTensor_(graclus)(state, self, row, col, weight);
}
#endif // THC_GENERIC_FILE
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCCGrid.c"
#else
void THCCTensor_(grid)(THCudaLongTensor *self, THCTensor *pos, THCTensor *size,
THCudaLongTensor *count) {
THCTensor_(grid)(state, self, pos, size, count);
}
#endif // THC_GENERIC_FILE
import torch
import cluster_cpu
import cluster_cuda
def grid(pos, size, start=None, end=None):
lib = cluster_cuda if pos.is_cuda else cluster_cpu
start = pos.t().min(dim=1)[0] if start is None else start
end = pos.t().max(dim=1)[0] if end is None else end
return lib.grid(pos, size, start, end)
def graclus(row, col, num_nodes):
lib = cluster_cuda if pos.is_cuda else cluster_cpu
return lib.graclus(row, col, num_nodes)
device = torch.device('cuda')
pos = torch.tensor([[1, 1], [3, 3], [5, 5], [7, 7]], device=device)
size = torch.tensor([2, 2], device=device)
print('pos', pos.tolist())
print('size', size.tolist())
cluster = grid(pos, size)
print('result', cluster.tolist(), cluster.dtype, cluster.device)
print('-----------------')
row = torch.tensor([0, 0, 1, 1, 1, 2, 2, 2, 3, 3], device=device)
col = torch.tensor([1, 2, 0, 2, 3, 0, 1, 3, 1, 2], device=device)
print('row', row.tolist())
print('col', col.tolist())
cluster = graclus(row, col, 4)
print('result', cluster.tolist(), cluster.dtype, cluster.device)
#include <torch/torch.h>
#include "graclus.cpp"
#include "grid.cpp"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("graclus", &graclus, "Graclus (CPU)");
m.def("grid", &grid, "Grid (CPU)");
}
#include <torch/torch.h>
#include "../include/degree.cpp"
#include "../include/loop.cpp"
#include "../include/perm.cpp"
at::Tensor graclus(at::Tensor row, at::Tensor col, int num_nodes) {
std::tie(row, col) = remove_self_loops(row, col);
std::tie(row, col) = randperm(row, col, num_nodes);
auto deg = degree(row, num_nodes, row.type().scalarType());
auto cluster = at::full(row.type(), {num_nodes}, -1);
auto *row_data = row.data<int64_t>();
auto *col_data = col.data<int64_t>();
auto *deg_data = deg.data<int64_t>();
auto *cluster_data = cluster.data<int64_t>();
int64_t e_idx = 0, d_idx, r, c;
while (e_idx < row.size(0)) {
r = row_data[e_idx];
if (cluster_data[r] < 0) {
cluster_data[r] = r;
for (d_idx = 0; d_idx < deg_data[r]; d_idx++) {
c = col_data[e_idx + d_idx];
if (cluster_data[c] < 0) {
cluster_data[r] = std::min(r, c);
cluster_data[c] = std::min(r, c);
break;
}
}
}
e_idx += deg_data[r];
}
return cluster;
}
#include <torch/torch.h>
at::Tensor grid(at::Tensor pos, at::Tensor size, at::Tensor start,
at::Tensor end) {
size = size.toType(pos.type());
start = start.toType(pos.type());
end = end.toType(pos.type());
pos = pos - start.view({1, -1});
auto num_voxels = ((end - start) / size).toType(at::kLong);
num_voxels = (num_voxels + 1).cumsum(0);
num_voxels -= num_voxels.data<int64_t>()[0];
num_voxels.data<int64_t>()[0] = 1;
auto cluster = pos / size.view({1, -1});
cluster = cluster.toType(at::kLong);
cluster *= num_voxels.view({1, -1});
cluster = cluster.sum(1);
return cluster;
}
#include <torch/torch.h>
at::Tensor grid(at::Tensor pos, at::Tensor size, at::Tensor start,
at::Tensor end);
at::Tensor graclus(at::Tensor row, at::Tensor col, int num_nodes);
at::Tensor weighted_graclus(at::Tensor row, at::Tensor col, at::Tensor weight,
int num_nodes);
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("grid", &grid, "Grid (CUDA)");
m.def("graclus", &graclus, "Graclus (CUDA)");
m.def("weighted_graclus", &weighted_graclus, "Weightes Graclus (CUDA)");
}
#pragma once
#include <ATen/ATen.h>
#include "common.cuh"
#define BLUE_PROB 0.53406
__global__ void color_kernel(int64_t *cluster, size_t num_nodes) {
const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
const size_t stride = blockDim.x * gridDim.x;
for (ptrdiff_t i = index; i < num_nodes; i += stride) {
}
}
inline bool color(at::Tensor cluster) {
color_kernel<scalar_t><<<BLOCKS(cluster.size(0)), THREADS>>>(
cluster.data<int64_t>(), cluster.size(0));
return true;
}
#pragma once
#include <ATen/ATen.h>
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
inline at::Tensor degree(at::Tensor index, int num_nodes) {
auto zero = at::zeros(index.type(), {num_nodes});
auto one = at::ones(index.type(), {index.size(0)});
return zero.scatter_add_(0, index, one);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment