kmeans.py 4.73 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import argparse
import contextlib
import time

import cupy
import matplotlib.pyplot as plt
import numpy


@contextlib.contextmanager
def timer(message):
    cupy.cuda.Stream.null.synchronize()
    start = time.time()
    yield
    cupy.cuda.Stream.null.synchronize()
    end = time.time()
    print('%s:  %f sec' % (message, end - start))


var_kernel = cupy.ElementwiseKernel(
    'T x0, T x1, T c0, T c1', 'T out',
    'out = (x0 - c0) * (x0 - c0) + (x1 - c1) * (x1 - c1)',
    'var_kernel'
)
sum_kernel = cupy.ReductionKernel(
    'T x, S mask', 'T out',
    'mask ? x : 0',
    'a + b', 'out = a', '0',
    'sum_kernel'
)
count_kernel = cupy.ReductionKernel(
    'T mask', 'float32 out',
    'mask ? 1.0 : 0.0',
    'a + b', 'out = a', '0.0',
    'count_kernel'
)


def fit_xp(X, n_clusters, max_iter):
    assert X.ndim == 2

    # Get NumPy or CuPy module from the supplied array.
    xp = cupy.get_array_module(X)

    n_samples = len(X)

    # Make an array to store the labels indicating which cluster each sample is
    # contained.
    pred = xp.zeros(n_samples)

    # Choose the initial centroid for each cluster.
    initial_indexes = xp.random.choice(n_samples, n_clusters, replace=False)
    centers = X[initial_indexes]

    for _ in range(max_iter):
        # Compute the new label for each sample.
        distances = xp.linalg.norm(X[:, None, :] - centers[None, :, :], axis=2)
        new_pred = xp.argmin(distances, axis=1)

        # If the label is not changed for each sample, we suppose the
        # algorithm has converged and exit from the loop.
        if xp.all(new_pred == pred):
            break
        pred = new_pred

        # Compute the new centroid for each cluster.
        i = xp.arange(n_clusters)
        mask = pred == i[:, None]
        sums = xp.where(mask[:, :, None], X, 0).sum(axis=1)
        counts = xp.count_nonzero(mask, axis=1).reshape((n_clusters, 1))
        centers = sums / counts

    return centers, pred


def fit_custom(X, n_clusters, max_iter):
    assert X.ndim == 2

    n_samples = len(X)

    pred = cupy.zeros(n_samples)

    initial_indexes = cupy.random.choice(n_samples, n_clusters, replace=False)
    centers = X[initial_indexes]

    for _ in range(max_iter):
        distances = var_kernel(X[:, None, 0], X[:, None, 1],
                               centers[None, :, 1], centers[None, :, 0])
        new_pred = cupy.argmin(distances, axis=1)
        if cupy.all(new_pred == pred):
            break
        pred = new_pred

        i = cupy.arange(n_clusters)
        mask = pred == i[:, None]
        sums = sum_kernel(X, mask[:, :, None], axis=1)
        counts = count_kernel(mask, axis=1).reshape((n_clusters, 1))
        centers = sums / counts

    return centers, pred


def draw(X, n_clusters, centers, pred, output):
    # Plot the samples and centroids of the fitted clusters into an image file.
    for i in range(n_clusters):
        labels = X[pred == i]
        plt.scatter(labels[:, 0], labels[:, 1], c=numpy.random.rand(3))
    plt.scatter(
        centers[:, 0], centers[:, 1], s=120, marker='s', facecolors='y',
        edgecolors='k')
    plt.savefig(output)


def run(gpuid, n_clusters, num, max_iter, use_custom_kernel, output):
    samples = numpy.random.randn(num, 2)
    X_train = numpy.r_[samples + 1, samples - 1]

    with timer(' CPU '):
        centers, pred = fit_xp(X_train, n_clusters, max_iter)

    with cupy.cuda.Device(gpuid):
        X_train = cupy.asarray(X_train)

        with timer(' GPU '):
            if use_custom_kernel:
                centers, pred = fit_custom(X_train, n_clusters, max_iter)
            else:
                centers, pred = fit_xp(X_train, n_clusters, max_iter)

        if output is not None:
            index = numpy.random.choice(10000000, 300, replace=False)
            draw(X_train[index].get(), n_clusters, centers.get(),
                 pred[index].get(), output)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu-id', '-g', default=0, type=int,
                        help='ID of GPU.')
    parser.add_argument('--n-clusters', '-n', default=2, type=int,
                        help='number of clusters')
    parser.add_argument('--num', default=5000000, type=int,
                        help='number of samples')
    parser.add_argument('--max-iter', '-m', default=10, type=int,
                        help='number of iterations')
    parser.add_argument('--use-custom-kernel', action='store_true',
                        default=False, help='use Elementwise kernel')
    parser.add_argument('--output-image', '-o', default=None, type=str,
                        help='output image file name')
    args = parser.parse_args()
    run(args.gpu_id, args.n_clusters, args.num, args.max_iter,
        args.use_custom_kernel, args.output_image)