cuda_dlib.h 11.7 KB
Newer Older
1
2
3
4
5
6
7
// Copyright (C) 2015  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CuDA_H_
#define DLIB_DNN_CuDA_H_


#include "tensor.h"
8
#include "../geometry/rectangle.h"
9
10
11
12
13
14

namespace dlib
{
    namespace cuda 
    {

15
16
17
18
19
20
21
22
23
    // ----------------------------------------------------------------------------------------

        void set_device (
            int dev
        );

        int get_device (
        );

Davis King's avatar
Davis King committed
24
25
26
        int get_num_devices (
        );

27
28
29
30
31
32
33
        std::string get_device_name (
            int device
        );

        void set_current_device_blocking_sync(
        );

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
        bool can_access_peer (int device_id, int peer_device_id);
        bool can_access_peer (const tensor& device, const tensor& peer_device);

        void device_synchronize (int dev);
        void device_synchronize (const tensor& dev);


        class raii_set_device
        {
        public:
            raii_set_device() = delete;
            raii_set_device(const raii_set_device&) = delete;
            raii_set_device& operator=(const raii_set_device&) = delete;

            raii_set_device(int dev)
            {
                prev_dev = get_device();
                set_device(dev);
            }

            raii_set_device(const tensor& dev)
            {
                prev_dev = get_device();
                set_device(dev.device_id());
            }

            void operator() (int dev)
            {
                set_device(dev);
            }

            void operator() (const tensor& dev)
            {
                set_device(dev.device_id());
            }

            ~raii_set_device() noexcept(false)
            {
                set_device(prev_dev);
            }

        private:
            int prev_dev;
        };


#ifdef DLIB_USE_CUDA

        class enable_peer_access
        {
        public:

            enable_peer_access() = delete;
            enable_peer_access(const enable_peer_access&) = delete;
            enable_peer_access& operator=(const enable_peer_access&) = delete;

            enable_peer_access(
                int device_id,
                int peer_device_id
            );

            enable_peer_access(
                const tensor& device,
                const tensor& peer_device
            ) : enable_peer_access(device.device_id(), peer_device.device_id())
            {}

            ~enable_peer_access() noexcept(false);

        private:

            bool call_disable;
            int device_id;
            int peer_device_id;
        };

110
    // -----------------------------------------------------------------------------------
Davis King's avatar
Davis King committed
111
112
113
114
115
116
117
118
119
120
121
122
123

        void inverse_norms (
            resizable_tensor& invnorms,
            const tensor& data,
            const double eps
        );

        void dot_prods (
            resizable_tensor& out,
            const tensor& lhs,
            const tensor& rhs
        );

124
125
126
127
128
129
        void scale_columns (
            tensor& out,
            const tensor& m,
            const tensor& v
        );

Davis King's avatar
Davis King committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
        void scale_rows (
            tensor& out,
            const tensor& m,
            const tensor& v
        );

        void scale_rows2 (
            float beta, 
            tensor& out,
            const tensor& m1,
            const tensor& m2,
            const tensor& v1,
            const tensor& v2
        );

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
        void exp (
            tensor& dest,
            const tensor& src
        );

        void log (
            tensor& dest,
            const tensor& src
        );

        void log10 (
            tensor& dest,
            const tensor& src
        );

Davis King's avatar
Davis King committed
160
    // ------------------------------------------------------------------------------------
161
162
163
164
165
166
167
168
169
170
171
172

        void set_tensor (
            tensor& t,
            float value
        );

        void scale_tensor (
            tensor& t,
            float value
        );

    // ------------------------------------------------------------------------------------
173

174
        void multiply (
175
            bool add_to,
176
            tensor& dest,
177
178
179
180
            const tensor& src1,
            const tensor& src2
        );

181
        void multiply_conv (
182
            bool add_to,
183
184
185
186
187
            tensor& dest,
            const tensor& src1,
            const tensor& src2
        );

Davis King's avatar
Davis King committed
188
189
190
191
192
193
194
        void multiply_zero_padded (
            bool add_to,
            tensor& dest,
            const tensor& src1,
            const tensor& src2
        );

Davis King's avatar
Davis King committed
195
196
197
198
199
200
201
        void scale_channels (
            bool add_to,
            tensor& dest,
            const tensor& src,
            const tensor& scales
        );

202
203
        void add (
            tensor& dest,
204
205
206
207
            const tensor& src1,
            const tensor& src2
        );

208
209
210
    // -----------------------------------------------------------------------------------

        void affine_transform(
211
            tensor& dest,
212
213
214
215
216
            const tensor& src,
            const float A,
            const float B
        );

217
218
219
220
221
222
        void affine_transform(
            tensor& dest,
            const tensor& src,
            const float A
        );

223
224
225
226
227
228
229
230
231
        void affine_transform(
            tensor& dest,
            const tensor& src1,
            const tensor& src2,
            const float A,
            const float B,
            const float C
        );

232
233
234
235
236
237
238
239
        void affine_transform(
            tensor& dest,
            const tensor& src1,
            const tensor& src2,
            const float A,
            const float B
        );

240
241
242
243
244
245
246
247
248
249
250
        void affine_transform(
            tensor& dest,
            const tensor& src1,
            const tensor& src2,
            const tensor& src3,
            const float A,
            const float B,
            const float C,
            const float D
        );

251
252
253
254
255
256
257
258
259
260
261
262
        void affine_transform_range(
            size_t begin,
            size_t end,
            tensor& dest,
            const tensor& src1,
            const tensor& src2,
            const tensor& src3,
            const float A,
            const float B,
            const float C
        );

263
264
265
266
267
268
269
270
271
272
273
        void affine_transform(
            const rectangle& rect,
            tensor& dest, 
            const tensor& src1, 
            const tensor& src2, 
            const tensor& src3, 
            float A, 
            float B,
            float C
        );

274
275
276
277
278
279
280
        // Note that this function isn't in the tt:: namespace because add_scaled() is
        // called by cuda::add() so we don't need a tt:: version of add_scaled().  
        void add_scaled(
            tensor& dest,
            const float scale,
            const tensor& src
        );
281

Davis King's avatar
Davis King committed
282
283
284
285
286
287
288
        void add_cv_to_all_columns(
            float beta, 
            tensor& dest, 
            float alpha, 
            const tensor& src
        );

289
290
291
    // -----------------------------------------------------------------------------------

        void affine_transform(
292
            tensor& dest,
293
294
295
296
297
            const tensor& src,
            const tensor& A,
            const tensor& B
        );

298
299
300
301
302
303
304
305
306
    // -----------------------------------------------------------------------------------

        void affine_transform_conv(
            tensor& dest,
            const tensor& src,
            const tensor& A,
            const tensor& B
        );

Davis King's avatar
Davis King committed
307
308
309
    // ----------------------------------------------------------------------------------------

        void compute_adam_update (
310
311
            size_t begin,
            size_t end,
Davis King's avatar
Davis King committed
312
313
314
315
316
317
318
319
320
321
322
323
            tensor& s,
            tensor& m,
            tensor& v,
            const float t,
            const float learning_rate,
            const float weight_decay,
            const float momentum1,
            const float momentum2,
            const tensor& params,
            const tensor& params_grad
        );

324
325
    // -----------------------------------------------------------------------------------

326
        void assign_bias_gradient (
327
328
329
330
            tensor& grad,
            const tensor& gradient_input
        );

331
332
    // -----------------------------------------------------------------------------------

333
334
335
336
        void threshold (
            tensor& data,
            float thresh
        );
337

338
339
340
341
342
343
344
345
346
    // ----------------------------------------------------------------------------------------

        void dot (
            const tensor& a,
            const tensor& b,
            tensor& result,
            size_t idx
        );

Davis King's avatar
Davis King committed
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
    // ----------------------------------------------------------------------------------------

        void prelu (
            tensor& dest,
            const tensor& src,
            const tensor& param
        );

        void prelu_gradient (
            tensor& grad,
            const tensor& src,
            const tensor& gradient_input,
            const tensor& param,
            tensor& params_grad 
        );

363
364
365
366
367

    // ----------------------------------------------------------------------------------------

        void resize_bilinear (
            tensor& dest,
368
369
370
371
372
            long dest_row_stride,
            long dest_channel_stride,
            const tensor& src,
            long src_row_stride,
            long src_channel_stride
373
374
375
376
        );

        void resize_bilinear_gradient (
            tensor& grad,
377
378
379
380
381
            long grad_row_stride,
            long grad_channel_stride,
            const tensor& gradient_input,
            long gradient_input_row_stride,
            long gradient_input_channel_stride
382
383
        );

384
385
386
387
388
389
390
391
392
393
        inline void resize_bilinear (
            tensor& dest,
            const tensor& src
        ) { resize_bilinear(dest, dest.nc(), dest.nr()*dest.nc(), src, src.nc(), src.nr()*src.nc()); }

        inline void resize_bilinear_gradient (
            tensor& grad,
            const tensor& gradient_input
        ) { resize_bilinear_gradient(grad, grad.nc(), grad.nr()*grad.nc(), gradient_input, gradient_input.nc(), gradient_input.nr()*gradient_input.nc()); }

394
395
    // ----------------------------------------------------------------------------------------

Fm's avatar
Fm committed
396
        void copy_tensor(
397
            bool add_to,
398
399
400
401
402
            tensor& dest,
            size_t dest_k_offset,
            const tensor& src,
            size_t src_k_offset,
            size_t count_k
403
        );
404

405
    // ------------------------------------------------------------------------------------
406
407
408
409
410
411
412
    // ------------------------------------------------------------------------------------
    // ------------------------------------------------------------------------------------
    // ------------------------------------------------------------------------------------

#else // if DLIB_USE_CUDA NOT DEFINED

        inline void set_device (
Davis King's avatar
Davis King committed
413
414
415
416
417
            int id
        )
        {
            DLIB_CASSERT(id == 0, "dlib::cuda::set_device(id) called with an invalid device id.");
        }
418
419

        inline int get_device (
Davis King's avatar
Davis King committed
420
        ){ return 0; }
421

Davis King's avatar
Davis King committed
422
        inline int get_num_devices (
Davis King's avatar
Davis King committed
423
424
        ) { return 1; }

425
426
427
428
        inline std::string get_device_name (
            int device
        ) 
        {
Davis King's avatar
Davis King committed
429
            DLIB_CASSERT(device == 0, "dlib::cuda::set_device(id) called with an invalid device id.");
430
431
432
433
434
435
436
            return "CUDA_DISABLED";
        }

        inline void set_current_device_blocking_sync(
        ) {}


Davis King's avatar
Davis King committed
437
        inline bool can_access_peer (int , int )
438
        { return false; }
Davis King's avatar
Davis King committed
439
        inline bool can_access_peer (const tensor& , const tensor& )
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
        { return false; }

        inline void device_synchronize (int ){}
        inline void device_synchronize (const tensor& ){}

        class enable_peer_access
        {
        public:
            enable_peer_access() = delete;
            enable_peer_access(const enable_peer_access&) = delete;
            enable_peer_access& operator=(const enable_peer_access&) = delete;
            enable_peer_access( int, int ){}
            enable_peer_access( const tensor&, const tensor& ) {}
        };

455
#endif // DLIB_USE_CUDA
456
457
458
459
460
461
462

    } 
}


#endif // DLIB_DNN_CuDA_H_