tensor_tools.cpp 29.1 KB
Newer Older
1
2
3
4
5
6
// Copyright (C) 2015  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_TeNSOR_TOOLS_CPP_
#define DLIB_TeNSOR_TOOLS_CPP_

#include "tensor_tools.h"
Davis King's avatar
Davis King committed
7
#include "../string.h"
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#include <atomic>

namespace dlib
{
    namespace
    {
        std::atomic<bool>& dnn_prefer_fastest_algo (
        )
        {
            static std::atomic<bool> var(true);
            return var;
        }
    }

    bool dnn_prefer_fastest_algorithms (
    )
    {
        return dnn_prefer_fastest_algo();
    }

    void set_dnn_prefer_fastest_algorithms(
    )
    {
        dnn_prefer_fastest_algo() = true;
    }

    void set_dnn_prefer_smallest_algorithms(
    )
    {
        dnn_prefer_fastest_algo() = false;
    }
}
40
41
42
43

namespace dlib { namespace tt
{

Davis King's avatar
Davis King committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// ----------------------------------------------------------------------------------------

    void inverse_norms (
        resizable_tensor& invnorms,
        const tensor& data,
        const double eps
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::inverse_norms(invnorms, data, eps);
#else
        invnorms = reciprocal(sqrt(sum_cols(squared(mat(data))) + eps));
#endif
    }

    void dot_prods (
        resizable_tensor& out,
        const tensor& lhs,
        const tensor& rhs
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::dot_prods(out, lhs, rhs);
#else
        out = sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); 
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#endif
    }

    void dot_prods (
        bool add_to,
        tensor& out,
        const tensor& lhs,
        const tensor& rhs
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::dot_prods(add_to, out, lhs, rhs);
#else
        if (add_to)
            out += sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); 
        else
            out = sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); 
Davis King's avatar
Davis King committed
86
87
88
#endif
    }

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    void scale_columns (
        tensor& out,
        const tensor& m,
        const tensor& v
    )
    {
        DLIB_CASSERT(have_same_dimensions(out,m));
        DLIB_CASSERT(is_vector(v));
        if (m.size() == 0 && v.size() == 0)
            return;
        DLIB_CASSERT(m.size() != 0);
        DLIB_CASSERT(m.size()/m.num_samples() == v.size());

#ifdef DLIB_USE_CUDA
        cuda::scale_columns(out, m, v);
#else
        out = scale_columns(mat(m), mat(v));
#endif
    }

Davis King's avatar
Davis King committed
109
110
111
112
113
114
115
    void scale_rows (
        tensor& out,
        const tensor& m,
        const tensor& v
    )
    {
        DLIB_CASSERT(have_same_dimensions(out,m));
116
117
118
119
        DLIB_CASSERT(is_vector(v));
        if (m.size() == 0 && v.size() == 0)
            return;
        DLIB_CASSERT(m.size() != 0);
120
        DLIB_CASSERT(m.num_samples() == static_cast<long long>(v.size()));
Davis King's avatar
Davis King committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

#ifdef DLIB_USE_CUDA
        cuda::scale_rows(out, m, v);
#else
        out = scale_rows(mat(m), mat(v));
#endif
    }

    void scale_rows2 (
        float beta, 
        tensor& out,
        const tensor& m1,
        const tensor& m2,
        const tensor& v1,
        const tensor& v2
    )
    {
        DLIB_CASSERT(have_same_dimensions(out,m1));
        DLIB_CASSERT(have_same_dimensions(out,m2));
        DLIB_CASSERT(have_same_dimensions(v1,v2));
        DLIB_CASSERT(is_vector(mat(v1))); 
142
        DLIB_CASSERT(static_cast<long long>(v1.size()) == m1.num_samples());
Davis King's avatar
Davis King committed
143
144
145
146
147
148
149
150
151
152

#ifdef DLIB_USE_CUDA
        cuda::scale_rows2(beta, out, m1, m2, v1, v2);
#else
        if (beta == 0)
            out = scale_rows(mat(m1) - scale_rows(mat(m2),mat(v1)), mat(v2));
        else
            out = beta*mat(out) + scale_rows(mat(m1) - scale_rows(mat(m2),mat(v1)), mat(v2));
#endif
    }
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

// ----------------------------------------------------------------------------------------

    void exp (
        tensor& dest,
        const tensor& src
    )
    {
        DLIB_CASSERT(dest.size() == src.size());

#ifdef DLIB_USE_CUDA
        cuda::exp(dest,src);
#else
        dest = exp(mat(src));
#endif
    }

// ----------------------------------------------------------------------------------------

    void log (
        tensor& dest,
        const tensor& src
    )
    {
        DLIB_CASSERT(dest.size() == src.size());

#ifdef DLIB_USE_CUDA
        cuda::log(dest,src);
#else
        dest = log(mat(src));
#endif
    }

// ----------------------------------------------------------------------------------------

    void log10 (
        tensor& dest,
        const tensor& src
    )
    {
        DLIB_CASSERT(dest.size() == src.size());

#ifdef DLIB_USE_CUDA
        cuda::log10(dest,src);
#else
        dest = log10(mat(src));
#endif
    }
Davis King's avatar
Davis King committed
201

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// ----------------------------------------------------------------------------------------

    void gemm (
        float beta,
        tensor& dest,
        float alpha,
        const tensor& lhs,
        bool trans_lhs,
        const tensor& rhs,
        bool trans_rhs
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::gemm(beta, dest, alpha, lhs, trans_lhs, rhs, trans_rhs);
#else
217
218
219
220
221
222
223
224
225
226
227
        if (beta != 0)
        {
            if (trans_lhs && trans_rhs)
                dest = alpha*trans(mat(lhs))*trans(mat(rhs)) + beta*mat(dest);
            else if (!trans_lhs && trans_rhs)
                dest = alpha*mat(lhs)*trans(mat(rhs)) + beta*mat(dest);
            else if (trans_lhs && !trans_rhs)
                dest = alpha*trans(mat(lhs))*mat(rhs) + beta*mat(dest);
            else
                dest = alpha*mat(lhs)*mat(rhs) + beta*mat(dest);
        }
228
        else
229
230
231
232
233
234
235
236
237
238
        {
            if (trans_lhs && trans_rhs)
                dest = alpha*trans(mat(lhs))*trans(mat(rhs));
            else if (!trans_lhs && trans_rhs)
                dest = alpha*mat(lhs)*trans(mat(rhs));
            else if (trans_lhs && !trans_rhs)
                dest = alpha*trans(mat(lhs))*mat(rhs);
            else
                dest = alpha*mat(lhs)*mat(rhs);
        }
239
240
241
242
243
244
245
246
247
#endif
    }

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    tensor_rand::
    tensor_rand(
        unsigned long long seed
Davis King's avatar
Davis King committed
248
249
250
251
252
253
    ) 
#ifdef DLIB_USE_CUDA
    :rnd(seed){}
#else
    {rnd.set_seed(cast_to_string(seed)); }
#endif
254
255
256
257
258
259
260
261

    void tensor_rand::
    fill_gaussian (
        tensor& data,
        float mean,
        float stddev
    )
    {
262
        DLIB_CASSERT(data.size()%2 == 0);
263
#ifdef DLIB_USE_CUDA
Davis King's avatar
Davis King committed
264
        rnd.fill_gaussian(data, mean, stddev);
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#else
        for (auto& x : data) 
            x = rnd.get_random_gaussian()*stddev + mean;
#endif
    }

    void tensor_rand::
    fill_uniform (
        tensor& data
    )
    {
#ifdef DLIB_USE_CUDA
        rnd.fill_uniform(data);
#else
        for (auto& x : data) 
            x = rnd.get_random_float();
#endif
    }

// ----------------------------------------------------------------------------------------
285
286
287
// ----------------------------------------------------------------------------------------

    void multiply (
288
        bool add_to,
289
290
291
292
293
        tensor& dest,
        const tensor& src1,
        const tensor& src2
    )
    {
294
295
        DLIB_CASSERT(dest.k() == src1.k() && src1.k() == src2.k() &&
            dest.nr() == src1.nr() && src1.nr() == src2.nr() &&
296
            dest.nc() == src1.nc() && src1.nc() == src2.nc() );
297
298
299
        const long MD = std::max(std::max(dest.num_samples(),src1.num_samples()),src2.num_samples());
        DLIB_CASSERT((dest.num_samples()==1 || dest.num_samples()==MD) &&
                    (src1.num_samples()==1 || src1.num_samples()==MD) &&
300
                    (src2.num_samples()==1 || src2.num_samples()==MD) );
301
#ifdef DLIB_USE_CUDA
302
        cuda::multiply(add_to, dest, src1, src2);
303
#else
304
        cpu::multiply(add_to, dest, src1, src2);
305
306
307
308
#endif

    }

Davis King's avatar
Davis King committed
309
310
311
312
313
314
315
316
317
318
319
320
321
322
    void scale_channels (
        bool add_to,
        tensor& dest,
        const tensor& src,
        const tensor& scales
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::scale_channels(add_to, dest, src, scales);
#else
        cpu::scale_channels(add_to, dest, src, scales);
#endif
    }

323
    void multiply_conv (
324
        bool add_to,
325
326
327
328
329
330
        tensor& dest,
        const tensor& src1,
        const tensor& src2
    )
    {
#ifdef DLIB_USE_CUDA
331
        cuda::multiply_conv(add_to, dest, src1, src2);
332
#else
333
        cpu::multiply_conv(add_to, dest, src1, src2);
334
335
336
#endif
    }

Davis King's avatar
Davis King committed
337
338
339
340
341
342
343
344
345
346
347
348
349
350
    void multiply_zero_padded (
        bool add_to,
        tensor& dest,
        const tensor& src1,
        const tensor& src2
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::multiply_zero_padded(add_to, dest, src1, src2);
#else
        cpu::multiply_zero_padded(add_to, dest, src1, src2);
#endif
    }

351
352
353
// ----------------------------------------------------------------------------------------

    void affine_transform(
354
        tensor& dest,
355
356
357
358
359
360
        const tensor& src,
        const float A,
        const float B
    )
    {
#ifdef DLIB_USE_CUDA
361
        cuda::affine_transform(dest,src,A,B);
362
363
364
365
366
#else
        cpu::affine_transform(dest,src,A,B);
#endif
    }

367
368
369
370
371
372
373
374
375
376
377
378
379
    void affine_transform(
        tensor& dest,
        const tensor& src,
        const float A
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::affine_transform(dest,src,A);
#else
        cpu::affine_transform(dest,src,A,0);
#endif
    }

380
381
382
383
384
385
386
387
388
389
    void affine_transform(
        tensor& dest,
        const tensor& src1,
        const tensor& src2,
        const float A,
        const float B,
        const float C
    )
    {
#ifdef DLIB_USE_CUDA
390
        cuda::affine_transform(dest,src1,src2,A,B,C);
391
392
393
394
395
#else
        cpu::affine_transform(dest,src1,src2,A,B,C);
#endif
    }

396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
    void affine_transform(
        tensor& dest,
        const tensor& src1,
        const tensor& src2,
        const float A,
        const float B
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::affine_transform(dest,src1,src2,A,B);
#else
        cpu::affine_transform(dest,src1,src2,A,B,0);
#endif
    }

411
412
413
414
415
416
417
418
419
420
421
422
    void affine_transform(
        tensor& dest,
        const tensor& src1,
        const tensor& src2,
        const tensor& src3,
        const float A,
        const float B,
        const float C,
        const float D
    )
    {
#ifdef DLIB_USE_CUDA
423
        cuda::affine_transform(dest,src1,src2,src3,A,B,C,D);
424
425
426
427
428
#else
        cpu::affine_transform(dest,src1,src2,src3,A,B,C,D);
#endif
    }

429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
    void affine_transform_range(
        size_t begin,
        size_t end,
        tensor& dest,
        const tensor& src1,
        const tensor& src2,
        const tensor& src3,
        const float A,
        const float B,
        const float C
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C);
#else
        cpu::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C);
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
#endif
    }

    void affine_transform(
        const rectangle& rect,
        tensor& dest, 
        const tensor& src1, 
        const tensor& src2, 
        const tensor& src3, 
        float A, 
        float B,
        float C
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::affine_transform(rect, dest,src1,src2,src3,A,B,C);
#else
        cpu::affine_transform(rect, dest,src1,src2,src3,A,B,C);
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
#endif
    }

    void affine_transform(
        tensor& dest,
        const tensor& src1,
        const tensor& src2,
        const tensor& src3,
        const float A,
        const float B,
        const float C
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C);
#else
        cpu::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C);
#endif
    }

483
484
485
// ----------------------------------------------------------------------------------------

    void affine_transform(
486
        tensor& dest,
487
488
489
490
491
492
        const tensor& src,
        const tensor& A,
        const tensor& B
    )
    {
#ifdef DLIB_USE_CUDA
493
        cuda::affine_transform(dest,src,A,B);
494
495
496
497
498
#else
        cpu::affine_transform(dest,src,A,B);
#endif
    }

499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
// ----------------------------------------------------------------------------------------

    void affine_transform_conv(
        tensor& dest,
        const tensor& src,
        const tensor& A,
        const tensor& B
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::affine_transform_conv(dest,src,A,B);
#else
        cpu::affine_transform_conv(dest,src,A,B);
#endif
    }

Davis King's avatar
Davis King committed
515
516
517
// ----------------------------------------------------------------------------------------

    void compute_adam_update (
518
519
        size_t begin,
        size_t end,
Davis King's avatar
Davis King committed
520
521
522
523
524
525
526
527
528
529
530
531
532
        tensor& s,
        tensor& m,
        tensor& v,
        const float t,
        const float learning_rate,
        const float weight_decay,
        const float momentum1,
        const float momentum2,
        const tensor& params,
        const tensor& params_grad
    )
    {
#ifdef DLIB_USE_CUDA
533
        cuda::compute_adam_update(begin, end, s, m, v, t, learning_rate, weight_decay, momentum1,
Davis King's avatar
Davis King committed
534
535
            momentum2, params, params_grad);
#else
536
        cpu::compute_adam_update(begin, end, s, m, v, t, learning_rate, weight_decay, momentum1,
Davis King's avatar
Davis King committed
537
538
539
540
            momentum2, params, params_grad);
#endif
    }

541
542
// ----------------------------------------------------------------------------------------

543
    void batch_normalize_inference (
544
        const double eps,
545
546
547
548
549
        resizable_tensor& dest,
        const tensor& src,
        const tensor& gamma, 
        const tensor& beta,
        const tensor& running_means,
550
        const tensor& running_variances
551
552
553
    )
    {
#ifdef DLIB_USE_CUDA
554
        cuda::batch_normalize_inference(eps,dest,src,gamma,beta,running_means,running_variances);
555
#else
556
        cpu::batch_normalize_inference(eps,dest,src,gamma,beta,running_means,running_variances);
557
558
559
#endif
    }

560
    void batch_normalize (
561
        const double eps,
562
563
564
        resizable_tensor& dest,
        resizable_tensor& means,
        resizable_tensor& vars,
565
566
        const double averaging_factor,
        resizable_tensor& running_means,
567
        resizable_tensor& running_variances,
568
569
570
571
572
573
        const tensor& src,
        const tensor& gamma, 
        const tensor& beta 
    )
    {
#ifdef DLIB_USE_CUDA
574
        cuda::batch_normalize(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
575
#else
576
        cpu::batch_normalize(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
577
578
579
580
#endif
    }

    void batch_normalize_gradient (
581
        const double eps,
582
583
584
585
586
587
588
589
590
591
592
593
            const tensor& gradient_input,
            const tensor& means,
            const tensor& invstds,
            const tensor& src,
            const tensor& gamma,
            tensor& src_grad,
            tensor& gamma_grad, 
            tensor& beta_grad 
    )
    {
             
#ifdef DLIB_USE_CUDA
594
        cuda::batch_normalize_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
595
#else
596
        cpu::batch_normalize_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
597
598
599
600
601
#endif
    }

// ----------------------------------------------------------------------------------------

602
    void batch_normalize_conv_inference (
603
        const double eps,
604
605
606
607
608
        resizable_tensor& dest,
        const tensor& src,
        const tensor& gamma, 
        const tensor& beta,
        const tensor& running_means,
609
        const tensor& running_variances
610
611
612
    )
    {
#ifdef DLIB_USE_CUDA
613
        cuda::batch_normalize_conv_inference(eps,dest,src,gamma,beta,running_means,running_variances);
614
#else
615
        cpu::batch_normalize_conv_inference(eps,dest,src,gamma,beta,running_means,running_variances);
616
617
618
#endif
    }

619
    void batch_normalize_conv (
620
        const double eps,
621
622
623
        resizable_tensor& dest,
        resizable_tensor& means,
        resizable_tensor& vars,
624
625
        const double averaging_factor,
        resizable_tensor& running_means,
626
        resizable_tensor& running_variances,
627
628
629
630
631
632
        const tensor& src,
        const tensor& gamma, 
        const tensor& beta 
    )
    {
#ifdef DLIB_USE_CUDA
633
        cuda::batch_normalize_conv(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
634
#else
635
        cpu::batch_normalize_conv(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
636
637
638
639
#endif
    }

    void batch_normalize_conv_gradient (
640
641
642
643
644
645
646
647
648
        const double eps,
        const tensor& gradient_input,
        const tensor& means,
        const tensor& invstds,
        const tensor& src,
        const tensor& gamma,
        tensor& src_grad,
        tensor& gamma_grad, 
        tensor& beta_grad 
649
650
651
652
    )
    {
             
#ifdef DLIB_USE_CUDA
653
        cuda::batch_normalize_conv_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
654
#else
655
        cpu::batch_normalize_conv_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
656
657
658
#endif
    }

659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
// ----------------------------------------------------------------------------------------

    void layer_normalize (
        const double eps,
        resizable_tensor& dest,
        resizable_tensor& means,
        resizable_tensor& vars,
        const tensor& src,
        const tensor& gamma,
        const tensor& beta
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::layer_normalize(eps, dest, means, vars, src, gamma, beta);
#else
        cpu::layer_normalize(eps, dest, means, vars, src, gamma, beta);
#endif
    }

    void layer_normalize_gradient (
        const double eps,
            const tensor& gradient_input,
            const tensor& means,
            const tensor& invstds,
            const tensor& src,
            const tensor& gamma,
            tensor& src_grad,
            tensor& gamma_grad,
            tensor& beta_grad
    )
    {
        cpu::layer_normalize_gradient(eps, gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad);
    }

693
694
695
696
697
698
699
700
// ----------------------------------------------------------------------------------------

    void threshold (
        tensor& data,
        float thresh
    )
    {
#ifdef DLIB_USE_CUDA
701
        cuda::threshold(data,thresh);
702
703
704
705
706
#else
        cpu::threshold(data,thresh);
#endif
    }

707
708
709
710
711
712
713
714
715
716
717
718
719
720
    void dot (
        const tensor& a,
        const tensor& b,
        tensor& result,
        size_t idx
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::dot(a,b,result,idx);
#else
        cpu::dot(a,b,result,idx);
#endif
    }

721
722
723
724
725
726
727
728
729
730
731
732
// ----------------------------------------------------------------------------------------

    void add(
        float beta,
        tensor& dest,
        float alpha,
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::add(beta,dest,alpha,src);
#else
733
        cpu::add(beta,dest,alpha,src);
734
735
736
#endif
    }

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
// ----------------------------------------------------------------------------------------

    void add (
        tensor& dest,
        const tensor& src1,
        const tensor& src2
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::add(dest, src1, src2);
#else
        cpu::add(dest, src1, src2);
#endif
    }

752
753
// ----------------------------------------------------------------------------------------

754
    void assign_conv_bias_gradient (
755
756
757
758
759
        tensor& grad,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
760
        cuda::assign_conv_bias_gradient(grad,gradient_input);
761
#else
762
        cpu::assign_conv_bias_gradient(grad,gradient_input);
763
764
765
#endif
    }

766
767
// ----------------------------------------------------------------------------------------

768
    void assign_bias_gradient (
769
770
771
772
773
        tensor& grad,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
774
        cuda::assign_bias_gradient(grad,gradient_input);
775
#else
776
        cpu::assign_bias_gradient(grad,gradient_input);
777
778
779
#endif
    }

780
781
782
783
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    void softmax (
784
        tensor& dest,
785
786
787
788
789
790
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::softmax(dest,src);
#else
791
        cpu::softmax(dest,src);
792
793
794
795
796
#endif
    }

    void softmax_gradient (
        tensor& grad,
797
        const tensor& dest,
798
799
800
801
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
802
        cuda::softmax_gradient(grad, dest, gradient_input);
803
#else
804
        cpu::softmax_gradient(grad, dest, gradient_input);
805
806
807
#endif
    }

Davis King's avatar
Davis King committed
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
// ----------------------------------------------------------------------------------------

    void softmax_all (
        tensor& dest,
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::softmax_all(dest,src);
#else
        cpu::softmax_all(dest,src);
#endif
    }

    void softmax_all_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::softmax_all_gradient(grad, dest, gradient_input);
#else
        cpu::softmax_all_gradient(grad, dest, gradient_input);
#endif
    }

835
836
837
// ----------------------------------------------------------------------------------------

    void sigmoid (
838
        tensor& dest,
839
840
841
842
843
844
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::sigmoid(dest,src);
#else
845
        cpu::sigmoid(dest,src);
846
847
848
849
850
851
852
853
854
855
#endif
    }

    void sigmoid_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
856
        cuda::sigmoid_gradient(grad, dest, gradient_input);
857
#else
858
        cpu::sigmoid_gradient(grad, dest, gradient_input);
859
860
861
#endif
    }

thebhatman's avatar
thebhatman committed
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
// ----------------------------------------------------------------------------------------

    void mish (
        tensor& dest,
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::mish(dest,src);
#else
        cpu::mish(dest,src);
#endif
    }

    void mish_gradient (
        tensor& grad,
        const tensor& src,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::mish_gradient(grad, src, gradient_input);
#else
        cpu::mish_gradient(grad, src, gradient_input);
#endif
    }

889
890
891
// ----------------------------------------------------------------------------------------

    void relu (
892
        tensor& dest,
893
894
895
896
897
898
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::relu(dest,src);
#else
899
        cpu::relu(dest,src);
900
901
902
903
904
905
906
907
908
909
#endif
    }

    void relu_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
910
        cuda::relu_gradient(grad, dest, gradient_input);
911
#else
912
        cpu::relu_gradient(grad, dest, gradient_input);
913
914
915
#endif
    }

Davis King's avatar
Davis King committed
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
// ----------------------------------------------------------------------------------------

    void prelu (
        tensor& dest,
        const tensor& src,
        const tensor& param
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::prelu(dest, src, param);
#else
        cpu::prelu(dest, src, param);
#endif
    }

    void prelu_gradient (
        tensor& grad,
        const tensor& src,
        const tensor& gradient_input,
        const tensor& param,
        tensor& params_grad 
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::prelu_gradient(grad, src, gradient_input, param, params_grad);
#else
        cpu::prelu_gradient(grad, src, gradient_input, param, params_grad);
#endif
    }

946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
// ----------------------------------------------------------------------------------------

    void leaky_relu (
        tensor& dest,
        const tensor& src,
        const float alpha
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::leaky_relu(dest, src, alpha);
#else
        cpu::leaky_relu(dest, src, alpha);
#endif
    }

    void leaky_relu_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input,
        const float alpha
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::leaky_relu_gradient(grad, dest, gradient_input, alpha);
#else
        cpu::leaky_relu_gradient(grad, dest, gradient_input, alpha);
#endif
    }

975
976
977
// ----------------------------------------------------------------------------------------

    void tanh (
978
        tensor& dest,
979
980
981
982
983
984
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::tanh(dest,src);
#else
985
        cpu::tanh(dest,src);
986
987
988
989
990
991
992
993
994
995
#endif
    }

    void tanh_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
996
        cuda::tanh_gradient(grad, dest, gradient_input);
997
#else
998
        cpu::tanh_gradient(grad, dest, gradient_input);
999
1000
1001
#endif
    }

1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
// ----------------------------------------------------------------------------------------

    void clipped_relu (
        tensor& dest,
        const tensor& src,
        const float ceiling
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::clipped_relu(dest, src, ceiling);
#else
        cpu::clipped_relu(dest, src, ceiling);
#endif
    }

    void clipped_relu_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input,
        const float ceiling
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::clipped_relu_gradient(grad, dest, gradient_input, ceiling);
#else
        cpu::clipped_relu_gradient(grad, dest, gradient_input, ceiling);
#endif
    }

// ----------------------------------------------------------------------------------------

    void elu (
        tensor& dest,
        const tensor& src,
        const float alpha
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::elu(dest, src, alpha);
#else
        cpu::elu(dest, src, alpha);
#endif
    }

    void elu_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input,
        const float alpha
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::elu_gradient(grad, dest, gradient_input, alpha);
#else
        cpu::elu_gradient(grad, dest, gradient_input, alpha);
#endif
    }

1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
// ----------------------------------------------------------------------------------------

    void gelu (
        tensor& dest,
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::gelu(dest,src);
#else
        cpu::gelu(dest,src);
#endif
    }

    void gelu_gradient (
        tensor& grad,
        const tensor& src,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::gelu_gradient(grad, src, gradient_input);
#else
        cpu::gelu_gradient(grad, src, gradient_input);
#endif
    }

Adrià Arrufat's avatar
Adrià Arrufat committed
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
// ----------------------------------------------------------------------------------------

    void smelu (
        tensor& dest,
        const tensor& src,
        const float beta
    )
    {
        DLIB_CASSERT(beta > 0);
#ifdef DLIB_USE_CUDA
        cuda::smelu(dest, src, beta);
#else
        cpu::smelu(dest, src, beta);
#endif
    }

    void smelu_gradient (
        tensor& grad,
        const tensor& dest,
        const tensor& gradient_input,
        const float beta
    )
    {
        DLIB_CASSERT(beta > 0);
#ifdef DLIB_USE_CUDA
        cuda::smelu_gradient(grad, dest, gradient_input, beta);
#else
        cpu::smelu_gradient(grad, dest, gradient_input, beta);
#endif
    }
1117
1118
1119
1120
// ----------------------------------------------------------------------------------------

    void resize_bilinear (
        tensor& dest,
1121
1122
1123
1124
1125
        long dest_row_stride,
        long dest_channel_stride,
        const tensor& src,
        long src_row_stride,
        long src_channel_stride
1126
1127
1128
    )
    {
#ifdef DLIB_USE_CUDA
1129
        cuda::resize_bilinear(dest,dest_row_stride,dest_channel_stride, src,src_row_stride,src_channel_stride);
1130
#else
1131
        cpu::resize_bilinear(dest,dest_row_stride,dest_channel_stride, src,src_row_stride,src_channel_stride);
1132
1133
1134
1135
1136
#endif
    }

    void resize_bilinear_gradient (
        tensor& grad,
1137
1138
1139
1140
1141
        long grad_row_stride,
        long grad_channel_stride,
        const tensor& gradient_input,
        long gradient_input_row_stride,
        long gradient_input_channel_stride
1142
1143
1144
    )
    {
#ifdef DLIB_USE_CUDA
1145
        cuda::resize_bilinear_gradient(grad,grad_row_stride,grad_channel_stride,  gradient_input,gradient_input_row_stride,gradient_input_channel_stride);
1146
#else
1147
        cpu::resize_bilinear_gradient(grad,grad_row_stride,grad_channel_stride,  gradient_input,gradient_input_row_stride,gradient_input_channel_stride);
1148
1149
1150
#endif
    }

Adrià Arrufat's avatar
Adrià Arrufat committed
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
// ------------------------------------------------------------------------------------

    void reorg (
        tensor& dest,
        const int row_stride,
        const int col_stride,
        const tensor& src
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::reorg(dest, row_stride, col_stride, src);
#else
        cpu::reorg(dest, row_stride, col_stride, src);
#endif
    }

    void reorg_gradient (
        tensor& grad,
        const int row_stride,
        const int col_stride,
        const tensor& gradient_input
    )
    {
#ifdef DLIB_USE_CUDA
        cuda::reorg_gradient(grad, row_stride, col_stride, gradient_input);
#else
        cpu::reorg_gradient(grad, row_stride, col_stride, gradient_input);
#endif
    }

Fm's avatar
Fm committed
1181
1182
// ------------------------------------------------------------------------------------

1183
    void copy_tensor(
1184
            bool add_to,
1185
1186
1187
1188
1189
1190
1191
            tensor& dest,
            size_t dest_k_offset,
            const tensor& src,
            size_t src_k_offset,
            size_t count_k
    )
    {
Fm's avatar
Fm committed
1192
#ifdef DLIB_USE_CUDA
1193
        cuda::copy_tensor(add_to, dest, dest_k_offset, src, src_k_offset, count_k);
Fm's avatar
Fm committed
1194
#else
1195
        cpu::copy_tensor(add_to, dest, dest_k_offset, src, src_k_offset, count_k);
Fm's avatar
Fm committed
1196
#endif
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
    }

// ----------------------------------------------------------------------------------------

    void inv::
    operator() (
        const tensor& m,
        resizable_tensor& out
    )
    {
#ifdef DLIB_USE_CUDA
        finv(m,out);
#else
1210
        out = dlib::inv(mat(m));
1211
1212
#endif
    }
Fm's avatar
Fm committed
1213

1214
1215
1216
1217
1218
// ----------------------------------------------------------------------------------------

}}

#endif // DLIB_TeNSOR_TOOLS_CPP_