Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
ccb1c95f
Commit
ccb1c95f
authored
Jan 31, 2015
by
Davis King
Browse files
Made dlib's built in fft faster by tweaking a few things and adding a twiddle
cache.
parent
eab9604d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
119 additions
and
62 deletions
+119
-62
dlib/matrix/matrix_fft.h
dlib/matrix/matrix_fft.h
+119
-62
No files found.
dlib/matrix/matrix_fft.h
View file @
ccb1c95f
...
@@ -95,27 +95,72 @@ namespace dlib
...
@@ -95,27 +95,72 @@ namespace dlib
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
template
<
typename
T
>
class
twiddles
{
/*!
The point of this object is to cache the twiddle values so we don't
recompute them over and over inside R8TX().
!*/
public:
twiddles
()
{
data
.
resize
(
64
);
}
const
std
::
complex
<
T
>*
get_twiddles
(
int
p
)
/*!
requires
- 0 <= p <= 64
ensures
- returns a pointer to the twiddle factors needed by R8TX if nxtlt == 2^p
!*/
{
// Compute the twiddle factors for this p value if we haven't done so
// already.
if
(
data
[
p
].
size
()
==
0
)
{
const
int
nxtlt
=
0x1
<<
p
;
data
[
p
].
reserve
(
nxtlt
*
7
);
const
T
twopi
=
6.2831853071795865
;
/* 2.0 * pi */
const
T
scale
=
twopi
/
(
nxtlt
*
8.0
);
std
::
complex
<
T
>
cs
[
7
];
for
(
int
j
=
0
;
j
<
nxtlt
;
++
j
)
{
const
T
arg
=
j
*
scale
;
cs
[
0
]
=
std
::
complex
<
T
>
(
std
::
cos
(
arg
),
std
::
sin
(
arg
));
cs
[
1
]
=
cs
[
0
]
*
cs
[
0
];
cs
[
2
]
=
cs
[
1
]
*
cs
[
0
];
cs
[
3
]
=
cs
[
1
]
*
cs
[
1
];
cs
[
4
]
=
cs
[
2
]
*
cs
[
1
];
cs
[
5
]
=
cs
[
2
]
*
cs
[
2
];
cs
[
6
]
=
cs
[
3
]
*
cs
[
2
];
data
[
p
].
insert
(
data
[
p
].
end
(),
cs
,
cs
+
7
);
}
}
return
&
data
[
p
][
0
];
}
private:
std
::
vector
<
std
::
vector
<
std
::
complex
<
T
>
>
>
data
;
};
// ----------------------------------------------------------------------------------------
/* Radix-8 iteration subroutine */
/* Radix-8 iteration subroutine */
template
<
typename
T
>
template
<
typename
T
>
void
R8TX
(
int
nxtlt
,
int
nthpo
,
int
length
,
void
R8TX
(
int
nxtlt
,
int
nthpo
,
int
length
,
const
std
::
complex
<
T
>*
cs
,
std
::
complex
<
T
>
*
cc0
,
std
::
complex
<
T
>
*
cc1
,
std
::
complex
<
T
>
*
cc2
,
std
::
complex
<
T
>
*
cc3
,
std
::
complex
<
T
>
*
cc0
,
std
::
complex
<
T
>
*
cc1
,
std
::
complex
<
T
>
*
cc2
,
std
::
complex
<
T
>
*
cc3
,
std
::
complex
<
T
>
*
cc4
,
std
::
complex
<
T
>
*
cc5
,
std
::
complex
<
T
>
*
cc6
,
std
::
complex
<
T
>
*
cc7
)
std
::
complex
<
T
>
*
cc4
,
std
::
complex
<
T
>
*
cc5
,
std
::
complex
<
T
>
*
cc6
,
std
::
complex
<
T
>
*
cc7
)
{
{
const
T
irt2
=
0.707106781186548
;
/* 1.0/sqrt(2.0) */
const
T
irt2
=
0.707106781186548
;
/* 1.0/sqrt(2.0) */
const
T
twopi
=
6.2831853071795865
;
/* 2.0 * pi */
const
T
scale
=
twopi
/
length
;
for
(
int
j
=
0
;
j
<
nxtlt
;
j
++
)
for
(
int
j
=
0
;
j
<
nxtlt
;
j
++
)
{
{
const
T
arg
=
j
*
scale
;
const
std
::
complex
<
T
>
cs1
(
std
::
cos
(
arg
),
std
::
sin
(
arg
));
const
std
::
complex
<
T
>
cs2
=
cs1
*
cs1
;
const
std
::
complex
<
T
>
cs3
=
cs2
*
cs1
;
const
std
::
complex
<
T
>
cs4
=
cs2
*
cs2
;
const
std
::
complex
<
T
>
cs5
=
cs3
*
cs2
;
const
std
::
complex
<
T
>
cs6
=
cs3
*
cs3
;
const
std
::
complex
<
T
>
cs7
=
cs4
*
cs3
;
for
(
int
k
=
j
;
k
<
nthpo
;
k
+=
length
)
for
(
int
k
=
j
;
k
<
nthpo
;
k
+=
length
)
{
{
std
::
complex
<
T
>
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
std
::
complex
<
T
>
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
...
@@ -144,18 +189,6 @@ namespace dlib
...
@@ -144,18 +189,6 @@ namespace dlib
const
std
::
complex
<
T
>
tmp2
(
-
irt2
*
(
b7
.
real
()
+
b7
.
imag
()),
irt2
*
(
b7
.
real
()
-
b7
.
imag
()));
const
std
::
complex
<
T
>
tmp2
(
-
irt2
*
(
b7
.
real
()
+
b7
.
imag
()),
irt2
*
(
b7
.
real
()
-
b7
.
imag
()));
cc0
[
k
]
=
b0
+
b1
;
cc0
[
k
]
=
b0
+
b1
;
if
(
j
>
0
)
{
cc1
[
k
]
=
cs4
*
(
b0
-
b1
);
cc2
[
k
]
=
cs2
*
(
b2
+
tmp0
);
cc3
[
k
]
=
cs6
*
(
b2
-
tmp0
);
cc4
[
k
]
=
cs1
*
(
b4
+
tmp1
);
cc5
[
k
]
=
cs5
*
(
b4
-
tmp1
);
cc6
[
k
]
=
cs3
*
(
b6
+
tmp2
);
cc7
[
k
]
=
cs7
*
(
b6
-
tmp2
);
}
else
{
cc1
[
k
]
=
b0
-
b1
;
cc1
[
k
]
=
b0
-
b1
;
cc2
[
k
]
=
b2
+
tmp0
;
cc2
[
k
]
=
b2
+
tmp0
;
cc3
[
k
]
=
b2
-
tmp0
;
cc3
[
k
]
=
b2
-
tmp0
;
...
@@ -163,15 +196,26 @@ namespace dlib
...
@@ -163,15 +196,26 @@ namespace dlib
cc5
[
k
]
=
b4
-
tmp1
;
cc5
[
k
]
=
b4
-
tmp1
;
cc6
[
k
]
=
b6
+
tmp2
;
cc6
[
k
]
=
b6
+
tmp2
;
cc7
[
k
]
=
b6
-
tmp2
;
cc7
[
k
]
=
b6
-
tmp2
;
if
(
j
>
0
)
{
cc1
[
k
]
*=
cs
[
3
];
cc2
[
k
]
*=
cs
[
1
];
cc3
[
k
]
*=
cs
[
5
];
cc4
[
k
]
*=
cs
[
0
];
cc5
[
k
]
*=
cs
[
4
];
cc6
[
k
]
*=
cs
[
2
];
cc7
[
k
]
*=
cs
[
6
];
}
}
}
}
cs
+=
7
;
}
}
}
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
template
<
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
layout
>
template
<
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
layout
>
void
fft1d_inplace
(
matrix
<
std
::
complex
<
T
>
,
NR
,
NC
,
MM
,
layout
>&
data
,
bool
do_backward_fft
)
void
fft1d_inplace
(
matrix
<
std
::
complex
<
T
>
,
NR
,
NC
,
MM
,
layout
>&
data
,
bool
do_backward_fft
,
twiddles
<
T
>&
cs
)
/*!
/*!
requires
requires
- is_vector(data) == true
- is_vector(data) == true
...
@@ -193,7 +237,7 @@ namespace dlib
...
@@ -193,7 +237,7 @@ namespace dlib
std
::
complex
<
T
>*
const
b
=
&
data
(
0
);
std
::
complex
<
T
>*
const
b
=
&
data
(
0
);
int
L
[
16
],
L1
,
L2
,
L3
,
L4
,
L5
,
L6
,
L7
,
L8
,
L9
,
L10
,
L11
,
L12
,
L13
,
L14
,
L15
;
int
L
[
16
],
L1
,
L2
,
L3
,
L4
,
L5
,
L6
,
L7
,
L8
,
L9
,
L10
,
L11
,
L12
,
L13
,
L14
,
L15
;
int
j1
,
j2
,
j3
,
j4
,
j5
,
j6
,
j7
,
j8
,
j9
,
j10
,
j11
,
j12
,
j13
,
j14
;
int
j1
,
j2
,
j3
,
j4
,
j5
,
j6
,
j7
,
j8
,
j9
,
j10
,
j11
,
j12
,
j13
,
j14
;
int
j
,
ij
,
ji
,
ij1
,
ji1
;
int
j
,
ij
,
ji
;
int
n2pow
,
n8pow
,
nthpo
,
ipass
,
nxtlt
,
length
;
int
n2pow
,
n8pow
,
nthpo
,
ipass
,
nxtlt
,
length
;
n2pow
=
fastlog2
(
data
.
size
());
n2pow
=
fastlog2
(
data
.
size
());
...
@@ -206,9 +250,10 @@ namespace dlib
...
@@ -206,9 +250,10 @@ namespace dlib
/* Radix 8 iterations */
/* Radix 8 iterations */
for
(
ipass
=
1
;
ipass
<=
n8pow
;
ipass
++
)
for
(
ipass
=
1
;
ipass
<=
n8pow
;
ipass
++
)
{
{
nxtlt
=
0x1
<<
(
n2pow
-
3
*
ipass
);
const
int
p
=
n2pow
-
3
*
ipass
;
nxtlt
=
0x1
<<
p
;
length
=
8
*
nxtlt
;
length
=
8
*
nxtlt
;
R8TX
(
nxtlt
,
nthpo
,
length
,
R8TX
(
nxtlt
,
nthpo
,
length
,
cs
.
get_twiddles
(
p
),
b
,
b
+
nxtlt
,
b
+
2
*
nxtlt
,
b
+
3
*
nxtlt
,
b
,
b
+
nxtlt
,
b
+
2
*
nxtlt
,
b
+
3
*
nxtlt
,
b
+
4
*
nxtlt
,
b
+
5
*
nxtlt
,
b
+
6
*
nxtlt
,
b
+
7
*
nxtlt
);
b
+
4
*
nxtlt
,
b
+
5
*
nxtlt
,
b
+
6
*
nxtlt
,
b
+
7
*
nxtlt
);
}
}
...
@@ -235,28 +280,26 @@ namespace dlib
...
@@ -235,28 +280,26 @@ namespace dlib
L15
=
L
[
1
];
L14
=
L
[
2
];
L13
=
L
[
3
];
L12
=
L
[
4
];
L11
=
L
[
5
];
L10
=
L
[
6
];
L9
=
L
[
7
];
L15
=
L
[
1
];
L14
=
L
[
2
];
L13
=
L
[
3
];
L12
=
L
[
4
];
L11
=
L
[
5
];
L10
=
L
[
6
];
L9
=
L
[
7
];
L8
=
L
[
8
];
L7
=
L
[
9
];
L6
=
L
[
10
];
L5
=
L
[
11
];
L4
=
L
[
12
];
L3
=
L
[
13
];
L2
=
L
[
14
];
L1
=
L
[
15
];
L8
=
L
[
8
];
L7
=
L
[
9
];
L6
=
L
[
10
];
L5
=
L
[
11
];
L4
=
L
[
12
];
L3
=
L
[
13
];
L2
=
L
[
14
];
L1
=
L
[
15
];
ij
=
1
;
ij
=
0
;
for
(
j1
=
1
;
j1
<=
L1
;
j1
++
)
for
(
j1
=
0
;
j1
<
L1
;
j1
++
)
for
(
j2
=
j1
;
j2
<=
L2
;
j2
+=
L1
)
for
(
j2
=
j1
;
j2
<
L2
;
j2
+=
L1
)
for
(
j3
=
j2
;
j3
<=
L3
;
j3
+=
L2
)
for
(
j3
=
j2
;
j3
<
L3
;
j3
+=
L2
)
for
(
j4
=
j3
;
j4
<=
L4
;
j4
+=
L3
)
for
(
j4
=
j3
;
j4
<
L4
;
j4
+=
L3
)
for
(
j5
=
j4
;
j5
<=
L5
;
j5
+=
L4
)
for
(
j5
=
j4
;
j5
<
L5
;
j5
+=
L4
)
for
(
j6
=
j5
;
j6
<=
L6
;
j6
+=
L5
)
for
(
j6
=
j5
;
j6
<
L6
;
j6
+=
L5
)
for
(
j7
=
j6
;
j7
<=
L7
;
j7
+=
L6
)
for
(
j7
=
j6
;
j7
<
L7
;
j7
+=
L6
)
for
(
j8
=
j7
;
j8
<=
L8
;
j8
+=
L7
)
for
(
j8
=
j7
;
j8
<
L8
;
j8
+=
L7
)
for
(
j9
=
j8
;
j9
<=
L9
;
j9
+=
L8
)
for
(
j9
=
j8
;
j9
<
L9
;
j9
+=
L8
)
for
(
j10
=
j9
;
j10
<=
L10
;
j10
+=
L9
)
for
(
j10
=
j9
;
j10
<
L10
;
j10
+=
L9
)
for
(
j11
=
j10
;
j11
<=
L11
;
j11
+=
L10
)
for
(
j11
=
j10
;
j11
<
L11
;
j11
+=
L10
)
for
(
j12
=
j11
;
j12
<=
L12
;
j12
+=
L11
)
for
(
j12
=
j11
;
j12
<
L12
;
j12
+=
L11
)
for
(
j13
=
j12
;
j13
<=
L13
;
j13
+=
L12
)
for
(
j13
=
j12
;
j13
<
L13
;
j13
+=
L12
)
for
(
j14
=
j13
;
j14
<=
L14
;
j14
+=
L13
)
for
(
j14
=
j13
;
j14
<
L14
;
j14
+=
L13
)
for
(
ji
=
j14
;
ji
<=
L15
;
ji
+=
L14
)
for
(
ji
=
j14
;
ji
<
L15
;
ji
+=
L14
)
{
{
ij1
=
ij
-
1
;
if
(
ij
<
ji
)
ji1
=
ji
-
1
;
swap
(
b
[
ij
],
b
[
ji
]);
if
(
ij
-
ji
<
0
)
swap
(
b
[
ij1
],
b
[
ji1
]);
ij
++
;
ij
++
;
}
}
...
@@ -283,12 +326,13 @@ namespace dlib
...
@@ -283,12 +326,13 @@ namespace dlib
return
;
return
;
matrix
<
std
::
complex
<
double
>
>
buff
;
matrix
<
std
::
complex
<
double
>
>
buff
;
twiddles
<
double
>
cs
;
// Compute transform row by row
// Compute transform row by row
for
(
long
r
=
0
;
r
<
data
.
nr
();
++
r
)
for
(
long
r
=
0
;
r
<
data
.
nr
();
++
r
)
{
{
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
rowm
(
data
,
r
));
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
rowm
(
data
,
r
));
fft1d_inplace
(
buff
,
do_backward_fft
);
fft1d_inplace
(
buff
,
do_backward_fft
,
cs
);
set_rowm
(
data
,
r
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
set_rowm
(
data
,
r
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
}
}
...
@@ -296,7 +340,7 @@ namespace dlib
...
@@ -296,7 +340,7 @@ namespace dlib
for
(
long
c
=
0
;
c
<
data
.
nc
();
++
c
)
for
(
long
c
=
0
;
c
<
data
.
nc
();
++
c
)
{
{
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
colm
(
data
,
c
));
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
colm
(
data
,
c
));
fft1d_inplace
(
buff
,
do_backward_fft
);
fft1d_inplace
(
buff
,
do_backward_fft
,
cs
);
set_colm
(
data
,
c
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
set_colm
(
data
,
c
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
}
}
}
}
...
@@ -328,12 +372,13 @@ namespace dlib
...
@@ -328,12 +372,13 @@ namespace dlib
matrix
<
std
::
complex
<
double
>
>
buff
;
matrix
<
std
::
complex
<
double
>
>
buff
;
data_out
.
set_size
(
data
.
nr
(),
data
.
nc
());
data_out
.
set_size
(
data
.
nr
(),
data
.
nc
());
twiddles
<
double
>
cs
;
// Compute transform row by row
// Compute transform row by row
for
(
long
r
=
0
;
r
<
data
.
nr
();
++
r
)
for
(
long
r
=
0
;
r
<
data
.
nr
();
++
r
)
{
{
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
rowm
(
data
,
r
));
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
rowm
(
data
,
r
));
fft1d_inplace
(
buff
,
do_backward_fft
);
fft1d_inplace
(
buff
,
do_backward_fft
,
cs
);
set_rowm
(
data_out
,
r
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
set_rowm
(
data_out
,
r
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
}
}
...
@@ -341,7 +386,7 @@ namespace dlib
...
@@ -341,7 +386,7 @@ namespace dlib
for
(
long
c
=
0
;
c
<
data_out
.
nc
();
++
c
)
for
(
long
c
=
0
;
c
<
data_out
.
nc
();
++
c
)
{
{
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
colm
(
data_out
,
c
));
buff
=
matrix_cast
<
std
::
complex
<
double
>
>
(
colm
(
data_out
,
c
));
fft1d_inplace
(
buff
,
do_backward_fft
);
fft1d_inplace
(
buff
,
do_backward_fft
,
cs
);
set_colm
(
data_out
,
c
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
set_colm
(
data_out
,
c
)
=
matrix_cast
<
std
::
complex
<
T
>
>
(
buff
);
}
}
}
}
...
@@ -370,7 +415,8 @@ namespace dlib
...
@@ -370,7 +415,8 @@ namespace dlib
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
{
{
matrix
<
typename
EXP
::
type
>
temp
(
data
);
matrix
<
typename
EXP
::
type
>
temp
(
data
);
impl
::
fft1d_inplace
(
temp
,
false
);
impl
::
twiddles
<
typename
EXP
::
type
::
value_type
>
cs
;
impl
::
fft1d_inplace
(
temp
,
false
,
cs
);
return
temp
;
return
temp
;
}
}
else
else
...
@@ -403,7 +449,8 @@ namespace dlib
...
@@ -403,7 +449,8 @@ namespace dlib
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
{
{
temp
=
data
;
temp
=
data
;
impl
::
fft1d_inplace
(
temp
,
true
);
impl
::
twiddles
<
typename
EXP
::
type
::
value_type
>
cs
;
impl
::
fft1d_inplace
(
temp
,
true
,
cs
);
}
}
else
else
{
{
...
@@ -430,10 +477,15 @@ namespace dlib
...
@@ -430,10 +477,15 @@ namespace dlib
);
);
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
impl
::
fft1d_inplace
(
data
,
false
);
{
impl
::
twiddles
<
T
>
cs
;
impl
::
fft1d_inplace
(
data
,
false
,
cs
);
}
else
else
{
impl
::
fft2d_inplace
(
data
,
false
);
impl
::
fft2d_inplace
(
data
,
false
);
}
}
}
template
<
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
L
>
template
<
typename
T
,
long
NR
,
long
NC
,
typename
MM
,
typename
L
>
void
ifft_inplace
(
matrix
<
std
::
complex
<
T
>
,
NR
,
NC
,
MM
,
L
>&
data
)
void
ifft_inplace
(
matrix
<
std
::
complex
<
T
>
,
NR
,
NC
,
MM
,
L
>&
data
)
...
@@ -449,10 +501,15 @@ namespace dlib
...
@@ -449,10 +501,15 @@ namespace dlib
);
);
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
if
(
data
.
nr
()
==
1
||
data
.
nc
()
==
1
)
impl
::
fft1d_inplace
(
data
,
true
);
{
impl
::
twiddles
<
T
>
cs
;
impl
::
fft1d_inplace
(
data
,
true
,
cs
);
}
else
else
{
impl
::
fft2d_inplace
(
data
,
true
);
impl
::
fft2d_inplace
(
data
,
true
);
}
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment