Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
f2600c2e
Unverified
Commit
f2600c2e
authored
Jan 29, 2020
by
Francisco Massa
Committed by
GitHub
Jan 29, 2020
Browse files
Revert "Base decoder for video. (#1747) (#1793)" (#1833)
This reverts commit
28b7f8ae
.
parent
c8345212
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
0 additions
and
1004 deletions
+0
-1004
torchvision/csrc/cpu/decoder/sync_decoder_test.cpp
torchvision/csrc/cpu/decoder/sync_decoder_test.cpp
+0
-22
torchvision/csrc/cpu/decoder/time_keeper.cpp
torchvision/csrc/cpu/decoder/time_keeper.cpp
+0
-40
torchvision/csrc/cpu/decoder/time_keeper.h
torchvision/csrc/cpu/decoder/time_keeper.h
+0
-27
torchvision/csrc/cpu/decoder/util.cpp
torchvision/csrc/cpu/decoder/util.cpp
+0
-374
torchvision/csrc/cpu/decoder/util.h
torchvision/csrc/cpu/decoder/util.h
+0
-33
torchvision/csrc/cpu/decoder/video_sampler.cpp
torchvision/csrc/cpu/decoder/video_sampler.cpp
+0
-274
torchvision/csrc/cpu/decoder/video_sampler.h
torchvision/csrc/cpu/decoder/video_sampler.h
+0
-52
torchvision/csrc/cpu/decoder/video_stream.cpp
torchvision/csrc/cpu/decoder/video_stream.cpp
+0
-143
torchvision/csrc/cpu/decoder/video_stream.h
torchvision/csrc/cpu/decoder/video_stream.h
+0
-39
No files found.
torchvision/csrc/cpu/decoder/sync_decoder_test.cpp
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#include <c10/util/Logging.h>
#include <gtest/gtest.h>
#include "sync_decoder.h"
using
namespace
ffmpeg
;
TEST
(
SyncDecoder
,
Test
)
{
SyncDecoder
decoder
;
DecoderParameters
params
;
params
.
timeoutMs
=
10000
;
params
.
startOffsetMs
=
1000
;
params
.
formats
=
{
MediaFormat
(),
MediaFormat
(
0
),
MediaFormat
(
'0'
)};
params
.
uri
=
"pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"
;
CHECK
(
decoder
.
init
(
params
,
nullptr
));
DecoderOutputMessage
out
;
while
(
0
==
decoder
.
decode
(
&
out
,
100
))
{
LOG
(
INFO
)
<<
"Decoded frame, timestamp(us): "
<<
out
.
header
.
pts
;
}
decoder
.
shutdown
();
}
torchvision/csrc/cpu/decoder/time_keeper.cpp
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#include "time_keeper.h"
extern
"C"
{
#include <libavutil/avutil.h>
}
namespace
ffmpeg
{
namespace
{
const
ssize_t
kMaxTimeBaseDiference
=
10
;
}
ssize_t
TimeKeeper
::
adjust
(
ssize_t
&
decoderTimestamp
)
{
const
ssize_t
now
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
microseconds
>
(
std
::
chrono
::
system_clock
::
now
().
time_since_epoch
())
.
count
();
if
(
startTime_
==
0
)
{
startTime_
=
now
;
}
if
(
streamTimestamp_
==
0
)
{
streamTimestamp_
=
decoderTimestamp
;
}
const
auto
runOut
=
startTime_
+
decoderTimestamp
-
streamTimestamp_
;
if
(
std
::
labs
((
now
-
runOut
)
/
AV_TIME_BASE
)
>
kMaxTimeBaseDiference
)
{
streamTimestamp_
=
startTime_
-
now
+
decoderTimestamp
;
}
const
auto
sleepAdvised
=
runOut
-
now
;
decoderTimestamp
+=
startTime_
-
streamTimestamp_
;
return
sleepAdvised
>
0
?
sleepAdvised
:
0
;
}
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/time_keeper.h
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include <stdlib.h>
#include <chrono>
namespace
ffmpeg
{
/**
* Class keeps the track of the decoded timestamps (us) for media streams.
*/
class
TimeKeeper
{
public:
TimeKeeper
()
=
default
;
// adjust provided @timestamp to the corrected value
// return advised sleep time before next frame processing in (us)
ssize_t
adjust
(
ssize_t
&
decoderTimestamp
);
private:
ssize_t
startTime_
{
0
};
ssize_t
streamTimestamp_
{
0
};
};
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/util.cpp
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#include "util.h"
#include <c10/util/Logging.h>
namespace
ffmpeg
{
namespace
Serializer
{
// fixed size types
template
<
typename
T
>
inline
size_t
getSize
(
const
T
&
x
)
{
return
sizeof
(
x
);
}
template
<
typename
T
>
inline
bool
serializeItem
(
uint8_t
*
dest
,
size_t
len
,
size_t
&
pos
,
const
T
&
src
)
{
VLOG
(
6
)
<<
"Generic serializeItem"
;
const
auto
required
=
sizeof
(
src
);
if
(
len
<
pos
+
required
)
{
return
false
;
}
memcpy
(
dest
+
pos
,
&
src
,
required
);
pos
+=
required
;
return
true
;
}
template
<
typename
T
>
inline
bool
deserializeItem
(
const
uint8_t
*
src
,
size_t
len
,
size_t
&
pos
,
T
&
dest
)
{
const
auto
required
=
sizeof
(
dest
);
if
(
len
<
pos
+
required
)
{
return
false
;
}
memcpy
(
&
dest
,
src
+
pos
,
required
);
pos
+=
required
;
return
true
;
}
// AVSubtitleRect specialization
inline
size_t
getSize
(
const
AVSubtitleRect
&
x
)
{
auto
rectBytes
=
[](
const
AVSubtitleRect
&
y
)
->
size_t
{
size_t
s
=
0
;
switch
(
y
.
type
)
{
case
SUBTITLE_BITMAP
:
for
(
int
i
=
0
;
i
<
y
.
nb_colors
;
++
i
)
{
s
+=
sizeof
(
y
.
pict
.
linesize
[
i
]);
s
+=
y
.
pict
.
linesize
[
i
];
}
break
;
case
SUBTITLE_TEXT
:
s
+=
sizeof
(
size_t
);
s
+=
strlen
(
y
.
text
);
break
;
case
SUBTITLE_ASS
:
s
+=
sizeof
(
size_t
);
s
+=
strlen
(
y
.
ass
);
break
;
default:
break
;
}
return
s
;
};
return
getSize
(
x
.
x
)
+
getSize
(
x
.
y
)
+
getSize
(
x
.
w
)
+
getSize
(
x
.
h
)
+
getSize
(
x
.
nb_colors
)
+
getSize
(
x
.
type
)
+
getSize
(
x
.
flags
)
+
rectBytes
(
x
);
}
// AVSubtitle specialization
inline
size_t
getSize
(
const
AVSubtitle
&
x
)
{
auto
rectBytes
=
[](
const
AVSubtitle
&
y
)
->
size_t
{
size_t
s
=
getSize
(
y
.
num_rects
);
for
(
unsigned
i
=
0
;
i
<
y
.
num_rects
;
++
i
)
{
s
+=
getSize
(
*
y
.
rects
[
i
]);
}
return
s
;
};
return
getSize
(
x
.
format
)
+
getSize
(
x
.
start_display_time
)
+
getSize
(
x
.
end_display_time
)
+
getSize
(
x
.
pts
)
+
rectBytes
(
x
);
}
inline
bool
serializeItem
(
uint8_t
*
dest
,
size_t
len
,
size_t
&
pos
,
const
AVSubtitleRect
&
src
)
{
auto
rectSerialize
=
[](
uint8_t
*
d
,
size_t
l
,
size_t
&
p
,
const
AVSubtitleRect
&
x
)
->
size_t
{
switch
(
x
.
type
)
{
case
SUBTITLE_BITMAP
:
for
(
int
i
=
0
;
i
<
x
.
nb_colors
;
++
i
)
{
if
(
!
serializeItem
(
d
,
l
,
p
,
x
.
pict
.
linesize
[
i
]))
{
return
false
;
}
if
(
p
+
x
.
pict
.
linesize
[
i
]
>
l
)
{
return
false
;
}
memcpy
(
d
+
p
,
x
.
pict
.
data
[
i
],
x
.
pict
.
linesize
[
i
]);
p
+=
x
.
pict
.
linesize
[
i
];
}
return
true
;
case
SUBTITLE_TEXT
:
{
const
size_t
s
=
strlen
(
x
.
text
);
if
(
!
serializeItem
(
d
,
l
,
p
,
s
))
{
return
false
;
}
if
(
p
+
s
>
l
)
{
return
false
;
}
memcpy
(
d
+
p
,
x
.
text
,
s
);
p
+=
s
;
return
true
;
}
case
SUBTITLE_ASS
:
{
const
size_t
s
=
strlen
(
x
.
ass
);
if
(
!
serializeItem
(
d
,
l
,
p
,
s
))
{
return
false
;
}
if
(
p
+
s
>
l
)
{
return
false
;
}
memcpy
(
d
+
p
,
x
.
ass
,
s
);
p
+=
s
;
return
true
;
}
default:
return
true
;
}
};
return
serializeItem
(
dest
,
len
,
pos
,
src
.
x
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
y
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
w
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
h
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
nb_colors
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
type
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
flags
)
&&
rectSerialize
(
dest
,
len
,
pos
,
src
);
}
inline
bool
serializeItem
(
uint8_t
*
dest
,
size_t
len
,
size_t
&
pos
,
const
AVSubtitle
&
src
)
{
auto
rectSerialize
=
[](
uint8_t
*
d
,
size_t
l
,
size_t
&
p
,
const
AVSubtitle
&
x
)
->
bool
{
bool
res
=
serializeItem
(
d
,
l
,
p
,
x
.
num_rects
);
for
(
unsigned
i
=
0
;
res
&&
i
<
x
.
num_rects
;
++
i
)
{
res
=
serializeItem
(
d
,
l
,
p
,
*
(
x
.
rects
[
i
]));
}
return
res
;
};
VLOG
(
6
)
<<
"AVSubtitle serializeItem"
;
return
serializeItem
(
dest
,
len
,
pos
,
src
.
format
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
start_display_time
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
end_display_time
)
&&
serializeItem
(
dest
,
len
,
pos
,
src
.
pts
)
&&
rectSerialize
(
dest
,
len
,
pos
,
src
);
}
inline
bool
deserializeItem
(
const
uint8_t
*
src
,
size_t
len
,
size_t
&
pos
,
AVSubtitleRect
&
dest
)
{
auto
rectDeserialize
=
[](
const
uint8_t
*
y
,
size_t
l
,
size_t
&
p
,
AVSubtitleRect
&
x
)
->
bool
{
switch
(
x
.
type
)
{
case
SUBTITLE_BITMAP
:
for
(
int
i
=
0
;
i
<
x
.
nb_colors
;
++
i
)
{
if
(
!
deserializeItem
(
y
,
l
,
p
,
x
.
pict
.
linesize
[
i
]))
{
return
false
;
}
if
(
p
+
x
.
pict
.
linesize
[
i
]
>
l
)
{
return
false
;
}
x
.
pict
.
data
[
i
]
=
(
uint8_t
*
)
av_malloc
(
x
.
pict
.
linesize
[
i
]);
memcpy
(
x
.
pict
.
data
[
i
],
y
+
p
,
x
.
pict
.
linesize
[
i
]);
p
+=
x
.
pict
.
linesize
[
i
];
}
return
true
;
case
SUBTITLE_TEXT
:
{
size_t
s
=
0
;
if
(
!
deserializeItem
(
y
,
l
,
p
,
s
))
{
return
false
;
}
if
(
p
+
s
>
l
)
{
return
false
;
}
x
.
text
=
(
char
*
)
av_malloc
(
s
+
1
);
memcpy
(
x
.
text
,
y
+
p
,
s
);
x
.
text
[
s
]
=
0
;
p
+=
s
;
return
true
;
}
case
SUBTITLE_ASS
:
{
size_t
s
=
0
;
if
(
!
deserializeItem
(
y
,
l
,
p
,
s
))
{
return
false
;
}
if
(
p
+
s
>
l
)
{
return
false
;
}
x
.
ass
=
(
char
*
)
av_malloc
(
s
+
1
);
memcpy
(
x
.
ass
,
y
+
p
,
s
);
x
.
ass
[
s
]
=
0
;
p
+=
s
;
return
true
;
}
default:
return
true
;
}
};
return
deserializeItem
(
src
,
len
,
pos
,
dest
.
x
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
y
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
w
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
h
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
nb_colors
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
type
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
flags
)
&&
rectDeserialize
(
src
,
len
,
pos
,
dest
);
}
inline
bool
deserializeItem
(
const
uint8_t
*
src
,
size_t
len
,
size_t
&
pos
,
AVSubtitle
&
dest
)
{
auto
rectDeserialize
=
[](
const
uint8_t
*
y
,
size_t
l
,
size_t
&
p
,
AVSubtitle
&
x
)
->
bool
{
bool
res
=
deserializeItem
(
y
,
l
,
p
,
x
.
num_rects
);
if
(
res
&&
x
.
num_rects
)
{
x
.
rects
=
(
AVSubtitleRect
**
)
av_malloc
(
x
.
num_rects
*
sizeof
(
AVSubtitleRect
*
));
}
for
(
unsigned
i
=
0
;
res
&&
i
<
x
.
num_rects
;
++
i
)
{
x
.
rects
[
i
]
=
(
AVSubtitleRect
*
)
av_malloc
(
sizeof
(
AVSubtitleRect
));
memset
(
x
.
rects
[
i
],
0
,
sizeof
(
AVSubtitleRect
));
res
=
deserializeItem
(
y
,
l
,
p
,
*
x
.
rects
[
i
]);
}
return
res
;
};
return
deserializeItem
(
src
,
len
,
pos
,
dest
.
format
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
start_display_time
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
end_display_time
)
&&
deserializeItem
(
src
,
len
,
pos
,
dest
.
pts
)
&&
rectDeserialize
(
src
,
len
,
pos
,
dest
);
}
}
// namespace Serializer
namespace
Util
{
std
::
string
generateErrorDesc
(
int
errorCode
)
{
std
::
array
<
char
,
1024
>
buffer
;
if
(
av_strerror
(
errorCode
,
buffer
.
data
(),
buffer
.
size
())
<
0
)
{
return
std
::
string
(
"Unknown error code: "
)
+
std
::
to_string
(
errorCode
);
}
buffer
.
back
()
=
0
;
return
std
::
string
(
buffer
.
data
());
}
size_t
serialize
(
const
AVSubtitle
&
sub
,
ByteStorage
*
out
)
{
const
auto
len
=
size
(
sub
);
CHECK_LE
(
len
,
out
->
tail
());
size_t
pos
=
0
;
if
(
!
Serializer
::
serializeItem
(
out
->
writableTail
(),
len
,
pos
,
sub
))
{
return
0
;
}
out
->
append
(
len
);
return
len
;
}
bool
deserialize
(
const
ByteStorage
&
buf
,
AVSubtitle
*
sub
)
{
size_t
pos
=
0
;
return
Serializer
::
deserializeItem
(
buf
.
data
(),
buf
.
length
(),
pos
,
*
sub
);
}
size_t
size
(
const
AVSubtitle
&
sub
)
{
return
Serializer
::
getSize
(
sub
);
}
bool
validateVideoFormat
(
const
VideoFormat
&
f
)
{
/*
Valid parameters values for decoder
______________________________________________________________
| W | H | minDimension | cropImage | algorithm |
|_____________________________________________________________|
| 0 | 0 | 0 | N/A | original |
|_____________________________________________________________|
| >0 | 0 | N/A | N/A | scale keeping W |
|_____________________________________________________________|
| 0 | >0 | N/A | N/A | scale keeping H |
|_____________________________________________________________|
| >0 | >0 | N/A | 0 | stretch/scale |
|_____________________________________________________________|
| >0 | >0 | N/A | >0 | scale/crop |
|_____________________________________________________________|
| 0 | 0 | >0 | N/A |scale to min dimension|
|_____|_____|______________|___________|______________________|
*/
return
(
f
.
width
==
0
&&
// #1 and #6
f
.
height
==
0
&&
f
.
cropImage
==
0
)
||
(
f
.
width
!=
0
&&
// #4 and #5
f
.
height
!=
0
&&
f
.
minDimension
==
0
)
||
(((
f
.
width
!=
0
&&
// #2
f
.
height
==
0
)
||
(
f
.
width
==
0
&&
// #3
f
.
height
!=
0
))
&&
f
.
minDimension
==
0
&&
f
.
cropImage
==
0
);
}
void
setFormatDimensions
(
size_t
&
destW
,
size_t
&
destH
,
size_t
userW
,
size_t
userH
,
size_t
srcW
,
size_t
srcH
,
size_t
minDimension
,
size_t
cropImage
)
{
// rounding rules
// int -> double -> round up
// if fraction is >= 0.5 or round down if fraction is < 0.5
// int result = double(value) + 0.5
// here we rounding double to int according to the above rule
if
(
userW
==
0
&&
userH
==
0
)
{
if
(
minDimension
>
0
)
{
if
(
srcW
>
srcH
)
{
// landscape
destH
=
minDimension
;
destW
=
round
(
double
(
srcW
*
minDimension
)
/
srcH
);
}
else
{
// portrait
destW
=
minDimension
;
destH
=
round
(
double
(
srcH
*
minDimension
)
/
srcW
);
}
}
else
{
destW
=
srcW
;
destH
=
srcH
;
}
}
else
if
(
userW
!=
0
&&
userH
==
0
)
{
destW
=
userW
;
destH
=
round
(
double
(
srcH
*
userW
)
/
srcW
);
}
else
if
(
userW
==
0
&&
userH
!=
0
)
{
destW
=
round
(
double
(
srcW
*
userH
)
/
srcH
);
destH
=
userH
;
}
else
{
// userW != 0 && userH != 0
if
(
cropImage
==
0
)
{
destW
=
userW
;
destH
=
userH
;
}
else
{
double
userSlope
=
double
(
userH
)
/
userW
;
double
srcSlope
=
double
(
srcH
)
/
srcW
;
if
(
srcSlope
<
userSlope
)
{
destW
=
round
(
double
(
srcW
*
userH
)
/
srcH
);
destH
=
userH
;
}
else
{
destW
=
userW
;
destH
=
round
(
double
(
srcH
*
userW
)
/
srcW
);
}
}
}
// prevent zeros
destW
=
std
::
max
(
destW
,
1UL
);
destH
=
std
::
max
(
destH
,
1UL
);
}
}
// namespace Util
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/util.h
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "defs.h"
extern
"C"
{
#include <libavcodec/avcodec.h>
}
namespace
ffmpeg
{
/**
* FFMPEG library utility functions.
*/
namespace
Util
{
std
::
string
generateErrorDesc
(
int
errorCode
);
size_t
serialize
(
const
AVSubtitle
&
sub
,
ByteStorage
*
out
);
bool
deserialize
(
const
ByteStorage
&
buf
,
AVSubtitle
*
sub
);
size_t
size
(
const
AVSubtitle
&
sub
);
void
setFormatDimensions
(
size_t
&
destW
,
size_t
&
destH
,
size_t
userW
,
size_t
userH
,
size_t
srcW
,
size_t
srcH
,
size_t
minDimension
,
size_t
cropImage
);
bool
validateVideoFormat
(
const
VideoFormat
&
format
);
}
// namespace Util
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/video_sampler.cpp
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#include "video_sampler.h"
#include <c10/util/Logging.h>
#include "util.h"
extern
"C"
{
#include <libavutil/imgutils.h>
}
// www.ffmpeg.org/doxygen/0.5/swscale-example_8c-source.html
namespace
ffmpeg
{
namespace
{
int
preparePlanes
(
const
VideoFormat
&
fmt
,
const
uint8_t
*
buffer
,
uint8_t
**
planes
,
int
*
lineSize
)
{
int
result
;
if
((
result
=
av_image_fill_arrays
(
planes
,
lineSize
,
buffer
,
(
AVPixelFormat
)
fmt
.
format
,
fmt
.
width
,
fmt
.
height
,
1
))
<
0
)
{
LOG
(
ERROR
)
<<
"av_image_fill_arrays failed, err: "
<<
Util
::
generateErrorDesc
(
result
);
}
return
result
;
}
int
transformImage
(
SwsContext
*
context
,
const
uint8_t
*
const
srcSlice
[],
int
srcStride
[],
VideoFormat
inFormat
,
VideoFormat
outFormat
,
uint8_t
*
out
,
uint8_t
*
planes
[],
int
lines
[])
{
int
result
;
if
((
result
=
preparePlanes
(
outFormat
,
out
,
planes
,
lines
))
<
0
)
{
return
result
;
}
if
((
result
=
sws_scale
(
context
,
srcSlice
,
srcStride
,
0
,
inFormat
.
height
,
planes
,
lines
))
<
0
)
{
LOG
(
ERROR
)
<<
"sws_scale failed, err: "
<<
Util
::
generateErrorDesc
(
result
);
return
result
;
}
return
0
;
}
}
// namespace
VideoSampler
::
VideoSampler
(
int
swsFlags
,
int64_t
loggingUuid
)
:
swsFlags_
(
swsFlags
),
loggingUuid_
(
loggingUuid
)
{}
VideoSampler
::~
VideoSampler
()
{
cleanUp
();
}
void
VideoSampler
::
shutdown
()
{
cleanUp
();
}
bool
VideoSampler
::
init
(
const
SamplerParameters
&
params
)
{
cleanUp
();
if
(
params
.
out
.
video
.
cropImage
!=
0
)
{
if
(
!
Util
::
validateVideoFormat
(
params
.
out
.
video
))
{
LOG
(
ERROR
)
<<
"Invalid video format"
<<
", width: "
<<
params
.
out
.
video
.
width
<<
", height: "
<<
params
.
out
.
video
.
height
<<
", format: "
<<
params
.
out
.
video
.
format
<<
", minDimension: "
<<
params
.
out
.
video
.
minDimension
<<
", crop: "
<<
params
.
out
.
video
.
cropImage
;
return
false
;
}
scaleFormat_
.
format
=
params
.
out
.
video
.
format
;
Util
::
setFormatDimensions
(
scaleFormat_
.
width
,
scaleFormat_
.
height
,
params
.
out
.
video
.
width
,
params
.
out
.
video
.
height
,
params
.
in
.
video
.
width
,
params
.
in
.
video
.
height
,
0
,
1
);
if
(
!
(
scaleFormat_
==
params_
.
out
.
video
))
{
// crop required
cropContext_
=
sws_getContext
(
params
.
out
.
video
.
width
,
params
.
out
.
video
.
height
,
(
AVPixelFormat
)
params_
.
out
.
video
.
format
,
params
.
out
.
video
.
width
,
params
.
out
.
video
.
height
,
(
AVPixelFormat
)
params
.
out
.
video
.
format
,
swsFlags_
,
nullptr
,
nullptr
,
nullptr
);
if
(
!
cropContext_
)
{
LOG
(
ERROR
)
<<
"sws_getContext failed for crop context"
;
return
false
;
}
const
auto
scaleImageSize
=
av_image_get_buffer_size
(
(
AVPixelFormat
)
scaleFormat_
.
format
,
scaleFormat_
.
width
,
scaleFormat_
.
height
,
1
);
scaleBuffer_
.
resize
(
scaleImageSize
);
}
}
else
{
scaleFormat_
=
params
.
out
.
video
;
}
VLOG
(
1
)
<<
"Input format #"
<<
loggingUuid_
<<
", width "
<<
params
.
in
.
video
.
width
<<
", height "
<<
params
.
in
.
video
.
height
<<
", format "
<<
params
.
in
.
video
.
format
<<
", minDimension "
<<
params
.
in
.
video
.
minDimension
<<
", cropImage "
<<
params
.
in
.
video
.
cropImage
;
VLOG
(
1
)
<<
"Scale format #"
<<
loggingUuid_
<<
", width "
<<
scaleFormat_
.
width
<<
", height "
<<
scaleFormat_
.
height
<<
", format "
<<
scaleFormat_
.
format
<<
", minDimension "
<<
scaleFormat_
.
minDimension
<<
", cropImage "
<<
scaleFormat_
.
cropImage
;
VLOG
(
1
)
<<
"Crop format #"
<<
loggingUuid_
<<
", width "
<<
params
.
out
.
video
.
width
<<
", height "
<<
params
.
out
.
video
.
height
<<
", format "
<<
params
.
out
.
video
.
format
<<
", minDimension "
<<
params
.
out
.
video
.
minDimension
<<
", cropImage "
<<
params
.
out
.
video
.
cropImage
;
scaleContext_
=
sws_getContext
(
params
.
in
.
video
.
width
,
params
.
in
.
video
.
height
,
(
AVPixelFormat
)
params
.
in
.
video
.
format
,
scaleFormat_
.
width
,
scaleFormat_
.
height
,
(
AVPixelFormat
)
scaleFormat_
.
format
,
swsFlags_
,
nullptr
,
nullptr
,
nullptr
);
// set output format
params_
=
params
;
return
scaleContext_
!=
nullptr
;
}
int
VideoSampler
::
getImageBytes
()
const
{
return
av_image_get_buffer_size
(
(
AVPixelFormat
)
params_
.
out
.
video
.
format
,
params_
.
out
.
video
.
width
,
params_
.
out
.
video
.
height
,
1
);
}
int
VideoSampler
::
sample
(
const
uint8_t
*
const
srcSlice
[],
int
srcStride
[],
ByteStorage
*
out
,
bool
allocateBuffer
)
{
int
result
;
// scaled and cropped image
const
auto
outImageSize
=
getImageBytes
();
if
(
allocateBuffer
)
{
out
->
clear
();
out
->
ensure
(
outImageSize
);
}
CHECK_LE
(
outImageSize
,
out
->
tail
());
uint8_t
*
scalePlanes
[
4
]
=
{
nullptr
};
int
scaleLines
[
4
]
=
{
0
};
// perform scale first
if
((
result
=
transformImage
(
scaleContext_
,
srcSlice
,
srcStride
,
params_
.
in
.
video
,
scaleFormat_
,
// for crop use internal buffer
cropContext_
?
scaleBuffer_
.
data
()
:
out
->
writableTail
(),
scalePlanes
,
scaleLines
)))
{
return
result
;
}
// is crop required?
if
(
cropContext_
)
{
uint8_t
*
cropPlanes
[
4
]
=
{
nullptr
};
int
cropLines
[
4
]
=
{
0
};
if
(
params_
.
out
.
video
.
height
<
scaleFormat_
.
height
)
{
// Destination image is wider of source image: cut top and bottom
for
(
size_t
i
=
0
;
i
<
4
&&
scalePlanes
[
i
]
!=
nullptr
;
++
i
)
{
scalePlanes
[
i
]
+=
scaleLines
[
i
]
*
(
scaleFormat_
.
height
-
params_
.
out
.
video
.
height
)
/
2
;
}
}
else
{
// Source image is wider of destination image: cut sides
for
(
size_t
i
=
0
;
i
<
4
&&
scalePlanes
[
i
]
!=
nullptr
;
++
i
)
{
scalePlanes
[
i
]
+=
scaleLines
[
i
]
*
(
scaleFormat_
.
width
-
params_
.
out
.
video
.
width
)
/
2
/
scaleFormat_
.
width
;
}
}
// crop image
if
((
result
=
transformImage
(
cropContext_
,
scalePlanes
,
scaleLines
,
params_
.
out
.
video
,
params_
.
out
.
video
,
out
->
writableTail
(),
cropPlanes
,
cropLines
)))
{
return
result
;
}
}
out
->
append
(
outImageSize
);
return
outImageSize
;
}
int
VideoSampler
::
sample
(
AVFrame
*
frame
,
ByteStorage
*
out
)
{
if
(
!
frame
)
{
return
0
;
// no flush for videos
}
return
sample
(
frame
->
data
,
frame
->
linesize
,
out
,
false
);
}
int
VideoSampler
::
sample
(
const
ByteStorage
*
in
,
ByteStorage
*
out
)
{
if
(
!
in
)
{
return
0
;
// no flush for videos
}
int
result
;
uint8_t
*
inPlanes
[
4
]
=
{
nullptr
};
int
inLineSize
[
4
]
=
{
0
};
if
((
result
=
preparePlanes
(
params_
.
in
.
video
,
in
->
data
(),
inPlanes
,
inLineSize
))
<
0
)
{
return
result
;
}
return
sample
(
inPlanes
,
inLineSize
,
out
,
true
);
}
void
VideoSampler
::
cleanUp
()
{
if
(
scaleContext_
)
{
sws_freeContext
(
scaleContext_
);
scaleContext_
=
nullptr
;
}
if
(
cropContext_
)
{
sws_freeContext
(
cropContext_
);
cropContext_
=
nullptr
;
scaleBuffer_
.
clear
();
}
}
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/video_sampler.h
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "defs.h"
extern
"C"
{
#include <libavformat/avformat.h>
#include "libswscale/swscale.h"
}
namespace
ffmpeg
{
/**
* Class transcode video frames from one format into another
*/
class
VideoSampler
:
public
MediaSampler
{
public:
VideoSampler
(
int
swsFlags
=
SWS_AREA
,
int64_t
loggingUuid
=
0
);
~
VideoSampler
()
override
;
// MediaSampler overrides
bool
init
(
const
SamplerParameters
&
params
)
override
;
int
sample
(
const
ByteStorage
*
in
,
ByteStorage
*
out
)
override
;
void
shutdown
()
override
;
// returns number processed/scaling bytes
int
sample
(
AVFrame
*
frame
,
ByteStorage
*
out
);
int
getImageBytes
()
const
;
private:
// close resources
void
cleanUp
();
// helper functions for rescaling, cropping, etc.
int
sample
(
const
uint8_t
*
const
srcSlice
[],
int
srcStride
[],
ByteStorage
*
out
,
bool
allocateBuffer
);
private:
VideoFormat
scaleFormat_
;
SwsContext
*
scaleContext_
{
nullptr
};
SwsContext
*
cropContext_
{
nullptr
};
int
swsFlags_
{
SWS_AREA
};
std
::
vector
<
uint8_t
>
scaleBuffer_
;
int64_t
loggingUuid_
{
0
};
};
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/video_stream.cpp
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#include "video_stream.h"
#include <c10/util/Logging.h>
#include "util.h"
namespace
ffmpeg
{
namespace
{
bool
operator
==
(
const
VideoFormat
&
x
,
const
AVFrame
&
y
)
{
return
x
.
width
==
y
.
width
&&
x
.
height
==
y
.
height
&&
x
.
format
==
y
.
format
;
}
VideoFormat
&
toVideoFormat
(
VideoFormat
&
x
,
const
AVFrame
&
y
)
{
x
.
width
=
y
.
width
;
x
.
height
=
y
.
height
;
x
.
format
=
y
.
format
;
return
x
;
}
}
// namespace
VideoStream
::
VideoStream
(
AVFormatContext
*
inputCtx
,
int
index
,
bool
convertPtsToWallTime
,
const
VideoFormat
&
format
,
int64_t
loggingUuid
)
:
Stream
(
inputCtx
,
MediaFormat
::
makeMediaFormat
(
format
,
index
),
convertPtsToWallTime
),
loggingUuid_
(
loggingUuid
)
{}
VideoStream
::~
VideoStream
()
{
if
(
sampler_
)
{
sampler_
->
shutdown
();
sampler_
.
reset
();
}
}
void
VideoStream
::
ensureSampler
()
{
if
(
!
sampler_
)
{
sampler_
=
std
::
make_unique
<
VideoSampler
>
(
SWS_AREA
,
loggingUuid_
);
}
}
int
VideoStream
::
initFormat
()
{
// set output format
if
(
!
Util
::
validateVideoFormat
(
format_
.
format
.
video
))
{
LOG
(
ERROR
)
<<
"Invalid video format"
<<
", width: "
<<
format_
.
format
.
video
.
width
<<
", height: "
<<
format_
.
format
.
video
.
height
<<
", format: "
<<
format_
.
format
.
video
.
format
<<
", minDimension: "
<<
format_
.
format
.
video
.
minDimension
<<
", crop: "
<<
format_
.
format
.
video
.
cropImage
;
return
-
1
;
}
// keep aspect ratio
Util
::
setFormatDimensions
(
format_
.
format
.
video
.
width
,
format_
.
format
.
video
.
height
,
format_
.
format
.
video
.
width
,
format_
.
format
.
video
.
height
,
codecCtx_
->
width
,
codecCtx_
->
height
,
format_
.
format
.
video
.
minDimension
,
0
);
if
(
format_
.
format
.
video
.
format
==
AV_PIX_FMT_NONE
)
{
format_
.
format
.
video
.
format
=
codecCtx_
->
pix_fmt
;
}
return
format_
.
format
.
video
.
width
!=
0
&&
format_
.
format
.
video
.
height
!=
0
&&
format_
.
format
.
video
.
format
!=
AV_PIX_FMT_NONE
?
0
:
-
1
;
}
int
VideoStream
::
estimateBytes
(
bool
flush
)
{
ensureSampler
();
// check if input format gets changed
if
(
!
flush
&&
!
(
sampler_
->
getInputFormat
().
video
==
*
frame_
))
{
// - reinit sampler
SamplerParameters
params
;
params
.
type
=
format_
.
type
;
params
.
out
=
format_
.
format
;
toVideoFormat
(
params
.
in
.
video
,
*
frame_
);
if
(
!
sampler_
->
init
(
params
))
{
return
-
1
;
}
VLOG
(
1
)
<<
"Set input video sampler format"
<<
", width: "
<<
params
.
in
.
video
.
width
<<
", height: "
<<
params
.
in
.
video
.
height
<<
", format: "
<<
params
.
in
.
video
.
format
<<
" : output video sampler format"
<<
", width: "
<<
format_
.
format
.
video
.
width
<<
", height: "
<<
format_
.
format
.
video
.
height
<<
", format: "
<<
format_
.
format
.
video
.
format
<<
", minDimension: "
<<
format_
.
format
.
video
.
minDimension
<<
", crop: "
<<
format_
.
format
.
video
.
cropImage
;
}
return
sampler_
->
getImageBytes
();
}
int
VideoStream
::
copyFrameBytes
(
ByteStorage
*
out
,
bool
flush
)
{
ensureSampler
();
return
sampler_
->
sample
(
flush
?
nullptr
:
frame_
,
out
);
}
void
VideoStream
::
setHeader
(
DecoderHeader
*
header
)
{
header
->
seqno
=
numGenerator_
++
;
if
(
codecCtx_
->
time_base
.
num
!=
0
)
{
header
->
pts
=
av_rescale_q
(
av_frame_get_best_effort_timestamp
(
frame_
),
codecCtx_
->
time_base
,
AV_TIME_BASE_Q
);
}
else
{
// If the codec time_base is missing then we would've skipped the
// rescalePackage step to rescale to codec time_base, so here we can
// rescale straight from the stream time_base into AV_TIME_BASE_Q.
header
->
pts
=
av_rescale_q
(
av_frame_get_best_effort_timestamp
(
frame_
),
inputCtx_
->
streams
[
format_
.
stream
]
->
time_base
,
AV_TIME_BASE_Q
);
}
if
(
convertPtsToWallTime_
)
{
keeper_
.
adjust
(
header
->
pts
);
}
header
->
keyFrame
=
frame_
->
key_frame
;
auto
fpsRational
=
inputCtx_
->
streams
[
format_
.
stream
]
->
avg_frame_rate
;
if
(
fpsRational
.
den
)
{
header
->
fps
=
av_q2d
(
fpsRational
);
}
else
{
header
->
fps
=
std
::
numeric_limits
<
double
>::
quiet_NaN
();
}
header
->
format
=
format_
;
}
}
// namespace ffmpeg
torchvision/csrc/cpu/decoder/video_stream.h
deleted
100644 → 0
View file @
c8345212
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
#include "stream.h"
#include "time_keeper.h"
#include "video_sampler.h"
namespace
ffmpeg
{
/**
* Class uses FFMPEG library to decode one video stream.
*/
class
VideoStream
:
public
Stream
{
public:
VideoStream
(
AVFormatContext
*
inputCtx
,
int
index
,
bool
convertPtsToWallTime
,
const
VideoFormat
&
format
,
int64_t
loggingUuid
=
0
);
~
VideoStream
()
override
;
private:
int
initFormat
()
override
;
int
estimateBytes
(
bool
flush
)
override
;
int
copyFrameBytes
(
ByteStorage
*
out
,
bool
flush
)
override
;
void
setHeader
(
DecoderHeader
*
header
)
override
;
void
ensureSampler
();
private:
std
::
unique_ptr
<
VideoSampler
>
sampler_
;
TimeKeeper
keeper_
;
int64_t
loggingUuid_
{
0
};
};
}
// namespace ffmpeg
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment