Skip to content

Commit 1befed7

Browse files
NicolasHugMolly Xumollyxu
authored
Fallback to CPU when video isn't supported by NVDEC (#977)
Co-authored-by: Molly Xu <[email protected]> Co-authored-by: Molly Xu <[email protected]>
1 parent 871e5a7 commit 1befed7

File tree

3 files changed

+164
-92
lines changed

3 files changed

+164
-92
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 140 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -53,74 +53,6 @@ pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) {
5353
}
5454

5555
static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
56-
// Check decoder capabilities - same checks as DALI
57-
auto caps = CUVIDDECODECAPS{};
58-
caps.eCodecType = videoFormat->codec;
59-
caps.eChromaFormat = videoFormat->chroma_format;
60-
caps.nBitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
61-
CUresult result = cuvidGetDecoderCaps(&caps);
62-
TORCH_CHECK(result == CUDA_SUCCESS, "Failed to get decoder caps: ", result);
63-
64-
TORCH_CHECK(
65-
caps.bIsSupported,
66-
"Codec configuration not supported on this GPU. "
67-
"Codec: ",
68-
static_cast<int>(videoFormat->codec),
69-
", chroma format: ",
70-
static_cast<int>(videoFormat->chroma_format),
71-
", bit depth: ",
72-
videoFormat->bit_depth_luma_minus8 + 8);
73-
74-
TORCH_CHECK(
75-
videoFormat->coded_width >= caps.nMinWidth &&
76-
videoFormat->coded_height >= caps.nMinHeight,
77-
"Video is too small in at least one dimension. Provided: ",
78-
videoFormat->coded_width,
79-
"x",
80-
videoFormat->coded_height,
81-
" vs supported:",
82-
caps.nMinWidth,
83-
"x",
84-
caps.nMinHeight);
85-
86-
TORCH_CHECK(
87-
videoFormat->coded_width <= caps.nMaxWidth &&
88-
videoFormat->coded_height <= caps.nMaxHeight,
89-
"Video is too large in at least one dimension. Provided: ",
90-
videoFormat->coded_width,
91-
"x",
92-
videoFormat->coded_height,
93-
" vs supported:",
94-
caps.nMaxWidth,
95-
"x",
96-
caps.nMaxHeight);
97-
98-
// See nMaxMBCount in cuviddec.h
99-
constexpr unsigned int macroblockConstant = 256;
100-
TORCH_CHECK(
101-
videoFormat->coded_width * videoFormat->coded_height /
102-
macroblockConstant <=
103-
caps.nMaxMBCount,
104-
"Video is too large (too many macroblocks). "
105-
"Provided (width * height / ",
106-
macroblockConstant,
107-
"): ",
108-
videoFormat->coded_width * videoFormat->coded_height / macroblockConstant,
109-
" vs supported:",
110-
caps.nMaxMBCount);
111-
112-
// Below we'll set the decoderParams.OutputFormat to NV12, so we need to make
113-
// sure it's actually supported.
114-
TORCH_CHECK(
115-
(caps.nOutputFormatMask >> cudaVideoSurfaceFormat_NV12) & 1,
116-
"NV12 output format is not supported for this configuration. ",
117-
"Codec: ",
118-
static_cast<int>(videoFormat->codec),
119-
", chroma format: ",
120-
static_cast<int>(videoFormat->chroma_format),
121-
", bit depth: ",
122-
videoFormat->bit_depth_luma_minus8 + 8);
123-
12456
// Decoder creation parameters, most are taken from DALI
12557
CUVIDDECODECREATEINFO decoderParams = {};
12658
decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
@@ -157,13 +89,39 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
15789
decoderParams.display_area.bottom = videoFormat->display_area.bottom;
15890

15991
CUvideodecoder* decoder = new CUvideodecoder();
160-
result = cuvidCreateDecoder(decoder, &decoderParams);
92+
CUresult result = cuvidCreateDecoder(decoder, &decoderParams);
16193
TORCH_CHECK(
16294
result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result);
16395
return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{});
16496
}
16597

166-
cudaVideoCodec validateCodecSupport(AVCodecID codecId) {
98+
std::optional<cudaVideoChromaFormat> validateChromaSupport(
99+
const AVPixFmtDescriptor* desc) {
100+
// Return the corresponding cudaVideoChromaFormat if supported, std::nullopt
101+
// otherwise.
102+
TORCH_CHECK(desc != nullptr, "desc can't be null");
103+
104+
if (desc->nb_components == 1) {
105+
return cudaVideoChromaFormat_Monochrome;
106+
} else if (desc->nb_components >= 3 && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
107+
// Make sure it's YUV: has chroma planes and isn't RGB
108+
if (desc->log2_chroma_w == 0 && desc->log2_chroma_h == 0) {
109+
return cudaVideoChromaFormat_444; // 1x1 subsampling = 4:4:4
110+
} else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) {
111+
return cudaVideoChromaFormat_420; // 2x2 subsampling = 4:2:0
112+
} else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 0) {
113+
return cudaVideoChromaFormat_422; // 2x1 subsampling = 4:2:2
114+
}
115+
}
116+
117+
return std::nullopt;
118+
}
119+
120+
std::optional<cudaVideoCodec> validateCodecSupport(AVCodecID codecId) {
121+
// Return the corresponding cudaVideoCodec if supported, std::nullopt
122+
// otherwise
123+
// Note that we currently return nullopt (and thus fallback to CPU) for some
124+
// codecs that are technically supported by NVDEC, see comment below.
167125
switch (codecId) {
168126
case AV_CODEC_ID_H264:
169127
return cudaVideoCodec_H264;
@@ -189,10 +147,69 @@ cudaVideoCodec validateCodecSupport(AVCodecID codecId) {
189147
// return cudaVideoCodec_JPEG;
190148
// case AV_CODEC_ID_VC1:
191149
// return cudaVideoCodec_VC1;
192-
default: {
193-
TORCH_CHECK(false, "Unsupported codec type: ", avcodec_get_name(codecId));
194-
}
150+
default:
151+
return std::nullopt;
152+
}
153+
}
154+
155+
bool nativeNVDECSupport(const SharedAVCodecContext& codecContext) {
156+
// Return true iff the input video stream is supported by our NVDEC
157+
// implementation.
158+
auto codecType = validateCodecSupport(codecContext->codec_id);
159+
if (!codecType.has_value()) {
160+
return false;
161+
}
162+
163+
const AVPixFmtDescriptor* desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
164+
if (!desc) {
165+
return false;
166+
}
167+
168+
auto chromaFormat = validateChromaSupport(desc);
169+
if (!chromaFormat.has_value()) {
170+
return false;
171+
}
172+
173+
auto caps = CUVIDDECODECAPS{};
174+
caps.eCodecType = codecType.value();
175+
caps.eChromaFormat = chromaFormat.value();
176+
caps.nBitDepthMinus8 = desc->comp[0].depth - 8;
177+
178+
CUresult result = cuvidGetDecoderCaps(&caps);
179+
if (result != CUDA_SUCCESS) {
180+
return false;
181+
}
182+
183+
if (!caps.bIsSupported) {
184+
return false;
185+
}
186+
187+
auto coded_width = static_cast<unsigned int>(codecContext->coded_width);
188+
auto coded_height = static_cast<unsigned int>(codecContext->coded_height);
189+
if (coded_width < static_cast<unsigned int>(caps.nMinWidth) ||
190+
coded_height < static_cast<unsigned int>(caps.nMinHeight) ||
191+
coded_width > caps.nMaxWidth || coded_height > caps.nMaxHeight) {
192+
return false;
193+
}
194+
195+
// See nMaxMBCount in cuviddec.h
196+
constexpr unsigned int macroblockConstant = 256;
197+
if (coded_width * coded_height / macroblockConstant > caps.nMaxMBCount) {
198+
return false;
199+
}
200+
201+
// We'll set the decoderParams.OutputFormat to NV12, so we need to make
202+
// sure it's actually supported.
203+
// TODO: If this fail, we could consider decoding to something else than NV12
204+
// (like cudaVideoSurfaceFormat_P016) instead of falling back to CPU. This is
205+
// what FFmpeg does.
206+
bool supportsNV12Output =
207+
(caps.nOutputFormatMask >> cudaVideoSurfaceFormat_NV12) & 1;
208+
if (!supportsNV12Output) {
209+
return false;
195210
}
211+
212+
return true;
196213
}
197214

198215
} // namespace
@@ -232,6 +249,19 @@ void BetaCudaDeviceInterface::initialize(
232249
const AVStream* avStream,
233250
const UniqueDecodingAVFormatContext& avFormatCtx,
234251
[[maybe_unused]] const SharedAVCodecContext& codecContext) {
252+
if (!nativeNVDECSupport(codecContext)) {
253+
cpuFallback_ = createDeviceInterface(torch::kCPU);
254+
TORCH_CHECK(
255+
cpuFallback_ != nullptr, "Failed to create CPU device interface");
256+
cpuFallback_->initialize(avStream, avFormatCtx, codecContext);
257+
cpuFallback_->initializeVideo(
258+
VideoStreamOptions(),
259+
{},
260+
/*resizedOutputDims=*/std::nullopt);
261+
// We'll always use the CPU fallback from now on, so we can return early.
262+
return;
263+
}
264+
235265
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
236266
timeBase_ = avStream->time_base;
237267
frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
@@ -243,7 +273,11 @@ void BetaCudaDeviceInterface::initialize(
243273

244274
// Create parser. Default values that aren't obvious are taken from DALI.
245275
CUVIDPARSERPARAMS parserParams = {};
246-
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
276+
auto codecType = validateCodecSupport(codecPar->codec_id);
277+
TORCH_CHECK(
278+
codecType.has_value(),
279+
"This should never happen, we should be using the CPU fallback by now. Please report a bug.");
280+
parserParams.CodecType = codecType.value();
247281
parserParams.ulMaxNumDecodeSurfaces = 8;
248282
parserParams.ulMaxDisplayDelay = 0;
249283
// Callback setup, all are triggered by the parser within a call
@@ -383,6 +417,10 @@ int BetaCudaDeviceInterface::streamPropertyChange(CUVIDEOFORMAT* videoFormat) {
383417
// Moral equivalent of avcodec_send_packet(). Here, we pass the AVPacket down to
384418
// the NVCUVID parser.
385419
int BetaCudaDeviceInterface::sendPacket(ReferenceAVPacket& packet) {
420+
if (cpuFallback_) {
421+
return cpuFallback_->sendPacket(packet);
422+
}
423+
386424
TORCH_CHECK(
387425
packet.get() && packet->data && packet->size > 0,
388426
"sendPacket received an empty packet, this is unexpected, please report.");
@@ -406,6 +444,10 @@ int BetaCudaDeviceInterface::sendPacket(ReferenceAVPacket& packet) {
406444
}
407445

408446
int BetaCudaDeviceInterface::sendEOFPacket() {
447+
if (cpuFallback_) {
448+
return cpuFallback_->sendEOFPacket();
449+
}
450+
409451
CUVIDSOURCEDATAPACKET cuvidPacket = {};
410452
cuvidPacket.flags = CUVID_PKT_ENDOFSTREAM;
411453
eofSent_ = true;
@@ -467,6 +509,10 @@ int BetaCudaDeviceInterface::frameReadyInDisplayOrder(
467509

468510
// Moral equivalent of avcodec_receive_frame().
469511
int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
512+
if (cpuFallback_) {
513+
return cpuFallback_->receiveFrame(avFrame);
514+
}
515+
470516
if (readyFrames_.empty()) {
471517
// No frame found, instruct caller to try again later after sending more
472518
// packets, or to stop if EOF was already sent.
@@ -601,6 +647,11 @@ UniqueAVFrame BetaCudaDeviceInterface::convertCudaFrameToAVFrame(
601647
}
602648

603649
void BetaCudaDeviceInterface::flush() {
650+
if (cpuFallback_) {
651+
cpuFallback_->flush();
652+
return;
653+
}
654+
604655
// The NVCUVID docs mention that after seeking, i.e. when flush() is called,
605656
// we should send a packet with the CUVID_PKT_DISCONTINUITY flag. The docs
606657
// don't say whether this should be an empty packet, or whether it should be a
@@ -618,6 +669,21 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
618669
UniqueAVFrame& avFrame,
619670
FrameOutput& frameOutput,
620671
std::optional<torch::Tensor> preAllocatedOutputTensor) {
672+
if (cpuFallback_) {
673+
// CPU decoded frame - need to do CPU color conversion then transfer to GPU
674+
FrameOutput cpuFrameOutput;
675+
cpuFallback_->convertAVFrameToFrameOutput(avFrame, cpuFrameOutput);
676+
677+
// Transfer CPU frame to GPU
678+
if (preAllocatedOutputTensor.has_value()) {
679+
preAllocatedOutputTensor.value().copy_(cpuFrameOutput.data);
680+
frameOutput.data = preAllocatedOutputTensor.value();
681+
} else {
682+
frameOutput.data = cpuFrameOutput.data.to(device_);
683+
}
684+
return;
685+
}
686+
621687
// TODONVDEC P2: we may need to handle 10bit videos the same way the CUDA
622688
// ffmpeg interface does it with maybeConvertAVFrameToNV12OrRGB24().
623689
TORCH_CHECK(

src/torchcodec/_core/BetaCudaDeviceInterface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ class BetaCudaDeviceInterface : public DeviceInterface {
9494

9595
// NPP context for color conversion
9696
UniqueNppContext nppCtx_;
97+
98+
std::unique_ptr<DeviceInterface> cpuFallback_;
9799
};
98100

99101
} // namespace facebook::torchcodec

test/test_decoders.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,20 +1701,19 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode):
17011701
assert beta_frame.duration_seconds == ref_frame.duration_seconds
17021702

17031703
@needs_cuda
1704-
def test_beta_cuda_interface_small_h265(self):
1705-
# Test to illustrate current difference in behavior between the BETA and
1706-
# the ffmpeg interface: this video isn't supported by NVDEC, but in the
1707-
# ffmpeg interface, FFMPEG fallsback to the CPU while we don't.
1708-
1709-
VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
1710-
1704+
def test_beta_cuda_interface_cpu_fallback(self):
1705+
# Non-regression test for the CPU fallback behavior of the BETA CUDA
1706+
# interface.
1707+
# We know that the H265_VIDEO asset isn't supported by NVDEC, its
1708+
# dimensions are too small. We also know that the FFmpeg CUDA interface
1709+
# fallbacks to the CPU path in such cases. We assert that we fall back
1710+
# to the CPU path, too.
1711+
1712+
ffmpeg = VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
17111713
with set_cuda_backend("beta"):
1712-
dec = VideoDecoder(H265_VIDEO.path, device="cuda")
1713-
with pytest.raises(
1714-
RuntimeError,
1715-
match="Video is too small in at least one dimension. Provided: 128x128 vs supported:144x144",
1716-
):
1717-
dec.get_frame_at(0)
1714+
beta = VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
1715+
1716+
torch.testing.assert_close(ffmpeg.data, beta.data, rtol=0, atol=0)
17181717

17191718
@needs_cuda
17201719
def test_beta_cuda_interface_error(self):
@@ -1740,15 +1739,20 @@ def test_set_cuda_backend(self):
17401739
assert _get_cuda_backend() == "beta"
17411740

17421741
def assert_decoder_uses(decoder, *, expected_backend):
1742+
# TODO: This doesn't work anymore after
1743+
# https://github.com/meta-pytorch/torchcodec/pull/977
1744+
# We need to define a better way to assert which backend a decoder
1745+
# is using.
1746+
return
17431747
# Assert that a decoder instance is using a given backend.
17441748
#
17451749
# We know H265_VIDEO fails on the BETA backend while it works on the
17461750
# ffmpeg one.
1747-
if expected_backend == "ffmpeg":
1748-
decoder.get_frame_at(0) # this would fail if this was BETA
1749-
else:
1750-
with pytest.raises(RuntimeError, match="Video is too small"):
1751-
decoder.get_frame_at(0)
1751+
# if expected_backend == "ffmpeg":
1752+
# decoder.get_frame_at(0) # this would fail if this was BETA
1753+
# else:
1754+
# with pytest.raises(RuntimeError, match="Video is too small"):
1755+
# decoder.get_frame_at(0)
17521756

17531757
# Check that the default is the ffmpeg backend
17541758
assert _get_cuda_backend() == "ffmpeg"

0 commit comments

Comments
 (0)