diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h index 801b83826..2a6bceac3 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.h +++ b/src/torchcodec/_core/CpuDeviceInterface.h @@ -19,7 +19,8 @@ class CpuDeviceInterface : public DeviceInterface { virtual ~CpuDeviceInterface() {} std::optional findCodec( - [[maybe_unused]] const AVCodecID& codecId) override { + [[maybe_unused]] const AVCodecID& codecId, + [[maybe_unused]] bool isDecoder = true) override { return std::nullopt; } diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index dfb660e85..34c6ab6d5 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -337,12 +337,19 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput( // appropriately set, so we just go off and find the matching codec for the CUDA // device std::optional CudaDeviceInterface::findCodec( - const AVCodecID& codecId) { + const AVCodecID& codecId, + bool isDecoder) { void* i = nullptr; const AVCodec* codec = nullptr; while ((codec = av_codec_iterate(&i)) != nullptr) { - if (codec->id != codecId || !av_codec_is_decoder(codec)) { - continue; + if (isDecoder) { + if (codec->id != codecId || !av_codec_is_decoder(codec)) { + continue; + } + } else { + if (codec->id != codecId || !av_codec_is_encoder(codec)) { + continue; + } } const AVCodecHWConfig* config = nullptr; @@ -487,5 +494,4 @@ void CudaDeviceInterface::setupHardwareFrameContextForEncoding( } codecContext->hw_frames_ctx = hwFramesCtxRef; } - } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h index d4460b169..267127c68 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.h +++ b/src/torchcodec/_core/CudaDeviceInterface.h @@ -18,7 +18,9 @@ class CudaDeviceInterface : public DeviceInterface { virtual ~CudaDeviceInterface(); - std::optional findCodec(const AVCodecID& codecId) override; + std::optional findCodec( + const AVCodecID& codecId, + bool isDecoder = true) override; void initialize( const AVStream* avStream, diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h index 2abee52f9..3e0fa0ec3 100644 --- a/src/torchcodec/_core/DeviceInterface.h +++ b/src/torchcodec/_core/DeviceInterface.h @@ -47,7 +47,8 @@ class DeviceInterface { }; virtual std::optional findCodec( - [[maybe_unused]] const AVCodecID& codecId) { + [[maybe_unused]] const AVCodecID& codecId, + [[maybe_unused]] bool isDecoder = true) { return std::nullopt; }; @@ -156,6 +157,11 @@ class DeviceInterface { TORCH_CHECK(false); } + virtual std::optional findHardwareEncoder( + [[maybe_unused]] const AVCodecID& codecId) { + TORCH_CHECK(false); + }; + protected: torch::Device device_; SharedAVCodecContext codecContext_; diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index bf81c4276..c19781669 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -745,18 +745,33 @@ void VideoEncoder::initializeEncoder( avCodec = avcodec_find_encoder(desc->id); } } - TORCH_CHECK( - avCodec != nullptr, - "Video codec ", - codec, - " not found. To see available codecs, run: ffmpeg -encoders"); } else { TORCH_CHECK( avFormatContext_->oformat != nullptr, "Output format is null, unable to find default codec."); - avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); - TORCH_CHECK(avCodec != nullptr, "Video codec not found"); + // If frames are on a CUDA device, try to substitute the default codec + // with its hardware equivalent + if (frames_.device().is_cuda()) { + TORCH_CHECK( + deviceInterface_ != nullptr, + "Device interface is undefined when input frames are on a CUDA device. This should never happen, please report this to the TorchCodec repo."); + auto hwCodec = deviceInterface_->findCodec( + avFormatContext_->oformat->video_codec, /*isDecoder=*/false); + if (hwCodec.has_value()) { + avCodec = hwCodec.value(); + } + } + if (!avCodec) { + avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); + } } + TORCH_CHECK( + avCodec != nullptr, + "Video codec ", + videoStreamOptions.codec.has_value() + ? videoStreamOptions.codec.value() + " " + : "", + "not found. To see available codecs, run: ffmpeg -encoders"); AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec); TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context."); diff --git a/test/test_encoders.py b/test/test_encoders.py index ab8a683f1..77ef045c0 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -885,7 +885,6 @@ def encode_to_tensor(frames): common_params = dict( crf=0, pixel_format="yuv444p" if device == "cpu" else None, - codec="h264_nvenc" if device != "cpu" else None, ) if method == "to_file": dest = str(tmp_path / "output.mp4") @@ -1337,28 +1336,28 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range @needs_ffmpeg_cli @pytest.mark.needs_cuda - # TODO-VideoEncoder: Auto-select codec for GPU encoding + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) + # TODO-VideoEncoder: Enable additional pixel formats ("yuv420p", "yuv444p") @pytest.mark.parametrize( - "format_codec", + ("format", "codec"), [ + ("mov", None), # will default to h264_nvenc ("mov", "h264_nvenc"), - ("mp4", "hevc_nvenc"), ("avi", "h264_nvenc"), + ("mp4", "hevc_nvenc"), # use non-default codec pytest.param( - ("mkv", "av1_nvenc"), + "mkv", + "av1_nvenc", marks=pytest.mark.skipif( IN_GITHUB_CI, reason="av1_nvenc is not supported on CI" ), ), ], ) - @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) - # TODO-VideoEncoder: Enable additional pixel formats ("yuv420p", "yuv444p") - def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): + def test_nvenc_against_ffmpeg_cli(self, tmp_path, method, format, codec): # Encode with FFmpeg CLI using nvenc codecs - format, codec = format_codec device = "cuda" - qp = 1 # Lossless (qp=0) is not supported on av1_nvenc, so we use 1 + qp = 1 # Use near lossless encoding to reduce noise and support av1_nvenc source_frames = self.decode(TEST_SRC_2_720P.path).data.to(device) temp_raw_path = str(tmp_path / "temp_input.raw") @@ -1381,21 +1380,18 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method): str(frame_rate), "-i", temp_raw_path, - "-c:v", - codec, # Use specified NVENC hardware encoder ] + # CLI requires explicit codec for nvenc + ffmpeg_cmd.extend(["-c:v", codec if codec is not None else "h264_nvenc"]) + # VideoEncoder will select an NVENC encoder by default since the frames are on GPU. ffmpeg_cmd.extend(["-pix_fmt", "nv12"]) # Output format is always NV12 - if codec == "av1_nvenc": - ffmpeg_cmd.extend(["-rc", "constqp"]) # Set rate control mode for AV1 - ffmpeg_cmd.extend(["-qp", str(qp)]) # Use lossless qp for other codecs + ffmpeg_cmd.extend(["-qp", str(qp)]) ffmpeg_cmd.extend([ffmpeg_encoded_path]) subprocess.run(ffmpeg_cmd, check=True, capture_output=True) - encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate) + encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate) encoder_extra_options = {"qp": qp} - if codec == "av1_nvenc": - encoder_extra_options["rc"] = 0 # constqp mode if method == "to_file": encoder_output_path = str(tmp_path / f"nvenc_output.{format}") encoder.to_file(