diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h
index 801b83826..2a6bceac3 100644
--- a/src/torchcodec/_core/CpuDeviceInterface.h
+++ b/src/torchcodec/_core/CpuDeviceInterface.h
@@ -19,7 +19,8 @@ class CpuDeviceInterface : public DeviceInterface {
   virtual ~CpuDeviceInterface() {}
 
   std::optional<const AVCodec*> findCodec(
-      [[maybe_unused]] const AVCodecID& codecId) override {
+      [[maybe_unused]] const AVCodecID& codecId,
+      [[maybe_unused]] bool isDecoder = true) override {
     return std::nullopt;
   }
 
diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp
index dfb660e85..34c6ab6d5 100644
--- a/src/torchcodec/_core/CudaDeviceInterface.cpp
+++ b/src/torchcodec/_core/CudaDeviceInterface.cpp
@@ -337,12 +337,19 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
 // appropriately set, so we just go off and find the matching codec for the CUDA
 // device
 std::optional<const AVCodec*> CudaDeviceInterface::findCodec(
-    const AVCodecID& codecId) {
+    const AVCodecID& codecId,
+    bool isDecoder) {
   void* i = nullptr;
   const AVCodec* codec = nullptr;
   while ((codec = av_codec_iterate(&i)) != nullptr) {
-    if (codec->id != codecId || !av_codec_is_decoder(codec)) {
-      continue;
+    if (isDecoder) {
+      if (codec->id != codecId || !av_codec_is_decoder(codec)) {
+        continue;
+      }
+    } else {
+      if (codec->id != codecId || !av_codec_is_encoder(codec)) {
+        continue;
+      }
     }
 
     const AVCodecHWConfig* config = nullptr;
@@ -487,5 +494,4 @@ void CudaDeviceInterface::setupHardwareFrameContextForEncoding(
   }
   codecContext->hw_frames_ctx = hwFramesCtxRef;
 }
-
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h
index d4460b169..267127c68 100644
--- a/src/torchcodec/_core/CudaDeviceInterface.h
+++ b/src/torchcodec/_core/CudaDeviceInterface.h
@@ -18,7 +18,9 @@ class CudaDeviceInterface : public DeviceInterface {
 
   virtual ~CudaDeviceInterface();
 
-  std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) override;
+  std::optional<const AVCodec*> findCodec(
+      const AVCodecID& codecId,
+      bool isDecoder = true) override;
 
   void initialize(
       const AVStream* avStream,
diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h
index 2abee52f9..3e0fa0ec3 100644
--- a/src/torchcodec/_core/DeviceInterface.h
+++ b/src/torchcodec/_core/DeviceInterface.h
@@ -47,7 +47,8 @@ class DeviceInterface {
   };
 
   virtual std::optional<const AVCodec*> findCodec(
-      [[maybe_unused]] const AVCodecID& codecId) {
+      [[maybe_unused]] const AVCodecID& codecId,
+      [[maybe_unused]] bool isDecoder = true) {
     return std::nullopt;
   };
 
@@ -156,6 +157,11 @@ class DeviceInterface {
     TORCH_CHECK(false);
   }
 
+  virtual std::optional<const AVCodec*> findHardwareEncoder(
+      [[maybe_unused]] const AVCodecID& codecId) {
+    TORCH_CHECK(false);
+  };
+
  protected:
   torch::Device device_;
   SharedAVCodecContext codecContext_;
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index bf81c4276..c19781669 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -745,18 +745,33 @@ void VideoEncoder::initializeEncoder(
         avCodec = avcodec_find_encoder(desc->id);
       }
     }
-    TORCH_CHECK(
-        avCodec != nullptr,
-        "Video codec ",
-        codec,
-        " not found. To see available codecs, run: ffmpeg -encoders");
   } else {
     TORCH_CHECK(
         avFormatContext_->oformat != nullptr,
         "Output format is null, unable to find default codec.");
-    avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
-    TORCH_CHECK(avCodec != nullptr, "Video codec not found");
+    // If frames are on a CUDA device, try to substitute the default codec
+    // with its hardware equivalent
+    if (frames_.device().is_cuda()) {
+      TORCH_CHECK(
+          deviceInterface_ != nullptr,
+          "Device interface is undefined when input frames are on a CUDA device. This should never happen, please report this to the TorchCodec repo.");
+      auto hwCodec = deviceInterface_->findCodec(
+          avFormatContext_->oformat->video_codec, /*isDecoder=*/false);
+      if (hwCodec.has_value()) {
+        avCodec = hwCodec.value();
+      }
+    }
+    if (!avCodec) {
+      avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
+    }
   }
+  TORCH_CHECK(
+      avCodec != nullptr,
+      "Video codec ",
+      videoStreamOptions.codec.has_value()
+          ? videoStreamOptions.codec.value() + " "
+          : "",
+      "not found. To see available codecs, run: ffmpeg -encoders");
 
   AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec);
   TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context.");
diff --git a/test/test_encoders.py b/test/test_encoders.py
index ab8a683f1..77ef045c0 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -885,7 +885,6 @@ def encode_to_tensor(frames):
             common_params = dict(
                 crf=0,
                 pixel_format="yuv444p" if device == "cpu" else None,
-                codec="h264_nvenc" if device != "cpu" else None,
             )
             if method == "to_file":
                 dest = str(tmp_path / "output.mp4")
@@ -1337,28 +1336,28 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range
 
     @needs_ffmpeg_cli
     @pytest.mark.needs_cuda
-    # TODO-VideoEncoder: Auto-select codec for GPU encoding
+    @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
+    # TODO-VideoEncoder: Enable additional pixel formats ("yuv420p", "yuv444p")
     @pytest.mark.parametrize(
-        "format_codec",
+        ("format", "codec"),
         [
+            ("mov", None),  # will default to h264_nvenc
             ("mov", "h264_nvenc"),
-            ("mp4", "hevc_nvenc"),
             ("avi", "h264_nvenc"),
+            ("mp4", "hevc_nvenc"),  # use non-default codec
             pytest.param(
-                ("mkv", "av1_nvenc"),
+                "mkv",
+                "av1_nvenc",
                 marks=pytest.mark.skipif(
                     IN_GITHUB_CI, reason="av1_nvenc is not supported on CI"
                 ),
             ),
         ],
     )
-    @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
-    # TODO-VideoEncoder: Enable additional pixel formats ("yuv420p", "yuv444p")
-    def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method):
+    def test_nvenc_against_ffmpeg_cli(self, tmp_path, method, format, codec):
         # Encode with FFmpeg CLI using nvenc codecs
-        format, codec = format_codec
         device = "cuda"
-        qp = 1  # Lossless (qp=0) is not supported on av1_nvenc, so we use 1
+        qp = 1  # Use near lossless encoding to reduce noise and support av1_nvenc
         source_frames = self.decode(TEST_SRC_2_720P.path).data.to(device)
 
         temp_raw_path = str(tmp_path / "temp_input.raw")
@@ -1381,21 +1380,18 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method):
             str(frame_rate),
             "-i",
             temp_raw_path,
-            "-c:v",
-            codec,  # Use specified NVENC hardware encoder
         ]
+        # CLI requires explicit codec for nvenc
+        ffmpeg_cmd.extend(["-c:v", codec if codec is not None else "h264_nvenc"])
+        # VideoEncoder will select an NVENC encoder by default since the frames are on GPU.
 
         ffmpeg_cmd.extend(["-pix_fmt", "nv12"])  # Output format is always NV12
-        if codec == "av1_nvenc":
-            ffmpeg_cmd.extend(["-rc", "constqp"])  # Set rate control mode for AV1
-        ffmpeg_cmd.extend(["-qp", str(qp)])  # Use lossless qp for other codecs
+        ffmpeg_cmd.extend(["-qp", str(qp)])
         ffmpeg_cmd.extend([ffmpeg_encoded_path])
         subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
-        encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
 
+        encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
         encoder_extra_options = {"qp": qp}
-        if codec == "av1_nvenc":
-            encoder_extra_options["rc"] = 0  # constqp mode
         if method == "to_file":
             encoder_output_path = str(tmp_path / f"nvenc_output.{format}")
             encoder.to_file(