Skip to content

Commit 44ca168

Browse files
author
pytorchbot
committed
2025-12-11 nightly release (f6a8161)
1 parent 0233545 commit 44ca168

File tree

14 files changed

+509
-97
lines changed

14 files changed

+509
-97
lines changed

benchmarks/decoders/benchmark_transforms.py

Lines changed: 83 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,11 @@
55

66
import torch
77
from torch import Tensor
8-
from torchcodec._core import add_video_stream, create_from_file, get_frames_by_pts
98
from torchcodec.decoders import VideoDecoder
109
from torchvision.transforms import v2
1110

12-
DEFAULT_NUM_EXP = 20
1311

14-
15-
def bench(f, *args, num_exp=DEFAULT_NUM_EXP, warmup=1) -> Tensor:
12+
def bench(f, *args, num_exp, warmup=1) -> Tensor:
1613

1714
for _ in range(warmup):
1815
f(*args)
@@ -45,37 +42,55 @@ def report_stats(times: Tensor, unit: str = "ms", prefix: str = "") -> float:
4542

4643

4744
def torchvision_resize(
48-
path: Path, pts_seconds: list[float], dims: tuple[int, int]
49-
) -> None:
50-
decoder = create_from_file(str(path), seek_mode="approximate")
51-
add_video_stream(decoder)
52-
raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
53-
return v2.functional.resize(raw_frames, size=dims)
45+
path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
46+
) -> Tensor:
47+
decoder = VideoDecoder(
48+
path, seek_mode="approximate", num_ffmpeg_threads=num_threads
49+
)
50+
raw_frames = decoder.get_frames_played_at(pts_seconds)
51+
transformed_frames = v2.Resize(size=dims)(raw_frames.data)
52+
assert len(transformed_frames) == len(pts_seconds)
53+
return transformed_frames
5454

5555

5656
def torchvision_crop(
57-
path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
58-
) -> None:
59-
decoder = create_from_file(str(path), seek_mode="approximate")
60-
add_video_stream(decoder)
61-
raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
62-
return v2.functional.crop(raw_frames, top=y, left=x, height=dims[0], width=dims[1])
63-
64-
65-
def decoder_native_resize(
66-
path: Path, pts_seconds: list[float], dims: tuple[int, int]
67-
) -> None:
68-
decoder = create_from_file(str(path), seek_mode="approximate")
69-
add_video_stream(decoder, transform_specs=f"resize, {dims[0]}, {dims[1]}")
70-
return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
71-
72-
73-
def decoder_native_crop(
74-
path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
75-
) -> None:
76-
decoder = create_from_file(str(path), seek_mode="approximate")
77-
add_video_stream(decoder, transform_specs=f"crop, {dims[0]}, {dims[1]}, {x}, {y}")
78-
return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
57+
path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
58+
) -> Tensor:
59+
decoder = VideoDecoder(
60+
path, seek_mode="approximate", num_ffmpeg_threads=num_threads
61+
)
62+
raw_frames = decoder.get_frames_played_at(pts_seconds)
63+
transformed_frames = v2.CenterCrop(size=dims)(raw_frames.data)
64+
assert len(transformed_frames) == len(pts_seconds)
65+
return transformed_frames
66+
67+
68+
def decoder_resize(
69+
path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
70+
) -> Tensor:
71+
decoder = VideoDecoder(
72+
path,
73+
transforms=[v2.Resize(size=dims)],
74+
seek_mode="approximate",
75+
num_ffmpeg_threads=num_threads,
76+
)
77+
transformed_frames = decoder.get_frames_played_at(pts_seconds).data
78+
assert len(transformed_frames) == len(pts_seconds)
79+
return transformed_frames.data
80+
81+
82+
def decoder_crop(
83+
path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
84+
) -> Tensor:
85+
decoder = VideoDecoder(
86+
path,
87+
transforms=[v2.CenterCrop(size=dims)],
88+
seek_mode="approximate",
89+
num_ffmpeg_threads=num_threads,
90+
)
91+
transformed_frames = decoder.get_frames_played_at(pts_seconds).data
92+
assert len(transformed_frames) == len(pts_seconds)
93+
return transformed_frames
7994

8095

8196
def main():
@@ -84,9 +99,27 @@ def main():
8499
parser.add_argument(
85100
"--num-exp",
86101
type=int,
87-
default=DEFAULT_NUM_EXP,
102+
default=5,
88103
help="number of runs to average over",
89104
)
105+
parser.add_argument(
106+
"--num-threads",
107+
type=int,
108+
default=1,
109+
help="number of threads to use; 0 means FFmpeg decides",
110+
)
111+
parser.add_argument(
112+
"--total-frame-fractions",
113+
nargs="+",
114+
type=float,
115+
default=[0.005, 0.01, 0.05, 0.1],
116+
)
117+
parser.add_argument(
118+
"--input-dimension-fractions",
119+
nargs="+",
120+
type=float,
121+
default=[0.5, 0.25, 0.125],
122+
)
90123

91124
args = parser.parse_args()
92125
path = Path(args.path)
@@ -100,10 +133,7 @@ def main():
100133

101134
input_height = metadata.height
102135
input_width = metadata.width
103-
fraction_of_total_frames_to_sample = [0.005, 0.01, 0.05, 0.1]
104-
fraction_of_input_dimensions = [0.5, 0.25, 0.125]
105-
106-
for num_fraction in fraction_of_total_frames_to_sample:
136+
for num_fraction in args.total_frame_fractions:
107137
num_frames_to_sample = math.ceil(metadata.num_frames * num_fraction)
108138
print(
109139
f"Sampling {num_fraction * 100}%, {num_frames_to_sample}, of {metadata.num_frames} frames"
@@ -112,51 +142,49 @@ def main():
112142
i * duration / num_frames_to_sample for i in range(num_frames_to_sample)
113143
]
114144

115-
for dims_fraction in fraction_of_input_dimensions:
145+
for dims_fraction in args.input_dimension_fractions:
116146
dims = (int(input_height * dims_fraction), int(input_width * dims_fraction))
117147

118148
times = bench(
119-
torchvision_resize, path, uniform_timestamps, dims, num_exp=args.num_exp
149+
torchvision_resize,
150+
path,
151+
uniform_timestamps,
152+
dims,
153+
args.num_threads,
154+
num_exp=args.num_exp,
120155
)
121156
report_stats(times, prefix=f"torchvision_resize({dims})")
122157

123158
times = bench(
124-
decoder_native_resize,
159+
decoder_resize,
125160
path,
126161
uniform_timestamps,
127162
dims,
163+
args.num_threads,
128164
num_exp=args.num_exp,
129165
)
130-
report_stats(times, prefix=f"decoder_native_resize({dims})")
131-
print()
166+
report_stats(times, prefix=f"decoder_resize({dims})")
132167

133-
center_x = (input_height - dims[0]) // 2
134-
center_y = (input_width - dims[1]) // 2
135168
times = bench(
136169
torchvision_crop,
137170
path,
138171
uniform_timestamps,
139172
dims,
140-
center_x,
141-
center_y,
173+
args.num_threads,
142174
num_exp=args.num_exp,
143175
)
144-
report_stats(
145-
times, prefix=f"torchvision_crop({dims}, {center_x}, {center_y})"
146-
)
176+
report_stats(times, prefix=f"torchvision_crop({dims})")
147177

148178
times = bench(
149-
decoder_native_crop,
179+
decoder_crop,
150180
path,
151181
uniform_timestamps,
152182
dims,
153-
center_x,
154-
center_y,
183+
args.num_threads,
155184
num_exp=args.num_exp,
156185
)
157-
report_stats(
158-
times, prefix=f"decoder_native_crop({dims}, {center_x}, {center_y})"
159-
)
186+
report_stats(times, prefix=f"decoder_crop({dims})")
187+
160188
print()
161189

162190

docs/source/api_ref_decoders.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_a
3333

3434
VideoStreamMetadata
3535
AudioStreamMetadata
36+
CpuFallbackStatus

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def __call__(self, filename):
8282
"approximate_mode.py",
8383
"sampling.py",
8484
"parallel_decoding.py",
85+
"performance_tips.py",
8586
"custom_frame_mappings.py",
8687
]
8788
else:

docs/source/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,14 @@ Decoding
8484

8585
How to sample regular and random clips from a video
8686

87+
.. grid-item-card:: :octicon:`file-code;1em`
88+
Performance Tips
89+
:img-top: _static/img/card-background.svg
90+
:link: generated_examples/decoding/performance_tips.html
91+
:link-type: url
92+
93+
Tips for optimizing video decoding performance
94+
8795

8896
Encoding
8997
^^^^^^^^

examples/decoding/basic_cuda_example.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,6 @@
1818
running the transform steps. Encoded packets are often much smaller than decoded frames so
1919
CUDA decoding also uses less PCI-e bandwidth.
2020
21-
When to and when not to use CUDA Decoding
22-
-----------------------------------------
23-
24-
CUDA Decoding can offer speed-up over CPU Decoding in a few scenarios:
25-
26-
#. You are decoding a large resolution video
27-
#. You are decoding a large batch of videos that's saturating the CPU
28-
#. You want to do whole-image transforms like scaling or convolutions on the decoded tensors
29-
after decoding
30-
#. Your CPU is saturated and you want to free it up for other work
31-
32-
33-
Here are situations where CUDA Decoding may not make sense:
34-
35-
#. You want bit-exact results compared to CPU Decoding
36-
#. You have small resolution videos and the PCI-e transfer latency is large
37-
#. Your GPU is already busy and CPU is not
38-
39-
It's best to experiment with CUDA Decoding to see if it improves your use-case. With
40-
TorchCodec you can simply pass in a device parameter to the
41-
:class:`~torchcodec.decoders.VideoDecoder` class to use CUDA Decoding.
42-
4321
Installing TorchCodec with CUDA Enabled
4422
---------------------------------------
4523
@@ -113,6 +91,25 @@
11391
print(frame.data.device)
11492

11593

94+
# %%
95+
# Checking for CPU Fallback
96+
# -------------------------------------
97+
#
98+
# In some cases, CUDA decoding may fall back to CPU decoding. This can happen
99+
# when the video codec or format is not supported by the NVDEC hardware decoder, or when NVCUVID wasn't found.
100+
# TorchCodec provides the :class:`~torchcodec.decoders.CpuFallbackStatus` class
101+
# to help you detect when this fallback occurs.
102+
#
103+
# You can access the fallback status via the
104+
# :attr:`~torchcodec.decoders.VideoDecoder.cpu_fallback` attribute:
105+
106+
with set_cuda_backend("beta"):
107+
decoder = VideoDecoder(video_file, device="cuda")
108+
109+
# Check and print the CPU fallback status
110+
print(decoder.cpu_fallback)
111+
112+
116113
# %%
117114
# Visualizing Frames
118115
# -------------------------------------

0 commit comments

Comments
 (0)