Skip to content

Commit 2c59958

Browse files
authored
New samplers benchmark (#248)
1 parent 21aef92 commit 2c59958

File tree

1 file changed

+46
-221
lines changed

1 file changed

+46
-221
lines changed
Lines changed: 46 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -1,227 +1,52 @@
1-
# Copyright (c) Meta Platforms, Inc. and affiliates.
2-
# All rights reserved.
3-
#
4-
# This source code is licensed under the BSD-style license found in the
5-
# LICENSE file in the root directory of this source tree.
1+
from pathlib import Path
2+
from time import perf_counter_ns
63

7-
import abc
8-
import argparse
9-
import importlib
10-
import os
11-
12-
import decord
13-
import numpy as np
144
import torch
15-
16-
import torch.utils.benchmark as benchmark
17-
from torchcodec.samplers import (
18-
IndexBasedSamplerArgs,
19-
TimeBasedSamplerArgs,
20-
VideoArgs,
21-
VideoClipSampler,
22-
)
23-
from torchmultimodal.fb.utils.video_utils import (
24-
ClipSamplerType,
25-
VideoClipSampler as tmm_vcs,
26-
)
27-
from torchvision.datasets.video_clip_sampler import ( # @manual=//pytorch/vision:internal_datasets
28-
TVVideoClipDecoder,
29-
UniformClipSamplingStrategy,
30-
VideoClipSampler as ta_vcs,
31-
)
32-
33-
34-
class AbstractSampler:
35-
def __init__(self):
36-
pass
37-
38-
@abc.abstractmethod
39-
def sample_frames_uniformly(self, video_file, clips_per_video):
40-
pass
41-
42-
43-
class TorchCodecTimeBasedSampler(AbstractSampler):
44-
def __init__(self):
45-
pass
46-
47-
def sample_frames_uniformly(self, video_file, clips_per_video):
48-
arr = np.fromfile(video_file, dtype=np.uint8)
49-
video_tensor = torch.from_numpy(arr)
50-
video_input = VideoArgs()
51-
sampler_input = TimeBasedSamplerArgs(
52-
sampler_type="uniform", clips_per_video=clips_per_video, frames_per_clip=1
53-
)
54-
sampler = VideoClipSampler(video_input, sampler_input)
55-
return sampler(video_tensor)
56-
57-
58-
class TorchCodecIndexBasedSampler(AbstractSampler):
59-
def __init__(self):
60-
pass
61-
62-
def sample_frames_uniformly(self, video_file, clips_per_video):
63-
arr = np.fromfile(video_file, dtype=np.uint8)
64-
video_tensor = torch.from_numpy(arr)
65-
video_input = VideoArgs()
66-
sampler_input = IndexBasedSamplerArgs(
67-
sampler_type="uniform", clips_per_video=clips_per_video, frames_per_clip=1
68-
)
69-
sampler = VideoClipSampler(video_input, sampler_input)
70-
return sampler(video_tensor)
71-
72-
73-
class TorchCodecIndexBasedSamplerWithStackedOutput(AbstractSampler):
74-
"""
75-
On large batch, torch stack has impact on performance, but it's not obvious locally.
76-
"""
77-
78-
def __init__(self):
79-
pass
80-
81-
def sample_frames_uniformly(self, video_file, clips_per_video):
82-
arr = np.fromfile(video_file, dtype=np.uint8)
83-
video_tensor = torch.from_numpy(arr)
84-
video_input = VideoArgs()
85-
sampler_input = IndexBasedSamplerArgs(
86-
sampler_type="uniform", clips_per_video=clips_per_video, frames_per_clip=1
87-
)
88-
sampler = VideoClipSampler(video_input, sampler_input)
89-
clips = sampler(video_tensor)
90-
return torch.stack([clip[0] for clip in clips])
91-
92-
93-
class DecordSampler(AbstractSampler):
94-
def __init__(self):
95-
pass
96-
97-
def sample_frames_uniformly(self, video_file, clips_per_video):
98-
decord.bridge.set_bridge("torch")
99-
av_reader = decord.VideoReader(video_file)
100-
num_frames = len(av_reader)
101-
frame_indices = np.linspace(0, num_frames - 1, clips_per_video, dtype=int)
102-
frames = av_reader.get_batch(frame_indices)
103-
return frames
104-
105-
106-
class TorchMMSamplerWithTorchVisionBackend(AbstractSampler):
107-
"""
108-
Here we use TorchMultimodal sampler as it's updated version on top of torchvision decoder.
109-
"""
110-
111-
def __init__(self):
112-
pass
113-
114-
def sample_frames_uniformly(self, video_file, clips_per_video):
115-
arr = np.fromfile(video_file, dtype=np.uint8)
116-
video_tensor = torch.from_numpy(arr)
117-
sampler = tmm_vcs(
118-
clip_sampler_type=ClipSamplerType("UNIFORM"),
119-
clips_per_video=clips_per_video,
120-
frames_per_clip=1,
121-
frame_dilation=1,
122-
)
123-
return sampler(video_tensor)
124-
125-
126-
class TorchVisionNewSamplerWithTorchVisionBackend(AbstractSampler):
127-
def __init__(self):
128-
pass
129-
130-
def sample_frames_uniformly(self, video_file, clips_per_video):
131-
clip_sampling_strategy = UniformClipSamplingStrategy(
132-
clips_per_video=clips_per_video
133-
)
134-
decoder = TVVideoClipDecoder(clip_length_in_frames=1, read_audio_stream=False)
135-
sampler = ta_vcs(clip_sampling_strategy, decoder)
136-
return sampler(str(video_file))
137-
138-
139-
def main():
140-
"""Benchmarks the performance of different samplers"""
141-
142-
parser = argparse.ArgumentParser()
143-
parser.add_argument(
144-
"--bm_small_video_speed",
145-
help="Benchmark small video decoding speed",
146-
default=True,
147-
action=argparse.BooleanOptionalAction,
148-
)
149-
parser.add_argument(
150-
"--bm_large_video_speed",
151-
help="Benchmark large video decoding speed",
152-
default=True,
153-
action=argparse.BooleanOptionalAction,
5+
from torchcodec.decoders import VideoDecoder
6+
from torchcodec.samplers import clips_at_random_indices
7+
8+
9+
def bench(f, *args, num_exp=100, warmup=0, **kwargs):
10+
11+
for _ in range(warmup):
12+
f(*args, **kwargs)
13+
14+
times = []
15+
for _ in range(num_exp):
16+
start = perf_counter_ns()
17+
f(*args, **kwargs)
18+
end = perf_counter_ns()
19+
times.append(end - start)
20+
return torch.tensor(times).float()
21+
22+
23+
def report_stats(times, unit="ms"):
24+
mul = {
25+
"ns": 1,
26+
"µs": 1e-3,
27+
"ms": 1e-6,
28+
"s": 1e-9,
29+
}[unit]
30+
times = times * mul
31+
std = times.std().item()
32+
med = times.median().item()
33+
print(f"{med = :.2f}{unit} +- {std:.2f}")
34+
return med
35+
36+
37+
def sample(num_clips):
38+
decoder = VideoDecoder(VIDEO_PATH)
39+
clips_at_random_indices(
40+
decoder,
41+
num_clips=num_clips,
42+
num_frames_per_clip=10,
43+
num_indices_between_frames=2,
15444
)
155-
parser.add_argument(
156-
"--bm_video_speed_min_run_seconds",
157-
help="Benchmark minimum run time, in seconds, to wait per datapoint",
158-
type=float,
159-
default=5.0,
160-
)
161-
args = parser.parse_args()
162-
163-
small_video_path = importlib.resources.path(__package__, "nasa_13013.mp4")
164-
small_video_path = os.fspath(str(small_video_path))
165-
166-
large_video_path = importlib.resources.path(__package__, "853.mp4")
167-
large_video_path = os.fspath(str(large_video_path))
168-
169-
clips_per_video = 8
170-
171-
sampler_dict = {}
172-
sampler_dict["TorchCodecTimeBasedSampler"] = TorchCodecTimeBasedSampler()
173-
sampler_dict["TorchCodecIndexBasedSampler"] = TorchCodecIndexBasedSampler()
174-
sampler_dict["TorchCodecIndexBasedSamplerWithStackedOutput"] = (
175-
TorchCodecIndexBasedSamplerWithStackedOutput()
176-
)
177-
sampler_dict["DecordSampler"] = DecordSampler()
178-
sampler_dict["TorchMMSamplerWithTorchVisionBackend"] = (
179-
TorchMMSamplerWithTorchVisionBackend()
180-
)
181-
sampler_dict["TorchVisionNewSamplerWithTorchVisionBackend"] = (
182-
TorchVisionNewSamplerWithTorchVisionBackend()
183-
)
184-
185-
results = []
18645

187-
for sampler_name, sampler in sampler_dict.items():
188-
if args.bm_small_video_speed:
189-
sampler_result = benchmark.Timer(
190-
stmt="sampler.sample_frames_uniformly(video_file, clips_per_video)",
191-
globals={
192-
"video_file": small_video_path,
193-
"clips_per_video": clips_per_video,
194-
"sampler": sampler,
195-
},
196-
label="uniform sampling latency for 700KB video",
197-
sub_label=sampler_name,
198-
description=f"uniform sampling {clips_per_video} frames",
199-
)
200-
results.append(
201-
sampler_result.blocked_autorange(
202-
min_run_time=args.bm_video_speed_min_run_seconds
203-
)
204-
)
20546

206-
if args.bm_large_video_speed:
207-
if sampler_name == "TorchMMSamplerWithTorchVisionBackend":
208-
continue
209-
sampler_result = benchmark.Timer(
210-
stmt="sampler.sample_frames_uniformly(video_file, clips_per_video)",
211-
globals={
212-
"video_file": large_video_path,
213-
"clips_per_video": clips_per_video,
214-
"sampler": sampler,
215-
},
216-
label="uniform sampling latency for 50MB video",
217-
sub_label=sampler_name,
218-
description=f"uniform sampling {clips_per_video} frames",
219-
)
220-
results.append(
221-
sampler_result.blocked_autorange(
222-
min_run_time=args.bm_video_speed_min_run_seconds
223-
)
224-
)
47+
VIDEO_PATH = Path(__file__).parent / "../../test/resources/nasa_13013.mp4"
22548

226-
compare = benchmark.Compare(results)
227-
compare.print()
49+
times = bench(sample, num_clips=1, num_exp=30, warmup=2)
50+
report_stats(times, unit="ms")
51+
times = bench(sample, num_clips=50, num_exp=30, warmup=2)
52+
report_stats(times, unit="ms")

0 commit comments

Comments
 (0)