Add Random time-based sampler (#255)

NicolasHug · web-flow · commit cff9492959a4 · 2024-10-11T15:55:00.000+01:00
diff --git a/benchmarks/samplers/benchmark_samplers.py b/benchmarks/samplers/benchmark_samplers.py
@@ -3,7 +3,12 @@
 
 import torch
 from torchcodec.decoders import VideoDecoder
-from torchcodec.samplers import clips_at_random_indices
+from torchcodec.samplers import (
+    clips_at_random_indices,
+    clips_at_random_timestamps,
+    clips_at_regular_indices,
+    clips_at_regular_timestamps,
+)
 
 
 def bench(f, *args, num_exp=100, warmup=0, **kwargs):
@@ -34,19 +39,51 @@ def report_stats(times, unit="ms"):
     return med
 
 
-def sample(num_clips):
+def sample(sampler, **kwargs):
     decoder = VideoDecoder(VIDEO_PATH)
-    clips_at_random_indices(
+    sampler(
         decoder,
-        num_clips=num_clips,
         num_frames_per_clip=10,
-        num_indices_between_frames=2,
+        **kwargs,
     )
 
 
 VIDEO_PATH = Path(__file__).parent / "../../test/resources/nasa_13013.mp4"
+NUM_EXP = 30
+
+for num_clips in (1, 50):
+    print("-" * 10)
+    print(f"{num_clips = }")
+
+    print("clips_at_random_indices     ", end="")
+    times = bench(
+        sample, clips_at_random_indices, num_clips=num_clips, num_exp=NUM_EXP, warmup=2
+    )
+    report_stats(times, unit="ms")
+
+    print("clips_at_regular_indices    ", end="")
+    times = bench(
+        sample, clips_at_regular_indices, num_clips=num_clips, num_exp=NUM_EXP, warmup=2
+    )
+    report_stats(times, unit="ms")
 
-times = bench(sample, num_clips=1, num_exp=30, warmup=2)
-report_stats(times, unit="ms")
-times = bench(sample, num_clips=50, num_exp=30, warmup=2)
-report_stats(times, unit="ms")
+    print("clips_at_random_timestamps  ", end="")
+    times = bench(
+        sample,
+        clips_at_random_timestamps,
+        num_clips=num_clips,
+        num_exp=NUM_EXP,
+        warmup=2,
+    )
+    report_stats(times, unit="ms")
+
+    print("clips_at_regular_timestamps ", end="")
+    seconds_between_clip_starts = 13 / num_clips  # approximate. video is 13s long
+    times = bench(
+        sample,
+        clips_at_regular_timestamps,
+        seconds_between_clip_starts=seconds_between_clip_starts,
+        num_exp=NUM_EXP,
+        warmup=2,
+    )
+    report_stats(times, unit="ms")
diff --git a/src/torchcodec/samplers/__init__.py b/src/torchcodec/samplers/__init__.py
@@ -1,5 +1,6 @@
 from ._implem import (
     clips_at_random_indices,
+    clips_at_random_timestamps,
     clips_at_regular_indices,
     clips_at_regular_timestamps,
 )
diff --git a/src/torchcodec/samplers/_implem.py b/src/torchcodec/samplers/_implem.py
@@ -78,7 +78,7 @@ def _validate_params(*, decoder, num_frames_per_clip, policy):
 
 def _validate_params_index_based(*, num_clips, num_indices_between_frames):
     if num_clips <= 0:
-        raise ValueError(f"num_clips ({num_clips}) must be strictly positive")
+        raise ValueError(f"num_clips ({num_clips}) must be > 0")
 
     if num_indices_between_frames <= 0:
         raise ValueError(
@@ -339,14 +339,24 @@ def clips_at_regular_indices(
 def _validate_params_time_based(
     *,
     decoder,
+    num_clips,
     seconds_between_clip_starts,
     seconds_between_frames,
 ):
-    if seconds_between_clip_starts <= 0:
+
+    if (num_clips is None and seconds_between_clip_starts is None) or (
+        num_clips is not None and seconds_between_clip_starts is not None
+    ):
+        raise ValueError("This is internal only and should never happen.")
+
+    if seconds_between_clip_starts is not None and seconds_between_clip_starts <= 0:
         raise ValueError(
             f"seconds_between_clip_starts ({seconds_between_clip_starts}) must be > 0"
         )
 
+    if num_clips is not None and num_clips <= 0:
+        raise ValueError(f"num_clips ({num_clips}) must be > 0")
+
     if decoder.metadata.average_fps is None:
         raise ValueError(
             "Could not infer average fps from video metadata. "
@@ -480,6 +490,13 @@ def _decode_all_clips_timestamps(
             and frame_pts_seconds == all_clips_timestamps_sorted[i - 1]
         ):
             # Avoid decoding the same frame twice.
+            # Unfortunatly this is unlikely to lead to speed-up as-is: it's
+            # pretty unlikely that 2 pts will be the same since pts are float
+            # contiguous values. Theoretically the dedup can still happen, but
+            # it would be much more efficient to implement it at the frame index
+            # level. We should do that once we implement that in C++.
+            # See also https://github.com/pytorch/torchcodec/issues/256.
+            #
             # IMPORTANT: this is only correct because a copy of the frame will
             # happen within `_to_framebatch` when we call torch.stack.
             # If a copy isn't made, the same underlying memory will be used for
@@ -498,15 +515,17 @@ def _decode_all_clips_timestamps(
     return [_to_framebatch(clip) for clip in all_clips]
 
 
-def clips_at_regular_timestamps(
+def _generic_time_based_sampler(
+    kind: Literal["random", "regular"],
     decoder,
     *,
-    seconds_between_clip_starts: float,
-    num_frames_per_clip: int = 1,
-    seconds_between_frames: Optional[float] = None,
+    num_clips: Optional[int],  # mutually exclusive with seconds_between_clip_starts
+    seconds_between_clip_starts: Optional[float],
+    num_frames_per_clip: int,
+    seconds_between_frames: Optional[float],
     # None means "begining", which may not always be 0
-    sampling_range_start: Optional[float] = None,
-    sampling_range_end: Optional[float] = None,  # interval is [start, end).
+    sampling_range_start: Optional[float],
+    sampling_range_end: Optional[float],  # interval is [start, end).
     policy: str = "repeat_last",
 ) -> List[FrameBatch]:
     # Note: *everywhere*, sampling_range_end denotes the upper bound of where a
@@ -521,6 +540,7 @@ def clips_at_regular_timestamps(
 
     seconds_between_frames = _validate_params_time_based(
         decoder=decoder,
+        num_clips=num_clips,
         seconds_between_clip_starts=seconds_between_clip_starts,
         seconds_between_frames=seconds_between_frames,
     )
@@ -534,11 +554,21 @@ def clips_at_regular_timestamps(
         end_stream_seconds=decoder.metadata.end_stream_seconds,
     )
 
-    clip_start_seconds = torch.arange(
-        sampling_range_start,
-        sampling_range_end,  # excluded
-        seconds_between_clip_starts,
-    )
+    if kind == "random":
+        assert num_clips is not None  # appease type-checker
+        sampling_range_width = sampling_range_end - sampling_range_start
+        # torch.rand() returns in [0, 1)
+        # which ensures all clip starts are < sampling_range_end
+        clip_start_seconds = (
+            torch.rand(num_clips) * sampling_range_width + sampling_range_start
+        )
+    else:
+        assert seconds_between_clip_starts is not None  # appease type-checker
+        clip_start_seconds = torch.arange(
+            sampling_range_start,
+            sampling_range_end,  # excluded
+            seconds_between_clip_starts,
+        )
 
     all_clips_timestamps = _build_all_clips_timestamps(
         clip_start_seconds=clip_start_seconds,
@@ -553,3 +583,51 @@ def clips_at_regular_timestamps(
         all_clips_timestamps=all_clips_timestamps,
         num_frames_per_clip=num_frames_per_clip,
     )
+
+
+def clips_at_random_timestamps(
+    decoder,
+    *,
+    num_clips: int = 1,
+    num_frames_per_clip: int = 1,
+    seconds_between_frames: Optional[float] = None,
+    # None means "begining", which may not always be 0
+    sampling_range_start: Optional[float] = None,
+    sampling_range_end: Optional[float] = None,  # interval is [start, end).
+    policy: str = "repeat_last",
+) -> List[FrameBatch]:
+    return _generic_time_based_sampler(
+        kind="random",
+        decoder=decoder,
+        num_clips=num_clips,
+        seconds_between_clip_starts=None,
+        num_frames_per_clip=num_frames_per_clip,
+        seconds_between_frames=seconds_between_frames,
+        sampling_range_start=sampling_range_start,
+        sampling_range_end=sampling_range_end,
+        policy=policy,
+    )
+
+
+def clips_at_regular_timestamps(
+    decoder,
+    *,
+    seconds_between_clip_starts: float,
+    num_frames_per_clip: int = 1,
+    seconds_between_frames: Optional[float] = None,
+    # None means "begining", which may not always be 0
+    sampling_range_start: Optional[float] = None,
+    sampling_range_end: Optional[float] = None,  # interval is [start, end).
+    policy: str = "repeat_last",
+) -> List[FrameBatch]:
+    return _generic_time_based_sampler(
+        kind="regular",
+        decoder=decoder,
+        num_clips=None,
+        seconds_between_clip_starts=seconds_between_clip_starts,
+        num_frames_per_clip=num_frames_per_clip,
+        seconds_between_frames=seconds_between_frames,
+        sampling_range_start=sampling_range_start,
+        sampling_range_end=sampling_range_end,
+        policy=policy,
+    )
diff --git a/test/samplers/test_samplers.py b/test/samplers/test_samplers.py

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`from ._implem import (`
`2`	`2`	`clips_at_random_indices,`
	`3`	`+ clips_at_random_timestamps,`
`3`	`4`	`clips_at_regular_indices,`
`4`	`5`	`clips_at_regular_timestamps,`
`5`	`6`	`)`