@@ -1092,13 +1092,6 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
10921092 // Returns true if we can avoid seeking in the AVFormatContext based on
10931093 // heuristics that rely on the target cursor_ and the last decoded frame.
10941094 // Seeking is expensive, so we try to avoid it when possible.
1095- // Note that this function itself isn't always that cheap to call: in
1096- // particular the calls to getKeyFrameIndexForPts below in approximate mode
1097- // are sometimes slow.
1098- // TODO we should understand why (is it because it reads the file?) and
1099- // potentially optimize it. E.g. we may not want to ever seek, or even *check*
1100- // if we need to seek in some cases, like if we're going to decode 80% of the
1101- // frames anyway.
11021095 const StreamInfo& streamInfo = streamInfos_.at (activeStreamIndex_);
11031096 if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
11041097 // For audio, we only need to seek if a backwards seek was requested
@@ -1145,10 +1138,10 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
11451138 // I P P P I P P P I P P I P
11461139 // x j y
11471140 // (2) is only more efficient than (1) if there is an I frame between x and y.
1148- int lastKeyFrameIndex = getKeyFrameIndexForPts (lastDecodedAvFramePts_);
1149- int targetKeyFrameIndex = getKeyFrameIndexForPts (cursor_);
1150- return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1151- lastKeyFrameIndex == targetKeyFrameIndex ;
1141+ int lastKeyFrame = getKeyFrameIdentifier (lastDecodedAvFramePts_);
1142+ int targetKeyFrame = getKeyFrameIdentifier (cursor_);
1143+ return lastKeyFrame >= 0 && targetKeyFrame >= 0 &&
1144+ lastKeyFrame == targetKeyFrame ;
11521145}
11531146
11541147// This method looks at currentPts and desiredPts and seeks in the
@@ -1365,7 +1358,19 @@ torch::Tensor SingleStreamDecoder::maybePermuteHWC2CHW(
13651358// PTS <-> INDEX CONVERSIONS
13661359// --------------------------------------------------------------------------
13671360
1368- int SingleStreamDecoder::getKeyFrameIndexForPts (int64_t pts) const {
1361+ int SingleStreamDecoder::getKeyFrameIdentifier (int64_t pts) const {
1362+ // This function "identifies" a key frame for a given pts value.
1363+ // We use the term "identifier" rather than "index" because the nature of the
1364+ // index that is returned depends on various factors:
1365+ // - If seek_mode is exact, we return the index of the key frame in the
1366+ // scanned key-frame vector (streamInfo.keyFrames). So the returned value is
1367+ // in [0, num_key_frames).
1368+ // - If seek_mode is approximate, we use av_index_search_timestamp() which
1369+ // may return a value in [0, num_key_frames) like for mkv, but also a value
1370+ // in [0, num_frames) like for mp4. It really depends on the container.
1371+ //
1372+ // The range of the "identifier" doesn't matter that much, for now we only
1373+ // use it to uniquely identify a key frame in canWeAvoidSeeking().
13691374 const StreamInfo& streamInfo = streamInfos_.at (activeStreamIndex_);
13701375 if (streamInfo.keyFrames .empty ()) {
13711376 return av_index_search_timestamp (
0 commit comments