Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion trunk/configure
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ if [[ $SRS_UTEST == YES ]]; then
"srs_utest_config" "srs_utest_rtmp" "srs_utest_http" "srs_utest_avc" "srs_utest_reload"
"srs_utest_mp4" "srs_utest_service" "srs_utest_app" "srs_utest_rtc" "srs_utest_config2"
"srs_utest_protocol" "srs_utest_protocol2" "srs_utest_kernel2" "srs_utest_protocol3"
"srs_utest_st" "srs_utest_rtc2")
"srs_utest_st" "srs_utest_rtc2" "srs_utest_rtc3")
if [[ $SRS_SRT == YES ]]; then
MODULE_FILES+=("srs_utest_srt")
fi
Expand Down
1 change: 1 addition & 0 deletions trunk/doc/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The changelog for SRS.
<a name="v7-changes"></a>

## SRS 7.0 Changelog
* v7.0, 2025-07-16, Merge [#4295](https://github.com/ossrs/srs/pull/4295): RTC: audio packet jitter buffer. v7.0.48 (#4295)
* v7.0, 2025-07-11, Merge [#4333](https://github.com/ossrs/srs/pull/4333): NEW PROTOCOL: Support viewing stream over RTSP. v7.0.47 (#4333)
* v7.0, 2025-07-10, Merge [#4414](https://github.com/ossrs/srs/pull/4414): Fix H.264 B-frame detection logic to comply with specification. v7.0.46 (#4414)
* v7.0, 2025-07-04, Merge [#4412](https://github.com/ossrs/srs/pull/4412): Refine code and add tests for #4289. v7.0.45 (#4412)
Expand Down
143 changes: 140 additions & 3 deletions trunk/src/app/srs_app_rtc_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ srs_error_t SrsRtcRtpBuilder::on_audio(SrsSharedPtrMessage* msg)
return err;
}

// ts support audio codec: aac/mp3
// support audio codec: aac/mp3
SrsAudioCodecId acodec = format->acodec->id;
if (acodec != SrsAudioCodecIdAAC && acodec != SrsAudioCodecIdMP3) {
return err;
Expand Down Expand Up @@ -1205,7 +1205,7 @@ srs_error_t SrsRtcRtpBuilder::on_video(SrsSharedPtrMessage* msg)

// If merge Nalus, we pcakges all NALUs(samples) as one NALU, in a RTP or FUA packet.
vector<SrsRtpPacket*> pkts;
// auto free when exit
// TODO: FIXME: Should rename to pkts_disposer.
SrsUniquePtr<vector<SrsRtpPacket*>> pkts_ptr(&pkts, free_packets);

if (merge_nalus && nn_samples > 1) {
Expand Down Expand Up @@ -1593,12 +1593,122 @@ bool SrsRtcFrameBuilderVideoFrameDetector::is_lost_sn(uint16_t received)
return lost_sn_ == received;
}

SrsRtcFrameBuilderAudioPacketCache::SrsRtcFrameBuilderAudioPacketCache()
{
last_audio_seq_num_ = 0;
last_audio_process_time_ = 0;
initialized_ = false;
timeout_ = MAX_AUDIO_WAIT_MS * SRS_UTIME_MILLISECONDS; // Default timeout in microseconds
}

SrsRtcFrameBuilderAudioPacketCache::~SrsRtcFrameBuilderAudioPacketCache()
{
clear_all();
}

void SrsRtcFrameBuilderAudioPacketCache::set_timeout(srs_utime_t timeout)
{
timeout_ = timeout;
}

srs_error_t SrsRtcFrameBuilderAudioPacketCache::process_packet(SrsRtpPacket* src, std::vector<SrsRtpPacket*>& ready_packets)
{
srs_error_t err = srs_success;

uint16_t seq = src->header.get_sequence();
srs_utime_t now = srs_update_system_time();

if (!initialized_) {
last_audio_seq_num_ = seq - 1;
last_audio_process_time_ = now;
initialized_ = true;
}

// Check if packet is too old (already processed)
if (srs_rtp_seq_distance(last_audio_seq_num_, seq) < 0) {
srs_warn("Discard late audio packet, seq=%u, last_seq=%u", seq, last_audio_seq_num_);
return err;
}

// Store packet in jitter buffer
if (true) {
std::map<uint16_t, SrsRtpPacket*>::iterator it = audio_buffer_.find(seq);
if (it != audio_buffer_.end()) {
SrsRtpPacket* pkt = it->second;
srs_freep(pkt);
}
audio_buffer_[seq] = src->copy();
}

// Try to process packets in the sliding window
bool force_process = audio_buffer_.size() >= AUDIO_JITTER_BUFFER_SIZE ||
(now - last_audio_process_time_) > timeout_;
uint16_t window_end = last_audio_seq_num_ + SLIDING_WINDOW_SIZE;

while (!audio_buffer_.empty()) {
std::map<uint16_t, SrsRtpPacket*>::iterator it = audio_buffer_.begin();
uint16_t next_seq = it->first;

// Check if the packet is within our sliding window
if (!force_process) {
// If packet is before window start (shouldn't happen normally)
if (srs_rtp_seq_distance(last_audio_seq_num_, next_seq) < 0) {
// Process it anyway as it's already late
srs_warn("Late audio packet, seq=%u, expected>=%u", next_seq, last_audio_seq_num_);
} else if (srs_rtp_seq_distance(next_seq, window_end) < 0) {
// If packet is beyond window end, stop processing
srs_warn("Audio packet beyond window end, seq=%u, window_end=%u", next_seq, window_end);
break;
} else if (srs_rtp_seq_distance(last_audio_seq_num_, next_seq) > 1) {
// If there's a gap and we haven't exceeded wait time, wait for missing packets
if ((now - last_audio_process_time_) <= timeout_) {
break;
}
srs_warn("Audio packet loss, expected=%u, got=%u", last_audio_seq_num_ + 1, next_seq);
}
}

// Take the packet from buffer
SrsRtpPacket* pkt = it->second;
audio_buffer_.erase(it);

// Update last sequence number
last_audio_seq_num_ = next_seq;
last_audio_process_time_ = now;

// Add to ready packets for processing
ready_packets.push_back(pkt);

// Update window end for next iteration
window_end = last_audio_seq_num_ + SLIDING_WINDOW_SIZE;
}

// If buffer is getting too full, force process oldest packets
if (audio_buffer_.size() >= AUDIO_JITTER_BUFFER_SIZE * 0.8) {
srs_warn("Audio jitter buffer nearly full, size=%zu", audio_buffer_.size());
}

return err;
}

void SrsRtcFrameBuilderAudioPacketCache::clear_all()
{
std::map<uint16_t, SrsRtpPacket*>::iterator it;
for (it = audio_buffer_.begin(); it != audio_buffer_.end(); ++it) {
SrsRtpPacket* pkt = it->second;
srs_freep(pkt);
}

audio_buffer_.clear();
}

SrsRtcFrameBuilder::SrsRtcFrameBuilder(ISrsStreamBridge* bridge)
{
bridge_ = bridge;
is_first_audio_ = true;
audio_transcoder_ = NULL;
video_codec_ = SrsVideoCodecIdAVC;
audio_cache_ = new SrsRtcFrameBuilderAudioPacketCache();
video_cache_ = new SrsRtcFrameBuilderVideoPacketCache();
frame_detector_ = new SrsRtcFrameBuilderVideoFrameDetector(video_cache_);
sync_state_ = -1;
Expand All @@ -1608,6 +1718,7 @@ SrsRtcFrameBuilder::SrsRtcFrameBuilder(ISrsStreamBridge* bridge)
SrsRtcFrameBuilder::~SrsRtcFrameBuilder()
{
srs_freep(audio_transcoder_);
srs_freep(audio_cache_);
srs_freep(video_cache_);
srs_freep(frame_detector_);
srs_freep(obs_whip_vps_);
Expand Down Expand Up @@ -1648,6 +1759,7 @@ srs_error_t SrsRtcFrameBuilder::on_publish()

void SrsRtcFrameBuilder::on_unpublish()
{
audio_cache_->clear_all();
}

srs_error_t SrsRtcFrameBuilder::on_rtp(SrsRtpPacket *pkt)
Expand Down Expand Up @@ -1675,14 +1787,38 @@ srs_error_t SrsRtcFrameBuilder::on_rtp(SrsRtpPacket *pkt)
}

if (pkt->is_audio()) {
err = transcode_audio(pkt);
err = packet_audio(pkt);
} else {
err = packet_video(pkt);
}

return err;
}

srs_error_t SrsRtcFrameBuilder::packet_audio(SrsRtpPacket* src)
{
srs_error_t err = srs_success;

std::vector<SrsRtpPacket*> ready_packets;
SrsUniquePtr<vector<SrsRtpPacket*>> pkts_disposer(&ready_packets, free_packets);

// Use audio cache to process packet through jitter buffer
if ((err = audio_cache_->process_packet(src, ready_packets)) != srs_success) {
return srs_error_wrap(err, "audio cache process");
}

// Process all ready packets in order
for (size_t i = 0; i < ready_packets.size(); ++i) {
SrsRtpPacket* pkt = ready_packets[i];

if ((err = transcode_audio(pkt)) != srs_success) {
return srs_error_wrap(err, "transcode audio");
}
}

return err;
}

srs_error_t SrsRtcFrameBuilder::transcode_audio(SrsRtpPacket *pkt)
{
srs_error_t err = srs_success;
Expand All @@ -1709,6 +1845,7 @@ srs_error_t SrsRtcFrameBuilder::transcode_audio(SrsRtpPacket *pkt)
is_first_audio_ = false;
}

// TODO: FIXME: Should use SrsUniquePtr to dispose it automatically.
std::vector<SrsAudioFrame*> out_pkts;
SrsRtpRawPayload *payload = dynamic_cast<SrsRtpRawPayload*>(pkt->payload());

Expand Down
37 changes: 36 additions & 1 deletion trunk/src/app/srs_app_rtc_source.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ const int kVideoPayloadType = 102;
// Chrome HEVC defaults as 49.
const int KVideoPayloadTypeHevc = 49;

// Audio jitter buffer size (in packets)
const int AUDIO_JITTER_BUFFER_SIZE = 100;
// Sliding window size for continuous processing
const int SLIDING_WINDOW_SIZE = 10;
// Maximum waiting time for out-of-order packets (in ms)
const int MAX_AUDIO_WAIT_MS = 100;

class SrsNtp
{
public:
Expand Down Expand Up @@ -378,6 +385,33 @@ class SrsRtcFrameBuilderVideoFrameDetector
bool is_lost_sn(uint16_t received);
};

// Audio packet cache for RTP packet jitter buffer management
class SrsRtcFrameBuilderAudioPacketCache
{
private:
// Audio jitter buffer, map sequence number to packet
std::map<uint16_t, SrsRtpPacket*> audio_buffer_;
// Last processed sequence number
uint16_t last_audio_seq_num_;
// Last time we processed the jitter buffer
srs_utime_t last_audio_process_time_;
// Whether the cache has been initialized
bool initialized_;
// Timeout for waiting out-of-order packets (in microseconds)
srs_utime_t timeout_;
public:
SrsRtcFrameBuilderAudioPacketCache();
virtual ~SrsRtcFrameBuilderAudioPacketCache();
public:
// Set timeout for waiting out-of-order packets (in microseconds)
void set_timeout(srs_utime_t timeout);
// Process audio packet through jitter buffer
// Returns packets ready for transcoding in order
srs_error_t process_packet(SrsRtpPacket* src, std::vector<SrsRtpPacket*>& ready_packets);
// Clear all cached packets
void clear_all();
};

// Collect and build WebRTC RTP packets to AV frames.
class SrsRtcFrameBuilder
{
Expand All @@ -386,9 +420,9 @@ class SrsRtcFrameBuilder
private:
bool is_first_audio_;
SrsAudioTranscoder *audio_transcoder_;

SrsVideoCodecId video_codec_;
private:
SrsRtcFrameBuilderAudioPacketCache* audio_cache_;
SrsRtcFrameBuilderVideoPacketCache* video_cache_;
SrsRtcFrameBuilderVideoFrameDetector* frame_detector_;
private:
Expand All @@ -408,6 +442,7 @@ class SrsRtcFrameBuilder
virtual void on_unpublish();
virtual srs_error_t on_rtp(SrsRtpPacket *pkt);
private:
srs_error_t packet_audio(SrsRtpPacket* pkt);
srs_error_t transcode_audio(SrsRtpPacket *pkt);
void packet_aac(SrsCommonMessage* audio, char* data, int len, uint32_t pts, bool is_header);
private:
Expand Down
6 changes: 6 additions & 0 deletions trunk/src/core/srs_core_time.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,11 @@ srs_utime_t srs_duration(srs_utime_t start, srs_utime_t end);
// Never timeout.
#define SRS_UTIME_NO_TIMEOUT ((srs_utime_t) -1LL)

// Get current system time in srs_utime_t, use cache to avoid performance problem
extern srs_utime_t srs_get_system_time();
extern srs_utime_t srs_get_system_startup_time();
// A daemon st-thread updates it.
extern srs_utime_t srs_update_system_time();

#endif

2 changes: 1 addition & 1 deletion trunk/src/core/srs_core_version7.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@

#define VERSION_MAJOR 7
#define VERSION_MINOR 0
#define VERSION_REVISION 47
#define VERSION_REVISION 48

#endif
Loading
Loading