From 2d9ae141f57562d8a34e3e708d4b183bc76b6c36 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 4 Jul 2025 03:33:43 +0800 Subject: [PATCH 1/6] peer: use swap instead of addition when checking `p.started` Otherwise it will keep increasing. --- peer/brontide.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/peer/brontide.go b/peer/brontide.go index 5b0d84f2f3a..7bbe7ecb6fa 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -775,7 +775,9 @@ func NewBrontide(cfg Config) *Brontide { // Start starts all helper goroutines the peer needs for normal operations. In // the case this peer has already been started, then this function is a noop. func (p *Brontide) Start() error { - if atomic.AddInt32(&p.started, 1) != 1 { + if !atomic.CompareAndSwapInt32(&p.started, 0, 1) { + p.log.Warn("already started") + return nil } From 987e6f8cf5b246285b25c3a339794a98d02c4a16 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 4 Jul 2025 03:36:11 +0800 Subject: [PATCH 2/6] peer: add more logging We add more detailed logging so it's eaiser to observe the overall flow. --- peer/brontide.go | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/peer/brontide.go b/peer/brontide.go index 7bbe7ecb6fa..c36f27c70cf 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -1581,17 +1581,27 @@ func (p *Brontide) maybeSendChannelUpdates() { // calling Disconnect will signal the quit channel and the method will not // block, since no goroutines were spawned. func (p *Brontide) WaitForDisconnect(ready chan struct{}) { + p.log.Trace("waiting for disconnect") + defer p.log.Trace("peer disconnected") + // Before we try to call the `Wait` goroutine, we'll make sure the main // set of goroutines are already active. select { case <-p.startReady: + p.log.Trace("startReady received, waiting for signal ready") + case <-p.cg.Done(): + p.log.Trace("peer quit, exit waiting for signal startReady") + return } select { case <-ready: + p.log.Trace("ready received, waiting goroutines to finish") + case <-p.cg.Done(): + p.log.Trace("peer quit, exit waiting for signal ready") } p.cg.WgWait() @@ -1606,6 +1616,9 @@ func (p *Brontide) WaitForDisconnect(ready chan struct{}) { // the peer has finished starting up before calling this method. func (p *Brontide) Disconnect(reason error) { if !atomic.CompareAndSwapInt32(&p.disconnect, 0, 1) { + p.log.Warnf("got disconnect reason [%v], but peer already "+ + "disconnected", reason) + return } @@ -1616,11 +1629,13 @@ func (p *Brontide) Disconnect(reason error) { // started, otherwise we will skip reading it as this chan won't be // closed, hence blocks forever. if atomic.LoadInt32(&p.started) == 1 { - p.log.Debugf("Peer hasn't finished starting up yet, waiting " + - "on startReady signal before closing connection") + p.log.Debug("waiting on startReady signal before closing " + + "connection") select { case <-p.startReady: + p.log.Debug("startReady received") + case <-p.cg.Done(): return } @@ -2062,7 +2077,8 @@ out: } } if err != nil { - p.log.Infof("unable to read message from peer: %v", err) + p.log.Debugf("unable to read message from peer: %v", + err) // If we could not read our peer's message due to an // unknown type or invalid alias, we continue processing @@ -2184,8 +2200,7 @@ out: err := p.resendChanSyncMsg(targetChan) if err != nil { // TODO(halseth): send error to peer? - p.log.Errorf("resend failed: %v", - err) + p.log.Errorf("resend failed: %v", err) } } From b4efe3fc102f764f2fe5ca1a6dfd95b759a7db29 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 4 Jul 2025 03:37:39 +0800 Subject: [PATCH 3/6] peer: wait for goroutines when peer is quitting in `WaitForDisconnect` We now make sure when we are waiting for the signal `startReady`, if the peer quits, we also wait for the goroutines to finish before exit. --- peer/brontide.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/peer/brontide.go b/peer/brontide.go index c36f27c70cf..5dad03e0db3 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -1593,6 +1593,9 @@ func (p *Brontide) WaitForDisconnect(ready chan struct{}) { case <-p.cg.Done(): p.log.Trace("peer quit, exit waiting for signal startReady") + // Wait for goroutines to finish before exit. + p.cg.WgWait() + return } From b2c398bcc81c4a84168958c73a966f17a7be9f37 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 4 Jul 2025 03:39:03 +0800 Subject: [PATCH 4/6] peer: skip Disconnect in read and write handlers There's no need to disconnect if the peer has already been disconnected. --- peer/brontide.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/peer/brontide.go b/peer/brontide.go index 5dad03e0db3..5f2ddb5ae64 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -2067,6 +2067,9 @@ func (p *Brontide) readHandler() { // gossiper? p.initGossipSync() + // exitErr is the error to be used when disconnect the peer. + var exitErr error + discStream := newDiscMsgStream(p) discStream.Start() defer discStream.Stop() @@ -2119,6 +2122,7 @@ out: // didn't recognize, then we'll stop all processing as // this is a fatal error. default: + exitErr = err break out } } @@ -2262,9 +2266,13 @@ out: idleTimer.Reset(idleTimeout) } - p.Disconnect(errors.New("read handler closed")) + // Disconnect the peer on exitErr, but only if the peer hasn't been + // disconnected before. + if atomic.LoadInt32(&p.disconnect) == 0 { + p.Disconnect(exitErr) + } - p.log.Trace("readHandler for peer done") + p.log.Debugf("peer quit, exit readHandler") } // handleCustomMessage handles the given custom message if a handler is @@ -2749,7 +2757,7 @@ out: } case <-p.cg.Done(): - exitErr = lnpeer.ErrPeerExiting + p.log.Debug("peer quit, exit writeHandler") break out } } @@ -2758,7 +2766,9 @@ out: // disconnect. p.cg.WgDone() - p.Disconnect(exitErr) + if exitErr != nil { + p.Disconnect(exitErr) + } p.log.Trace("writeHandler for peer done") } From cfdafb0c0071cfb96c42f91169ccaf320b106861 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 4 Jul 2025 03:40:00 +0800 Subject: [PATCH 5/6] lnd: make sure we track goroutine and wait for it to be finished --- server.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server.go b/server.go index 7eb8c583da2..93d02bb6485 100644 --- a/server.go +++ b/server.go @@ -5209,7 +5209,12 @@ func (s *server) DisconnectPeer(pubKey *btcec.PublicKey) error { // // NOTE: We call it in a goroutine to avoid blocking the main server // goroutine because we might hold the server's mutex. - go peer.Disconnect(fmt.Errorf("server: DisconnectPeer called")) + s.wg.Add(1) + go func() { + defer s.wg.Done() + + peer.Disconnect(fmt.Errorf("server: DisconnectPeer called")) + }() return nil } From c1ce7d54da62c675affd0e266f4392849668942f Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Tue, 15 Jul 2025 20:53:13 +0800 Subject: [PATCH 6/6] docs: update release docs --- docs/release-notes/release-notes-0.20.0.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/release-notes/release-notes-0.20.0.md b/docs/release-notes/release-notes-0.20.0.md index e41243ca98e..58be6ccb522 100644 --- a/docs/release-notes/release-notes-0.20.0.md +++ b/docs/release-notes/release-notes-0.20.0.md @@ -140,6 +140,10 @@ reader of a payment request. - [Refactored](https://github.com/lightningnetwork/lnd/pull/10018) `channelLink` to improve readability and maintainability of the code. +- Remove unnecessary + [disconnect](https://github.com/lightningnetwork/lnd/pull/10031) and properly + wait for goroutine to finish when shutting down. + ## Breaking Changes ## Performance Improvements