Skip to content

Commit af1e5f2

Browse files
committed
runc create/run/exec: show fatal errors from init
In case early stage of runc init (nsenter) fails for some reason, it logs error(s) with FATAL log level, via bail(). The runc init log is read by a parent (runc create/run/exec) and is logged via normal logrus mechanism, which is all fine and dandy, except when `runc init` fails, we return the error from the parent (which is usually not too helpful, for example): runc run failed: unable to start container process: can't get final child's PID from pipe: EOF Now, the actual underlying error is from runc init and it was logged earlier; here's how full runc output looks like: FATA[0000] nsexec-1[3247792]: failed to unshare remaining namespaces: No space left on device FATA[0000] nsexec-0[3247790]: failed to sync with stage-1: next state: Success ERRO[0000] runc run failed: unable to start container process: can't get final child's PID from pipe: EOF The problem is, upper level runtimes tend to ignore everything except the last line from runc, and thus error reported by e.g. docker is not very helpful. This patch tries to improve the situation by collecting FATAL errors from runc init and appending those to the error returned (instead of logging). With it, the above error will look like this: ERRO[0000] runc run failed: unable to start container process: can't get final child's PID from pipe: EOF; runc init error(s): nsexec-1[141549]: failed to unshare remaining namespaces: No space left on device; nsexec-0[141547]: failed to sync with stage-1: next state: Success Yes, it is long and ugly, but at least the upper level runtime will report it. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 2001273 commit af1e5f2

File tree

2 files changed

+36
-16
lines changed

2 files changed

+36
-16
lines changed

libcontainer/container_linux.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,6 @@ func (c *Container) start(process *Process) (retErr error) {
338338
// We do not need the cloned binaries once the process is spawned.
339339
defer process.closeClonedExes()
340340

341-
logsDone := parent.forwardChildLogs()
342-
343341
// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
344342
// to make sure we don't leak any files into "runc init". Any files to be
345343
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
@@ -349,21 +347,24 @@ func (c *Container) start(process *Process) (retErr error) {
349347
if err := utils.CloseExecFrom(3); err != nil {
350348
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
351349
}
352-
if err := parent.start(); err != nil {
353-
return fmt.Errorf("unable to start container process: %w", err)
354-
}
355350

356-
if logsDone != nil {
351+
if logsDone := parent.forwardChildLogs(); logsDone != nil {
357352
defer func() {
358353
// Wait for log forwarder to finish. This depends on
359354
// runc init closing the _LIBCONTAINER_LOGPIPE log fd.
360355
err := <-logsDone
361-
if err != nil && retErr == nil {
362-
retErr = fmt.Errorf("unable to forward init logs: %w", err)
356+
// An error from child logs is nsexec FATAL messages.
357+
// Always append those to the original error.
358+
if err != nil {
359+
retErr = fmt.Errorf("%w; runc init error(s): %w ", retErr, err)
363360
}
364361
}()
365362
}
366363

364+
if err := parent.start(); err != nil {
365+
return fmt.Errorf("unable to start container process: %w", err)
366+
}
367+
367368
if process.Init {
368369
c.fifo.Close()
369370
if c.config.HasHook(configs.Poststart) {

libcontainer/logs/logs.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@ package logs
44

55
import (
66
"bufio"
7+
"bytes"
78
"encoding/json"
9+
"errors"
810
"io"
911

1012
"github.com/sirupsen/logrus"
1113
)
1214

15+
var fatalsSep = []byte("; ")
16+
1317
func ForwardLogs(logPipe io.ReadCloser) chan error {
1418
done := make(chan error, 1)
1519
s := bufio.NewScanner(logPipe)
@@ -25,24 +29,33 @@ func ForwardLogs(logPipe io.ReadCloser) chan error {
2529
}
2630

2731
go func() {
32+
fatals := []byte{}
2833
for s.Scan() {
29-
processEntry(s.Bytes(), logger)
34+
fatals = processEntry(s.Bytes(), logger, fatals)
35+
}
36+
if err := s.Err(); err != nil {
37+
logrus.Errorf("error reading from log source: %v", err)
3038
}
3139
if err := logPipe.Close(); err != nil {
3240
logrus.Errorf("error closing log source: %v", err)
3341
}
34-
// The only error we want to return is when reading from
35-
// logPipe has failed.
36-
done <- s.Err()
42+
// The only error we return is fatal messages from runc init.
43+
var err error
44+
if len(fatals) > 0 {
45+
err = errors.New(string(bytes.TrimSuffix(fatals, fatalsSep)))
46+
}
47+
done <- err
3748
close(done)
3849
}()
3950

4051
return done
4152
}
4253

43-
func processEntry(text []byte, logger *logrus.Logger) {
54+
// processEntry parses the error and either logs it via the standard logger or,
55+
// if this is a fatal error, appends its text to fatals.
56+
func processEntry(text []byte, logger *logrus.Logger, fatals []byte) []byte {
4457
if len(text) == 0 {
45-
return
58+
return fatals
4659
}
4760

4861
var jl struct {
@@ -51,8 +64,14 @@ func processEntry(text []byte, logger *logrus.Logger) {
5164
}
5265
if err := json.Unmarshal(text, &jl); err != nil {
5366
logrus.Errorf("failed to decode %q to json: %v", text, err)
54-
return
67+
return fatals
5568
}
5669

57-
logger.Log(jl.Level, jl.Msg)
70+
if jl.Level == logrus.FatalLevel {
71+
fatals = append(fatals, jl.Msg...)
72+
fatals = append(fatals, fatalsSep...)
73+
} else {
74+
logger.Log(jl.Level, jl.Msg)
75+
}
76+
return fatals
5877
}

0 commit comments

Comments
 (0)