Skip to content

Commit 8fb0eea

Browse files
authored
copy pgBackRest logs to local machine rather than stream (CrunchyData#129)
This features copies the pgBackRest logs to a user's local machine. It mimics the behavior we use for the Postgres logs. We have found situation where pgBackRest are large and the previous behavior would time out.
1 parent 83d4554 commit 8fb0eea

File tree

1 file changed

+95
-36
lines changed

1 file changed

+95
-36
lines changed

internal/cmd/export.go

Lines changed: 95 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ Collecting PGO CLI logs...
469469
}
470470

471471
// All pgBackRest Logs on the Postgres Instances
472-
err = gatherDbBackrestLogs(ctx, clientset, restConfig, namespace, clusterName, tw, cmd)
472+
err = gatherDbBackrestLogs(ctx, clientset, restConfig, namespace, clusterName, outputDir, outputFile, tw, cmd)
473473
if err != nil {
474474
writeInfo(cmd, fmt.Sprintf("Error gathering pgBackRest DB Hosts Logs: %s", err))
475475
}
@@ -481,7 +481,7 @@ Collecting PGO CLI logs...
481481
}
482482

483483
// All pgBackRest Logs on the Repo Host
484-
err = gatherRepoHostLogs(ctx, clientset, restConfig, namespace, clusterName, tw, cmd)
484+
err = gatherRepoHostLogs(ctx, clientset, restConfig, namespace, clusterName, outputDir, outputFile, tw, cmd)
485485
if err != nil {
486486
writeInfo(cmd, fmt.Sprintf("Error gathering pgBackRest Repo Host Logs: %s", err))
487487
}
@@ -1223,6 +1223,8 @@ func gatherDbBackrestLogs(ctx context.Context,
12231223
config *rest.Config,
12241224
namespace string,
12251225
clusterName string,
1226+
outputDir string,
1227+
outputFile string,
12261228
tw *tar.Writer,
12271229
cmd *cobra.Command,
12281230
) error {
@@ -1291,30 +1293,58 @@ func gatherDbBackrestLogs(ctx context.Context,
12911293
}
12921294

12931295
logFiles := strings.Split(strings.TrimSpace(stdout), "\n")
1296+
1297+
// localDirectory is created to save data on disk
1298+
// e.g. outputDir/crunchy_k8s_support_export_2022-08-08-115726-0400/remotePath
1299+
localDirectory := filepath.Join(outputDir, strings.ReplaceAll(outputFile, ".tar.gz", ""))
1300+
1301+
// flag to determine whether or not to remove localDirectory after the loop
1302+
// When an error happens, this flag will switch to false
1303+
// It's nice to have the extra data around when errors have happened
1304+
doCleanup := true
1305+
12941306
for _, logFile := range logFiles {
12951307
writeDebug(cmd, fmt.Sprintf("LOG FILE: %s\n", logFile))
1296-
var buf bytes.Buffer
1297-
1298-
stdout, stderr, err := Executor(exec).catFile(logFile)
1308+
// get the file size to stream
1309+
fileSize, err := getRemoteFileSize(config, namespace, pod.Name, util.ContainerDatabase, logFile)
12991310
if err != nil {
1300-
if apierrors.IsForbidden(err) {
1301-
writeInfo(cmd, err.Error())
1302-
// Continue and output errors for each log file
1303-
// Allow the user to see and address all issues at once
1304-
continue
1305-
}
1306-
return err
1311+
writeDebug(cmd, fmt.Sprintf("could not get file size for %s: %v\n", logFile, err))
1312+
continue
13071313
}
13081314

1309-
buf.Write([]byte(stdout))
1310-
if stderr != "" {
1311-
str := fmt.Sprintf("\nError returned: %s\n", stderr)
1312-
buf.Write([]byte(str))
1315+
// fileSpecSrc is namespace/podname:path/to/file
1316+
// fileSpecDest is the local destination of the file
1317+
// These are used to help the user grab the file manually when necessary
1318+
// e.g. postgres-operator/hippo-instance1-vp9k-0:pgdata/pgbackrest/log/db-stanza-create.log
1319+
fileSpecSrc := fmt.Sprintf("%s/%s:%s", namespace, pod.Name, logFile)
1320+
fileSpecDest := filepath.Join(localDirectory, logFile)
1321+
writeInfo(cmd, fmt.Sprintf("\tSize of %-85s %v", fileSpecSrc, convertBytes(fileSize)))
1322+
1323+
// Stream the file to disk and write the local file to the tar
1324+
err = streamFileFromPod(config, tw,
1325+
localDirectory, clusterName, namespace, pod.Name, util.ContainerDatabase, logFile, fileSize)
1326+
1327+
if err != nil {
1328+
doCleanup = false // prevent the deletion of localDirectory so a user can examine contents
1329+
writeInfo(cmd, fmt.Sprintf("\tError streaming file %s: %v", logFile, err))
1330+
writeInfo(cmd, fmt.Sprintf("\tCollect manually with kubectl cp -c %s %s %s",
1331+
util.ContainerDatabase, fileSpecSrc, fileSpecDest))
1332+
writeInfo(cmd, fmt.Sprintf("\tRemove %s manually after gathering necessary information", localDirectory))
1333+
continue
13131334
}
13141335

1315-
path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile
1316-
if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil {
1317-
return err
1336+
}
1337+
1338+
// doCleanup is true when there are no errors above.
1339+
if doCleanup {
1340+
// Remove the local directory created to hold the data
1341+
// Errors in removing localDirectory should instruct the user to remove manually.
1342+
// This happens often on Windows
1343+
err = os.RemoveAll(localDirectory)
1344+
if err != nil {
1345+
writeInfo(cmd, fmt.Sprintf("\tError removing %s: %v", localDirectory, err))
1346+
writeInfo(cmd, fmt.Sprintf("\tYou may need to remove %s manually", localDirectory))
1347+
continue
13181348
}
13191349
}
13201350

@@ -1436,6 +1466,8 @@ func gatherRepoHostLogs(ctx context.Context,
14361466
config *rest.Config,
14371467
namespace string,
14381468
clusterName string,
1469+
outputDir string,
1470+
outputFile string,
14391471
tw *tar.Writer,
14401472
cmd *cobra.Command,
14411473
) error {
@@ -1503,30 +1535,57 @@ func gatherRepoHostLogs(ctx context.Context,
15031535
}
15041536

15051537
logFiles := strings.Split(strings.TrimSpace(stdout), "\n")
1538+
1539+
// localDirectory is created to save data on disk
1540+
// e.g. outputDir/crunchy_k8s_support_export_2022-08-08-115726-0400/remotePath
1541+
localDirectory := filepath.Join(outputDir, strings.ReplaceAll(outputFile, ".tar.gz", ""))
1542+
1543+
// flag to determine whether or not to remove localDirectory after the loop
1544+
// When an error happens, this flag will switch to false
1545+
// It's nice to have the extra data around when errors have happened
1546+
doCleanup := true
1547+
15061548
for _, logFile := range logFiles {
15071549
writeDebug(cmd, fmt.Sprintf("LOG FILE: %s\n", logFile))
1508-
var buf bytes.Buffer
1509-
1510-
stdout, stderr, err := Executor(exec).catFile(logFile)
1550+
// get the file size to stream
1551+
fileSize, err := getRemoteFileSize(config, namespace, pod.Name, util.ContainerPGBackrest, logFile)
15111552
if err != nil {
1512-
if apierrors.IsForbidden(err) {
1513-
writeInfo(cmd, err.Error())
1514-
// Continue and output errors for each log file
1515-
// Allow the user to see and address all issues at once
1516-
continue
1517-
}
1518-
return err
1553+
writeDebug(cmd, fmt.Sprintf("could not get file size for %s: %v\n", logFile, err))
1554+
continue
15191555
}
15201556

1521-
buf.Write([]byte(stdout))
1522-
if stderr != "" {
1523-
str := fmt.Sprintf("\nError returned: %s\n", stderr)
1524-
buf.Write([]byte(str))
1557+
// fileSpecSrc is namespace/podname:path/to/file
1558+
// fileSpecDest is the local destination of the file
1559+
// These are used to help the user grab the file manually when necessary
1560+
// e.g. postgres-operator/hippo-repo-host-0:pgbackrest/repo1/log/db-backup.log
1561+
fileSpecSrc := fmt.Sprintf("%s/%s:%s", namespace, pod.Name, logFile)
1562+
fileSpecDest := filepath.Join(localDirectory, logFile)
1563+
writeInfo(cmd, fmt.Sprintf("\tSize of %-85s %v", fileSpecSrc, convertBytes(fileSize)))
1564+
1565+
// Stream the file to disk and write the local file to the tar
1566+
err = streamFileFromPod(config, tw,
1567+
localDirectory, clusterName, namespace, pod.Name, util.ContainerPGBackrest, logFile, fileSize)
1568+
1569+
if err != nil {
1570+
doCleanup = false // prevent the deletion of localDirectory so a user can examine contents
1571+
writeInfo(cmd, fmt.Sprintf("\tError streaming file %s: %v", logFile, err))
1572+
writeInfo(cmd, fmt.Sprintf("\tCollect manually with kubectl cp -c %s %s %s",
1573+
util.ContainerPGBackrest, fileSpecSrc, fileSpecDest))
1574+
writeInfo(cmd, fmt.Sprintf("\tRemove %s manually after gathering necessary information", localDirectory))
1575+
continue
15251576
}
1577+
}
15261578

1527-
path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile
1528-
if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil {
1529-
return err
1579+
// doCleanup is true when there are no errors above.
1580+
if doCleanup {
1581+
// Remove the local directory created to hold the data
1582+
// Errors in removing localDirectory should instruct the user to remove manually.
1583+
// This happens often on Windows
1584+
err = os.RemoveAll(localDirectory)
1585+
if err != nil {
1586+
writeInfo(cmd, fmt.Sprintf("\tError removing %s: %v", localDirectory, err))
1587+
writeInfo(cmd, fmt.Sprintf("\tYou may need to remove %s manually", localDirectory))
1588+
continue
15301589
}
15311590
}
15321591

0 commit comments

Comments
 (0)