diff --git a/cmd/nginx-supportpkg.go b/cmd/nginx-supportpkg.go index 7427573..fb03994 100644 --- a/cmd/nginx-supportpkg.go +++ b/cmd/nginx-supportpkg.go @@ -31,9 +31,9 @@ import ( func Execute() { - var namespaces []string var product string var jobList []jobs.Job + collector := data_collector.DataCollector{} var rootCmd = &cobra.Command{ Use: "nginx-supportpkg", @@ -41,7 +41,7 @@ func Execute() { Long: `nginx-supportpkg - a tool to create Ingress Controller diagnostics package`, Run: func(cmd *cobra.Command, args []string) { - collector, err := data_collector.NewDataCollector(namespaces...) + err := data_collector.NewDataCollector(&collector) if err != nil { fmt.Println(fmt.Errorf("unable to start data collector: %s", err)) os.Exit(1) @@ -57,8 +57,10 @@ func Execute() { jobList = slices.Concat(jobs.CommonJobList(), jobs.NGFJobList()) case "ngx": jobList = slices.Concat(jobs.CommonJobList(), jobs.NGXJobList()) + case "nim": + jobList = slices.Concat(jobs.CommonJobList(), jobs.NIMJobList()) default: - fmt.Printf("Error: product must be in the following list: [nic, ngf, ngx]\n") + fmt.Printf("Error: product must be in the following list: [nic, ngf, ngx, nim]\n") os.Exit(1) } @@ -66,12 +68,14 @@ func Execute() { failedJobs := 0 for _, job := range jobList { fmt.Printf("Running job %s...", job.Name) - err = job.Collect(collector) - if err != nil { - fmt.Printf(" Error: %s\n", err) + err, Skipped := job.Collect(&collector) + if Skipped { + fmt.Print(" SKIPPED\n") + } else if err != nil { + fmt.Printf(" FAILED: %s\n", err) failedJobs++ } else { - fmt.Print(" OK\n") + fmt.Print(" COMPLETED\n") } } @@ -94,7 +98,7 @@ func Execute() { }, } - rootCmd.Flags().StringSliceVarP(&namespaces, "namespace", "n", []string{}, "list of namespaces to collect information from") + rootCmd.Flags().StringSliceVarP(&collector.Namespaces, "namespace", "n", []string{}, "list of namespaces to collect information from") if err := rootCmd.MarkFlagRequired("namespace"); err != nil { fmt.Println(err) os.Exit(1) @@ -106,6 +110,9 @@ func Execute() { os.Exit(1) } + rootCmd.Flags().BoolVarP(&collector.ExcludeDBData, "exclude-db-data", "d", false, "exclude DB data collection") + rootCmd.Flags().BoolVarP(&collector.ExcludeTimeSeriesData, "exclude-time-series-data", "t", false, "exclude time series data collection") + versionStr := "nginx-supportpkg - version: " + version.Version + " - build: " + version.Build + "\n" rootCmd.SetVersionTemplate(versionStr) rootCmd.Version = versionStr @@ -115,8 +122,9 @@ func Execute() { "Usage:" + "\n nginx-supportpkg -h|--help" + "\n nginx-supportpkg -v|--version" + - "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nic,ngf,ngx]" + - "\n nginx-supportpkg [-n|--namespace] ns1,ns2 [-p|--product] [nic,ngf,ngx] \n") + "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nic,ngf,ngx,nim]" + + "\n nginx-supportpkg [-n|--namespace] ns1,ns2 [-p|--product] [nic,ngf,ngx,nim]" + + "\n nginx-supportpkg [-n|--namespace] ns1 [-n|--namespace] ns2 [-p|--product] [nim] [-d|--exclude-db-data] [-t|--exclude-time-series-data] \n") if err := rootCmd.Execute(); err != nil { fmt.Println(err) diff --git a/pkg/data_collector/data_collector.go b/pkg/data_collector/data_collector.go index cedbda0..0df57fa 100644 --- a/pkg/data_collector/data_collector.go +++ b/pkg/data_collector/data_collector.go @@ -24,9 +24,15 @@ import ( "compress/gzip" "context" "fmt" + "io" + "log" + "os" + "path/filepath" + "strconv" + "time" + helmClient "github.com/mittwald/go-helm-client" "github.com/nginxinc/nginx-k8s-supportpkg/pkg/crds" - "io" corev1 "k8s.io/api/core/v1" crdClient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -38,35 +44,32 @@ import ( "k8s.io/client-go/tools/remotecommand" "k8s.io/client-go/util/homedir" metricsClient "k8s.io/metrics/pkg/client/clientset/versioned" - "log" - "os" - "path/filepath" - "strconv" - "time" ) type DataCollector struct { - BaseDir string - Namespaces []string - Logger *log.Logger - LogFile *os.File - K8sRestConfig *rest.Config - K8sCoreClientSet *kubernetes.Clientset - K8sCrdClientSet *crdClient.Clientset - K8sMetricsClientSet *metricsClient.Clientset - K8sHelmClientSet map[string]helmClient.Client + BaseDir string + Namespaces []string + Logger *log.Logger + LogFile *os.File + K8sRestConfig *rest.Config + K8sCoreClientSet *kubernetes.Clientset + K8sCrdClientSet *crdClient.Clientset + K8sMetricsClientSet *metricsClient.Clientset + K8sHelmClientSet map[string]helmClient.Client + ExcludeDBData bool + ExcludeTimeSeriesData bool } -func NewDataCollector(namespaces ...string) (*DataCollector, error) { +func NewDataCollector(collector *DataCollector) error { tmpDir, err := os.MkdirTemp("", "-pkg-diag") if err != nil { - return nil, fmt.Errorf("unable to create temp directory: %s", err) + return fmt.Errorf("unable to create temp directory: %s", err) } logFile, err := os.OpenFile(filepath.Join(tmpDir, "supportpkg.log"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - return nil, fmt.Errorf("unable to create log file: %s", err) + return fmt.Errorf("unable to create log file: %s", err) } // Find config @@ -77,30 +80,27 @@ func NewDataCollector(namespaces ...string) (*DataCollector, error) { config, err := clientcmd.BuildConfigFromFlags("", kubeConfig) if err != nil { - return nil, fmt.Errorf("unable to connect to k8s using file %s: %s", kubeConfig, err) - } - - dc := DataCollector{ - BaseDir: tmpDir, - Namespaces: namespaces, - LogFile: logFile, - Logger: log.New(logFile, "", log.LstdFlags|log.LUTC|log.Lmicroseconds|log.Lshortfile), - K8sHelmClientSet: make(map[string]helmClient.Client), + return fmt.Errorf("unable to connect to k8s using file %s: %s", kubeConfig, err) } + // Set up the DataCollector options + collector.BaseDir = tmpDir + collector.LogFile = logFile + collector.Logger = log.New(logFile, "", log.LstdFlags|log.LUTC|log.Lmicroseconds|log.Lshortfile) + collector.K8sHelmClientSet = make(map[string]helmClient.Client) //Initialize clients - dc.K8sRestConfig = config - dc.K8sCoreClientSet, _ = kubernetes.NewForConfig(config) - dc.K8sCrdClientSet, _ = crdClient.NewForConfig(config) - dc.K8sMetricsClientSet, _ = metricsClient.NewForConfig(config) - for _, namespace := range dc.Namespaces { - dc.K8sHelmClientSet[namespace], _ = helmClient.NewClientFromRestConf(&helmClient.RestConfClientOptions{ + collector.K8sRestConfig = config + collector.K8sCoreClientSet, _ = kubernetes.NewForConfig(config) + collector.K8sCrdClientSet, _ = crdClient.NewForConfig(config) + collector.K8sMetricsClientSet, _ = metricsClient.NewForConfig(config) + for _, namespace := range collector.Namespaces { + collector.K8sHelmClientSet[namespace], _ = helmClient.NewClientFromRestConf(&helmClient.RestConfClientOptions{ Options: &helmClient.Options{Namespace: namespace}, RestConfig: config, }) } - return &dc, nil + return nil } func (c *DataCollector) WrapUp(product string) (string, error) { diff --git a/pkg/jobs/common_job_list.go b/pkg/jobs/common_job_list.go index 61d9701..f2e0aa5 100644 --- a/pkg/jobs/common_job_list.go +++ b/pkg/jobs/common_job_list.go @@ -23,12 +23,13 @@ import ( "context" "encoding/json" "fmt" - "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" "io" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "path/filepath" "time" + + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func CommonJobList() []Job { @@ -88,6 +89,91 @@ func CommonJobList() []Job { ch <- jobResult }, }, + { + Name: "pv-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve persistent volumes list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "persistentvolumes.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "pvc-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.CoreV1().PersistentVolumeClaims(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve persistent volume claims list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "persistentvolumeclaims.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "sc-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.StorageV1().StorageClasses().List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve storage classes list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "storageclasses.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "apiresources-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.DiscoveryClient.ServerPreferredResources() + if err != nil { + dc.Logger.Printf("\tCould not retrieve API resources list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "apiresources.json")] = jsonResult + } + } + ch <- jobResult + }, + }, + { + Name: "apiversions-list", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + for _, namespace := range dc.Namespaces { + result, err := dc.K8sCoreClientSet.DiscoveryClient.ServerGroups() + if err != nil { + dc.Logger.Printf("\tCould not retrieve API versions list %s: %v\n", namespace, err) + } else { + jsonResult, _ := json.MarshalIndent(result, "", " ") + jobResult.Files[filepath.Join(dc.BaseDir, "resources", namespace, "apiversions.json")] = jsonResult + } + } + ch <- jobResult + }, + }, { Name: "events-list", Timeout: time.Second * 10, diff --git a/pkg/jobs/job.go b/pkg/jobs/job.go index 634acd3..3a0fe25 100644 --- a/pkg/jobs/job.go +++ b/pkg/jobs/job.go @@ -20,12 +20,12 @@ package jobs import ( "context" - "errors" "fmt" - "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" "os" "path/filepath" "time" + + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" ) type Job struct { @@ -35,11 +35,12 @@ type Job struct { } type JobResult struct { - Files map[string][]byte - Error error + Files map[string][]byte + Error error + Skipped bool } -func (j Job) Collect(dc *data_collector.DataCollector) error { +func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) { ch := make(chan JobResult, 1) ctx, cancel := context.WithTimeout(context.Background(), j.Timeout) @@ -51,28 +52,32 @@ func (j Job) Collect(dc *data_collector.DataCollector) error { select { case <-ctx.Done(): dc.Logger.Printf("\tJob %s has timed out: %s\n---\n", j.Name, ctx.Err()) - return errors.New(fmt.Sprintf("Context cancelled: %v", ctx.Err())) + return fmt.Errorf("Context cancelled: %v", ctx.Err()), false case jobResults := <-ch: + if jobResults.Skipped { + dc.Logger.Printf("\tJob %s has been skipped\n---\n", j.Name) + return nil, true + } if jobResults.Error != nil { dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error) - return jobResults.Error + return jobResults.Error, false } for fileName, fileValue := range jobResults.Files { err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm) if err != nil { - return fmt.Errorf("MkdirAll failed: %v", err) + return fmt.Errorf("MkdirAll failed: %v", err), jobResults.Skipped } file, _ := os.Create(fileName) _, err = file.Write(fileValue) if err != nil { - return fmt.Errorf("Write failed: %v", err) + return fmt.Errorf("Write failed: %v", err), jobResults.Skipped } _ = file.Close() dc.Logger.Printf("\tJob %s wrote %d bytes to %s\n", j.Name, len(fileValue), fileName) } dc.Logger.Printf("\tJob %s completed successfully\n---\n", j.Name) - return nil + return nil, jobResults.Skipped } } diff --git a/pkg/jobs/nim_job_list.go b/pkg/jobs/nim_job_list.go new file mode 100644 index 0000000..5230750 --- /dev/null +++ b/pkg/jobs/nim_job_list.go @@ -0,0 +1,277 @@ +/** + +Copyright 2024 F5, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +**/ + +package jobs + +import ( + "context" + "os" + "path/filepath" + "strings" + "time" + + "github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var dqliteBackupPath = "/etc/nms/scripts/dqlite-backup" +var nmsConfigPath = "/etc/nms/nms.conf" + +var clickhouseCommands = map[string]string{ + "events.csv": "SELECT * FROM nms.events WHERE creation_time > subtractHours(now(),${default_hrs}) ORDER BY creation_time DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics.csv": "SELECT * FROM nms.metrics WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics_1day.csv": "SELECT * FROM nms.metrics_1day WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics_1hour.csv": "SELECT * FROM nms.metrics_1hour WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics_5min.csv": "SELECT * FROM nms.metrics_5min WHERE timestamp > subtractHours(now(),${default_hrs}) AND date > toDate(subtractDays(now(),${max_num_days})) ORDER BY timestamp DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics-row-counts.csv": "SELECT count(*), name FROM nms.metrics GROUP BY name FORMAT CSVWithNames", + "events-row-counts.csv": "SELECT count(*), category FROM nms.events GROUP BY category FORMAT CSVWithNames", + "events.sql": "SHOW CREATE TABLE nms.events", + "metrics.sql": "SHOW CREATE TABLE nms.metrics", + "metrics_1day.sql": "SHOW CREATE TABLE nms.metrics_1day", + "metrics_1hour.sql": "SHOW CREATE TABLE nms.metrics_1hour", + "metrics_5min.sql": "SHOW CREATE TABLE nms.metrics_5min", + "table-sizes.stat": "SELECT table, formatReadableSize(sum(bytes)) as size, min(min_date) as min_date, max(max_date) as max_date FROM system.parts WHERE active GROUP BY table ORDER BY table FORMAT CSVWithNames", + "system-asynchronous-metrics.stat": "SELECT * FROM system.asynchronous_metrics FORMAT CSVWithNames", + "system-tables.stat": "SELECT * FROM system.tables FORMAT CSVWithNames", + "system-parts.stat": "SELECT * FROM system.parts ORDER BY table ASC, name DESC, modification_time DESC FORMAT CSVWithNames", + "system-metrics.stat": "SELECT * FROM system.metrics FORMAT CSVWithNames", + "system-settings.stat": "SELECT * FROM system.settings FORMAT CSVWithNames", + "system-query-log.csv": "SELECT * FROM system.query_log WHERE event_time > subtractHours(now(),${default_hrs}) AND event_date > toDate(subtractDays(now(),${max_num_days})) ORDER BY event_time DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "system-events.stat": "SELECT * FROM system.events ORDER BY value DESC LIMIT ${max_log_limit} FORMAT CSVWithNames", + "metrics-profile-15-min.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 15 minute) tmstp, count(*) cnt FROM metrics where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics-profile-1-hour.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 hour) tmstp, count(*) cnt FROM metrics where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_5min-profile-1-hour.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 hour) tmstp, count(*) cnt FROM metrics_5min where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_5min-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics_5min where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1hour-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics_1hour where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1hour-profile-1-month.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 month) tmstp, count(*) cnt FROM metrics_1hour where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1day-profile-1-day.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 day) tmstp, count(*) cnt FROM metrics_1day where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", + "metrics_1day-profile-1-month.csv": "SELECT hex(SHA256(instance)) inst, name, toStartOfInterval(timestamp, INTERVAL 1 month) tmstp, count(*) cnt FROM metrics_1day where timestamp > date_add(month, -1, timestamp) GROUP BY tmstp, inst, name ORDER BY inst, name, tmstp asc LIMIT 1000000;", +} + +func NIMJobList() []Job { + jobList := []Job{ + { + Name: "exec-apigw-nginx-t", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + command := []string{"/usr/sbin/nginx", "-T"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "apigw") { + res, err := dc.PodExecutor(namespace, pod.Name, "apigw", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__nginx-t.txt")] = res + } + } + } + } + } + ch <- jobResult + }, + }, + { + Name: "exec-apigw-nginx-version", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + command := []string{"/usr/sbin/nginx", "-v"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "apigw") { + res, err := dc.PodExecutor(namespace, pod.Name, "apigw", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__nginx-version.txt")] = res + } + } + } + } + } + ch <- jobResult + }, + }, + { + Name: "exec-clickhouse-version", + Timeout: time.Second * 10, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + command := []string{"clickhouse-server", "--version"} + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "clickhouse") { + res, err := dc.PodExecutor(namespace, pod.Name, "clickhouse-server", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__clickhouse-server-version.txt")] = res + } + } + } + } + } + ch <- jobResult + }, + }, + { + Name: "exec-clickhouse-data", + Timeout: time.Second * 600, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + + if dc.ExcludeTimeSeriesData { + dc.Logger.Printf("\tSkipping clickhouse data dump as ExcludeTimeSeriesData is set to true\n") + jobResult.Skipped = true + ch <- jobResult + return + } + + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, pod := range pods.Items { + if strings.Contains(pod.Name, "clickhouse") { + for fileName, query := range clickhouseCommands { + // Replace placeholders in the query + query = strings.ReplaceAll(query, "${default_hrs}", "24") + query = strings.ReplaceAll(query, "${max_num_days}", "30") + query = strings.ReplaceAll(query, "${max_log_limit}", "1000") + command := []string{"clickhouse-client", "--database", "nms", "-q", query} + if fileName == "events.csv" || fileName == "metrics.csv" { + command = append(command, "--format_csv_delimiter=,") + } + + // Execute the command + res, err := dc.PodExecutor(namespace, pod.Name, "clickhouse-server", command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__"+fileName)] = res + } + } + } + } + } + } + ch <- jobResult + }, + }, + { + Name: "exec-dqlite-dump", + Timeout: time.Second * 600, + Execute: func(dc *data_collector.DataCollector, ctx context.Context, ch chan JobResult) { + jobResult := JobResult{Files: make(map[string][]byte), Error: nil} + + if dc.ExcludeDBData { + dc.Logger.Printf("\tSkipping dqlite dump as ExcludeDBData is set to true\n") + jobResult.Skipped = true + ch <- jobResult + return + } + // Check if ExcludeDBData is set to true, skip dump if so + // (already handled above with dc.ExcludeDBData) + dbConfigs := []struct { + dbName string + containerName string + outputFile string + dbAddr string + }{ + {"core", "core", "/tmp/core.sql", "0.0.0.0:7891"}, + {"dpm", "dpm", "/tmp/dpm.sql", "0.0.0.0:7890"}, + {"integrations", "integrations", "/tmp/integrations.sql", "0.0.0.0:7892"}, + {"license", "integrations", "/tmp/license.sql", "0.0.0.0:7893"}, + // Add more containers as needed + } + + // /etc/nms/scripts/dqlite-backup -n core -c /etc/nms/nms.conf -a 0.0.0.0:7891 -o /tmp/core.sql -k + + for _, namespace := range dc.Namespaces { + pods, err := dc.K8sCoreClientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + dc.Logger.Printf("\tCould not retrieve pod list for namespace %s: %v\n", namespace, err) + } else { + for _, config := range dbConfigs { + command := []string{dqliteBackupPath, "-n", config.dbName, "-c", nmsConfigPath, "-a", config.dbAddr, "-o", config.outputFile, "-k"} + for _, pod := range pods.Items { + if strings.Contains(pod.Name, config.containerName) { + res, err := dc.PodExecutor(namespace, pod.Name, config.containerName, command, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tCommand execution %s failed for pod %s in namespace %s: %v\n", command, pod.Name, namespace, err) + } else { + jobResult.Files[filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+config.dbName+".txt")] = res + + // Copy the dumped file from the pod to the host + destPathFilename := filepath.Join(dc.BaseDir, "exec", namespace, pod.Name+"__dqlite-dump-"+filepath.Base(config.outputFile)) + copyCmd := []string{"cat", config.outputFile} + fileContent, err := dc.PodExecutor(namespace, pod.Name, config.containerName, copyCmd, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tFailed to copy dumped file %s from pod %s in namespace %s to %s: %v\n", config.outputFile, pod.Name, namespace, destPathFilename, err) + } else { + err = os.WriteFile(destPathFilename, fileContent, 0644) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tFailed to write file to %s: %v\n", destPathFilename, err) + } else { + dc.Logger.Printf("\tSuccessfully copied dumped file %s from pod %s in namespace %s to %s\n", config.outputFile, pod.Name, namespace, destPathFilename) + } + } + + // Remove/delete the dumped file from the pod + _, err = dc.PodExecutor(namespace, pod.Name, config.containerName, []string{"rm", "-f", config.outputFile}, ctx) + if err != nil { + jobResult.Error = err + dc.Logger.Printf("\tFailed to remove dumped file %s from pod %s in namespace %s: %v\n", config.outputFile, pod.Name, namespace, err) + } else { + dc.Logger.Printf("\tSuccessfully removed dumped file %s from pod %s in namespace %s\n", config.outputFile, pod.Name, namespace) + } + } + } + } + } + } + } + ch <- jobResult + }, + }, + } + return jobList +}