Skip to content

Commit b459c54

Browse files
authored
Add otel logging core to clean-nfs-cache job (#1284)
1 parent b72091f commit b459c54

File tree

4 files changed

+56
-39
lines changed

4 files changed

+56
-39
lines changed

iac/provider-gcp/nomad/clean-nfs-cache.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@ resource "nomad_job" "clean_nfs_cache" {
2424
dry_run = var.filestore_cache_cleanup_dry_run
2525
deletions_per_loop = var.filestore_cache_cleanup_deletions_per_loop
2626
files_per_loop = var.filestore_cache_cleanup_files_per_loop
27+
otel_collector_endpoint = data.google_secret_manager_secret_version.grafana_logs_url.secret_data
2728
})
2829
}

iac/provider-gcp/nomad/jobs/clean-nfs-cache.hcl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ job "filestore-cleanup" {
2626
"--disk-usage-target-percent=${max_disk_usage_target}",
2727
"--files-per-loop=${files_per_loop}",
2828
"--deletions-per-loop=${deletions_per_loop}",
29+
%{ if otel_collector_endpoint != "" }
30+
"--otel-collector-endpoint=${otel_collector_endpoint}",
31+
%{ endif }
2932
"${nfs_cache_mount_path}",
3033
]
3134
}

packages/orchestrator/cmd/clean-nfs-cache/main.go

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,22 @@ import (
1111
"sort"
1212
"time"
1313

14+
"github.com/google/uuid"
15+
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
1416
"go.uber.org/zap"
17+
"go.uber.org/zap/zapcore"
1518

1619
"github.com/e2b-dev/infra/packages/orchestrator/cmd/clean-nfs-cache/pkg"
1720
"github.com/e2b-dev/infra/packages/shared/pkg/env"
1821
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
22+
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
1923
)
2024

21-
const serviceName = "clean-nfs-cache"
25+
const (
26+
serviceName = "clean-nfs-cache"
27+
commitSHA = ""
28+
serviceVersion = "0.1.0"
29+
)
2230

2331
func main() {
2432
ctx := context.Background()
@@ -29,11 +37,25 @@ func main() {
2937
}
3038

3139
func cleanNFSCache(ctx context.Context) error {
40+
path, opts, err := parseArgs()
41+
if err != nil {
42+
return fmt.Errorf("invalid arguments: %w", err)
43+
}
44+
45+
var cores []zapcore.Core
46+
if opts.otelCollectorEndpoint != "" {
47+
otelCore, err := newOtelCore(ctx, opts)
48+
if err != nil {
49+
return fmt.Errorf("failed to create otel logger: %w", err)
50+
}
51+
cores = append(cores, otelCore)
52+
}
53+
3254
globalLogger := zap.Must(logger.NewLogger(ctx, logger.LoggerConfig{
3355
ServiceName: serviceName,
3456
IsInternal: true,
3557
IsDebug: env.IsDebug(),
36-
Cores: nil,
58+
Cores: cores,
3759
EnableConsole: true,
3860
}))
3961
defer func(l *zap.Logger) {
@@ -44,11 +66,6 @@ func cleanNFSCache(ctx context.Context) error {
4466
}(globalLogger)
4567
zap.ReplaceGlobals(globalLogger)
4668

47-
path, opts, err := parseArgs()
48-
if err != nil {
49-
return fmt.Errorf("invalid arguments: %w", err)
50-
}
51-
5269
// get free space information for path
5370
zap.L().Info("starting",
5471
zap.Bool("dry_run", opts.dryRun),
@@ -64,10 +81,6 @@ func cleanNFSCache(ctx context.Context) error {
6481
}
6582
targetDiskUsage := int64(float64(opts.targetDiskUsagePercent) / 100 * float64(diskInfo.Total))
6683
areWeDone := func() bool {
67-
currentUsedPercentage := (float64(diskInfo.Used) / float64(diskInfo.Total)) * 100
68-
zap.L().Info("current usage",
69-
zap.Float64("percent", currentUsedPercentage),
70-
zap.String("size", formatBytes(diskInfo.Used)))
7184
return diskInfo.Used < targetDiskUsage
7285
}
7386

@@ -102,7 +115,7 @@ func cleanNFSCache(ctx context.Context) error {
102115
zap.Int64("count", results.deletedFiles),
103116
zap.Int64("bytes", results.deletedBytes))
104117
})
105-
allResults = allResults.union(results)
118+
allResults = allResults.sum(results)
106119
if err != nil {
107120
return fmt.Errorf("failed to delete files: %w", err)
108121
}
@@ -111,6 +124,27 @@ func cleanNFSCache(ctx context.Context) error {
111124
return nil
112125
}
113126

127+
func newOtelCore(ctx context.Context, opts opts) (zapcore.Core, error) {
128+
nodeID := env.GetNodeID()
129+
serviceInstanceID := uuid.NewString()
130+
131+
resource, err := telemetry.GetResource(ctx, nodeID, serviceName, commitSHA, serviceVersion, serviceInstanceID)
132+
if err != nil {
133+
return nil, fmt.Errorf("failed to create resource: %w", err)
134+
}
135+
136+
logsExporter, err := telemetry.NewLogExporter(ctx,
137+
otlploggrpc.WithEndpoint(opts.otelCollectorEndpoint),
138+
)
139+
if err != nil {
140+
return nil, fmt.Errorf("failed to create logs exporter: %w", err)
141+
}
142+
143+
loggerProvider := telemetry.NewLogProvider(ctx, logsExporter, resource)
144+
otelCore := logger.GetOTELCore(loggerProvider, serviceName)
145+
return otelCore, nil
146+
}
147+
114148
func printSummary(r results, opts opts) {
115149
if r.deletedFiles == 0 {
116150
zap.L().Info("no files deleted")
@@ -185,7 +219,7 @@ type results struct {
185219
createdDurations []time.Duration
186220
}
187221

188-
func (r results) union(other results) results {
222+
func (r results) sum(other results) results {
189223
return results{
190224
deletedFiles: r.deletedFiles + other.deletedFiles,
191225
deletedBytes: r.deletedBytes + other.deletedBytes,
@@ -197,17 +231,8 @@ func (r results) union(other results) results {
197231
func deleteOldestFiles(cache *pkg.ListingCache, files []pkg.File, opts opts, diskInfo *pkg.DiskInfo, areWeDone func() bool, deleteCount int64) (results, error) {
198232
now := time.Now()
199233
var results results
200-
for index, file := range files {
201-
if opts.dryRun {
202-
zap.L().Debug("would delete",
203-
zap.String("path", file.Path),
204-
zap.Int64("bytes", file.Size),
205-
zap.Duration("last_access", time.Since(file.ATime).Round(time.Minute)))
206-
} else {
207-
zap.L().Debug("deleting",
208-
zap.Int("index", index+1),
209-
zap.String("path", file.Path),
210-
zap.Int64("bytes", file.Size))
234+
for _, file := range files {
235+
if !opts.dryRun {
211236
if err := os.Remove(file.Path); err != nil {
212237
zap.L().Error("failed to delete",
213238
zap.String("path", file.Path),
@@ -291,6 +316,7 @@ type opts struct {
291316
dryRun bool
292317
filesPerLoop int
293318
filesToDeletePerLoop int64
319+
otelCollectorEndpoint string
294320
}
295321

296322
var (
@@ -306,6 +332,7 @@ func parseArgs() (string, opts, error) {
306332
flags.BoolVar(&opts.dryRun, "dry-run", true, "dry run")
307333
flags.IntVar(&opts.filesPerLoop, "files-per-loop", 10000, "number of files to gather metadata for per loop")
308334
flags.Int64Var(&opts.filesToDeletePerLoop, "deletions-per-loop", 100, "maximum number of files to delete per loop")
335+
flags.StringVar(&opts.otelCollectorEndpoint, "otel-collector-endpoint", "", "endpoint of the otel collector")
309336

310337
args := os.Args[1:] // skip the command name
311338
if err := flags.Parse(args); err != nil {
@@ -327,17 +354,3 @@ func timeit(message string, fn func()) {
327354

328355
zap.L().Debug(message, zap.Duration("duration", done))
329356
}
330-
331-
func formatBytes(b int64) string {
332-
const unit = 1024
333-
if b < unit {
334-
return fmt.Sprintf("%d B", b)
335-
}
336-
div, exp := int64(unit), 0
337-
for n := b / unit; n >= unit; n /= unit {
338-
div *= unit
339-
exp++
340-
}
341-
return fmt.Sprintf("%.1f %ciB",
342-
float64(b)/float64(div), "KMGTPE"[exp])
343-
}

packages/orchestrator/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ require (
4848
github.com/vishvananda/netns v0.0.5
4949
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0
5050
go.opentelemetry.io/otel v1.38.0
51+
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0
5152
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0
5253
go.opentelemetry.io/otel/metric v1.38.0
5354
go.opentelemetry.io/otel/sdk/metric v1.38.0
@@ -232,7 +233,6 @@ require (
232233
go.opentelemetry.io/contrib/bridges/otelzap v0.13.0 // indirect
233234
go.opentelemetry.io/contrib/detectors/gcp v1.38.0 // indirect
234235
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
235-
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0 // indirect
236236
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
237237
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
238238
go.opentelemetry.io/otel/log v0.14.0 // indirect

0 commit comments

Comments
 (0)