@@ -7,17 +7,45 @@ import (
77 "sync"
88
99 "github.com/caarlos0/env/v11"
10+ "go.opentelemetry.io/otel"
11+ "go.opentelemetry.io/otel/attribute"
1012 "go.opentelemetry.io/otel/metric"
1113 "go.uber.org/zap"
1214
1315 "github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
16+ "github.com/e2b-dev/infra/packages/shared/pkg/utils"
1417)
1518
1619const (
1720 NewSlotsPoolSize = 32
1821 ReusedSlotsPoolSize = 100
1922)
2023
24+ var (
25+ meter = otel .Meter ("github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/network" )
26+
27+ newSlotsAvailableCounter = utils .Must (meter .Int64UpDownCounter ("orchestrator.network.slots_pool.new" ,
28+ metric .WithDescription ("Number of new network slots ready to be used." ),
29+ metric .WithUnit ("{slot" ),
30+ ))
31+ reusableSlotsAvailableCounter = utils .Must (meter .Int64UpDownCounter ("orchestrator.network.slots_pool.reused" ,
32+ metric .WithDescription ("Number of reused network slots ready to be used." ),
33+ metric .WithUnit ("{slot}" ),
34+ ))
35+ acquiredSlots = utils .Must (meter .Int64Counter ("orchestrator.network.slots_pool.acquired" ,
36+ metric .WithDescription ("Number of network slots acquired." ),
37+ metric .WithUnit ("{slot}" ),
38+ ))
39+ returnedSlotCounter = utils .Must (meter .Int64Counter ("orchestrator.network.slots_pool.returned" ,
40+ metric .WithDescription ("Number of network slots returned." ),
41+ metric .WithUnit ("{slot}" ),
42+ ))
43+ releasedSlotCounter = utils .Must (meter .Int64Counter ("orchestrator.network.slots_pool.released" ,
44+ metric .WithDescription ("Number of network slots released." ),
45+ metric .WithUnit ("{slot}" ),
46+ ))
47+ )
48+
2149type Config struct {
2250 // Using reserver IPv4 in range that is used for experiments and documentation
2351 // https://en.wikipedia.org/wiki/Reserved_IP_addresses
@@ -36,45 +64,29 @@ type Pool struct {
3664 done chan struct {}
3765 doneOnce sync.Once
3866
39- newSlots chan * Slot
40- reusedSlots chan * Slot
41- newSlotCounter metric.Int64UpDownCounter
42- reusedSlotCounter metric.Int64UpDownCounter
67+ newSlots chan * Slot
68+ reusedSlots chan * Slot
4369
4470 slotStorage Storage
4571}
4672
4773var ErrClosed = errors .New ("cannot read from a closed pool" )
4874
49- func NewPool (meterProvider metric. MeterProvider , newSlotsPoolSize , reusedSlotsPoolSize int , nodeID string , config Config ) (* Pool , error ) {
75+ func NewPool (newSlotsPoolSize , reusedSlotsPoolSize int , nodeID string , config Config ) (* Pool , error ) {
5076 newSlots := make (chan * Slot , newSlotsPoolSize - 1 )
5177 reusedSlots := make (chan * Slot , reusedSlotsPoolSize )
5278
53- meter := meterProvider .Meter ("orchestrator.network.pool" )
54-
55- newSlotCounter , err := telemetry .GetUpDownCounter (meter , telemetry .NewNetworkSlotSPoolCounterMeterName )
56- if err != nil {
57- return nil , fmt .Errorf ("failed to create new slot counter: %w" , err )
58- }
59-
60- reusedSlotsCounter , err := telemetry .GetUpDownCounter (meter , telemetry .ReusedNetworkSlotSPoolCounterMeterName )
61- if err != nil {
62- return nil , fmt .Errorf ("failed to create reused slot counter: %w" , err )
63- }
64-
6579 slotStorage , err := NewStorage (vrtSlotsSize , nodeID , config )
6680 if err != nil {
6781 return nil , fmt .Errorf ("failed to create slot storage: %w" , err )
6882 }
6983
7084 pool := & Pool {
71- config : config ,
72- done : make (chan struct {}),
73- newSlots : newSlots ,
74- reusedSlots : reusedSlots ,
75- newSlotCounter : newSlotCounter ,
76- reusedSlotCounter : reusedSlotsCounter ,
77- slotStorage : slotStorage ,
85+ config : config ,
86+ done : make (chan struct {}),
87+ newSlots : newSlots ,
88+ reusedSlots : reusedSlots ,
89+ slotStorage : slotStorage ,
7890 }
7991
8092 return pool , nil
@@ -114,7 +126,7 @@ func (p *Pool) Populate(ctx context.Context) {
114126 continue
115127 }
116128
117- p . newSlotCounter .Add (ctx , 1 )
129+ newSlotsAvailableCounter .Add (ctx , 1 )
118130 p .newSlots <- slot
119131 }
120132 }
@@ -127,7 +139,8 @@ func (p *Pool) Get(ctx context.Context, allowInternet bool) (*Slot, error) {
127139 case <- p .done :
128140 return nil , ErrClosed
129141 case s := <- p .reusedSlots :
130- p .reusedSlotCounter .Add (ctx , - 1 )
142+ reusableSlotsAvailableCounter .Add (ctx , - 1 )
143+ acquiredSlots .Add (ctx , 1 , metric .WithAttributes (attribute .String ("pool" , "reused" )))
131144 telemetry .ReportEvent (ctx , "reused network slot" )
132145
133146 slot = s
@@ -138,7 +151,8 @@ func (p *Pool) Get(ctx context.Context, allowInternet bool) (*Slot, error) {
138151 case <- ctx .Done ():
139152 return nil , ctx .Err ()
140153 case s := <- p .newSlots :
141- p .newSlotCounter .Add (ctx , - 1 )
154+ newSlotsAvailableCounter .Add (ctx , - 1 )
155+ acquiredSlots .Add (ctx , 1 , metric .WithAttributes (attribute .String ("pool" , "new" )))
142156 telemetry .ReportEvent (ctx , "new network slot" )
143157
144158 slot = s
@@ -165,7 +179,7 @@ func (p *Pool) Return(ctx context.Context, slot *Slot) error {
165179 err := slot .ResetInternet (ctx )
166180 if err != nil {
167181 // Cleanup the slot if resetting internet fails
168- if cerr := p .cleanup (slot ); cerr != nil {
182+ if cerr := p .cleanup (ctx , slot ); cerr != nil {
169183 return fmt .Errorf ("reset internet: %w; cleanup: %w" , err , cerr )
170184 }
171185
@@ -178,9 +192,10 @@ func (p *Pool) Return(ctx context.Context, slot *Slot) error {
178192 case <- p .done :
179193 return ErrClosed
180194 case p .reusedSlots <- slot :
181- p .reusedSlotCounter .Add (ctx , 1 )
195+ returnedSlotCounter .Add (ctx , 1 )
196+ reusableSlotsAvailableCounter .Add (ctx , 1 )
182197 default :
183- err := p .cleanup (slot )
198+ err := p .cleanup (ctx , slot )
184199 if err != nil {
185200 return fmt .Errorf ("failed to return slot '%d': %w" , slot .Idx , err )
186201 }
@@ -189,7 +204,7 @@ func (p *Pool) Return(ctx context.Context, slot *Slot) error {
189204 return nil
190205}
191206
192- func (p * Pool ) cleanup (slot * Slot ) error {
207+ func (p * Pool ) cleanup (ctx context. Context , slot * Slot ) error {
193208 var errs []error
194209
195210 err := slot .RemoveNetwork ()
@@ -202,10 +217,12 @@ func (p *Pool) cleanup(slot *Slot) error {
202217 errs = append (errs , fmt .Errorf ("failed to release slot '%d': %w" , slot .Idx , err ))
203218 }
204219
220+ releasedSlotCounter .Add (ctx , 1 )
221+
205222 return errors .Join (errs ... )
206223}
207224
208- func (p * Pool ) Close (_ context.Context ) error {
225+ func (p * Pool ) Close (ctx context.Context ) error {
209226 zap .L ().Info ("Closing network pool" )
210227
211228 p .doneOnce .Do (func () {
@@ -215,7 +232,7 @@ func (p *Pool) Close(_ context.Context) error {
215232 var errs []error
216233
217234 for slot := range p .newSlots {
218- err := p .cleanup (slot )
235+ err := p .cleanup (ctx , slot )
219236 if err != nil {
220237 errs = append (errs , fmt .Errorf ("failed to cleanup slot '%d': %w" , slot .Idx , err ))
221238 }
@@ -224,7 +241,7 @@ func (p *Pool) Close(_ context.Context) error {
224241 close (p .reusedSlots )
225242
226243 for slot := range p .reusedSlots {
227- err := p .cleanup (slot )
244+ err := p .cleanup (ctx , slot )
228245 if err != nil {
229246 errs = append (errs , fmt .Errorf ("failed to cleanup slot '%d': %w" , slot .Idx , err ))
230247 }
0 commit comments