@@ -33,6 +33,15 @@ import (
33
33
"sigs.k8s.io/controller-runtime/pkg/client"
34
34
)
35
35
36
+ // tflops add all samples, like cpu in vpa
37
+ // Consider gpu allocator, check if enough tflops or vram to allocate
38
+ // cron scheduler stragegy
39
+ // Add AutoSetResources to schedulingconfigtemplate and make it more configurable
40
+ // refactor main, setup database may not put in leader election runnable group
41
+ // scale to zero when query data if no usage, need carl to support
42
+ // add recommendation to workload
43
+ // resolve conversation on github, thanks for reviews
44
+
36
45
var _ = Describe ("Autoscaler" , func () {
37
46
Context ("when creating an autoscaler" , func () {
38
47
It ("should return an error if there is no client" , func () {
@@ -49,8 +58,8 @@ var _ = Describe("Autoscaler", func() {
49
58
scaler .LoadHistoryMetrics (ctx )
50
59
metrics := scaler .MetricsProvider .GetHistoryMetrics ()
51
60
for _ , m := range metrics {
52
- Expect (scaler .WorkloadStates ).To (HaveKey (m .Workload ))
53
- Expect (scaler .WorkerStates ).To (HaveKey (m .Worker ))
61
+ Expect (scaler .WorkloadStates ).To (HaveKey (m .WorkloadName ))
62
+ Expect (scaler .WorkerStates ).To (HaveKey (m .WorkerName ))
54
63
}
55
64
})
56
65
})
@@ -101,7 +110,7 @@ var _ = Describe("Autoscaler", func() {
101
110
})
102
111
103
112
Context ("when loading real time metrics" , func () {
104
- It ("should update the state of workloads and workers" , func () {
113
+ FIt ("should update the state of workloads and workers" , func () {
105
114
tfEnv := NewTensorFusionEnvBuilder ().
106
115
AddPoolWithNodeCount (1 ).SetGpuCountPerNode (1 ).
107
116
Build ()
@@ -117,17 +126,16 @@ var _ = Describe("Autoscaler", func() {
117
126
scaler .LoadWorkloads (ctx )
118
127
ws := scaler .WorkloadStates [workload .Name ]
119
128
metrics := & WorkerMetrics {
120
- Workload : workload .Name ,
121
- Worker : worker ,
122
- TflopsUsage : ResourceAmount (12.0 ),
123
- VramUsage : 9000 ,
124
- Timestamp : time .Now (),
129
+ WorkloadName : workload .Name ,
130
+ WorkerName : worker ,
131
+ TflopsUsage : ResourceAmount (12.0 ),
132
+ VramUsage : 9000 ,
133
+ Timestamp : time .Now (),
125
134
}
126
135
127
136
scaler .MetricsProvider = & FakeMetricsProvider {[]* WorkerMetrics {metrics }}
128
137
scaler .LoadRealTimeMetrics (ctx )
129
138
130
- Expect (scaler .WorkerStates [worker ].TflopsPeak ).To (Equal (metrics .TflopsUsage ))
131
139
Expect (scaler .WorkerStates [worker ].LastTflopsSampleTime ).To (Equal (metrics .Timestamp ))
132
140
Expect (ws .TflopsHistogram .IsEmpty ()).To (BeFalse ())
133
141
Expect (scaler .WorkerStates [worker ].VramPeak ).To (Equal (metrics .VramUsage ))
@@ -302,11 +310,11 @@ func (f *FakeMetricsProvider) GetHistoryMetrics() []*WorkerMetrics {
302
310
for hour := 0 ; hour < 24 ; hour ++ {
303
311
idx := day * 24 + hour
304
312
metrics = append (metrics , & WorkerMetrics {
305
- Workload : "workload-0" ,
306
- Worker : fmt .Sprintf ("worker-%d" , idx ),
307
- TflopsUsage : ResourceAmount (10.0 + float64 (idx % 10 )),
308
- VramUsage : 1 * 1024 * 1024 * 1024 ,
309
- Timestamp : startTime .Add (time .Duration (day * 24 + hour ) * time .Hour ),
313
+ WorkloadName : "workload-0" ,
314
+ WorkerName : fmt .Sprintf ("worker-%d" , idx ),
315
+ TflopsUsage : ResourceAmount (10.0 + float64 (idx % 10 )),
316
+ VramUsage : 1 * 1024 * 1024 * 1024 ,
317
+ Timestamp : startTime .Add (time .Duration (day * 24 + hour ) * time .Hour ),
310
318
})
311
319
}
312
320
}
0 commit comments