NexusGPU
diff --git a/‎.vscode/settings.json‎
Lines changed: 2 additions & 0 deletions b/‎.vscode/settings.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎api/v1/gpupool_types.go‎
Lines changed: 6 additions & 0 deletions b/‎api/v1/gpupool_types.go‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎api/v1/gpuresourcequota_types.go‎
Lines changed: 2 additions & 0 deletions b/‎api/v1/gpuresourcequota_types.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎charts/tensor-fusion/Chart.yaml‎
Lines changed: 1 addition & 1 deletion b/‎charts/tensor-fusion/Chart.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml‎
Lines changed: 6 additions & 0 deletions b/‎charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml‎
Lines changed: 6 additions & 0 deletions b/‎charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎charts/tensor-fusion/templates/controller-deployment.yaml‎
Lines changed: 1 addition & 0 deletions b/‎charts/tensor-fusion/templates/controller-deployment.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎charts/tensor-fusion/templates/gpu-public-gpu-info.yaml‎
Lines changed: 15 additions & 3 deletions b/‎charts/tensor-fusion/templates/gpu-public-gpu-info.yaml‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎charts/tensor-fusion/templates/priorityclass.yaml‎
Lines changed: 23 additions & 0 deletions b/‎charts/tensor-fusion/templates/priorityclass.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎charts/tensor-fusion/values.yaml‎
Lines changed: 2 additions & 2 deletions b/‎charts/tensor-fusion/values.yaml‎
Lines changed: 2 additions & 2 deletions
@@ -47,6 +47,7 @@
         "envtest",
         "essd",
         "Eventf",
+        "evictable",
         "featuregate",
         "finalizer",
         "Finalizers",
@@ -133,6 +134,7 @@
         "schedulingconfigtemplate",
         "schedulingconfigtemplates",
         "schedulingcorev",
+        "schedv",
         "serviceaccount",
         "shirou",
         "shortuuid",
 
@@ -238,6 +238,12 @@ type QosConfig struct {
 	Definitions []QosDefinition `json:"definitions,omitempty"`
 	DefaultQoS  QoSLevel        `json:"defaultQoS,omitempty"`
 	Pricing     []QosPricing    `json:"pricing,omitempty"`
+
+	// Eviction protection price ratio applied to cost calculation during protection period
+	// This multiplier increases pricing for protected workloads to discourage preemption
+	// +optional
+	// +kubebuilder:default="1.2"
+	EvictionProtectionPriceRatio string `json:"evictionProtectionPriceRatio,omitempty"`
 }
 
 type QosDefinition struct {
 
@@ -186,6 +186,8 @@ type AllocRequest struct {
 
 	// record the pod meta for quota check
 	PodMeta metav1.ObjectMeta
+
+	QoS QoSLevel
 }
 
 func (p *AllocRequest) Clone() fwk.StateData {
 
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.5.8
+version: 1.5.9
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 
@@ -562,6 +562,12 @@ spec:
                           type: integer
                       type: object
                     type: array
+                  evictionProtectionPriceRatio:
+                    default: "1.2"
+                    description: |-
+                      Eviction protection price ratio applied to cost calculation during protection period
+                      This multiplier increases pricing for protected workloads to discourage preemption
+                    type: string
                   pricing:
                     items:
                       properties:
 
@@ -629,6 +629,12 @@ spec:
                                     type: integer
                                 type: object
                               type: array
+                            evictionProtectionPriceRatio:
+                              default: "1.2"
+                              description: |-
+                                Eviction protection price ratio applied to cost calculation during protection period
+                                This multiplier increases pricing for protected workloads to discourage preemption
+                              type: string
                             pricing:
                               items:
                                 properties:
 
@@ -32,6 +32,7 @@ spec:
       {{- end }}
       serviceAccountName: {{ include "tensor-fusion.serviceAccountName" . }}
       enableServiceLinks: false
+      priorityClassName: "system-cluster-critical"
       containers:
         - name: controller
           image: "{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag | default .Chart.AppVersion }}"
 
@@ -45,6 +45,18 @@ data:
       costPerHour: 1.64
       fp16TFlops: 312
 
+    - model: A100_PCIe_40GB
+      fullModelName: "NVIDIA A100-PCIE-40GB"
+      vendor: NVIDIA
+      costPerHour: 1.64
+      fp16TFlops: 312
+
+    - model: A100_PCIe_80GB
+      fullModelName: "NVIDIA A100-PCIE-80GB"
+      vendor: NVIDIA
+      costPerHour: 1.64
+      fp16TFlops: 312
+
     - model: A100_SXM_40G
       fullModelName: "NVIDIA A100-SXM4-40GB"
       vendor: NVIDIA
@@ -70,13 +82,13 @@ data:
       fp16TFlops: 312
     
     - model: A800_PCIe_80G
-      fullModelName: "NVIDIA A800 80GB PCIe"
+      fullModelName: "NVIDIA A800-PCIE-80GB"
       vendor: NVIDIA
       costPerHour: 1.64
       fp16TFlops: 312
 
     - model: A800_PCIe_40G
-      fullModelName: "NVIDIA A800 40GB PCIe"
+      fullModelName: "NVIDIA A800-PCIE-40GB"
       vendor: NVIDIA
       costPerHour: 1.64
       fp16TFlops: 312  
@@ -95,7 +107,7 @@ data:
       fp16TFlops: 125
 
     - model: A40
-      fullModelName: "NVIDIA A40 48GB PCIe"
+      fullModelName: "NVIDIA A40-PCIE-48GB"
       vendor: NVIDIA
       costPerHour: 0.4
       fp16TFlops: 149.7
 
@@ -0,0 +1,23 @@
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-critical
+value: 100000
+globalDefault: false
+description: "TensorFusion critical priority"
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-high
+value: 10000
+globalDefault: false
+description: "TensorFusion high priority"
+---
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: tensor-fusion-medium
+value: 0
+globalDefault: false
+description: "TensorFusion medium priority"
@@ -169,8 +169,8 @@ schedulerConfig:
   kind: KubeSchedulerConfiguration
   clientConnection:
     kubeconfig: ""
-    qps: 50
-    burst: 100
+    qps: 1000
+    burst: 2000
   profiles:
   # Refer: https://kubernetes.io/docs/reference/scheduling/config/
   - schedulerName: tensor-fusion-scheduler
Original file line number	Diff line number	Diff line change
`@@ -186,6 +186,8 @@ type AllocRequest struct {`
`186`	`186`
`187`	`187`	`// record the pod meta for quota check`
`188`	`188`	`PodMeta metav1.ObjectMeta`
	`189`	`+`
	`190`	`+ QoS QoSLevel`
`189`	`191`	`}`
`190`	`192`
`191`	`193`	`func (p *AllocRequest) Clone() fwk.StateData {`