From 4644ab6a3ae553772ae436f62bfbbd4ee7cc90a5 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 1 Oct 2025 07:42:18 -0400
Subject: [PATCH 01/44] values files

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 .../inference-scheduling/gaie-values.yaml     | 150 +++++++++++
 .../inference-scheduling/gateway-values.yaml  |   8 +
 .../inference-scheduling/ms-values.yaml       | 237 ++++++++++++++++++
 3 files changed, 395 insertions(+)
 create mode 100644 tekton-poc/examples/inference-scheduling/gaie-values.yaml
 create mode 100644 tekton-poc/examples/inference-scheduling/gateway-values.yaml
 create mode 100644 tekton-poc/examples/inference-scheduling/ms-values.yaml

diff --git a/tekton-poc/examples/inference-scheduling/gaie-values.yaml b/tekton-poc/examples/inference-scheduling/gaie-values.yaml
new file mode 100644
index 00000000..2d84f723
--- /dev/null
+++ b/tekton-poc/examples/inference-scheduling/gaie-values.yaml
@@ -0,0 +1,150 @@
+inferenceExtension:
+  replicas: 1
+  image:
+    # Either image will work, you just need to bring the correct plugins per image. In this example we will bring the upstream default plugin
+    ###################
+    name: llm-d-inference-scheduler
+    hub: ghcr.io/llm-d
+    tag: v0.2.1
+    pullPolicy: Always
+  extProcPort: 9002
+  extraContainerPorts:
+    - name: zmq
+      containerPort: 5557
+      protocol: TCP
+  extraServicePorts:
+    - name: zmq
+      port: 5557
+      targetPort: 5557
+      protocol: TCP
+  env:
+    - name: HF_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-secret
+          key: HF_TOKEN
+  pluginsConfigFile: "inf-sche-none.yaml"
+  pluginsCustomConfig:
+    inf-sche-none.yaml: |
+      # Sample EPP configuration for running without P/D with no scorers
+      # https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/setup/presets/gaie/inf-sche-none.yaml
+      apiVersion: inference.networking.x-k8s.io/v1alpha1
+      kind: EndpointPickerConfig
+      plugins:
+      - type: prefix-cache-scorer
+      - type: decode-filter
+      - type: max-score-picker
+      - type: single-profile-handler
+      schedulingProfiles:
+      - name: default
+        plugins:
+        - pluginRef: decode-filter
+        - pluginRef: max-score-picker
+        - pluginRef: prefix-cache-scorer
+          weight: 0
+    inf-sche-prefix-kv-queue.yaml: |
+      # Sample EPP configuration for running without P/D with prefix, kv, and queue scorers
+      # https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/setup/presets/gaie/inf-sche-prefix-kv-queue.yaml
+      apiVersion: inference.networking.x-k8s.io/v1alpha1
+      kind: EndpointPickerConfig
+      plugins:
+      - type: prefix-cache-scorer
+      - type: decode-filter
+      - type: max-score-picker
+      - type: single-profile-handler
+      - type: kv-cache-scorer
+      - type: queue-cache-scorer
+      schedulingProfiles:
+      - name: default
+        plugins:
+        - pluginRef: decode-filter
+        - pluginRef: max-score-picker
+        - pluginRef: prefix-cache-scorer
+          weight: 1
+        - pluginRef: kv-cache-scorer
+          weight: 1
+        - pluginRef: queue-scorer
+          weight: 1
+    inf-sche-prefix-kv.yaml: |
+      # Sample EPP configuration for running without P/D with prefix and kv scorers
+      # https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/setup/presets/gaie/inf-sche-prefix-kv.yaml
+      apiVersion: inference.networking.x-k8s.io/v1alpha1
+      kind: EndpointPickerConfig
+      plugins:
+      - type: prefix-cache-scorer
+      - type: decode-filter
+      - type: max-score-picker
+      - type: single-profile-handler
+      - type: kv-cache-scorer
+      schedulingProfiles:
+      - name: default
+        plugins:
+        - pluginRef: decode-filter
+        - pluginRef: max-score-picker
+        - pluginRef: prefix-cache-scorer
+          weight: 1
+        - pluginRef: kv-cache-scorer
+          weight: 1
+        - pluginRef: queue-scorer
+          weight: 1
+    inf-sche-prefix.yaml: |
+      # Sample EPP configuration for running without P/D with prefix scorer with weight of 1
+      # https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/setup/presets/gaie/inf-sche-prefix.yaml
+      apiVersion: inference.networking.x-k8s.io/v1alpha1
+      kind: EndpointPickerConfig
+      plugins:
+      - type: prefix-cache-scorer
+      - type: decode-filter
+      - type: max-score-picker
+      - type: single-profile-handler
+      schedulingProfiles:
+      - name: default
+        plugins:
+        - pluginRef: decode-filter
+        - pluginRef: max-score-picker
+        - pluginRef: prefix-cache-scorer
+          weight: 1
+    inf-sche-queue.yaml: |
+      # Sample EPP configuration for running without P/D with no scorers
+      # https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/setup/presets/gaie/inf-sche-queue.yaml
+      apiVersion: inference.networking.x-k8s.io/v1alpha1
+      kind: EndpointPickerConfig
+      plugins:
+      - type: queue-scorer
+      - type: decode-filter
+      - type: max-score-picker
+      - type: single-profile-handler
+      schedulingProfiles:
+      - name: default
+        plugins:
+        - pluginRef: decode-filter
+        - pluginRef: max-score-picker
+        - pluginRef: queue-scorer
+          weight: 1
+    inf-sche-kv.yaml: |
+      # Sample EPP configuration for running without P/D with no scorers
+      # https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/setup/presets/gaie/inf-sche-kv.yaml
+      apiVersion: inference.networking.x-k8s.io/v1alpha1
+      kind: EndpointPickerConfig
+      plugins:
+      - type: kv-cache-scorer
+      - type: decode-filter
+      - type: max-score-picker
+      - type: single-profile-handler
+      schedulingProfiles:
+      - name: default
+        plugins:
+        - pluginRef: decode-filter
+        - pluginRef: max-score-picker
+        - pluginRef: kv-cache-scorer
+          weight: 1
+inferencePool:
+  targetPortNumber: 8000
+  modelServerType: vllm
+  apiVersion: "inference.networking.x-k8s.io/v1alpha2"
+  modelServers:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: qwen-qwe-1ea37573-en3-0-6b
+provider:
+  name: none
diff --git a/tekton-poc/examples/inference-scheduling/gateway-values.yaml b/tekton-poc/examples/inference-scheduling/gateway-values.yaml
new file mode 100644
index 00000000..b22f8140
--- /dev/null
+++ b/tekton-poc/examples/inference-scheduling/gateway-values.yaml
@@ -0,0 +1,8 @@
+gateway:
+  gatewayClassName: kgateway
+  service:
+    type: NodePort
+  destinationRule:
+    host: gaie-inference-scheduling-epp.kalantar-is.svc.cluster.local
+  gatewayParameters:
+    enabled: true
diff --git a/tekton-poc/examples/inference-scheduling/ms-values.yaml b/tekton-poc/examples/inference-scheduling/ms-values.yaml
new file mode 100644
index 00000000..b8ad2d45
--- /dev/null
+++ b/tekton-poc/examples/inference-scheduling/ms-values.yaml
@@ -0,0 +1,237 @@
+fullnameOverride: qwen-qwe-1ea37573-en3-0-6b
+multinode: false
+
+modelArtifacts:
+  uri: pvc://model-pvc/models/Qwen/Qwen3-0.6B
+  size: 300Gi
+  authSecretName: "hf-secret"
+  name: Qwen/Qwen3-0.6B
+
+routing:
+  servicePort: 8000
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: infra-llmdbench-inference-gateway
+  proxy:
+    image: "ghcr.io/llm-d/llm-d-routing-sidecar:v0.3.0"
+    secure: false
+    connector: nixlv2
+    debugLevel: 3
+  inferenceModel:
+    create: true
+  inferencePool:
+    create: false
+    name: mk-qwen-qwe-1ea37573-en3-0-6b-gaie
+  httpRoute:
+    create: true
+    rules:
+    - backendRefs:
+      - group: inference.networking.x-k8s.io
+        kind: InferencePool
+        name: mk-qwen-qwe-1ea37573-en3-0-6b-gaie
+        port: 8000
+        weight: 1
+      timeouts:
+        backendRequest: 0s
+        request: 0s
+      matches:
+      - path:
+          type: PathPrefix
+          value: /qwen-qwen3-0-6b/
+      filters:
+      - type: URLRewrite
+        urlRewrite:
+          path:
+            type: ReplacePrefixMatch
+            replacePrefixMatch: /
+    - backendRefs:
+      - group: inference.networking.x-k8s.io
+        kind: InferencePool
+        name: mk-qwen-qwe-1ea37573-en3-0-6b-gaie
+        port: 8000
+        weight: 1
+      timeouts:
+        backendRequest: 0s
+        request: 0s
+
+  epp:
+    create: false
+
+decode:
+  create: true
+  replicas: 2
+  acceleratorTypes:
+      labelKey: nvidia.com/gpu.product
+      labelValues:
+        - NVIDIA-H100-80GB-HBM3
+  parallelism:
+    data: 1
+    tensor: 1
+  annotations:
+      deployed-by: jchen
+      modelservice: llm-d-benchmark
+  podAnnotations:
+      deployed-by: jchen
+      modelservice: llm-d-benchmark
+  #no____config
+  containers:
+  - name: "vllm"
+    mountModelVolume: true
+    image: "ghcr.io/llm-d/llm-d:v0.2.0"
+    modelCommand: vllmServe
+    
+    args:
+      - "--enforce-eager"
+      - "--block-size"
+      - "64"
+      - "--kv-transfer-config"
+      - '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
+      - "--tensor-parallel-size"
+      - "1"
+      - "--disable-log-requests"
+      - "--disable-uvicorn-access-log"
+      - "--max-model-len"
+      - "16000" 
+    env:
+      - name: UCX_TLS
+        value: "cuda_ipc,cuda_copy,tcp"
+      - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+        value: "5557"
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: VLLM_LOGGING_LEVEL
+        value: DEBUG
+      - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
+        value: "1"
+    resources:
+      limits:
+        memory: 64Gi
+        cpu: "16"
+        
+        nvidia.com/gpu: "1"
+        
+      requests:
+        memory: 64Gi
+        cpu: "16"
+        
+        nvidia.com/gpu: "1"
+        
+    extraConfig:
+      startupProbe:
+        httpGet:
+          path: /health
+          port: 8200
+        failureThreshold: 60
+        initialDelaySeconds: 30
+        periodSeconds: 30
+        timeoutSeconds: 5
+      livenessProbe:
+        tcpSocket:
+          port: 8200
+        failureThreshold: 3
+        periodSeconds: 5
+      readinessProbe:
+        httpGet:
+          path: /health
+          port: 8200
+        failureThreshold: 3
+        periodSeconds: 5
+    
+      ports:
+        - containerPort: 5557
+          protocol: TCP
+        - containerPort: 8200
+          name: metrics
+          protocol: TCP
+    volumeMounts: []
+  volumes: []
+
+prefill:
+  create: false
+  replicas: 0
+  acceleratorTypes:
+      labelKey: nvidia.com/gpu.product
+      labelValues:
+        - NVIDIA-H100-80GB-HBM3
+  parallelism:
+    data: 1
+    tensor: 1
+  annotations:
+      deployed-by: jchen
+      modelservice: llm-d-benchmark
+  podAnnotations:
+      deployed-by: jchen
+      modelservice: llm-d-benchmark
+  #no____config
+  containers:
+  - name: "vllm"
+    mountModelVolume: true
+    image: "ghcr.io/llm-d/llm-d:v0.2.0"
+    modelCommand: vllmServe
+    
+    args:
+      - "--disable-log-requests"
+      - "--max-model-len"
+      - "16000"
+      - "--tensor-parallel-size"
+      - "1" 
+    env:
+      - name: VLLM_IS_PREFILL
+        value: "1"
+      - name: UCX_TLS
+        value: "cuda_ipc,cuda_copy,tcp"
+      - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+        value: "5557"
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: VLLM_LOGGING_LEVEL
+        value: DEBUG
+      - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
+        value: "1"
+    resources:
+      limits:
+        memory: 40Gi
+        cpu: "4"
+        
+        nvidia.com/gpu: "0"
+        
+      requests:
+        memory: 40Gi
+        cpu: "4"
+        
+        nvidia.com/gpu: "0"
+        
+    extraConfig:
+      startupProbe:
+        httpGet:
+          path: /health
+          port: 8000
+        failureThreshold: 60
+        initialDelaySeconds: 30
+        periodSeconds: 30
+        timeoutSeconds: 5
+      livenessProbe:
+        tcpSocket:
+          port: 8000
+        failureThreshold: 3
+        periodSeconds: 5
+      readinessProbe:
+        httpGet:
+          path: /health
+          port: 8000
+        failureThreshold: 3
+        periodSeconds: 5
+    
+      ports:
+        - containerPort: 5557
+          protocol: TCP
+        - containerPort: 8200
+          name: metrics
+          protocol: TCP
+    volumeMounts: []
+  volumes: []
\ No newline at end of file

From 40bf8cdf20e471bab470abac63af4ef8bf39c049 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 1 Oct 2025 09:11:08 -0400
Subject: [PATCH 02/44] update gateway

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/inference-scheduling/ms-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/examples/inference-scheduling/ms-values.yaml b/tekton-poc/examples/inference-scheduling/ms-values.yaml
index b8ad2d45..80649823 100644
--- a/tekton-poc/examples/inference-scheduling/ms-values.yaml
+++ b/tekton-poc/examples/inference-scheduling/ms-values.yaml
@@ -12,7 +12,7 @@ routing:
   parentRefs:
     - group: gateway.networking.k8s.io
       kind: Gateway
-      name: infra-llmdbench-inference-gateway
+      name: experiment-gateway-inference-gateway
   proxy:
     image: "ghcr.io/llm-d/llm-d-routing-sidecar:v0.3.0"
     secure: false

From 9041ccc3029e825c3c738019912f494981c48e19 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 1 Oct 2025 12:29:10 -0400
Subject: [PATCH 03/44] change model label

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/inference-scheduling/gaie-values.yaml | 2 +-
 tekton-poc/examples/inference-scheduling/ms-values.yaml   | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tekton-poc/examples/inference-scheduling/gaie-values.yaml b/tekton-poc/examples/inference-scheduling/gaie-values.yaml
index 2d84f723..08d28bc6 100644
--- a/tekton-poc/examples/inference-scheduling/gaie-values.yaml
+++ b/tekton-poc/examples/inference-scheduling/gaie-values.yaml
@@ -145,6 +145,6 @@ inferencePool:
   modelServers:
     matchLabels:
       llm-d.ai/inferenceServing: "true"
-      llm-d.ai/model: qwen-qwe-1ea37573-en3-0-6b
+      llm-d.ai/model: qwen-qwen3-0-6b
 provider:
   name: none
diff --git a/tekton-poc/examples/inference-scheduling/ms-values.yaml b/tekton-poc/examples/inference-scheduling/ms-values.yaml
index 80649823..a02e894b 100644
--- a/tekton-poc/examples/inference-scheduling/ms-values.yaml
+++ b/tekton-poc/examples/inference-scheduling/ms-values.yaml
@@ -1,4 +1,4 @@
-fullnameOverride: qwen-qwe-1ea37573-en3-0-6b
+fullnameOverride: qwen-qwen3-0-6b
 multinode: false
 
 modelArtifacts:
@@ -22,14 +22,14 @@ routing:
     create: true
   inferencePool:
     create: false
-    name: mk-qwen-qwe-1ea37573-en3-0-6b-gaie
+    name: experiment-gaie
   httpRoute:
     create: true
     rules:
     - backendRefs:
       - group: inference.networking.x-k8s.io
         kind: InferencePool
-        name: mk-qwen-qwe-1ea37573-en3-0-6b-gaie
+        name: experiment-gaie
         port: 8000
         weight: 1
       timeouts:
@@ -48,7 +48,7 @@ routing:
     - backendRefs:
       - group: inference.networking.x-k8s.io
         kind: InferencePool
-        name: mk-qwen-qwe-1ea37573-en3-0-6b-gaie
+        name: experiment-gaie
         port: 8000
         weight: 1
       timeouts:

From 698d774c922b19ff3cde7a2f0856f54960ea2af6 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 1 Oct 2025 14:32:54 -0400
Subject: [PATCH 04/44] harness launcher chart

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/.helmignore                    |  24 ++
 charts/harness/Chart.yaml                     |  40 +++
 charts/harness/templates/_helpers.tpl         |  31 +++
 charts/harness/templates/harness-pod.yaml     |  79 ++++++
 charts/harness/templates/harness-role.yaml    |  19 ++
 .../templates/harness-rolebinding.yaml        |  27 ++
 charts/harness/templates/harness-sa.yaml      |   6 +
 .../templates/inference-perf-profiles.yaml    | 235 ++++++++++++++++++
 charts/harness/values.yaml                    |  38 +++
 9 files changed, 499 insertions(+)
 create mode 100644 charts/harness/.helmignore
 create mode 100644 charts/harness/Chart.yaml
 create mode 100644 charts/harness/templates/_helpers.tpl
 create mode 100644 charts/harness/templates/harness-pod.yaml
 create mode 100644 charts/harness/templates/harness-role.yaml
 create mode 100644 charts/harness/templates/harness-rolebinding.yaml
 create mode 100644 charts/harness/templates/harness-sa.yaml
 create mode 100644 charts/harness/templates/inference-perf-profiles.yaml
 create mode 100644 charts/harness/values.yaml

diff --git a/charts/harness/.helmignore b/charts/harness/.helmignore
new file mode 100644
index 00000000..898df488
--- /dev/null
+++ b/charts/harness/.helmignore
@@ -0,0 +1,24 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+
diff --git a/charts/harness/Chart.yaml b/charts/harness/Chart.yaml
new file mode 100644
index 00000000..701fc7e4
--- /dev/null
+++ b/charts/harness/Chart.yaml
@@ -0,0 +1,40 @@
+apiVersion: v2
+name: llm-d-benchark
+description: A Helm chart for the experiment harness in llm-d-benchmark 
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: "v0.0.1"
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "v0.3.0"
+
+maintainers:
+  - name: "Michael Kalantar"
+    email: "kalantar@us.ibm.com"
+    url: "https://github.com/kalantar"
+
+sources:
+  - https://github.com/llm-d/llm-d-benchmark
+
+# dependencies:
+#   - name: common
+#     repository: https://charts.bitnami.com/bitnami
+#     tags:
+#       - bitnami-common
+#     version: "2.27.0"
+
diff --git a/charts/harness/templates/_helpers.tpl b/charts/harness/templates/_helpers.tpl
new file mode 100644
index 00000000..aa63cc97
--- /dev/null
+++ b/charts/harness/templates/_helpers.tpl
@@ -0,0 +1,31 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "harness.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+
+{{/*
+Create chart name and version as used by the chart label.
+Truncated to 63 characrters because Kubernetes label values are limited to this
+*/}}
+{{- define "harness.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create common labels for the resources managed by this chart.
+*/}}
+{{- define "harness.labels" -}}
+helm.sh/chart: {{ include "harness.chart" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{- define "harness.sanitizeString" -}}
+{{- $input := . | lower | replace "." "-" | replace "/" "-" -}}
+{{- $input -}}
+{{- end -}}
\ No newline at end of file
diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
new file mode 100644
index 00000000..56ce99d4
--- /dev/null
+++ b/charts/harness/templates/harness-pod.yaml
@@ -0,0 +1,79 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: {{ .Values.harness.type }}-launcher
+  labels:
+    app: {{ .Values.harness.type }}-launcher
+spec:
+  serviceAccountName: {{ include "harness.name" . }}-runner
+  containers:
+  - name: harness
+    image: "{{ .Values.harness.image.repository }}:{{ .Values.harness.image.tag }}"
+    imagePullPolicy: {{ .Values.harness.image.pullPolicy }}
+    securityContext:
+      runAsUser: 0
+    command: ["sh", "-c"]
+    args:
+    {{- toYaml .Values.harness.args | nindent 4 }}
+    env:
+    - name: LLMDBENCH_RUN_EXPERIMENT_LAUNCHER
+      value: "1"
+    - name: LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY
+      value: "0"
+    - name: LLMDBENCH_RUN_EXPERIMENT_HARNESS
+      value: "{{ .Values.harness.type }}-llm-d-benchmark.sh"
+    - name: LLMDBENCH_RUN_EXPERIMENT_ANALYZER
+      value: "{{ .Values.harness.type }}-analyze_results.sh"
+    - name: LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME
+      value: "{{ .Values.experiment.profile.name }}"
+    - name: LLMDBENCH_RUN_EXPERIMENT_ID
+      value: "{{ .Values.experiment.identifier }}"
+    - name: LLMDBENCH_HARNESS_NAME
+      value: "{{ .Values.harness.type }}"
+    - name: LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR
+      value: "/requests/{{ .Values.harness.type }}_{{ .Values.experiment.identifier }}_{{ .Values.stack.name }}"
+    - name: LLMDBENCH_CONTROL_WORK_DIR
+      value: "/requests/{{ .Values.harness.type }}_{{ .Values.experiment.identifier }}_{{ .Values.stack.name }}"
+    - name: LLMDBENCH_HARNESS_NAMESPACE
+      value: "{{ .Release.Namespace }}"
+    - name: LLMDBENCH_HARNESS_STACK_TYPE
+      value: "{{ .Values.stack.type }}"
+    - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
+      value: "{{ .Values.stack.endpointUrl }}"
+    - name: LLMDBENCH_HARNESS_STACK_NAME
+      value: {{ include "harness.sanitizeString" .Values.stack.model | quote }}
+    - name: LLMDBENCH_DEPLOY_METHODS
+      value: "{{ .Values.stack.deployMethod }}"
+    - name: LLMDBENCH_MAGIC_ENVAR
+      value: "harness_pod"
+    {{- with .Values.harness.extraEnv }}
+    - name: {{ .name }}
+      value: "{{ .value }}"
+    {{- end }}
+
+    #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
+    - name: LLMDBENCH_DEPLOY_CURRENT_MODELID
+      value: "{{ .Values.stack.model }}"
+
+    - name: HF_TOKEN_SECRET
+      value: "hf-secret"
+    - name: HUGGING_FACE_HUB_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-secret
+          key: HF_TOKEN
+    resources:
+      {{- toYaml .Values.harness.resources | nindent 6 }}
+    volumeMounts:
+    - name: results
+      mountPath: /requests
+    - name: {{ .Values.harness.type }}-profiles
+      mountPath: /workspace/profiles/{{ .Values.harness.type }}
+  volumes:
+  - name: results
+    persistentVolumeClaim:
+      claimName: {{ .Values.harness.resultsPVC }}
+  - name: {{ .Values.harness.type }}-profiles
+    configMap:
+      name: {{ .Values.harness.type }}-profiles
+  restartPolicy: Never
\ No newline at end of file
diff --git a/charts/harness/templates/harness-role.yaml b/charts/harness/templates/harness-role.yaml
new file mode 100644
index 00000000..9ae95bb6
--- /dev/null
+++ b/charts/harness/templates/harness-role.yaml
@@ -0,0 +1,19 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ include "harness.name" . }}-job-creator
+  labels:
+    {{- include "harness.labels" . | nindent 4 }}
+rules:
+  - apiGroups: ["batch"]
+    resources: ["jobs"]
+    verbs: ["create", "get", "list", "watch", "delete", "patch", "update"]
+  - apiGroups: [""]
+    resources: ["serviceaccounts"]
+    verbs: ["get"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods/log"]
+    verbs: ["get"]
diff --git a/charts/harness/templates/harness-rolebinding.yaml b/charts/harness/templates/harness-rolebinding.yaml
new file mode 100644
index 00000000..202ab4ff
--- /dev/null
+++ b/charts/harness/templates/harness-rolebinding.yaml
@@ -0,0 +1,27 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ include "harness.name" . }}-job-creator-binding
+  labels:
+    {{- include "harness.labels" . | nindent 4 }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ include "harness.name" . }}-runner
+roleRef:
+  kind: Role
+  name: {{ include "harness.name" . }}-job-creator
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ include "harness.name" . }}-restricted-scc
+  labels:
+    {{- include "harness.labels" . | nindent 4 }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ include "harness.name" . }}-runner
+roleRef:
+  kind: ClusterRole
+  name: system:openshift:scc:restricted
+  apiGroup: rbac.authorization.k8s.io
diff --git a/charts/harness/templates/harness-sa.yaml b/charts/harness/templates/harness-sa.yaml
new file mode 100644
index 00000000..f6a4a83f
--- /dev/null
+++ b/charts/harness/templates/harness-sa.yaml
@@ -0,0 +1,6 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "harness.name" . }}-runner
+  labels:
+    {{- include "harness.labels" . | nindent 4 }}
diff --git a/charts/harness/templates/inference-perf-profiles.yaml b/charts/harness/templates/inference-perf-profiles.yaml
new file mode 100644
index 00000000..285107c6
--- /dev/null
+++ b/charts/harness/templates/inference-perf-profiles.yaml
@@ -0,0 +1,235 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: inference-perf-profiles
+data:
+  chatbot_sharegpt.yaml: "load:\n  type: constant\n  stages:\n  - rate: 1\n    duration:
+    120\n  - rate: 2\n    duration: 120\n  - rate: 4\n    duration: 120\n  - rate:
+    8\n    duration: 120\napi:\n  type: completion\n  streaming: true\nserver:\n  type:
+    vllm\n  model_name: {{ .Values.stack.model }}\n  base_url: {{ .Values.stack.endpointUrl }}\n
+    \ ignore_eos: true\ntokenizer:\n  pretrained_model_name_or_path: {{ .Values.stack.model }}\ndata:\n
+    \ type: shareGPT\n  input_distribution:\n    min: 10             # min length
+    of the synthetic prompts\n    max: 1024           # max length of the synthetic
+    prompts\n  output_distribution:\n    min: 10             # min length of the output
+    to be generated\n    max: 1024           # max length of the output to be generated
+    \nreport:\n  request_lifecycle:\n    summary: true\n    per_stage: true\n    per_request:
+    true\nstorage:\n  local_storage:\n    path: /workspace"
+  chatbot_synthetic.yaml: |-
+    load:
+      type: constant
+      stages:
+      - rate: 1
+        duration: 120
+      - rate: 2
+        duration: 120
+      - rate: 4
+        duration: 120
+      - rate: 8
+        duration: 120
+    api:
+      type: completion
+      streaming: true
+    server:
+      type: vllm
+      model_name: {{ .Values.stack.model }}
+      base_url: {{ .Values.stack.endpointUrl }}
+      ignore_eos: true
+    tokenizer:
+      pretrained_model_name_or_path: {{ .Values.stack.model }}
+    data:
+      type: random
+      input_distribution:
+        min: 10             # min length of the synthetic prompts
+        max: 8192           # max length of the synthetic prompts
+        mean: 4096          # mean length of the synthetic prompts
+        std: 2048           # standard deviation of the length of the synthetic prompts
+        total_count: 1000   # total number of prompts to generate to fit the above mentioned distribution constraints
+      output_distribution:
+        min: 10             # min length of the output to be generated
+        max: 2048           # max length of the output to be generated
+        mean: 1024          # mean length of the output to be generated
+        std: 512            # standard deviation of the length of the output to be generated
+        total_count: 1000   # total number of output lengths to generate to fit the above mentioned distribution constraints
+    report:
+      request_lifecycle:
+        summary: true
+        per_stage: true
+        per_request: true
+    storage:
+      local_storage:
+        path: /workspace
+  code_completion_synthetic.yaml: |-
+    load:
+      type: constant
+      stages:
+      - rate: 1
+        duration: 120
+      - rate: 2
+        duration: 120
+      - rate: 4
+        duration: 120
+      - rate: 8
+        duration: 120
+    api:
+      type: completion
+      streaming: true
+    server:
+      type: vllm
+      model_name: {{ .Values.stack.model }}
+      base_url: {{ .Values.stack.endpointUrl }}
+      ignore_eos: true
+    tokenizer:
+      pretrained_model_name_or_path: {{ .Values.stack.model }}
+    data:
+      type: random
+      input_distribution:
+        min: 10             # min length of the synthetic prompts
+        max: 4096           # max length of the synthetic prompts
+        mean: 2048          # mean length of the synthetic prompts
+        std: 1024           # standard deviation of the length of the synthetic prompts
+        total_count: 1000   # total number of prompts to generate to fit the above mentioned distribution constraints
+      output_distribution:
+        min: 10             # min length of the output to be generated
+        max: 256            # max length of the output to be generated
+        mean: 128           # mean length of the output to be generated
+        std: 64             # standard deviation of the length of the output to be generated
+        total_count: 1000   # total number of output lengths to generate to fit the above mentioned distribution constraints
+    report:
+      request_lifecycle:
+        summary: true
+        per_stage: true
+        per_request: true
+    storage:
+      local_storage:
+        path: /workspace
+  sanity_random.yaml: |-
+    load:
+      type: constant
+      stages:
+      - rate: 1
+        duration: 30
+    api:
+      type: completion
+      streaming: true
+    server:
+      type: vllm
+      model_name: {{ .Values.stack.model }}
+      base_url: {{ .Values.stack.endpointUrl }}
+      ignore_eos: true
+    tokenizer:
+      pretrained_model_name_or_path: {{ .Values.stack.model }}
+    data:
+      type: random
+      input_distribution:
+        min: 10             # min length of the synthetic prompts
+        max: 100            # max length of the synthetic prompts
+        mean: 50            # mean length of the synthetic prompts
+        std: 10             # standard deviation of the length of the synthetic prompts
+        total_count: 100    # total number of prompts to generate to fit the above mentioned distribution constraints
+      output_distribution:
+        min: 10             # min length of the output to be generated
+        max: 100            # max length of the output to be generated
+        mean: 50            # mean length of the output to be generated
+        std: 10             # standard deviation of the length of the output to be generated
+        total_count: 100    # total number of output lengths to generate to fit the above mentioned distribution constraints
+    report:
+      request_lifecycle:
+        summary: true
+        per_stage: true
+        per_request: true
+    storage:
+      local_storage:
+        path: /workspace
+  shared_prefix_synthetic.yaml: |
+    load:
+      type: constant
+      stages:
+      - rate: 2
+        duration: 50
+      - rate: 5
+        duration: 50
+      # - rate: 8
+      #   duration: 50
+      # - rate: 10
+      #   duration: 50
+      # - rate: 12
+      #   duration: 50
+      # - rate: 15
+      #   duration: 50
+      # - rate: 20
+      #   duration: 50
+    api:
+      type: completion
+      streaming: true
+    server:
+      type: vllm
+      model_name: {{ .Values.stack.model }}
+      base_url: {{ .Values.stack.endpointUrl }}
+      ignore_eos: true
+    tokenizer:
+      pretrained_model_name_or_path: {{ .Values.stack.model }}
+    data:
+      type: shared_prefix
+      shared_prefix:
+        # Number of distinct shared prefixes
+        num_groups: {{ .Values.experiment.profile.shared_prefix.num_groups }}
+        # Number of unique questions per shared prefix
+        num_prompts_per_group: {{ .Values.experiment.profile.shared_prefix.num_prompts_per_group }}
+        # Length of the shared prefix (in tokens)
+        system_prompt_len: {{ .Values.experiment.profile.shared_prefix.system_prompt_len }}
+        # Length of the unique question part (in tokens)
+        question_len: {{ .Values.experiment.profile.shared_prefix.question_len }}
+        # Target length for the model's generated output (in tokens)
+        output_len: {{ .Values.experiment.profile.shared_prefix.output_len }}
+    report:
+      request_lifecycle:
+        summary: true
+        per_stage: true
+        per_request: true
+    storage:
+      local_storage:
+        path: /workspace
+  summarization_synthetic.yaml: |-
+    load:
+      type: constant
+      stages:
+      - rate: 1
+        duration: 120
+      - rate: 2
+        duration: 120
+      - rate: 4
+        duration: 120
+      - rate: 8
+        duration: 120
+    api:
+      type: completion
+      streaming: true
+    server:
+      type: vllm
+      model_name: {{ .Values.stack.model }}
+      base_url: {{ .Values.stack.endpointUrl }}
+      ignore_eos: true
+    tokenizer:
+      pretrained_model_name_or_path: {{ .Values.stack.model }}
+    data:
+      type: random
+      input_distribution:
+        min: 10             # min length of the synthetic prompts
+        max: 4096           # max length of the synthetic prompts
+        mean: 2048          # mean length of the synthetic prompts
+        std: 1024           # standard deviation of the length of the synthetic prompts
+        total_count: 1000   # total number of prompts to generate to fit the above mentioned distribution constraints
+      output_distribution:
+        min: 10             # min length of the output to be generated
+        max: 512            # max length of the output to be generated
+        mean: 256           # mean length of the output to be generated
+        std: 128            # standard deviation of the length of the output to be generated
+        total_count: 1000   # total number of output lengths to generate to fit the above mentioned distribution constraints
+    report:
+      request_lifecycle:
+        summary: true
+        per_stage: true
+        per_request: true
+    storage:
+      local_storage:
+        path: /workspace
diff --git a/charts/harness/values.yaml b/charts/harness/values.yaml
new file mode 100644
index 00000000..4c2fd1ac
--- /dev/null
+++ b/charts/harness/values.yaml
@@ -0,0 +1,38 @@
+harness:
+  type: inference-perf
+  resultsPVC: workspace-pvc
+  image:
+    repository: ghcr.io/llm-d/llm-d-benchmark
+    tag: v0.3.0rc2
+    pullPolicy: Always
+  extraEnv: []
+  args: ["llm-d-benchmark.sh"]
+  resources:
+    limits:
+      cpu: 16
+      memory: 32Gi
+    requests:
+      cpu: 16
+      memory: 32Gi
+
+stack:
+  type: "llm-d"
+  # model: 
+  deployMethod: modelservice
+  # name
+  # endpointUrl
+
+experiment:
+  # identifier: 
+  profile:
+    name: sanity_random.yaml
+    shared_prefix:
+      num_groups: 32
+      num_prompts_per_group: 32
+      system_prompt_len: 2048
+      question_len: 256
+      output_len: 256
+
+nameOverride: ""
+fullnameOverride: ""
+

From 0516f2bf56d6ed8fbf2fef2e35aedd9f963a7852 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 10:53:40 -0400
Subject: [PATCH 05/44] model-download chart

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/model-download/.helmignore            | 24 ++++++++++++
 charts/model-download/Chart.yaml             | 40 +++++++++++++++++++
 charts/model-download/templates/_helpers.tpl | 31 +++++++++++++++
 charts/model-download/templates/job.yaml     | 41 ++++++++++++++++++++
 charts/model-download/templates/pvc.yaml     | 14 +++++++
 charts/model-download/values.yaml            |  8 ++++
 6 files changed, 158 insertions(+)
 create mode 100644 charts/model-download/.helmignore
 create mode 100644 charts/model-download/Chart.yaml
 create mode 100644 charts/model-download/templates/_helpers.tpl
 create mode 100644 charts/model-download/templates/job.yaml
 create mode 100644 charts/model-download/templates/pvc.yaml
 create mode 100644 charts/model-download/values.yaml

diff --git a/charts/model-download/.helmignore b/charts/model-download/.helmignore
new file mode 100644
index 00000000..898df488
--- /dev/null
+++ b/charts/model-download/.helmignore
@@ -0,0 +1,24 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+
diff --git a/charts/model-download/Chart.yaml b/charts/model-download/Chart.yaml
new file mode 100644
index 00000000..04da0386
--- /dev/null
+++ b/charts/model-download/Chart.yaml
@@ -0,0 +1,40 @@
+apiVersion: v2
+name: llm-d-benchark
+description: A Helm chart for model download
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: "v0.0.1"
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "v0.3.0"
+
+maintainers:
+  - name: "Michael Kalantar"
+    email: "kalantar@us.ibm.com"
+    url: "https://github.com/kalantar"
+
+sources:
+  - https://github.com/llm-d/llm-d-benchmark
+
+# dependencies:
+#   - name: common
+#     repository: https://charts.bitnami.com/bitnami
+#     tags:
+#       - bitnami-common
+#     version: "2.27.0"
+
diff --git a/charts/model-download/templates/_helpers.tpl b/charts/model-download/templates/_helpers.tpl
new file mode 100644
index 00000000..2d518662
--- /dev/null
+++ b/charts/model-download/templates/_helpers.tpl
@@ -0,0 +1,31 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "download.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+
+{{/*
+Create chart name and version as used by the chart label.
+Truncated to 63 characrters because Kubernetes label values are limited to this
+*/}}
+{{- define "download.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create common labels for the resources managed by this chart.
+*/}}
+{{- define "dowload.labels" -}}
+helm.sh/chart: {{ include "download.chart" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{- define "download.sanitizeString" -}}
+{{- $input := . | lower | replace "." "-" | replace "/" "-" -}}
+{{- $input -}}
+{{- end -}}
\ No newline at end of file
diff --git a/charts/model-download/templates/job.yaml b/charts/model-download/templates/job.yaml
new file mode 100644
index 00000000..590bb3a2
--- /dev/null
+++ b/charts/model-download/templates/job.yaml
@@ -0,0 +1,41 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ include "download.name" . }}-job
+spec:
+  template:
+    spec:
+      containers:
+      - name: downloader
+        image: python:3.10
+        command: ["/bin/sh", "-c"]
+        args:
+        - mkdir -p "\${MOUNT_PATH}/\${MODEL_PATH}" && \
+          pip install huggingface_hub && \
+          export PATH="\${PATH}:\${HOME}/.local/bin" && \
+          hf auth login --token "${HF_TOKEN}" && \
+          hf download "\${HF_MODEL_ID}" --local-dir "/cache/\${MODEL_PATH}"
+        env:
+          - name: MODEL_PATH
+            value: models/{{ required "ERROR .Values.hf_model must be set" .Values.hf_model }}
+          - name: HF_MODEL_ID
+            value: {{ .Values.hf_model }}
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: {{ .Values.hf_secret }}
+                key: HF_TOKEN
+          - name: HF_HOME
+            value: /tmp/huggingface
+          - name: HOME
+            value: /tmp
+          - name: MOUNT_PATH
+            value: /cache
+        volumeMounts:
+          - name: model-cache
+            mountPath: /cache
+      restartPolicy: OnFailure
+      volumes:
+        - name: model-cache
+          persistentVolumeClaim:
+            claimName: {{ .Values.pvc.name }}
\ No newline at end of file
diff --git a/charts/model-download/templates/pvc.yaml b/charts/model-download/templates/pvc.yaml
new file mode 100644
index 00000000..3c367832
--- /dev/null
+++ b/charts/model-download/templates/pvc.yaml
@@ -0,0 +1,14 @@
+{{- if .Values.pvc.create }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name:  {{ .Values.pvc.name }}
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage:  {{ .Values.pvc.size }}
+  storageClassName:  {{ .Values.pvc.storageClass }}
+  volumeMode: Filesystem
+{{- end }}
\ No newline at end of file
diff --git a/charts/model-download/values.yaml b/charts/model-download/values.yaml
new file mode 100644
index 00000000..f4ca639d
--- /dev/null
+++ b/charts/model-download/values.yaml
@@ -0,0 +1,8 @@
+# hf_model: # required
+hf_secret: hf-secret
+
+pvc:
+  name: model-pvc
+  create: false
+  size: 5Gi
+  storageClass: default

From ae36f889941a9bf0994c70fa32eb36d1b208897e Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 11:36:21 -0400
Subject: [PATCH 06/44] try without backslash

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/model-download/templates/job.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/charts/model-download/templates/job.yaml b/charts/model-download/templates/job.yaml
index 590bb3a2..2b0edb76 100644
--- a/charts/model-download/templates/job.yaml
+++ b/charts/model-download/templates/job.yaml
@@ -10,11 +10,11 @@ spec:
         image: python:3.10
         command: ["/bin/sh", "-c"]
         args:
-        - mkdir -p "\${MOUNT_PATH}/\${MODEL_PATH}" && \
+        - mkdir -p "${MOUNT_PATH}/${MODEL_PATH}" && \
           pip install huggingface_hub && \
-          export PATH="\${PATH}:\${HOME}/.local/bin" && \
+          export PATH="${PATH}:${HOME}/.local/bin" && \
           hf auth login --token "${HF_TOKEN}" && \
-          hf download "\${HF_MODEL_ID}" --local-dir "/cache/\${MODEL_PATH}"
+          hf download "${HF_MODEL_ID}" --local-dir "/cache/${MODEL_PATH}"
         env:
           - name: MODEL_PATH
             value: models/{{ required "ERROR .Values.hf_model must be set" .Values.hf_model }}

From aeec0007790dbef4c4bc328c52dd601a80e866d8 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 11:42:20 -0400
Subject: [PATCH 07/44] try without backslash

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/model-download/templates/job.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/charts/model-download/templates/job.yaml b/charts/model-download/templates/job.yaml
index 2b0edb76..e25b924b 100644
--- a/charts/model-download/templates/job.yaml
+++ b/charts/model-download/templates/job.yaml
@@ -10,11 +10,11 @@ spec:
         image: python:3.10
         command: ["/bin/sh", "-c"]
         args:
-        - mkdir -p "${MOUNT_PATH}/${MODEL_PATH}" && \
+        - mkdir -p "\${MOUNT_PATH}/\${MODEL_PATH}" && \
           pip install huggingface_hub && \
-          export PATH="${PATH}:${HOME}/.local/bin" && \
-          hf auth login --token "${HF_TOKEN}" && \
-          hf download "${HF_MODEL_ID}" --local-dir "/cache/${MODEL_PATH}"
+          export PATH="\${PATH}:\${HOME}/.local/bin" && \
+          hf auth login --token "\${HF_TOKEN}" && \
+          hf download "\${HF_MODEL_ID}" --local-dir "/cache/\${MODEL_PATH}"
         env:
           - name: MODEL_PATH
             value: models/{{ required "ERROR .Values.hf_model must be set" .Values.hf_model }}

From 90473a594f54c0ffa712bdb1c548a2aec0731824 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 13:15:08 -0400
Subject: [PATCH 08/44] restructure args

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/model-download/templates/job.yaml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/charts/model-download/templates/job.yaml b/charts/model-download/templates/job.yaml
index e25b924b..1ad41655 100644
--- a/charts/model-download/templates/job.yaml
+++ b/charts/model-download/templates/job.yaml
@@ -10,11 +10,12 @@ spec:
         image: python:3.10
         command: ["/bin/sh", "-c"]
         args:
-        - mkdir -p "\${MOUNT_PATH}/\${MODEL_PATH}" && \
-          pip install huggingface_hub && \
-          export PATH="\${PATH}:\${HOME}/.local/bin" && \
-          hf auth login --token "\${HF_TOKEN}" && \
-          hf download "\${HF_MODEL_ID}" --local-dir "/cache/\${MODEL_PATH}"
+        - >
+          export PATH="${PATH}:${HOME}/.local/bin";
+          mkdir -p "${MOUNT_PATH}/${MODEL_PATH}";
+          python -m pip install huggingface_hub;
+          hf auth login --token "${HF_TOKEN}";
+          hf download "${HF_MODEL_ID}" --local-dir "/cache/${MODEL_PATH}"
         env:
           - name: MODEL_PATH
             value: models/{{ required "ERROR .Values.hf_model must be set" .Values.hf_model }}

From b68c2f7af9376ea6f8b457d9d8a5182ac022cb5e Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 15:09:30 -0400
Subject: [PATCH 09/44] hack

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/templates/harness-pod.yaml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
index 56ce99d4..decfa9cb 100644
--- a/charts/harness/templates/harness-pod.yaml
+++ b/charts/harness/templates/harness-pod.yaml
@@ -46,11 +46,31 @@ spec:
       value: "{{ .Values.stack.deployMethod }}"
     - name: LLMDBENCH_MAGIC_ENVAR
       value: "harness_pod"
+
+    - name: LLMDBENCH_LLMD_IMAGE_REGISTRY
+      value: ghcr.io
+    - name: LLMDBENCH_LLMD_IMAGE_REPO
+      value: llm-d
+    - name: LLMDBENCH_LLMD_IMAGE_NAME
+      value: llm-d-benchmark
+    - name: LLMDBENCH_LLMD_IMAGE_TAG
+      value: {{ .Values.harness.image.tag }}
     {{- with .Values.harness.extraEnv }}
     - name: {{ .name }}
       value: "{{ .value }}"
     {{- end }}
 
+    - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS
+      value: 0
+    - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS
+      value: 2
+    - name: LLMDBENCH_VLLM_COMMON_AFFINITY
+      value: "nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3"
+    - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM
+      value: 4
+    - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM
+      value: 1
+
     #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
     - name: LLMDBENCH_DEPLOY_CURRENT_MODELID
       value: "{{ .Values.stack.model }}"

From 37893a1d6b2672742c258cbaba0e8878a41b270c Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 15:13:14 -0400
Subject: [PATCH 10/44] hack

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/templates/harness-pod.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
index decfa9cb..1eb938f6 100644
--- a/charts/harness/templates/harness-pod.yaml
+++ b/charts/harness/templates/harness-pod.yaml
@@ -61,15 +61,15 @@ spec:
     {{- end }}
 
     - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS
-      value: 0
+      value: "0"
     - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS
-      value: 2
+      value: "2"
     - name: LLMDBENCH_VLLM_COMMON_AFFINITY
       value: "nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3"
     - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM
-      value: 4
+      value: "4"
     - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM
-      value: 1
+      value: "1"
 
     #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
     - name: LLMDBENCH_DEPLOY_CURRENT_MODELID

From f9e1372ef9d9634590ca2ae98d33f96bda684c3b Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 15:55:33 -0400
Subject: [PATCH 11/44] extend hack

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/templates/harness-pod.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
index 1eb938f6..2c87e4d9 100644
--- a/charts/harness/templates/harness-pod.yaml
+++ b/charts/harness/templates/harness-pod.yaml
@@ -70,6 +70,10 @@ spec:
       value: "4"
     - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM
       value: "1"
+    - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM
+      value: "1"
+    - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM
+      value: "1"
 
     #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
     - name: LLMDBENCH_DEPLOY_CURRENT_MODELID

From 91c6c2f54665379d34b99f1c287a8b16f2aebb00 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 16:35:30 -0400
Subject: [PATCH 12/44] more image configurability

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/templates/harness-pod.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
index 2c87e4d9..91456252 100644
--- a/charts/harness/templates/harness-pod.yaml
+++ b/charts/harness/templates/harness-pod.yaml
@@ -8,7 +8,7 @@ spec:
   serviceAccountName: {{ include "harness.name" . }}-runner
   containers:
   - name: harness
-    image: "{{ .Values.harness.image.repository }}:{{ .Values.harness.image.tag }}"
+    image: "{{ .Values.harness.image.registry }}/{{ .Values.harness.image.repository }}/{{ .Values.harness.image.name }}:{{ .Values.harness.image.tag }}"
     imagePullPolicy: {{ .Values.harness.image.pullPolicy }}
     securityContext:
       runAsUser: 0
@@ -48,11 +48,11 @@ spec:
       value: "harness_pod"
 
     - name: LLMDBENCH_LLMD_IMAGE_REGISTRY
-      value: ghcr.io
+      value: {{ .Values.harness.image.registry }}
     - name: LLMDBENCH_LLMD_IMAGE_REPO
-      value: llm-d
+      value: {{ .Values.harness.image.repository }}
     - name: LLMDBENCH_LLMD_IMAGE_NAME
-      value: llm-d-benchmark
+      value: {{ .Values.harness.image.name }}
     - name: LLMDBENCH_LLMD_IMAGE_TAG
       value: {{ .Values.harness.image.tag }}
     {{- with .Values.harness.extraEnv }}

From cde15493826f96d7038e671115ef5642e8f493f3 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 17:42:12 -0400
Subject: [PATCH 13/44] quote

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/templates/harness-pod.yaml | 2 +-
 charts/harness/values.yaml                | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
index 91456252..4d96b73a 100644
--- a/charts/harness/templates/harness-pod.yaml
+++ b/charts/harness/templates/harness-pod.yaml
@@ -54,7 +54,7 @@ spec:
     - name: LLMDBENCH_LLMD_IMAGE_NAME
       value: {{ .Values.harness.image.name }}
     - name: LLMDBENCH_LLMD_IMAGE_TAG
-      value: {{ .Values.harness.image.tag }}
+      value: {{ .Values.harness.image.tag | quote }}
     {{- with .Values.harness.extraEnv }}
     - name: {{ .name }}
       value: "{{ .value }}"
diff --git a/charts/harness/values.yaml b/charts/harness/values.yaml
index 4c2fd1ac..faae6341 100644
--- a/charts/harness/values.yaml
+++ b/charts/harness/values.yaml
@@ -2,7 +2,9 @@ harness:
   type: inference-perf
   resultsPVC: workspace-pvc
   image:
-    repository: ghcr.io/llm-d/llm-d-benchmark
+    registry: ghcr.io
+    repository: llm-d
+    name: llm-d-benchmark
     tag: v0.3.0rc2
     pullPolicy: Always
   extraEnv: []

From 9792fa5b21aaff5e8ce763bca0575d46beb72e61 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 2 Oct 2025 18:14:46 -0400
Subject: [PATCH 14/44] MODELID

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 charts/harness/templates/harness-pod.yaml         | 9 +++++----
 charts/harness/templates/harness-role.yaml        | 2 +-
 charts/harness/templates/harness-rolebinding.yaml | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/charts/harness/templates/harness-pod.yaml b/charts/harness/templates/harness-pod.yaml
index 4d96b73a..014c265c 100644
--- a/charts/harness/templates/harness-pod.yaml
+++ b/charts/harness/templates/harness-pod.yaml
@@ -60,6 +60,11 @@ spec:
       value: "{{ .value }}"
     {{- end }}
 
+    #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
+    - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
+      value: "{{ .Values.stack.model }}"
+    - name: LLMDBENCH_DEPLOY_CURRENT_MODELID
+      value: {{ include "harness.sanitizeString" .Values.stack.model | quote }}
     - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS
       value: "0"
     - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS
@@ -75,10 +80,6 @@ spec:
     - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM
       value: "1"
 
-    #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
-    - name: LLMDBENCH_DEPLOY_CURRENT_MODELID
-      value: "{{ .Values.stack.model }}"
-
     - name: HF_TOKEN_SECRET
       value: "hf-secret"
     - name: HUGGING_FACE_HUB_TOKEN
diff --git a/charts/harness/templates/harness-role.yaml b/charts/harness/templates/harness-role.yaml
index 9ae95bb6..7aebcaa4 100644
--- a/charts/harness/templates/harness-role.yaml
+++ b/charts/harness/templates/harness-role.yaml
@@ -16,4 +16,4 @@ rules:
     verbs: ["get", "list", "watch"]
   - apiGroups: [""]
     resources: ["pods/log"]
-    verbs: ["get"]
+    verbs: ["get"]
\ No newline at end of file
diff --git a/charts/harness/templates/harness-rolebinding.yaml b/charts/harness/templates/harness-rolebinding.yaml
index 202ab4ff..ec657601 100644
--- a/charts/harness/templates/harness-rolebinding.yaml
+++ b/charts/harness/templates/harness-rolebinding.yaml
@@ -24,4 +24,4 @@ subjects:
 roleRef:
   kind: ClusterRole
   name: system:openshift:scc:restricted
-  apiGroup: rbac.authorization.k8s.io
+  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file

From 24c8fe1e19906e64f736bc2f7825f44a1ced75f1 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 3 Oct 2025 13:51:50 -0400
Subject: [PATCH 15/44] inital pipeline

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md                        |  89 ++++
 tekton-poc/pipeline/experiment-task.yaml    | 429 ++++++++++++++++++++
 tekton-poc/pipeline/experiment-taskrun.yaml |  25 ++
 tekton-poc/pipeline/pipelinerun-matrix.yaml |  33 ++
 tekton-poc/pipeline/roles.yaml              | 122 ++++++
 tekton-poc/pipeline/stepactions.yaml        | 317 +++++++++++++++
 6 files changed, 1015 insertions(+)
 create mode 100644 tekton-poc/README.md
 create mode 100644 tekton-poc/pipeline/experiment-task.yaml
 create mode 100644 tekton-poc/pipeline/experiment-taskrun.yaml
 create mode 100644 tekton-poc/pipeline/pipelinerun-matrix.yaml
 create mode 100644 tekton-poc/pipeline/roles.yaml
 create mode 100644 tekton-poc/pipeline/stepactions.yaml

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
new file mode 100644
index 00000000..a75d6b62
--- /dev/null
+++ b/tekton-poc/README.md
@@ -0,0 +1,89 @@
+# Benchmarking with Tekton
+
+This folder contains a proof of concept 
+
+## Tekton Basics
+A **Pipeline** is set of **Tasks**. Tasks run in parallel. The execution flow can be controlled implicitly (via one task consume a result of another) or explcitly with mechanisms like `runAfter`, `when` and `finally`.
+A **Task** is a sequence of **Steps**. Steps run sequentially. The step can programmatically determine to execute or skip.
+
+To execute a **Pipeline** create a **PipelineRun**, 
+an object that identifies:
+ - the Pipeline to execute and 
+ - the values of any parameters
+
+Tekton creates a **TaskRun** for each Task in the Pipeline.
+A TaskRun is an object that identifies: 
+ - the Task and 
+ - the values of any parameters (passed from the PipelineRun)
+
+The TaskRun is implemented by a Pod
+Each Step is implemented by a Container in the Pod.
+
+## Supported Benchmarking Use Cases
+
+Given a matrix of factors and values, measure performance of a model over a matrix of factors/values
+Factors may be model deployment related, such as: model, endpoint picker configuration, parallelism, ...
+Factors may also be workload related, for example: question_len, output_len,workload_profile, ...
+
+This proof of concept currently implements a variation of the inference-scheduling [scenairo](https://github.com/llm-d/llm-d-benchmark/blob/main/scenarios/guides/inference-scheduling.sh)/[experiment](https://github.com/llm-d/llm-d-benchmark/blob/main/experiments/inference-scheduling.yaml).
+
+## Approach
+
+A single Task measures performance over a single set of values from the factor/values matrix. This task implements steps:
+
+1. Create/prepare an experiment namespace
+2. Deploy a Gateway
+3. Configure GAIE
+4. Download the model from HuggingFace to a PVC
+5. Deploy the model
+6. Run the workload for a single set of parameters
+7. Upload the results to external storage (not yet implemented)\
+8. Delete the experiment namespace
+
+A PipelineRun is created that embeds a Pipeline containing one Task with a matrix of values for a set of factors.  An example is `pipelinerun-matrix.yaml`.
+
+## Use
+
+1. Create a namespace, for example: $NAMESPACE and set to current context:
+    ```shell
+    kubectl create ns $NAMESPACE
+    kubectl config set-context --current --namespace $NAMESPACE
+    ```
+2. Deploy a secret `hf-secret` containing your HuggingFace token in the namespace.
+    ```shell
+    kubectl create secret generic hf-secret \
+          --namespace ${NAMESPACE} \
+          --from-literal="HF_TOKEN=${HF_TOKEN}" \
+          --dry-run=client -o yaml | kubectl apply -f -
+    ```
+3. Give the task needed permissions
+    ```shell
+    kubectl apply -f pipeline/roles.yaml
+    ```
+4. Deploy the steps and tasks:
+    ```shell
+    kubectl apply -f pipeline/stepactions.yaml
+    kubectl apply -f pipeline/experiment-task.yaml
+    ```
+5. Run experiments (set the parameter `namespace` to $NAMESPACE):
+    ```shell
+    kubectl apply -f pipeline/pipelinerun-matrix.yaml
+    ```
+
+See the TaskRun objects created:
+
+```shell
+tkn tr list
+```
+
+See the logs for a TaskRun:
+
+```shell
+tkn tr logs <taskrun_name> -f
+```
+
+## Cautions
+
+- be sure to set the namespace parameter in the pipeline run; this is where the pipeline runs and is the base of the name for each experiment
+- the upload of data is not yet implemented
+- there are hardcoded assumptions/values about the use case in several places; these will be removed as more use cases are explored
diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
new file mode 100644
index 00000000..64a311c3
--- /dev/null
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -0,0 +1,429 @@
+apiVersion: tekton.dev/v1
+kind: Task
+metadata:
+  name: experiment
+spec:
+  description: >
+    Runs an llm-d-benchmark experiment.
+
+  params:
+
+    - name: question_len
+      type: string
+    - name: output_len
+      type: string
+
+    - name: namespace
+      type: string
+      default: kalantar-llmd
+      description: Target namespace
+
+    - name: model-id
+      type: string
+      default: "meta-llama/Llama-3.2-1B-Instruct"
+    - name: inferencePort
+      default: 8000
+
+    - name: experimentBaseUrl
+      type: string
+    - name: experimentName
+      type: string
+      default: "experiment"
+
+    - name: workspace-pvc-name
+      type: string
+      default: workspace-pvc
+    - name: workspace-pvc-size
+      type: string
+      default: 20Gi
+    - name: workspace-storage-class
+      type: string
+      default: ocs-storagecluster-cephfs
+
+    - name: model-pvc-name
+      type: string
+      default: model-pvc
+    - name: model-pvc-size
+      type: string
+      default: 300Gi
+    - name: model-storage-class
+      type: string
+      default: ocs-storagecluster-cephfs
+
+    - name: download-job-name
+      type: string
+      default: download-job
+
+    - default: llm-d-infra
+      description: Name of the Helm repository for the Gateway
+      name: gatewayRepoName
+      type: string
+    - default: https://llm-d-incubation.github.io/llm-d-infra/
+      description: URL of the Helm repository for the Gateway
+      name: gatewayRepoUrl
+      type: string
+    - name: gatewayChartVersion
+      type: string
+      default: ""
+      description: Optional gateway chart version (used with --version)
+
+    - name: gatewayExtraArgs
+      type: string
+      default: ""
+      description: Optional extra args for the gateway (to append to 'helm upgrade --install')
+
+    - name: gaieChartVersion
+      type: string
+      default: "v0.5.1"
+      description: Optional GAIE chart version (used with --version)
+
+    - name: gaieExtraArgs
+      type: string
+      default: ""
+      description: Optional extra args for GAIE (to append to 'helm upgrade --install')
+
+    - default: llm-d-modelservice
+      description: Name of the Helm repository for the model engine
+      name: msRepoName
+      type: string
+    - default: https://llm-d-incubation.github.io/llm-d-modelservice/
+      description: URL of the Helm repository for the model engine
+      name: msRepoUrl
+      type: string
+    - name: msChartVersion
+      type: string
+      default: ""
+      description: Optional modelservice chart version (used with --version)
+
+    - name: msExtraArgs
+      type: string
+      default: ""
+      description: Optional extra args for the model engine (to append to 'helm upgrade --install')
+
+    - name: modelWaitTimeout
+      type: string
+      default: 900
+
+    - name: harnessName
+      type: string
+      default: inference-perf
+    - name: harnessProfile
+      type: string
+      default: sanity_random.yaml
+    - name: stackType
+      type: string
+      default: lld-d
+    - name: experimentIDBase
+      type: string
+      default: experiment
+
+    - name: dry-run
+      type: string 
+      default: "false"
+
+  steps:
+    - name: log-start
+      image: alpine:3.20
+      script: |
+        #!/bin/sh
+        echo "🔄 Starting sweep step ..."
+
+    - name: prepare-namespace
+      image: quay.io/openshift/origin-cli:latest
+      script: |
+        #!/bin/sh
+
+        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+        DRY_RUN="$(params.dry-run)"
+
+        if [ "${DRY_RUN}" = "true" ]; then
+          echo ">> skipping"
+          exit 0
+        fi
+
+        kubectl create namespace ${NAMESPACE} \
+          --dry-run=client -o yaml | kubectl apply -f -
+        
+        # HF_TOKEN=$(
+        HF_TOKEN=$(
+          kubectl get secret hf-secret \
+          --namespace "$(context.taskRun.namespace)" \
+            -o jsonpath='{.data.HF_TOKEN}' \
+          | tr -d '\n' \
+          | base64 -d
+        )
+        # kubectl --namespace $(context.taskRun.namespace) get secret hf-secret -o jsonpath='{.data.HF_TOKEN}' | tr -d '\n' | base64 -d)
+        kubectl create secret generic hf-secret \
+          --namespace ${NAMESPACE} \
+          --from-literal="HF_TOKEN=${HF_TOKEN}" \
+          --dry-run=client -o yaml | kubectl apply -f -
+
+        # TBD only if OpenShift
+        oc adm policy add-scc-to-user anyuid -z helm-installer -n ${NAMESPACE}
+        # oc adm policy add-scc-to-user privileged -z helm-installer -n ${NAMESPACE}
+
+    - name: model-download
+      ref: 
+        name: helm-upgrade-install
+      params:
+        # Location of helm chart
+        - name: git_url
+          value: "https://github.com/kalantar/llm-d-benchmark"
+        - name: git_revision
+          value: "tekton-poc"
+        - name: checkout_dir
+          value: "/tmp/llm-d-benchmark"
+
+        # Helm arguments
+        - name: releaseName
+          value: $(params.experimentName)-download
+        - name: chart
+          value: /tmp/llm-d-benchmark/charts/model-download        
+        - name: namespace
+          value: $(params.namespace)-$(context.taskRun.name)
+        - name: timeout
+          value: 15m
+        # - name: valuesYamlUrl
+        #   value: "/tmp/llm-d-benchmark/charts/model-download/values.yaml"
+        - name: extraArgs
+          value: >
+            --set hf_model=$(params.model-id) 
+            --set pvc.create=true 
+            --set pvc.name=$(params.model-pvc-name) 
+            --set pvc.size=$(params.model-pvc-size) 
+            --set pvc.storageClass=$(params.model-storage-class)
+
+        - name: dry-run
+          value: $(params.dry-run)
+
+    - name: wait-for-download
+      image: alpine:3.20
+      script : |
+        #!/bin/sh
+        echo "⏳ TBD: Wait for download job to complete"
+
+    # TBD use tekton notion of workspace ??
+    - name: create-workspace-pvc
+      ref: 
+        name: create-rwx-pvc
+      params:
+        - name: name
+          value: $(params.workspace-pvc-name)
+        - name: namespace
+          value: $(params.namespace)-$(context.taskRun.name)
+        - name: size
+          value: $(params.workspace-pvc-size)
+        - name: storage-class
+          value: $(params.workspace-storage-class)
+        - name: dry-run
+          value: $(params.dry-run)
+
+    - name: gateway
+      ref: 
+        name: helm-upgrade-install
+      params:
+        - name: releaseName
+          value: $(params.experimentName)-gateway
+        - name: chart
+          value: llm-d-infra/llm-d-infra
+        - name: repoName
+          value: llm-d-infra
+        - name: repoUrl
+          value: https://llm-d-incubation.github.io/llm-d-infra/
+        
+        - name: namespace
+          value: $(params.namespace)-$(context.taskRun.name)
+        - name: timeout
+          value: 15m
+        - name: valuesYamlUrl
+          value: "$(params.experimentBaseUrl)/gateway-values.yaml"
+
+        - name: dry-run
+          value: $(params.dry-run)
+      
+    - name: gaie
+      ref: 
+        name: helm-upgrade-install
+      params:
+        - name: releaseName
+          value: $(params.experimentName)-gaie
+        - name: chart
+          value: oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
+        - name: version
+          value: $(params.gaieChartVersion)
+        
+        - name: namespace
+          value: $(params.namespace)-$(context.taskRun.name)
+        - name: timeout
+          value: 15m
+        - name: valuesYamlUrl
+          value: "$(params.experimentBaseUrl)/gaie-values.yaml"
+        - name: extraArgs
+          value: "--set inferenceExtension.pluginsConfigFile=$(params.gaiePluginConfig)"
+
+        - name: dry-run
+          value: $(params.dry-run)
+
+    - name: model-engine
+      ref: 
+        name: helm-upgrade-install
+      params:
+        - name: releaseName
+          value: $(params.experimentName)-ms
+        - name: chart
+          value: llm-d-modelservice/llm-d-modelservice
+        - name: repoName
+          value: llm-d-modelservice
+        - name: repoUrl
+          value: https://llm-d-incubation.github.io/llm-d-modelservice/
+        
+        - name: namespace
+          value: $(params.namespace)-$(context.taskRun.name)
+        - name: timeout
+          value: 15m
+        - name: valuesYamlUrl
+          value: "$(params.experimentBaseUrl)/ms-values.yaml"
+
+        - name: dry-run
+          value: $(params.dry-run)
+
+    - name: wait-for-model
+      image: alpine/kubectl:1.34.1
+      script: |
+        #!/bin/sh
+        
+        if [ "$(params.dry-run)" = "true" ]; then
+          echo ">> skipping"
+          exit 0
+        fi
+        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+        MODEL_ID="$(params.model-id)"
+        MODEL_LABEL=$(echo "$MODEL_ID" | tr '[:upper:]' '[:lower:]' | sed 's/[./]/-/g')
+        MODEL_START_TIMEOUT="$(params.modelWaitTimeout)"
+
+        echo "⏳ Waiting for pods serving model ${MODEL_ID} to be 'Running'"
+        echo "Model label = ${MODEL_LABEL}"
+
+        kubectl --namespace ${NAMESPACE} \
+          wait pod \
+          -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=decode \
+          --for=create \
+          --timeout=${MODEL_START_TIMEOUT}s
+        echo "✅ (decode) pods serving model ${MODEL_ID} created"
+ 
+        # kubectl --namespace ${NAMESPACE} \
+        #   wait pod \
+        #   -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
+        #   --for=create \
+        #   --timeout=${MODEL_START_TIMEOUT}s
+        # echo "✅ prefill pods serving model ${MODEL_ID} created"
+
+        kubectl --namespace ${NAMESPACE} \
+          wait pod \
+          -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=decode \
+          --for=condition=Ready=True \
+          --timeout=${MODEL_START_TIMEOUT}s
+        echo "✅ (decode) pods serving model ${MODEL_ID} ready"
+
+        # kubectl --namespace ${NAMESPACE} \
+        #   wait pod \
+        #   -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
+        #   --for=condition=Ready=True \
+        #   --timeout=${MODEL_START_TIMEOUT}s
+        # echo "✅ prefill pods serving model ${MODEL_ID} ready"
+
+    - name: workload
+      ref: 
+        name: helm-upgrade-install
+      params:
+        # Location of helm chart
+        - name: git_url
+          value: "https://github.com/kalantar/llm-d-benchmark"
+        - name: git_revision
+          value: "tekton-poc"
+        - name: checkout_dir
+          value: "/tmp/llm-d-benchmark"
+
+        # Helm arguments
+        - name: releaseName
+          value: $(params.experimentName)-harness
+        - name: chart
+          value: /tmp/llm-d-benchmark/charts/harness        
+        - name: namespace
+          value: $(params.namespace)-$(context.taskRun.name)
+        - name: timeout
+          value: 15m
+        # - name: valuesYamlUrl
+        #   value: "/tmp/llm-d-benchmark/charts/harness/values.yaml"
+        - name: extraArgs
+          value: >
+            --set harness.image.registry=quay.io
+            --set harness.image.repository=namasluk
+            --set harness.image.name=llm-d-benchmark
+            --set harness.image.tag=251002.1
+            --set experiment.profile.name=$(params.harnessProfile)
+            --set experiment.profile.shared_prefix.question_len=$(params.question_len)
+            --set experiment.profile.shared_prefix.output_len=$(params.output_len)
+            --set experiment.identifier=experiment-DATE
+            --set stack.model=$(params.model-id)
+            --set stack.name=$(context.taskRun.name)
+            --set stack.endpointUrl='http://experiment-gateway-inference-gateway:80'
+
+        - name: dry-run
+          value: $(params.dry-run)
+
+    - name: wait-for-workload
+      image: alpine/kubectl:1.34.1
+      script : |
+        #!/bin/sh
+
+        if [ "$(params.dry-run)" = "true" ]; then
+          echo ">> skipping"
+          exit 0
+        fi
+
+        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+        HARNESS_NAME="$(params.harnessName)"
+
+        echo "⏳ Waiting for pod ${HARNESS_NAME}-launcher to complete..."
+
+        while true; do
+          STATUS=$(kubectl --namespace ${NAMESPACE} get pod ${HARNESS_NAME}-launcher -o jsonpath='{.status.phase}')
+          if [ "$STATUS" = "Succeeded" ] || [ "$STATUS" = "Failed" ]; then
+            echo "Pod completed with status: $STATUS"
+            break
+          fi
+          echo "⏳ Still waiting for pod to complete..."
+          sleep 5
+        done
+
+        echo "✅ workload completed"
+
+    - name: upload-results
+      image: alpine:3.20
+      script : |
+        #!/bin/sh
+        echo "🚚 TBD: Upload results"
+
+    - name: delete-namespace
+      image: alpine/helm:3.14.0
+      script : |
+        #!/bin/sh
+
+        if [ "$(params.dry-run)" = "true" ]; then
+          echo ">> skipping"
+          exit 0
+        fi
+
+        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+
+        # helm delete --namespace ${NAMESPACE} $(params.experimentName)-harness
+        # kubectl delete namespace ${NAMESPACE}
+
+        echo "✅ workload pod deleted"
+
+    - name: log-completion
+      image: alpine:3.20
+      script: |
+        #!/bin/sh
+        echo "✅ Sweep step complete."
diff --git a/tekton-poc/pipeline/experiment-taskrun.yaml b/tekton-poc/pipeline/experiment-taskrun.yaml
new file mode 100644
index 00000000..86fca127
--- /dev/null
+++ b/tekton-poc/pipeline/experiment-taskrun.yaml
@@ -0,0 +1,25 @@
+apiVersion: tekton.dev/v1
+kind: TaskRun
+metadata:
+  name: experiment-run
+spec:
+  serviceAccountName: helm-installer
+  taskRef:
+    name: experiment
+  params:
+    - name: namespace
+      value: kalantar
+    - name: model-id
+      value: "Qwen/Qwen3-0.6B"
+    - name: experimentBaseUrl
+      value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+    - name: harnessProfile
+      value: shared_prefix_synthetic.yaml
+
+    - name: gaiePluginConfig
+      value: "inf-sche-queue.yaml"
+    - name: question_len
+      value: 100
+    - name: output_len
+      value: 300
+
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
new file mode 100644
index 00000000..cd64e491
--- /dev/null
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -0,0 +1,33 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: experiment-matrix-run
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  pipelineSpec:
+    tasks:
+      - name: run-experiment
+        taskRef:
+          name: experiment
+        params:
+          - name: namespace
+            value: kalantar
+          - name: model-id
+            value: "Qwen/Qwen3-0.6B"
+          - name: experimentBaseUrl
+            value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+          - name: harnessProfile
+            value: shared_prefix_synthetic.yaml
+        matrix:
+          params:
+            - name: gaiePluginConfig
+              value: 
+                - "inf-sche-queue.yaml"
+            - name: question_len
+              value:
+                - "100"
+                - "300"
+            - name: output_len
+              value: 
+                - "300"
diff --git a/tekton-poc/pipeline/roles.yaml b/tekton-poc/pipeline/roles.yaml
new file mode 100644
index 00000000..68a8aa2d
--- /dev/null
+++ b/tekton-poc/pipeline/roles.yaml
@@ -0,0 +1,122 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: helm-installer
+  namespace: kalantar
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: helm-installer-clusterrole
+rules:
+- apiGroups: [""]
+  resources: ["pods", "services", "namespaces", "persistentvolumeclaims", "secrets", "configmaps", "serviceaccounts"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: [""]
+  resources: ["pods/log"]
+  verbs: ["get"]
+- apiGroups: ["apps"]
+  resources: ["deployments", "replicasets"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["rbac.authorization.k8s.io"]
+  resources: ["roles", "rolebindings", "clusterroles", "clusterrolebindings"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["gateway.networking.k8s.io"]
+  resources: ["gateways", "httproutes"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["gateway.kgateway.dev"]
+  resources: ["gatewayparameters"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools", "inferencemodels"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["authentication.k8s.io"]
+  resources: ["tokenreviews"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["authorization.k8s.io"]
+  resources: ["subjectaccessreviews"]
+  verbs: ["create"]
+- apiGroups: ["route.openshift.io"]
+  resources: ["routes"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["security.openshift.io"]
+  resources: ["securitycontextconstraints"]
+  resourceNames: ["anyuid", "restricted", "privileged"]
+  verbs: ["use"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: helm-installer-clusterrolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: helm-installer-clusterrole
+subjects:
+- kind: ServiceAccount
+  name: helm-installer
+  namespace: kalantar
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: helm-installer-restricted-scc
+subjects:
+  - kind: ServiceAccount
+    name: helm-installer
+roleRef:
+  kind: ClusterRole
+  name: system:openshift:scc:restricted
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: helm-access
+  namespace: kalantar
+rules:
+- apiGroups: [""]
+  resources: ["secrets", "configmaps", "services", "pods", "namespaces", "serviceaccounts", "persistentvolumeclaims"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: [""]
+  resources: ["pods/log"]
+  verbs: ["get"]
+- apiGroups: ["batch"]
+  resources: ["jobs"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["apps"]
+  resources: ["deployments", "replicasets"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["gateway.networking.k8s.io"]
+  resources: ["gateways", "httproutes"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["gateway.kgateway.dev"]
+  resources: ["gatewayparameters"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools", "inferencemodels"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["authentication.k8s.io"]
+  resources: ["tokenreviews", "subjectaccessreviews"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["route.openshift.io"]
+  resources: ["routes"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+- apiGroups: ["security.openshift.io"]
+  resources: ["securitycontextconstraints"]
+  resourceNames: ["restricted"]
+  verbs: ["use"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: helm-access-binding
+  namespace: kalantar
+subjects:
+- kind: ServiceAccount
+  name: helm-installer
+  namespace: kalantar
+roleRef:
+  kind: Role
+  name: helm-access
+  apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
diff --git a/tekton-poc/pipeline/stepactions.yaml b/tekton-poc/pipeline/stepactions.yaml
new file mode 100644
index 00000000..7a135024
--- /dev/null
+++ b/tekton-poc/pipeline/stepactions.yaml
@@ -0,0 +1,317 @@
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: create-rwx-pvc
+spec:
+  params:
+    - name: name
+      type: string
+    - name: namespace
+      type: string
+    - name: size
+      type: string
+      default: "1Gi"
+    - name: storage-class
+      type: string
+      default: "default"
+
+    - name: dry-run
+      type: string
+      default: "false"
+  env:
+    - name: NAME
+      value: $(params.name)
+    # - name: TARGET_NAMESPACE_RESULT
+    #   value: $(results.targetNamespace.path)
+    - name: NAMESPACE
+      value: $(params.namespace)
+    - name: SIZE
+      value: $(params.size)
+    - name: STORAGE_CLASS
+      value: $(params.storage-class)
+    - name: DRY_RUN
+      value: $(params.dry-run)
+  image: alpine/kubectl:1.34.1
+  script: |
+    #!/bin/sh
+    if [ "${DRY_RUN}" = "true" ]; then
+      echo ">> skipping"
+      exit 0
+    fi
+
+    # NAMESPACE=$(cat $TARGET_NAMESPACE_RESULT)
+
+    cat <<EOF | kubectl apply -f -
+    apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      name:  ${NAME}
+      namespace: ${NAMESPACE}
+    spec:
+      accessModes:
+        - ReadWriteMany
+      resources:
+        requests:
+          storage:  ${SIZE}
+      storageClassName:  ${STORAGE_CLASS}
+      volumeMode: Filesystem
+    EOF
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: helm-upgrade-install
+  annotations:
+    tekton.dev/displayName: "Helm upgrade --install with repo add/update"
+spec:
+  # image: alpine/helm:3.14.0
+  image: dtzar/helm-kubectl:3.19
+
+  # ---------- Parameters ----------
+  params:
+    # Git repository (optional)
+    - name: git_url
+      type: string
+      default: ""
+      description: HTTPS URL of the public Git repo
+    - name: git_revision
+      type: string
+      default: main
+      description: Branch / tag / commit
+    - name: depth
+      type: string
+      default: "1"
+      description: Shallow clone depth
+    - name: checkout_dir
+      type: string
+      default: /tmp
+      description: Absolute path to clone into (e.g., /workspace/source/repo)
+
+    # Release + chart
+    - name: releaseName
+      type: string
+      description: Helm release name (required)
+    - name: chart
+      type: string
+      description: >-
+        Chart ref or name. Examples:
+        - "nginx" (used with repoName to form repoName/nginx)
+        - "bitnami/nginx"
+        - "oci://registry.example.com/myorg/mychart"
+    - name: version
+      type: string
+      default: ""
+      description: Optional chart version
+
+    # Repo management (add/update)
+    - name: repoName
+      type: string
+      default: ""
+      description: If set with repoUrl, the action will 'helm repo add' and 'helm repo update'
+    - name: repoUrl
+      type: string
+      default: ""
+      description: Chart repository URL
+    - name: updateRepo
+      type: string
+      default: "true"
+      description: '"true" to run helm repo update'
+
+    # Repo auth/TLS (optional)
+    - name: repoUsername
+      type: string
+      default: ""
+    - name: repoPassword
+      type: string
+      default: ""
+    - name: repoPassCredentials
+      type: string
+      default: "false"
+      description: '"true" to pass credentials to all domains'
+    - name: repoInsecureSkipTLSVerify
+      type: string
+      default: "false"
+    - name: repoCAFile
+      type: string
+      default: ""
+    - name: repoCertFile
+      type: string
+      default: ""
+    - name: repoKeyFile
+      type: string
+      default: ""
+
+    # Install/upgrade behavior
+    - name: namespace
+      type: string
+      default: "default"
+    - name: createNamespace
+      type: string
+      default: "true"
+    - name: wait
+      type: string
+      default: "true"
+    - name: timeout
+      type: string
+      default: "10m0s"
+
+    # Values and extra args
+    - name: valuesYaml
+      type: string
+      default: ""
+    - name: valuesYamlUrl
+      type: string
+      default: ""
+    - name: extraArgs
+      type: string
+      default: ""
+
+    - name: dry-run
+      type: string
+      default: "false"
+  # ---------- Params -> env (StepActions don't interpolate $(params.*) directly in script) ----------
+  env:
+    - name: GIT_URL
+      value: $(params.git_url)
+    - name: GIT_REVISION
+      value: $(params.git_revision)
+    - name: GIT_DEPTH
+      value: $(params.depth)
+    - name: CHECKOUT_DIR
+      value: $(params.checkout_dir)
+
+    - name: HELM_RELEASE
+      value: "$(params.releaseName)"
+    - name: HELM_CHART
+      value: "$(params.chart)"
+    - name: HELM_VERSION
+      value: "$(params.version)"
+
+    - name: HELM_REPO_NAME
+      value: "$(params.repoName)"
+    - name: HELM_REPO_URL
+      value: "$(params.repoUrl)"
+    - name: HELM_REPO_UPDATE
+      value: "$(params.updateRepo)"
+
+    - name: HELM_REPO_USERNAME
+      value: "$(params.repoUsername)"
+    - name: HELM_REPO_PASSWORD
+      value: "$(params.repoPassword)"
+    - name: HELM_REPO_PASS_CREDS
+      value: "$(params.repoPassCredentials)"
+    - name: HELM_REPO_INSECURE
+      value: "$(params.repoInsecureSkipTLSVerify)"
+    - name: HELM_REPO_CA_FILE
+      value: "$(params.repoCAFile)"
+    - name: HELM_REPO_CERT_FILE
+      value: "$(params.repoCertFile)"
+    - name: HELM_REPO_KEY_FILE
+      value: "$(params.repoKeyFile)"
+
+    - name: HELM_NAMESPACE
+      value: "$(params.namespace)"
+    - name: HELM_CREATE_NAMESPACE
+      value: "$(params.createNamespace)"
+    - name: HELM_WAIT
+      value: "$(params.wait)"
+    - name: HELM_TIMEOUT
+      value: "$(params.timeout)"
+    - name: HELM_VALUES_YAML
+      value: "$(params.valuesYaml)"
+    - name: HELM_VALUES_YAML_URL
+      value: "$(params.valuesYamlUrl)"
+    - name: HELM_EXTRA_ARGS
+      value: "$(params.extraArgs)"
+
+    - name: DRY_RUN
+      value: $(params.dry-run)
+
+  script: |
+    #!/usr/bin/env sh
+    set -eu
+
+    if [ "${DRY_RUN}" = "true" ]; then
+      echo ">> skipping"
+      exit 0
+    fi
+
+    # if a GIT_URL is defined, clone the project; we will use helm chart from this
+    if [ -n "${GIT_URL:-}" ]; then
+      mkdir -p "$CHECKOUT_DIR"
+      rm -rf "$CHECKOUT_DIR/.git" || true
+      echo "Cloning $GIT_URL @ $GIT_REVISION into $CHECKOUT_DIR"
+      git init "$CHECKOUT_DIR"
+      git -C "$CHECKOUT_DIR" remote add origin "$GIT_URL"
+      git -C "$CHECKOUT_DIR" fetch --depth "$GIT_DEPTH" origin "$GIT_REVISION"
+      git -C "$CHECKOUT_DIR" checkout FETCH_HEAD
+      COMMIT=$(git -C "$CHECKOUT_DIR" rev-parse HEAD)
+      echo "Checked out commit: $COMMIT"
+    fi
+
+    # Construct optional values file; values overrides url
+    VALUES_FLAG=""
+    if [ -n "${HELM_VALUES_YAML_URL:-}" ]; then
+      VALUES_FLAG="-f ${HELM_VALUES_YAML_URL}"
+    fi
+    
+    if [ -n "${HELM_VALUES_YAML:-}" ]; then
+      printf "%s" "${HELM_VALUES_YAML}" > /tmp/${HELM_RELEASE}-values.yaml
+      VALUES_FLAG="-f /tmp/${HELM_RELEASE}-values.yaml"
+    fi 
+
+    # Optional repo add (idempotent via --force-update)
+    if [ -n "${HELM_REPO_NAME:-}" ] && [ -n "${HELM_REPO_URL:-}" ]; then
+      REPO_ADD_FLAGS="--force-update"
+      [ -n "${HELM_REPO_USERNAME:-}" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --username ${HELM_REPO_USERNAME}"
+      [ -n "${HELM_REPO_PASSWORD:-}" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --password ${HELM_REPO_PASSWORD}"
+      [ "${HELM_REPO_PASS_CREDS:-false}" = "true" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --pass-credentials"
+      [ "${HELM_REPO_INSECURE:-false}" = "true" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --insecure-skip-tls-verify"
+      [ -n "${HELM_REPO_CA_FILE:-}" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --ca-file ${HELM_REPO_CA_FILE}"
+      [ -n "${HELM_REPO_CERT_FILE:-}" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --cert-file ${HELM_REPO_CERT_FILE}"
+      [ -n "${HELM_REPO_KEY_FILE:-}" ] && REPO_ADD_FLAGS="$REPO_ADD_FLAGS --key-file ${HELM_REPO_KEY_FILE}"
+
+      echo "==> Adding/refreshing repo ${HELM_REPO_NAME} -> ${HELM_REPO_URL}"
+      # shellcheck disable=SC2086
+      helm repo add bitnami https://charts.bitnami.com/bitnami
+      helm repo add "${HELM_REPO_NAME}" "${HELM_REPO_URL}" ${REPO_ADD_FLAGS}
+      # (helm repo add flags documented by Helm)  # docs: https://helm.sh/docs/helm/helm_repo_add/
+
+      if [ "${HELM_REPO_UPDATE:-true}" = "true" ]; then
+        echo "==> Updating Helm repo cache"
+        # Update all repos for portability across Helm versions
+        helm repo update
+      fi
+    fi
+
+    # Build common flags
+    CREATE_NS_FLAG=""; [ "${HELM_CREATE_NAMESPACE:-true}" = "true" ] && CREATE_NS_FLAG="--create-namespace"
+    WAIT_FLAG="";      [ "${HELM_WAIT:-true}" = "true" ] && WAIT_FLAG="--wait"
+    VERSION_FLAG="";   [ -n "${HELM_VERSION:-}" ] && VERSION_FLAG="--version ${HELM_VERSION}"
+    TIMEOUT_FLAG="";   [ -n "${HELM_TIMEOUT:-}" ] && TIMEOUT_FLAG="--timeout ${HELM_TIMEOUT}"
+
+    # Decide final chart reference:
+    # - If user passed repoName and a bare chart, use repoName/chart.
+    # - If user passed repo/chart or oci://..., use as-is.
+    CHART_REF="${HELM_CHART}"
+    case "${HELM_CHART}" in
+      */*|oci://*) : ;;
+      *) if [ -n "${HELM_REPO_NAME:-}" ]; then CHART_REF="${HELM_REPO_NAME}/${HELM_CHART}"; fi ;;
+    esac
+
+    if [ -n "${HELM_EXTRA_ARGS:-}" ]; then
+      HELM_EXTRA_ARGS=$(echo "$HELM_EXTRA_ARGS" | sed "s/DATE/$(date +%s)/g")
+    fi
+
+    echo "==> helm upgrade --install ${HELM_RELEASE} ${CHART_REF} --namespace ${HELM_NAMESPACE} ${VERSION_FLAG} ${CREATE_NS_FLAG} ${WAIT_FLAG} ${TIMEOUT_FLAG} ${VALUES_FLAG} ${HELM_EXTRA_ARGS}"
+    # shellcheck disable=SC2086
+    helm template \
+      "${HELM_RELEASE}" "${CHART_REF}" \
+      --namespace "${HELM_NAMESPACE}" \
+      ${VERSION_FLAG} ${CREATE_NS_FLAG} ${WAIT_FLAG} ${TIMEOUT_FLAG} ${VALUES_FLAG} ${HELM_EXTRA_ARGS} \
+      | kubectl --namespace "${HELM_NAMESPACE}" apply -f -
+    # helm upgrade --install \
+    #   "${HELM_RELEASE}" "${CHART_REF}" \
+    #   --namespace "${HELM_NAMESPACE}" \
+    #   ${VERSION_FLAG} ${CREATE_NS_FLAG} ${WAIT_FLAG} ${TIMEOUT_FLAG} ${VALUES_FLAG} ${HELM_EXTRA_ARGS}
+

From 5c6600770a0056536a80246ab33ab4931ef16fb3 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 3 Oct 2025 13:54:37 -0400
Subject: [PATCH 16/44] inital pipeline

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/pipelinerun-matrix.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index cd64e491..9fa11494 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -12,7 +12,7 @@ spec:
           name: experiment
         params:
           - name: namespace
-            value: kalantar
+            value: CHANGE_ME
           - name: model-id
             value: "Qwen/Qwen3-0.6B"
           - name: experimentBaseUrl

From abc04193e1b158e67fcc09dd6c50db7c97387a80 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Mon, 6 Oct 2025 10:53:29 -0400
Subject: [PATCH 17/44] utility to manage parallelism

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md                        |  28 +++
 tekton-poc/pipeline/pipelinerun-matrix.yaml |   6 +
 tekton-poc/utility/transform-pr-parallel.py | 256 ++++++++++++++++++++
 3 files changed, 290 insertions(+)
 create mode 100644 tekton-poc/utility/transform-pr-parallel.py

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index a75d6b62..6bacc50d 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -82,6 +82,34 @@ See the logs for a TaskRun:
 tkn tr logs <taskrun_name> -f
 ```
 
+## Managing Parallelism
+
+The sample `PipelineRun` (`pipeline/pipelinerun-matrix.yaml`) executes all the tasks in parallel. Depdending on the size of the matrix, this may require a large number of resources.
+A _matrix_ based `Task` can be unrolled into multiple tasks to reduce the parallelism.
+The utility script `utility/transform-pr-parallel.py` does this as follows:
+
+1. Unroll a single parameter into one `Task` per value. Each resulting Task defines a matrix over the remaining parameters.
+
+    ```shell
+    python transform-pr.py pipelinerun-matrix.yaml --unroll gaiePluginConfig -o pr-unrolled.yaml
+    ```
+
+2. Unroll multiple parameters into [their Cartesian product] Tasks. Each resulting Task defines a matrix over the remaining parameters.
+
+    ```shell
+    python transform-pr.py pipelinerun-matrix.yaml --unroll gaiePluginConfig,question_len -o pr-unrolled-2.yaml
+    ```
+
+3. Unroll all the parameters into [their Cartian product] Tasks. Allow _n_ to run at once. This can be done using a _barrier_ strategy or a _sliding_window_ strategy
+
+    ```shell
+    # Barrier (default)
+    python transform-pr.py pipelinerun-matrix.yaml -n 3 -o pr-expanded-barrier.yaml
+
+    # Sliding window
+    python transform-pr.py pipelinerun-matrix.yaml -n 3 --sliding-window -o pr-expanded-sliding.yaml
+    ```
+
 ## Cautions
 
 - be sure to set the namespace parameter in the pipeline run; this is where the pipeline runs and is the base of the name for each experiment
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 9fa11494..739e6d30 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -23,11 +23,17 @@ spec:
           params:
             - name: gaiePluginConfig
               value: 
+                - "inf-sche-none.yaml"
+                - "inf-sche-prefix.yaml"
+                - "inf-sche-kv.yaml"
                 - "inf-sche-queue.yaml"
             - name: question_len
               value:
                 - "100"
                 - "300"
+                - "1000"
             - name: output_len
               value: 
+                - "100"
                 - "300"
+                - "1000"
diff --git a/tekton-poc/utility/transform-pr-parallel.py b/tekton-poc/utility/transform-pr-parallel.py
new file mode 100644
index 00000000..426e0a58
--- /dev/null
+++ b/tekton-poc/utility/transform-pr-parallel.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+import sys
+import yaml
+import itertools
+import argparse
+from copy import deepcopy
+
+def load_yaml_from_path_or_stdin(path: str):
+    """Load YAML from a file path or stdin if path == '-'."""
+    if path == "-":
+        try:
+            return yaml.safe_load(sys.stdin)
+        except Exception as e:
+            raise ValueError(f"Failed to read YAML from stdin: {e}")
+    else:
+        try:
+            with open(path, "r") as f:
+                return yaml.safe_load(f)
+        except FileNotFoundError:
+            raise ValueError(f"Input file not found: {path}")
+        except Exception as e:
+            raise ValueError(f"Failed to read YAML from '{path}': {e}")
+
+def dump_yaml_to_path_or_stdout(data, path: str | None, announce_to_stderr: str | None = None):
+    """
+    Write YAML to the given path. If path is None or '-', write to stdout with no extra noise.
+    If path is a real file, write there and optionally announce to stderr.
+    """
+    if path is None or path == "-":
+        yaml.safe_dump(data, sys.stdout, sort_keys=False)
+    else:
+        with open(path, "w") as f:
+            yaml.safe_dump(data, f, sort_keys=False)
+        if announce_to_stderr:
+            print(announce_to_stderr, file=sys.stderr)
+
+# -------------------- EXPANSION (existing behavior) -------------------- #
+def transform_matrix_to_batched_dict(original_yaml: dict, max_parallel: int, sliding_window: bool):
+    """
+    Expand the matrix task into concrete tasks with runAfter enforcing either:
+      - barrier batching (default), or
+      - sliding-window (--sliding-window).
+    """
+    if max_parallel < 1:
+        raise ValueError("max_parallel must be >= 1")
+
+    try:
+        pipeline_spec = original_yaml["spec"]["pipelineSpec"]
+        tasks = pipeline_spec["tasks"]
+    except Exception:
+        raise ValueError("Input YAML must contain spec.pipelineSpec.tasks")
+
+    if not isinstance(tasks, list) or len(tasks) == 0:
+        raise ValueError("spec.pipelineSpec.tasks must be a non-empty list")
+
+    base_task = deepcopy(tasks[0])
+    base_name = base_task.get("name", "task")
+
+    # matrix params
+    matrix_params = {}
+    for p in base_task.get("matrix", {}).get("params", []):
+        vals = p.get("value", [])
+        if not isinstance(vals, list):
+            vals = [vals]
+        matrix_params[p["name"]] = vals
+
+    combos = list(itertools.product(*matrix_params.values())) if matrix_params else [tuple()]
+    total = len(combos)
+
+    new_tasks = []
+    for i, combo in enumerate(combos):
+        t = deepcopy(base_task)
+        t.pop("matrix", None)
+        t["name"] = f"{base_name}-{i}"
+
+        t["params"] = deepcopy(base_task.get("params", [])) + [
+            {"name": name, "value": value}
+            for name, value in zip(matrix_params.keys(), combo)
+        ]
+
+        if sliding_window:
+            if i >= max_parallel:
+                t["runAfter"] = [f"{base_name}-{i - max_parallel}"]
+            else:
+                t.pop("runAfter", None)
+        else:
+            batch_index = i // max_parallel
+            if batch_index > 0:
+                prev_start = (batch_index - 1) * max_parallel
+                prev_end = min(batch_index * max_parallel, total)
+                t["runAfter"] = [f"{base_name}-{j}" for j in range(prev_start, prev_end)]
+            else:
+                t.pop("runAfter", None)
+
+        new_tasks.append(t)
+
+    new_pr = deepcopy(original_yaml)
+    new_pipeline_spec = deepcopy(pipeline_spec)
+    new_pipeline_spec["tasks"] = new_tasks
+    new_pr["spec"]["pipelineSpec"] = new_pipeline_spec
+    return new_pr
+
+# -------------------- UNROLLING (new behavior) -------------------- #
+def transform_unroll_params_dict(original_yaml: dict, unroll_params: list[str]):
+    """
+    Unroll (hoist) one or more matrix parameters into separate tasks.
+
+    For given unroll_params (subset of the matrix param names):
+      - Create one task for each Cartesian product of the chosen params' values.
+      - In each task:
+          * Set the chosen params as fixed task 'params' (not matrix).
+          * Keep a 'matrix' of the remaining matrix params (if any).
+      - Do not add runAfter constraints (preserve original no-dependency behavior).
+    """
+    if not unroll_params:
+        raise ValueError("unroll_params must be a non-empty list of parameter names")
+
+    try:
+        pipeline_spec = original_yaml["spec"]["pipelineSpec"]
+        tasks = pipeline_spec["tasks"]
+    except Exception:
+        raise ValueError("Input YAML must contain spec.pipelineSpec.tasks")
+
+    if not isinstance(tasks, list) or len(tasks) == 0:
+        raise ValueError("spec.pipelineSpec.tasks must be a non-empty list")
+
+    base_task = deepcopy(tasks[0])
+    base_name = base_task.get("name", "task")
+
+    # Load matrix params preserving order as a list of (name, values)
+    matrix_params_list = []
+    for p in base_task.get("matrix", {}).get("params", []):
+        vals = p.get("value", [])
+        if not isinstance(vals, list):
+            vals = [vals]
+        matrix_params_list.append((p["name"], vals))
+
+    if not matrix_params_list:
+        raise ValueError("Base task has no matrix to unroll")
+
+    # Validate unroll params are present in matrix
+    matrix_names = [name for name, _ in matrix_params_list]
+    unknown = [n for n in unroll_params if n not in matrix_names]
+    if unknown:
+        raise ValueError(f"Unroll params not found in matrix: {unknown}. Available: {matrix_names}")
+
+    # Split into "chosen" vs "remaining"
+    chosen = [(name, vals) for name, vals in matrix_params_list if name in unroll_params]
+    remaining = [(name, vals) for name, vals in matrix_params_list if name not in unroll_params]
+
+    # Cartesian product over chosen
+    chosen_names = [name for name, _ in chosen]
+    chosen_values_lists = [vals for _, vals in chosen]
+    chosen_combos = list(itertools.product(*chosen_values_lists)) if chosen else [tuple()]
+
+    new_tasks = []
+    for i, combo in enumerate(chosen_combos):
+        t = deepcopy(base_task)
+        t["name"] = f"{base_name}-{i}"
+
+        # Remove matrix entirely; we will rebuild it only with remaining params
+        t.pop("matrix", None)
+
+        # Merge original params plus fixed chosen params for this task
+        t["params"] = deepcopy(base_task.get("params", [])) + [
+            {"name": name, "value": value}
+            for name, value in zip(chosen_names, combo)
+        ]
+
+        # Rebuild matrix from the remaining params (if any)
+        if remaining:
+            t["matrix"] = {
+                "params": [{"name": name, "value": vals} for name, vals in remaining]
+            }
+        else:
+            # Nothing remains; ensure no stray runAfter or matrix fields
+            t.pop("matrix", None)
+
+        # Preserve lack of dependencies (no runAfter) unless the base had them explicitly
+        if "runAfter" in t:
+            # Typically matrix tasks don't carry runAfter; remove to keep parallelism by default
+            t.pop("runAfter", None)
+
+        new_tasks.append(t)
+
+    # Replace tasks with our new set
+    new_pr = deepcopy(original_yaml)
+    new_pipeline_spec = deepcopy(pipeline_spec)
+    new_pipeline_spec["tasks"] = new_tasks
+    new_pr["spec"]["pipelineSpec"] = new_pipeline_spec
+    return new_pr
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Tekton PipelineRun matrix transformer.\n"
+            "Default: expand the matrix to concrete tasks with barrier batching or sliding-window.\n"
+            "Use --unroll to split specified matrix params into separate tasks while keeping a reduced matrix."
+        )
+    )
+    parser.add_argument("input", help="Input PipelineRun YAML file or '-' for stdin")
+
+    # Mutually exclusive: either unroll OR expand
+    mode_group = parser.add_mutually_exclusive_group()
+    mode_group.add_argument(
+        "--unroll", metavar="PARAMS",
+        help="Comma-separated matrix parameter names to hoist into tasks (e.g., 'gaiePluginConfig' or 'p1,p2')."
+    )
+    mode_group.add_argument(
+        "--sliding-window", action="store_true",
+        help="(Expand mode) Use sliding-window scheduling (each task i depends on i-n). Default is barrier batching."
+    )
+
+    # Expansion options (used only if NOT --unroll)
+    parser.add_argument(
+        "-n", "--max-parallel", type=int, default=1,
+        help="(Expand mode) Maximum number of tasks to run in parallel. Default: 1"
+    )
+
+    parser.add_argument(
+        "-o", "--output", default=None,
+        help="Output file path. Use '-' or omit to write to stdout."
+    )
+
+    args = parser.parse_args()
+
+    try:
+        original = load_yaml_from_path_or_stdin(args.input)
+
+        if args.unroll:
+            unroll_params = [s.strip() for s in args.unroll.split(",") if s.strip()]
+            transformed = transform_unroll_params_dict(original_yaml=original, unroll_params=unroll_params)
+            mode_desc = f"unroll={unroll_params}"
+        else:
+            transformed = transform_matrix_to_batched_dict(
+                original_yaml=original,
+                max_parallel=args.max_parallel,
+                sliding_window=args.sliding_window,
+            )
+            mode_desc = "sliding-window" if args.sliding_window else "barrier"
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    announce = None
+    if args.output not in (None, "-"):
+        if args.unroll:
+            announce = f"✅ Transformed PipelineRun saved to '{args.output}' ({mode_desc})"
+        else:
+            announce = f"✅ Transformed PipelineRun saved to '{args.output}' (mode={mode_desc}, max_parallel={args.max_parallel})"
+
+    dump_yaml_to_path_or_stdout(transformed, args.output, announce_to_stderr=announce)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 46d469fe030313f8dfc128966ce729992dd6976b Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 8 Oct 2025 14:45:42 -0400
Subject: [PATCH 18/44] change workload steps

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml      | 291 ++++--
 tekton-poc/pipeline/experiment-taskrun.yaml   |  25 -
 .../pipeline/pipelinerun-matrix-subset.yaml   |  81 ++
 tekton-poc/pipeline/pipelinerun-matrix.yaml   |  13 +-
 .../pipeline/pipelinerun-sequential-1.yaml    | 841 ++++++++++++++++
 .../pipelinerun-sequential-4-sliding.yaml     | 835 ++++++++++++++++
 .../pipeline/pipelinerun-sequential-4.yaml    | 931 ++++++++++++++++++
 ...un-sequential-unroll-gaiePluginConfig.yaml | 119 +++
 tekton-poc/pipeline/stepactions.yaml          |   6 +-
 9 files changed, 3022 insertions(+), 120 deletions(-)
 delete mode 100644 tekton-poc/pipeline/experiment-taskrun.yaml
 create mode 100644 tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
 create mode 100644 tekton-poc/pipeline/pipelinerun-sequential-1.yaml
 create mode 100644 tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml
 create mode 100644 tekton-poc/pipeline/pipelinerun-sequential-4.yaml
 create mode 100644 tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index 64a311c3..2f651b89 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -6,8 +6,10 @@ spec:
   description: >
     Runs an llm-d-benchmark experiment.
 
-  params:
+  workspaces:
+    - name: data
 
+  params:
     - name: question_len
       type: string
     - name: output_len
@@ -30,16 +32,6 @@ spec:
       type: string
       default: "experiment"
 
-    - name: workspace-pvc-name
-      type: string
-      default: workspace-pvc
-    - name: workspace-pvc-size
-      type: string
-      default: 20Gi
-    - name: workspace-storage-class
-      type: string
-      default: ocs-storagecluster-cephfs
-
     - name: model-pvc-name
       type: string
       default: model-pvc
@@ -104,6 +96,15 @@ spec:
       type: string
       default: 900
 
+    - name: llmdbenchImageRegistry
+      default: "quay.io"
+    - name: llmdbenchImageRepo
+      default: "namasluk"
+    - name: llmdbenchImageName
+      default: "llm-d-benchmark"
+    - name: llmdbenchImageTag
+      default: "251002.1"
+
     - name: harnessName
       type: string
       default: inference-perf
@@ -113,10 +114,20 @@ spec:
     - name: stackType
       type: string
       default: lld-d
-    - name: experimentIDBase
+    - name: pipelineUID
       type: string
       default: experiment
 
+    - name: bucket
+      type: string
+      default: "cloud-object-storage-cos-standard-ere"
+    - name: prefix
+      type: string
+      default: "results"
+    - name: endpoint
+      type: string
+      default: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
+
     - name: dry-run
       type: string 
       default: "false"
@@ -202,22 +213,6 @@ spec:
         #!/bin/sh
         echo "⏳ TBD: Wait for download job to complete"
 
-    # TBD use tekton notion of workspace ??
-    - name: create-workspace-pvc
-      ref: 
-        name: create-rwx-pvc
-      params:
-        - name: name
-          value: $(params.workspace-pvc-name)
-        - name: namespace
-          value: $(params.namespace)-$(context.taskRun.name)
-        - name: size
-          value: $(params.workspace-pvc-size)
-        - name: storage-class
-          value: $(params.workspace-storage-class)
-        - name: dry-run
-          value: $(params.dry-run)
-
     - name: gateway
       ref: 
         name: helm-upgrade-install
@@ -246,7 +241,7 @@ spec:
         name: helm-upgrade-install
       params:
         - name: releaseName
-          value: $(params.experimentName)-gaie
+          value: $(params.experimentName)-gaie-NAMESPACE_HASH
         - name: chart
           value: oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
         - name: version
@@ -283,6 +278,11 @@ spec:
           value: 15m
         - name: valuesYamlUrl
           value: "$(params.experimentBaseUrl)/ms-values.yaml"
+        - name: extraArgs
+          value: >
+            --set routing.inferencePool.name=$(params.experimentName)-gaie-NAMESPACE_HASH
+            --set routing.httpRoute.rules[0].backendRefs[0].name=$(params.experimentName)-gaie-NAMESPACE_HASH
+            --set routing.httpRoute.rules[1].backendRefs[0].name=$(params.experimentName)-gaie-NAMESPACE_HASH
 
         - name: dry-run
           value: $(params.dry-run)
@@ -333,94 +333,199 @@ spec:
         # echo "✅ prefill pods serving model ${MODEL_ID} ready"
 
     - name: workload
-      ref: 
-        name: helm-upgrade-install
-      params:
-        # Location of helm chart
-        - name: git_url
-          value: "https://github.com/kalantar/llm-d-benchmark"
-        - name: git_revision
-          value: "tekton-poc"
-        - name: checkout_dir
-          value: "/tmp/llm-d-benchmark"
-
-        # Helm arguments
-        - name: releaseName
-          value: $(params.experimentName)-harness
-        - name: chart
-          value: /tmp/llm-d-benchmark/charts/harness        
-        - name: namespace
-          value: $(params.namespace)-$(context.taskRun.name)
-        - name: timeout
-          value: 15m
-        # - name: valuesYamlUrl
-        #   value: "/tmp/llm-d-benchmark/charts/harness/values.yaml"
-        - name: extraArgs
-          value: >
-            --set harness.image.registry=quay.io
-            --set harness.image.repository=namasluk
-            --set harness.image.name=llm-d-benchmark
-            --set harness.image.tag=251002.1
-            --set experiment.profile.name=$(params.harnessProfile)
-            --set experiment.profile.shared_prefix.question_len=$(params.question_len)
-            --set experiment.profile.shared_prefix.output_len=$(params.output_len)
-            --set experiment.identifier=experiment-DATE
-            --set stack.model=$(params.model-id)
-            --set stack.name=$(context.taskRun.name)
-            --set stack.endpointUrl='http://experiment-gateway-inference-gateway:80'
-
-        - name: dry-run
-          value: $(params.dry-run)
+      image: $(params.llmdbenchImageRegistry)/$(params.llmdbenchImageRepo)/$(params.llmdbenchImageName):$(params.llmdbenchImageTag)
+      env:
+        - name: LLMDBENCH_RUN_EXPERIMENT_LAUNCHER
+          value: "1"
+        - name: LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY
+          value: "0"
+        - name: LLMDBENCH_RUN_EXPERIMENT_HARNESS
+          value: "$(params.harnessName)-llm-d-benchmark.sh"
+        - name: LLMDBENCH_RUN_EXPERIMENT_ANALYZER
+          value: "$(params.harnessName)-analyze_results.sh"
+        - name: LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME
+          value: "$(params.harnessProfile)"
+        - name: LLMDBENCH_HARNESS_NAME
+          value: "$(params.harnessName)"
+        - name: LLMDBENCH_HARNESS_NAMESPACE
+          value: "$(params.namespace)-$(context.taskRun.name)"
+        - name: LLMDBENCH_HARNESS_STACK_TYPE
+          value: "llm-d"
+        - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
+          value: "http://experiment-gateway-inference-gateway.$(params.namespace)-$(context.taskRun.name).svc.cluster.local:80"
+        - name: LLMDBENCH_DEPLOY_METHODS
+          value: "modelservice"
+        - name: LLMDBENCH_MAGIC_ENVAR
+          value: "harness_pod"
+
+        - name: LLMDBENCH_LLMD_IMAGE_REGISTRY
+          value: "$(params.llmdbenchImageRegistry)"
+        - name: LLMDBENCH_LLMD_IMAGE_REPO
+          value: "$(params.llmdbenchImageRepo)"
+        - name: LLMDBENCH_LLMD_IMAGE_NAME
+          value: "$(params.llmdbenchImageName)"
+        - name: LLMDBENCH_LLMD_IMAGE_TAG
+          value: "$(params.llmdbenchImageTag)"
+
+        #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
+        - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
+          value: "$(params.model-id)"
+        - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS
+          value: "0"
+        - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS
+          value: "2"
+        - name: LLMDBENCH_VLLM_COMMON_AFFINITY
+          value: "nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3"
+        - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM
+          value: "4"
+        - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM
+          value: "1"
+        - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM
+          value: "1"
+        - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM
+          value: "1"
+
+        - name: HF_TOKEN_SECRET
+          value: "hf-secret"
+        - name: HUGGING_FACE_HUB_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: hf-secret
+              key: HF_TOKEN
+
+      computeResources:
+        requests:
+          memory: "32Gi"
+          cpu: "16"
+        limits:
+          memory: "32Gi"
+          cpu: "16"
 
-    - name: wait-for-workload
-      image: alpine/kubectl:1.34.1
-      script : |
-        #!/bin/sh
+      script: |
+        #!/bin/bash
+
+        export EXPERIMENT_ID="experiment-$(echo -n $(params.pipelineUID) | cut -c1-8)"
+        export LLMDBENCH_RUN_EXPERIMENT_ID="${EXPERIMENT_ID}"
+        export LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR="$(workspaces.data.path)/$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
+        export LLMDBENCH_CONTROL_WORK_DIR="$(workspaces.data.path)/$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
+        export LLMDBENCH_HARNESS_STACK_NAME=$(echo "$(params.model-id)" | tr '[:upper:]' '[:lower:]' | sed 's/[./]/-/g')
+        export LLMDBENCH_DEPLOY_CURRENT_MODELID="${LLMDBENCH_HARNESS_STACK_NAME}"
+        export LLMDBENCH_DEPLOY_CURRENT_TOKENIZER="$(params.model-id)"
+
+        export QUESTION_LEN=$(params.question_len)
+        export OUTPUT_LEN=$(params.output_len)
+
+        get_profiles() {
+          git init llm-d-benchmark
+          cd llm-d-benchmark
+          git remote add origin https://github.com/llm-d/llm-d-benchmark.git
+          git config core.sparseCheckout true
+          echo "workload/profiles/" >> .git/info/sparse-checkout
+          git pull origin main
+        }
 
         if [ "$(params.dry-run)" = "true" ]; then
           echo ">> skipping"
           exit 0
         fi
 
-        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
-        HARNESS_NAME="$(params.harnessName)"
-
-        echo "⏳ Waiting for pod ${HARNESS_NAME}-launcher to complete..."
-
-        while true; do
-          STATUS=$(kubectl --namespace ${NAMESPACE} get pod ${HARNESS_NAME}-launcher -o jsonpath='{.status.phase}')
-          if [ "$STATUS" = "Succeeded" ] || [ "$STATUS" = "Failed" ]; then
-            echo "Pod completed with status: $STATUS"
-            break
-          fi
-          echo "⏳ Still waiting for pod to complete..."
-          sleep 5
+        get_profiles
+
+        echo "creating CONTROL directories"
+        mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/setup
+        rm -f ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+        touch ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+
+        workload=$(echo $(params.harnessProfile) | sed 's^\.yaml^^g' )
+        echo "workload = $workload"
+        workload_template_list=$(find workload/profiles/ -name "${workload}.yaml.in")
+        echo "workload_template_list = $workload_template_list"
+
+        for workload_template_full_path in $workload_template_list; do
+          echo "PROCESSING $workload_template_full_path"
+          workload_template_type=$(echo ${workload_template_full_path} | rev | cut -d '/' -f 2 | rev)
+          echo "workload_template_type = $workload_template_type"
+          workload_template_file_name=$(echo ${workload_template_full_path} | rev | cut -d '/' -f 1 | rev | sed -e "s^\.yaml.in$^^g")
+          echo "workload_template_file_name = $workload_template_file_name"
+          ## 
+          workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/shared_prefix_synthetic_short.yaml
+          # workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/$workload_template_type/$workload_template_file_name
+          echo "workload_output_file = $workload_output_file"
+          ##
+          mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/$workload_template_type
+
+          echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+          echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+          echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+          echo "s^question_len: .*^question_len: ${QUESTION_LEN}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+          echo "s^output_len: .*^output_len: ${OUTPUT_LEN}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+          echo "s^    path: .*^    path: ${LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+
+          echo "------"
+          cat ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
+          echo "------"
+          echo "workload_output_file=$workload_output_file"
+          sed -f ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands $workload_template_full_path > $workload_output_file
+
+          cat $workload_output_file
         done
 
-        echo "✅ workload completed"
+        llm-d-benchmark.sh
 
     - name: upload-results
-      image: alpine:3.20
-      script : |
-        #!/bin/sh
-        echo "🚚 TBD: Upload results"
+      image: amazon/aws-cli:2.31.9
+      workingDir: $(workspaces.data.path)
+      env:
+        - name: AWS_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              name: ibm-cos-secret
+              key: AWS_ACCESS_KEY_ID
+        - name: AWS_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: ibm-cos-secret
+              key: AWS_SECRET_ACCESS_KEY
+        - name: AWS_EC2_METADATA_DISABLED
+          value: "true"
+      script: |
+        #!/usr/bin/env sh
+        set -euo pipefail
+
+        dnf install tar gzip -y
+
+        EXPERIMENT_ID="experiment-$(echo -n $(params.pipelineUID) | cut -c1-8)"
+        EXPERIMENT_RESULTS_FOLDER="$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
+        ARCHIVE_NAME="${EXPERIMENT_RESULTS_FOLDER}.tar.gz"
+
+        tar -czf ${ARCHIVE_NAME} -C "$(workspaces.data.path)" ${EXPERIMENT_RESULTS_FOLDER}
+
+        aws s3 cp ${ARCHIVE_NAME} "s3://$(params.bucket)/${ARCHIVE_NAME}" \
+            --endpoint-url "$(params.endpoint)" \
+            --content-type "application/x-tar" \
+            --content-encoding "gzip" \
+            --no-progress
+            # --recursive \
+
+        rm -rf ${ARCHIVE_NAME}
+        
+        echo "✅ Uploaded results to ${ARCHIVE_NAME}"
 
     - name: delete-namespace
       image: alpine/helm:3.14.0
       script : |
         #!/bin/sh
 
+        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+
         if [ "$(params.dry-run)" = "true" ]; then
           echo ">> skipping"
           exit 0
         fi
 
-        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
-
-        # helm delete --namespace ${NAMESPACE} $(params.experimentName)-harness
         # kubectl delete namespace ${NAMESPACE}
 
-        echo "✅ workload pod deleted"
+        echo "✅ workload namespace deleted"
 
     - name: log-completion
       image: alpine:3.20
diff --git a/tekton-poc/pipeline/experiment-taskrun.yaml b/tekton-poc/pipeline/experiment-taskrun.yaml
deleted file mode 100644
index 86fca127..00000000
--- a/tekton-poc/pipeline/experiment-taskrun.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-apiVersion: tekton.dev/v1
-kind: TaskRun
-metadata:
-  name: experiment-run
-spec:
-  serviceAccountName: helm-installer
-  taskRef:
-    name: experiment
-  params:
-    - name: namespace
-      value: kalantar
-    - name: model-id
-      value: "Qwen/Qwen3-0.6B"
-    - name: experimentBaseUrl
-      value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
-    - name: harnessProfile
-      value: shared_prefix_synthetic.yaml
-
-    - name: gaiePluginConfig
-      value: "inf-sche-queue.yaml"
-    - name: question_len
-      value: 100
-    - name: output_len
-      value: 300
-
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
new file mode 100644
index 00000000..244d8c72
--- /dev/null
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -0,0 +1,81 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: experiment-matrix-run
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  workspaces:
+    - name: data
+      persistentVolumeClaim:
+        claimName: workspace-pvc
+  pipelineSpec:
+    workspaces:
+      - name: data
+    tasks:
+      - name: run-experiment
+        taskRef:
+          name: experiment
+        workspaces:
+          - name: data
+            workspace: data
+        params:
+          - name: namespace
+            value: kalantar
+          - name: model-id
+            value: "Qwen/Qwen3-0.6B"
+          - name: experimentBaseUrl
+            value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+          - name: harnessProfile
+            value: shared_prefix_synthetic.yaml
+        matrix:
+          params:
+            - name: gaiePluginConfig
+              value: 
+                - "inf-sche-none.yaml"
+                - "inf-sche-prefix.yaml"
+                - "inf-sche-kv.yaml"
+                - "inf-sche-queue.yaml"
+            - name: question_len
+              value:
+                - "100"
+                - "300"
+                - "1000"
+            - name: output_len
+              value: 
+                - "100"
+                - "300"
+                - "1000"
+          include:
+            - name: combo-1
+              params:
+                - name: gaiePluginConfig
+                  value: "inf-sche-none.yaml"
+                - name: question_len
+                  value: "100"
+                - name: output_len
+                  value: "100"
+            - name: combo-2
+              params:
+                - name: gaiePluginConfig
+                  value: "inf-sche-prefix.yaml"
+                - name: question_len
+                  value: "300"
+                - name: output_len
+                  value: "300"
+            - name: combo-3
+              params:
+                - name: gaiePluginConfig
+                  value: "inf-sche-kv.yaml"
+                - name: question_len
+                  value: "1000"
+                - name: output_len
+                  value: "100"
+            - name: combo-4
+              params:
+                - name: gaiePluginConfig
+                  value: "inf-sche-queue.yaml"
+                - name: question_len
+                  value: "300"
+                - name: output_len
+                  value: "1000"
\ No newline at end of file
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 739e6d30..73c6ef5f 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -5,11 +5,20 @@ metadata:
 spec:
   taskRunTemplate:
     serviceAccountName: helm-installer
+  workspaces:
+    - name: data
+      persistentVolumeClaim:
+        claimName: workspace-pvc
   pipelineSpec:
+    workspaces:
+      - name: data
     tasks:
       - name: run-experiment
         taskRef:
           name: experiment
+        workspaces:
+          - name: data
+            workspace: data
         params:
           - name: namespace
             value: CHANGE_ME
@@ -18,7 +27,9 @@ spec:
           - name: experimentBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
           - name: harnessProfile
-            value: shared_prefix_synthetic.yaml
+            value: shared_prefix_synthetic_short.yaml
+          - name: pipelineUID
+            value: "$(context.pipelineRun.uid)"
         matrix:
           params:
             - name: gaiePluginConfig
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-1.yaml b/tekton-poc/pipeline/pipelinerun-sequential-1.yaml
new file mode 100644
index 00000000..a4b77783
--- /dev/null
+++ b/tekton-poc/pipeline/pipelinerun-sequential-1.yaml
@@ -0,0 +1,841 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: experiment-matrix-run
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  workspaces:
+  - name: data
+    persistentVolumeClaim:
+      claimName: workspace-pvc
+  pipelineSpec:
+    workspaces:
+    - name: data
+    tasks:
+    - name: run-experiment-0
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+    - name: run-experiment-1
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-0
+    - name: run-experiment-2
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-1
+    - name: run-experiment-3
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-2
+    - name: run-experiment-4
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-3
+    - name: run-experiment-5
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-4
+    - name: run-experiment-6
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-5
+    - name: run-experiment-7
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-6
+    - name: run-experiment-8
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-7
+    - name: run-experiment-9
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-8
+    - name: run-experiment-10
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-9
+    - name: run-experiment-11
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-10
+    - name: run-experiment-12
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-11
+    - name: run-experiment-13
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-12
+    - name: run-experiment-14
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-13
+    - name: run-experiment-15
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-14
+    - name: run-experiment-16
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-15
+    - name: run-experiment-17
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-16
+    - name: run-experiment-18
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-17
+    - name: run-experiment-19
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-18
+    - name: run-experiment-20
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-19
+    - name: run-experiment-21
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-20
+    - name: run-experiment-22
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-21
+    - name: run-experiment-23
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-22
+    - name: run-experiment-24
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-23
+    - name: run-experiment-25
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-24
+    - name: run-experiment-26
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-25
+    - name: run-experiment-27
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-26
+    - name: run-experiment-28
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-27
+    - name: run-experiment-29
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-28
+    - name: run-experiment-30
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-29
+    - name: run-experiment-31
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-30
+    - name: run-experiment-32
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-31
+    - name: run-experiment-33
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-32
+    - name: run-experiment-34
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-33
+    - name: run-experiment-35
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-34
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml b/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml
new file mode 100644
index 00000000..76f815b6
--- /dev/null
+++ b/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml
@@ -0,0 +1,835 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: experiment-matrix-run
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  workspaces:
+  - name: data
+    persistentVolumeClaim:
+      claimName: workspace-pvc
+  pipelineSpec:
+    workspaces:
+    - name: data
+    tasks:
+    - name: run-experiment-0
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+    - name: run-experiment-1
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+    - name: run-experiment-2
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+    - name: run-experiment-3
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+    - name: run-experiment-4
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-0
+    - name: run-experiment-5
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-1
+    - name: run-experiment-6
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-2
+    - name: run-experiment-7
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-3
+    - name: run-experiment-8
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-4
+    - name: run-experiment-9
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-5
+    - name: run-experiment-10
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-6
+    - name: run-experiment-11
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-7
+    - name: run-experiment-12
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-8
+    - name: run-experiment-13
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-9
+    - name: run-experiment-14
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-10
+    - name: run-experiment-15
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-11
+    - name: run-experiment-16
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-12
+    - name: run-experiment-17
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-13
+    - name: run-experiment-18
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-14
+    - name: run-experiment-19
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-15
+    - name: run-experiment-20
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-16
+    - name: run-experiment-21
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-17
+    - name: run-experiment-22
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-18
+    - name: run-experiment-23
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-19
+    - name: run-experiment-24
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-20
+    - name: run-experiment-25
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-21
+    - name: run-experiment-26
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-22
+    - name: run-experiment-27
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-23
+    - name: run-experiment-28
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-24
+    - name: run-experiment-29
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-25
+    - name: run-experiment-30
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-26
+    - name: run-experiment-31
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-27
+    - name: run-experiment-32
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-28
+    - name: run-experiment-33
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-29
+    - name: run-experiment-34
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-30
+    - name: run-experiment-35
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-31
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-4.yaml b/tekton-poc/pipeline/pipelinerun-sequential-4.yaml
new file mode 100644
index 00000000..988117a1
--- /dev/null
+++ b/tekton-poc/pipeline/pipelinerun-sequential-4.yaml
@@ -0,0 +1,931 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: experiment-matrix-run
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  workspaces:
+  - name: data
+    persistentVolumeClaim:
+      claimName: workspace-pvc
+  pipelineSpec:
+    workspaces:
+    - name: data
+    tasks:
+    - name: run-experiment-0
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+    - name: run-experiment-1
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+    - name: run-experiment-2
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+    - name: run-experiment-3
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+    - name: run-experiment-4
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-0
+      - run-experiment-1
+      - run-experiment-2
+      - run-experiment-3
+    - name: run-experiment-5
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-0
+      - run-experiment-1
+      - run-experiment-2
+      - run-experiment-3
+    - name: run-experiment-6
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-0
+      - run-experiment-1
+      - run-experiment-2
+      - run-experiment-3
+    - name: run-experiment-7
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-0
+      - run-experiment-1
+      - run-experiment-2
+      - run-experiment-3
+    - name: run-experiment-8
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-4
+      - run-experiment-5
+      - run-experiment-6
+      - run-experiment-7
+    - name: run-experiment-9
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-4
+      - run-experiment-5
+      - run-experiment-6
+      - run-experiment-7
+    - name: run-experiment-10
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-4
+      - run-experiment-5
+      - run-experiment-6
+      - run-experiment-7
+    - name: run-experiment-11
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-4
+      - run-experiment-5
+      - run-experiment-6
+      - run-experiment-7
+    - name: run-experiment-12
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-8
+      - run-experiment-9
+      - run-experiment-10
+      - run-experiment-11
+    - name: run-experiment-13
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-8
+      - run-experiment-9
+      - run-experiment-10
+      - run-experiment-11
+    - name: run-experiment-14
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-8
+      - run-experiment-9
+      - run-experiment-10
+      - run-experiment-11
+    - name: run-experiment-15
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-8
+      - run-experiment-9
+      - run-experiment-10
+      - run-experiment-11
+    - name: run-experiment-16
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-12
+      - run-experiment-13
+      - run-experiment-14
+      - run-experiment-15
+    - name: run-experiment-17
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-12
+      - run-experiment-13
+      - run-experiment-14
+      - run-experiment-15
+    - name: run-experiment-18
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-12
+      - run-experiment-13
+      - run-experiment-14
+      - run-experiment-15
+    - name: run-experiment-19
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-12
+      - run-experiment-13
+      - run-experiment-14
+      - run-experiment-15
+    - name: run-experiment-20
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-16
+      - run-experiment-17
+      - run-experiment-18
+      - run-experiment-19
+    - name: run-experiment-21
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-16
+      - run-experiment-17
+      - run-experiment-18
+      - run-experiment-19
+    - name: run-experiment-22
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-16
+      - run-experiment-17
+      - run-experiment-18
+      - run-experiment-19
+    - name: run-experiment-23
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-16
+      - run-experiment-17
+      - run-experiment-18
+      - run-experiment-19
+    - name: run-experiment-24
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-20
+      - run-experiment-21
+      - run-experiment-22
+      - run-experiment-23
+    - name: run-experiment-25
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-20
+      - run-experiment-21
+      - run-experiment-22
+      - run-experiment-23
+    - name: run-experiment-26
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-20
+      - run-experiment-21
+      - run-experiment-22
+      - run-experiment-23
+    - name: run-experiment-27
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-20
+      - run-experiment-21
+      - run-experiment-22
+      - run-experiment-23
+    - name: run-experiment-28
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-24
+      - run-experiment-25
+      - run-experiment-26
+      - run-experiment-27
+    - name: run-experiment-29
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '100'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-24
+      - run-experiment-25
+      - run-experiment-26
+      - run-experiment-27
+    - name: run-experiment-30
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-24
+      - run-experiment-25
+      - run-experiment-26
+      - run-experiment-27
+    - name: run-experiment-31
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-24
+      - run-experiment-25
+      - run-experiment-26
+      - run-experiment-27
+    - name: run-experiment-32
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '300'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-28
+      - run-experiment-29
+      - run-experiment-30
+      - run-experiment-31
+    - name: run-experiment-33
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '100'
+      runAfter:
+      - run-experiment-28
+      - run-experiment-29
+      - run-experiment-30
+      - run-experiment-31
+    - name: run-experiment-34
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '300'
+      runAfter:
+      - run-experiment-28
+      - run-experiment-29
+      - run-experiment-30
+      - run-experiment-31
+    - name: run-experiment-35
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      - name: question_len
+        value: '1000'
+      - name: output_len
+        value: '1000'
+      runAfter:
+      - run-experiment-28
+      - run-experiment-29
+      - run-experiment-30
+      - run-experiment-31
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml b/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml
new file mode 100644
index 00000000..5c36a680
--- /dev/null
+++ b/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml
@@ -0,0 +1,119 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: experiment-matrix-run
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  workspaces:
+  - name: data
+    persistentVolumeClaim:
+      claimName: workspace-pvc
+  pipelineSpec:
+    workspaces:
+    - name: data
+    tasks:
+    - name: run-experiment-0
+      taskRef:
+        name: experiment
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-none.yaml
+      matrix:
+        params:
+        - name: question_len
+          value: &id001
+          - '100'
+          - '300'
+          - '1000'
+        - name: output_len
+          value: &id002
+          - '100'
+          - '300'
+          - '1000'
+    - name: run-experiment-1
+      taskRef:
+        name: experiment
+      runAfter:
+      - run-experiment-0
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-prefix.yaml
+      matrix:
+        params:
+        - name: question_len
+          value: *id001
+        - name: output_len
+          value: *id002
+    - name: run-experiment-2
+      taskRef:
+        name: experiment
+      runAfter:
+      - run-experiment-1
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-kv.yaml
+      matrix:
+        params:
+        - name: question_len
+          value: *id001
+        - name: output_len
+          value: *id002
+    - name: run-experiment-3
+      taskRef:
+        name: experiment
+      runAfter:
+      - run-experiment-2
+      workspaces:
+      - name: data
+        workspace: data
+      params:
+      - name: namespace
+        value: kalantar
+      - name: model-id
+        value: Qwen/Qwen3-0.6B
+      - name: experimentBaseUrl
+        value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+      - name: harnessProfile
+        value: shared_prefix_synthetic.yaml
+      - name: gaiePluginConfig
+        value: inf-sche-queue.yaml
+      matrix:
+        params:
+        - name: question_len
+          value: *id001
+        - name: output_len
+          value: *id002
diff --git a/tekton-poc/pipeline/stepactions.yaml b/tekton-poc/pipeline/stepactions.yaml
index 7a135024..60d28385 100644
--- a/tekton-poc/pipeline/stepactions.yaml
+++ b/tekton-poc/pipeline/stepactions.yaml
@@ -236,6 +236,10 @@ spec:
       exit 0
     fi
 
+    SHA256CMD=$(type -p gsha256sum || type -p sha256sum)
+    NAMESPACE_HASH=$(echo -n "$HELM_NAMESPACE" | $SHA256CMD | awk '{print $1}' | cut -c1-8)
+    HELM_RELEASE=$(echo "$HELM_RELEASE" | sed "s/NAMESPACE_HASH/$NAMESPACE_HASH/g")
+  
     # if a GIT_URL is defined, clone the project; we will use helm chart from this
     if [ -n "${GIT_URL:-}" ]; then
       mkdir -p "$CHECKOUT_DIR"
@@ -300,7 +304,7 @@ spec:
     esac
 
     if [ -n "${HELM_EXTRA_ARGS:-}" ]; then
-      HELM_EXTRA_ARGS=$(echo "$HELM_EXTRA_ARGS" | sed "s/DATE/$(date +%s)/g")
+      HELM_EXTRA_ARGS=$(echo "$HELM_EXTRA_ARGS" | sed "s/NAMESPACE_HASH/$NAMESPACE_HASH/g")
     fi
 
     echo "==> helm upgrade --install ${HELM_RELEASE} ${CHART_REF} --namespace ${HELM_NAMESPACE} ${VERSION_FLAG} ${CREATE_NS_FLAG} ${WAIT_FLAG} ${TIMEOUT_FLAG} ${VALUES_FLAG} ${HELM_EXTRA_ARGS}"

From 59b329aac2e35c8f0b77024fee0d77bb164e0fd1 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 8 Oct 2025 14:57:03 -0400
Subject: [PATCH 19/44] update readme

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md                          | 25 +++++++++++++------
 ... => pipelinerun-sequential-4-barrier.yaml} |  0
 2 files changed, 18 insertions(+), 7 deletions(-)
 rename tekton-poc/pipeline/{pipelinerun-sequential-4.yaml => pipelinerun-sequential-4-barrier.yaml} (100%)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index 6bacc50d..677fa3af 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -37,8 +37,8 @@ A single Task measures performance over a single set of values from the factor/v
 4. Download the model from HuggingFace to a PVC
 5. Deploy the model
 6. Run the workload for a single set of parameters
-7. Upload the results to external storage (not yet implemented)\
-8. Delete the experiment namespace
+7. Upload the results to external storage (s3)
+8. Delete the experiment namespace (not yet implemented)
 
 A PipelineRun is created that embeds a Pipeline containing one Task with a matrix of values for a set of factors.  An example is `pipelinerun-matrix.yaml`.
 
@@ -60,12 +60,15 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
     ```shell
     kubectl apply -f pipeline/roles.yaml
     ```
-4. Deploy the steps and tasks:
+
+4. Create a RWX PVC `workspace-pvc` for storing execution results. This PVC is shared between all tasks.
+
+5. Deploy the steps and tasks:
     ```shell
     kubectl apply -f pipeline/stepactions.yaml
     kubectl apply -f pipeline/experiment-task.yaml
     ```
-5. Run experiments (set the parameter `namespace` to $NAMESPACE):
+6. Run experiments (set the parameter `namespace` to $NAMESPACE):
     ```shell
     kubectl apply -f pipeline/pipelinerun-matrix.yaml
     ```
@@ -84,9 +87,17 @@ tkn tr logs <taskrun_name> -f
 
 ## Managing Parallelism
 
-The sample `PipelineRun` (`pipeline/pipelinerun-matrix.yaml`) executes all the tasks in parallel. Depdending on the size of the matrix, this may require a large number of resources.
-A _matrix_ based `Task` can be unrolled into multiple tasks to reduce the parallelism.
-The utility script `utility/transform-pr-parallel.py` does this as follows:
+The default PipelineSpec (in `pipeline/pipelinerun-matrix.yaml`) executes all the tasks in parallel. It can be modified in a number of ways to reduce the amount of parallel execution (at the expense of time).
+
+Some examples are provided:
+
+- `pipeline/pipelinerun-matrix-subset.yaml`: Uses `matrix.include` to list an explicit set of combinations to execute.
+- `pipeline/pipelinerun-sequential-1.yaml`: Executes 1 task at a time. Each task depends on the previous one.
+- `pipeline/pipelinerun-sequential-4-barrier.yaml`: Executes 4 tasks at a time. When all 4 complete, the next 4 start.
+- `pipeline/pipelinerun-sequential-4-sliding.yaml`: Executes 4 tasks at a time. When one task completes another starts.
+- `pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml`: Creates one task for each value of one dimention of the matrix. Each is executed in sequence. However, for other dimensions, parallel execution takes place.
+
+The utility script `utility/transform-pr-parallel.py` can be used to transform a default `PipelineRun` into alternatives as follows:
 
 1. Unroll a single parameter into one `Task` per value. Each resulting Task defines a matrix over the remaining parameters.
 
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-4.yaml b/tekton-poc/pipeline/pipelinerun-sequential-4-barrier.yaml
similarity index 100%
rename from tekton-poc/pipeline/pipelinerun-sequential-4.yaml
rename to tekton-poc/pipeline/pipelinerun-sequential-4-barrier.yaml

From 8dfadd87e9b034bfbd4a9095b03fdec401172917 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 8 Oct 2025 15:44:06 -0400
Subject: [PATCH 20/44] pin image version

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index 2f651b89..8108f7a5 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -140,7 +140,7 @@ spec:
         echo "🔄 Starting sweep step ..."
 
     - name: prepare-namespace
-      image: quay.io/openshift/origin-cli:latest
+      image: quay.io/openshift/origin-cli:4.21
       script: |
         #!/bin/sh
 
@@ -155,7 +155,6 @@ spec:
         kubectl create namespace ${NAMESPACE} \
           --dry-run=client -o yaml | kubectl apply -f -
         
-        # HF_TOKEN=$(
         HF_TOKEN=$(
           kubectl get secret hf-secret \
           --namespace "$(context.taskRun.namespace)" \
@@ -163,7 +162,7 @@ spec:
           | tr -d '\n' \
           | base64 -d
         )
-        # kubectl --namespace $(context.taskRun.namespace) get secret hf-secret -o jsonpath='{.data.HF_TOKEN}' | tr -d '\n' | base64 -d)
+
         kubectl create secret generic hf-secret \
           --namespace ${NAMESPACE} \
           --from-literal="HF_TOKEN=${HF_TOKEN}" \
@@ -311,6 +310,7 @@ spec:
           --timeout=${MODEL_START_TIMEOUT}s
         echo "✅ (decode) pods serving model ${MODEL_ID} created"
  
+        # TBD check if any prefill pods and wait if so
         # kubectl --namespace ${NAMESPACE} \
         #   wait pod \
         #   -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
@@ -325,6 +325,7 @@ spec:
           --timeout=${MODEL_START_TIMEOUT}s
         echo "✅ (decode) pods serving model ${MODEL_ID} ready"
 
+        # TBD check if any prefill pods and wait if so
         # kubectl --namespace ${NAMESPACE} \
         #   wait pod \
         #   -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \

From ed015a8bf21b78e72a86b7d8ec6e09761ff26fa3 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 9 Oct 2025 09:45:05 -0400
Subject: [PATCH 21/44] update roles.yaml

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md           | 2 +-
 tekton-poc/pipeline/roles.yaml | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index 677fa3af..dc5c58df 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -56,7 +56,7 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
           --from-literal="HF_TOKEN=${HF_TOKEN}" \
           --dry-run=client -o yaml | kubectl apply -f -
     ```
-3. Give the task needed permissions
+3. Give the task needed permissions (edit to set namespace)
     ```shell
     kubectl apply -f pipeline/roles.yaml
     ```
diff --git a/tekton-poc/pipeline/roles.yaml b/tekton-poc/pipeline/roles.yaml
index 68a8aa2d..5bf06b1a 100644
--- a/tekton-poc/pipeline/roles.yaml
+++ b/tekton-poc/pipeline/roles.yaml
@@ -2,7 +2,6 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: helm-installer
-  namespace: kalantar
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -55,7 +54,7 @@ roleRef:
 subjects:
 - kind: ServiceAccount
   name: helm-installer
-  namespace: kalantar
+  namespace: CHANGE_ME
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
@@ -73,7 +72,6 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
   name: helm-access
-  namespace: kalantar
 rules:
 - apiGroups: [""]
   resources: ["secrets", "configmaps", "services", "pods", "namespaces", "serviceaccounts", "persistentvolumeclaims"]
@@ -111,11 +109,10 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
   name: helm-access-binding
-  namespace: kalantar
 subjects:
 - kind: ServiceAccount
   name: helm-installer
-  namespace: kalantar
+  namespace: CHANGE_ME
 roleRef:
   kind: Role
   name: helm-access

From 0fb4271ea3cd39b3e77220df4b40aba24f78c80b Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 9 Oct 2025 10:30:20 -0400
Subject: [PATCH 22/44] update readme

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md           | 68 ++++++++++++++++++++++++++++++----
 tekton-poc/pipeline/roles.yaml |  8 +++-
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index dc5c58df..769c03f5 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -42,13 +42,20 @@ A single Task measures performance over a single set of values from the factor/v
 
 A PipelineRun is created that embeds a Pipeline containing one Task with a matrix of values for a set of factors.  An example is `pipelinerun-matrix.yaml`.
 
-## Use
+## Usage
 
-1. Create a namespace, for example: $NAMESPACE and set to current context:
+### Setup
+
+1. Create a namespace where the Tekton pipeline will execute.
     ```shell
+    export $NAMESPACE=your_namespace
     kubectl create ns $NAMESPACE
+    ```
+    For convenience, set the current context:
+    ```shell
     kubectl config set-context --current --namespace $NAMESPACE
     ```
+
 2. Deploy a secret `hf-secret` containing your HuggingFace token in the namespace.
     ```shell
     kubectl create secret generic hf-secret \
@@ -56,35 +63,80 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
           --from-literal="HF_TOKEN=${HF_TOKEN}" \
           --dry-run=client -o yaml | kubectl apply -f -
     ```
+
 3. Give the task needed permissions (edit to set namespace)
     ```shell
-    kubectl apply -f pipeline/roles.yaml
+    envsubst '$NAMESPACE' < pipeline/roles.yaml | kubectl apply -f -
+    ```
+
+4. Create a RWX PVC `workspace-pvc` for storing execution results. This PVC is shared between all tasks.  For example:
+    ```shell
+    cat <<EOF | kubectl apply -f -
+    apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+        name: workspace-pvc
+        namespace: ${NAMESPACE}
+    spec:
+        accessModes:
+        - ReadWriteMany
+        resources:
+            requests:
+                storage: 20Gi
+        storageClassName: ocs-storagecluster-cephfs
+        volumeMode: Filesystem
+    EOF
     ```
 
-4. Create a RWX PVC `workspace-pvc` for storing execution results. This PVC is shared between all tasks.
+### Starting a `Pipeline`
 
-5. Deploy the steps and tasks:
+1. Deploy the steps and tasks:
     ```shell
     kubectl apply -f pipeline/stepactions.yaml
     kubectl apply -f pipeline/experiment-task.yaml
     ```
-6. Run experiments (set the parameter `namespace` to $NAMESPACE):
+
+2. Run experiments (set the parameter `namespace` to $NAMESPACE):
     ```shell
     kubectl apply -f pipeline/pipelinerun-matrix.yaml
     ```
 
-See the TaskRun objects created:
+### Inspection
+
+See the `PipelineRun` object created:
+
+```shell
+tkn pr list
+```
+
+See the `TaskRun` objects created:
 
 ```shell
 tkn tr list
 ```
 
-See the logs for a TaskRun:
+See the logs for a `TaskRun`:
 
 ```shell
 tkn tr logs <taskrun_name> -f
 ```
 
+Describe a `TaskRun`:
+
+```shell
+tkn tr describe <taskrun_name>
+```
+
+### Cleanup
+
+Delete the `PipelineRun`: 
+
+```shell
+tkn pr delete <pipelinerun_name> -f
+```
+
+**Note**: The current implementation does not remove the namespaces created by each sweep step. Manually delete them to release all their resources.  If you leave them, subsequent executions of the pipeline will attempt to reuse the resources.
+
 ## Managing Parallelism
 
 The default PipelineSpec (in `pipeline/pipelinerun-matrix.yaml`) executes all the tasks in parallel. It can be modified in a number of ways to reduce the amount of parallel execution (at the expense of time).
diff --git a/tekton-poc/pipeline/roles.yaml b/tekton-poc/pipeline/roles.yaml
index 5bf06b1a..b49148df 100644
--- a/tekton-poc/pipeline/roles.yaml
+++ b/tekton-poc/pipeline/roles.yaml
@@ -2,6 +2,7 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: helm-installer
+  namespace: ${NAMESPACE}
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -54,12 +55,13 @@ roleRef:
 subjects:
 - kind: ServiceAccount
   name: helm-installer
-  namespace: CHANGE_ME
+  namespace: ${NAMESPACE}
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
   name: helm-installer-restricted-scc
+  namespace: ${NAMESPACE}
 subjects:
   - kind: ServiceAccount
     name: helm-installer
@@ -72,6 +74,7 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
   name: helm-access
+  namespace: ${NAMESPACE}
 rules:
 - apiGroups: [""]
   resources: ["secrets", "configmaps", "services", "pods", "namespaces", "serviceaccounts", "persistentvolumeclaims"]
@@ -109,10 +112,11 @@ apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
   name: helm-access-binding
+  namespace: ${NAMESPACE}
 subjects:
 - kind: ServiceAccount
   name: helm-installer
-  namespace: CHANGE_ME
+  namespace: ${NAMESPACE}
 roleRef:
   kind: Role
   name: helm-access

From 6535c56366efe44a8a26932bcf5f29becb077dc3 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 9 Oct 2025 10:32:48 -0400
Subject: [PATCH 23/44] update readme

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index 769c03f5..8d4464c7 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -64,7 +64,7 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
           --dry-run=client -o yaml | kubectl apply -f -
     ```
 
-3. Give the task needed permissions (edit to set namespace)
+3. Give the task needed permissions
     ```shell
     envsubst '$NAMESPACE' < pipeline/roles.yaml | kubectl apply -f -
     ```

From d349a0177670213af76d782422d0d4ea4199227c Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 9 Oct 2025 13:29:59 -0400
Subject: [PATCH 24/44] remove hardcoded param

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml | 2 +-
 tekton-poc/pipeline/roles.yaml           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index 8108f7a5..3257f4a9 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -449,7 +449,7 @@ spec:
           workload_template_file_name=$(echo ${workload_template_full_path} | rev | cut -d '/' -f 1 | rev | sed -e "s^\.yaml.in$^^g")
           echo "workload_template_file_name = $workload_template_file_name"
           ## 
-          workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/shared_prefix_synthetic_short.yaml
+          workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/${workload_template_file_name}.yaml
           # workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/$workload_template_type/$workload_template_file_name
           echo "workload_output_file = $workload_output_file"
           ##
diff --git a/tekton-poc/pipeline/roles.yaml b/tekton-poc/pipeline/roles.yaml
index b49148df..5f447233 100644
--- a/tekton-poc/pipeline/roles.yaml
+++ b/tekton-poc/pipeline/roles.yaml
@@ -47,7 +47,7 @@ rules:
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
-  name: helm-installer-clusterrolebinding
+  name: helm-installer-crb-${NAMESPACE}
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole

From 28fabf124176d1e95bfe5acd6160edb263939efb Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 9 Oct 2025 15:45:36 -0400
Subject: [PATCH 25/44] expose s3 config

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md                          |  5 ++
 tekton-poc/pipeline/experiment-task.yaml      | 28 ++++----
 .../pipeline/pipelinerun-matrix-subset.yaml   | 68 +++++++++----------
 tekton-poc/pipeline/pipelinerun-matrix.yaml   | 45 +++++++++---
 4 files changed, 89 insertions(+), 57 deletions(-)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index 8d4464c7..a449fa2b 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -178,3 +178,8 @@ The utility script `utility/transform-pr-parallel.py` can be used to transform a
 - be sure to set the namespace parameter in the pipeline run; this is where the pipeline runs and is the base of the name for each experiment
 - the upload of data is not yet implemented
 - there are hardcoded assumptions/values about the use case in several places; these will be removed as more use cases are explored
+
+
+# Issues
+
+- document set up s3 keys
diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index 3257f4a9..df3cbf4c 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -14,6 +14,8 @@ spec:
       type: string
     - name: output_len
       type: string
+    - name: gaiePluginConfig
+      type: string
 
     - name: namespace
       type: string
@@ -118,15 +120,13 @@ spec:
       type: string
       default: experiment
 
-    - name: bucket
+    - name: s3-keys
       type: string
-      default: "cloud-object-storage-cos-standard-ere"
-    - name: prefix
+      default: "s3-keys"
+    - name: s3-bucket
       type: string
-      default: "results"
-    - name: endpoint
+    - name: s3-endpoint
       type: string
-      default: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
 
     - name: dry-run
       type: string 
@@ -137,7 +137,10 @@ spec:
       image: alpine:3.20
       script: |
         #!/bin/sh
-        echo "🔄 Starting sweep step ..."
+        echo "🔄 Starting sweep step for ..."
+        echo "     gaiePluginConfig = $(params.gaiePluginConfig)"
+        echo "         question_len = $(params.question_len)"
+        echo "           output_len = $(params.output_len)"
 
     - name: prepare-namespace
       image: quay.io/openshift/origin-cli:4.21
@@ -480,12 +483,12 @@ spec:
         - name: AWS_ACCESS_KEY_ID
           valueFrom:
             secretKeyRef:
-              name: ibm-cos-secret
+              name: $(params.s3-keys)
               key: AWS_ACCESS_KEY_ID
         - name: AWS_SECRET_ACCESS_KEY
           valueFrom:
             secretKeyRef:
-              name: ibm-cos-secret
+              name: $(params.s3-keys)
               key: AWS_SECRET_ACCESS_KEY
         - name: AWS_EC2_METADATA_DISABLED
           value: "true"
@@ -499,10 +502,11 @@ spec:
         EXPERIMENT_RESULTS_FOLDER="$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
         ARCHIVE_NAME="${EXPERIMENT_RESULTS_FOLDER}.tar.gz"
 
-        tar -czf ${ARCHIVE_NAME} -C "$(workspaces.data.path)" ${EXPERIMENT_RESULTS_FOLDER}
+        tar -czf ${ARCHIVE_NAME} \
+            -C "$(workspaces.data.path)" ${EXPERIMENT_RESULTS_FOLDER}
 
-        aws s3 cp ${ARCHIVE_NAME} "s3://$(params.bucket)/${ARCHIVE_NAME}" \
-            --endpoint-url "$(params.endpoint)" \
+        aws s3 cp ${ARCHIVE_NAME} "s3://$(params.s3-bucket)/${ARCHIVE_NAME}" \
+            --endpoint-url "$(params.s3-endpoint)" \
             --content-type "application/x-tar" \
             --content-encoding "gzip" \
             --no-progress
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index 244d8c72..3a10d432 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -9,6 +9,24 @@ spec:
     - name: data
       persistentVolumeClaim:
         claimName: workspace-pvc
+  params:
+    - name: namespace
+      value: kalantar
+    - name: model-id
+      value: "Qwen/Qwen3-0.6B"
+
+    # Harness / Workload
+    - name: harnessProfile
+      value: shared_prefix_synthetic_short.yaml
+
+    # Output Location
+    - name: s3-keys
+      value: ibm-cos-secret
+    - name: s3-bucket
+      value: "cloud-object-storage-cos-standard-ere"
+    - name: s3-endpoint
+      value: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
+
   pipelineSpec:
     workspaces:
       - name: data
@@ -21,31 +39,25 @@ spec:
             workspace: data
         params:
           - name: namespace
-            value: kalantar
+            value: $(params.namespace)
           - name: model-id
-            value: "Qwen/Qwen3-0.6B"
+            value: $(params.model-id)
           - name: experimentBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+
+          - name: s3-keys
+            value: $(params.s3-keys)
+          - name: s3-bucket
+            value: $(params.s3-bucket)
+          - name: s3-endpoint
+            value: $(params.s3-endpoint)
+
           - name: harnessProfile
-            value: shared_prefix_synthetic.yaml
+            value: $(params.harnessProfile)
+
+          - name: pipelineUID
+            value: "$(context.pipelineRun.uid)"
         matrix:
-          params:
-            - name: gaiePluginConfig
-              value: 
-                - "inf-sche-none.yaml"
-                - "inf-sche-prefix.yaml"
-                - "inf-sche-kv.yaml"
-                - "inf-sche-queue.yaml"
-            - name: question_len
-              value:
-                - "100"
-                - "300"
-                - "1000"
-            - name: output_len
-              value: 
-                - "100"
-                - "300"
-                - "1000"
           include:
             - name: combo-1
               params:
@@ -63,19 +75,3 @@ spec:
                   value: "300"
                 - name: output_len
                   value: "300"
-            - name: combo-3
-              params:
-                - name: gaiePluginConfig
-                  value: "inf-sche-kv.yaml"
-                - name: question_len
-                  value: "1000"
-                - name: output_len
-                  value: "100"
-            - name: combo-4
-              params:
-                - name: gaiePluginConfig
-                  value: "inf-sche-queue.yaml"
-                - name: question_len
-                  value: "300"
-                - name: output_len
-                  value: "1000"
\ No newline at end of file
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 73c6ef5f..34320b82 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -9,6 +9,24 @@ spec:
     - name: data
       persistentVolumeClaim:
         claimName: workspace-pvc
+  params:
+    - name: namespace
+      value: kalantar
+    - name: model-id
+      value: "Qwen/Qwen3-0.6B"
+
+    # Harness / Workload
+    - name: harnessProfile
+      value: shared_prefix_synthetic_short.yaml
+
+    # Output Location
+    - name: s3-keys
+      value: ibm-cos-secret
+    - name: s3-bucket
+      value: "cloud-object-storage-cos-standard-ere"
+    - name: s3-endpoint
+      value: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
+
   pipelineSpec:
     workspaces:
       - name: data
@@ -21,30 +39,39 @@ spec:
             workspace: data
         params:
           - name: namespace
-            value: CHANGE_ME
+            value: $(params.namespace)
           - name: model-id
-            value: "Qwen/Qwen3-0.6B"
+            value: $(params.model-id)
           - name: experimentBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
+
+          - name: s3-keys
+            value: $(params.s3-keys)
+          - name: s3-bucket
+            value: $(params.s3-bucket)
+          - name: s3-endpoint
+            value: $(params.s3-endpoint)
+
           - name: harnessProfile
-            value: shared_prefix_synthetic_short.yaml
+            value: $(params.harnessProfile)
+
           - name: pipelineUID
             value: "$(context.pipelineRun.uid)"
         matrix:
           params:
             - name: gaiePluginConfig
               value: 
-                - "inf-sche-none.yaml"
-                - "inf-sche-prefix.yaml"
-                - "inf-sche-kv.yaml"
+                # - "inf-sche-none.yaml"
+                # - "inf-sche-prefix.yaml"
+                # - "inf-sche-kv.yaml"
                 - "inf-sche-queue.yaml"
             - name: question_len
               value:
-                - "100"
-                - "300"
+                # - "100"
+                # - "300"
                 - "1000"
             - name: output_len
               value: 
-                - "100"
+                # - "100"
                 - "300"
                 - "1000"

From ec1a648a2cfda33f0ff29c22d29d8417d8085b7c Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 9 Oct 2025 15:56:31 -0400
Subject: [PATCH 26/44] document s3

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index a449fa2b..1f8ed491 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -44,6 +44,12 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
 
 ## Usage
 
+### Requirements
+
+1. HF token
+2. s3 bucket and necessary keys
+3. 
+
 ### Setup
 
 1. Create a namespace where the Tekton pipeline will execute.
@@ -56,7 +62,7 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
     kubectl config set-context --current --namespace $NAMESPACE
     ```
 
-2. Deploy a secret `hf-secret` containing your HuggingFace token in the namespace.
+2. Create a secret `hf-secret` containing your HuggingFace token in the namespace.
     ```shell
     kubectl create secret generic hf-secret \
           --namespace ${NAMESPACE} \
@@ -64,12 +70,14 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
           --dry-run=client -o yaml | kubectl apply -f -
     ```
 
-3. Give the task needed permissions
+3. Create a secret containing your s3 credentials `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
+
+4. Give the task needed permissions
     ```shell
     envsubst '$NAMESPACE' < pipeline/roles.yaml | kubectl apply -f -
     ```
 
-4. Create a RWX PVC `workspace-pvc` for storing execution results. This PVC is shared between all tasks.  For example:
+5. Create a RWX PVC `workspace-pvc` for storing execution results. This PVC is shared between all tasks.  For example:
     ```shell
     cat <<EOF | kubectl apply -f -
     apiVersion: v1
@@ -96,7 +104,13 @@ A PipelineRun is created that embeds a Pipeline containing one Task with a matri
     kubectl apply -f pipeline/experiment-task.yaml
     ```
 
-2. Run experiments (set the parameter `namespace` to $NAMESPACE):
+2. Configure the `PipelineRun` and execute experiment.
+Edit one of the sample PipelineRuns or create your own. In particular, set:
+
+    - the namespace (where the PipelineRun executes)
+    - s3 details: secret name, bucket name and endpoint URL
+
+Run by creating the PipelineRun:
     ```shell
     kubectl apply -f pipeline/pipelinerun-matrix.yaml
     ```
@@ -182,4 +196,4 @@ The utility script `utility/transform-pr-parallel.py` can be used to transform a
 
 # Issues
 
-- document set up s3 keys
+

From f6c062b82479464bae3fc23d315bf4c0b3806fc9 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 10 Oct 2025 08:39:47 -0400
Subject: [PATCH 27/44] clarify use of namespace

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml      | 25 +++++++++----------
 .../pipeline/pipelinerun-matrix-subset.yaml   |  6 ++---
 tekton-poc/pipeline/pipelinerun-matrix.yaml   |  6 ++---
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index df3cbf4c..d79ce3da 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -17,10 +17,9 @@ spec:
     - name: gaiePluginConfig
       type: string
 
-    - name: namespace
+    - name: targetNamespacePrefix
       type: string
-      default: kalantar-llmd
-      description: Target namespace
+      default: llmdbench
 
     - name: model-id
       type: string
@@ -147,7 +146,7 @@ spec:
       script: |
         #!/bin/sh
 
-        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+        NAMESPACE="$(params.targetNamespacePrefix)-$(context.taskRun.name)"
         DRY_RUN="$(params.dry-run)"
 
         if [ "${DRY_RUN}" = "true" ]; then
@@ -193,7 +192,7 @@ spec:
         - name: chart
           value: /tmp/llm-d-benchmark/charts/model-download        
         - name: namespace
-          value: $(params.namespace)-$(context.taskRun.name)
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
         - name: timeout
           value: 15m
         # - name: valuesYamlUrl
@@ -229,7 +228,7 @@ spec:
           value: https://llm-d-incubation.github.io/llm-d-infra/
         
         - name: namespace
-          value: $(params.namespace)-$(context.taskRun.name)
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
         - name: timeout
           value: 15m
         - name: valuesYamlUrl
@@ -250,7 +249,7 @@ spec:
           value: $(params.gaieChartVersion)
         
         - name: namespace
-          value: $(params.namespace)-$(context.taskRun.name)
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
         - name: timeout
           value: 15m
         - name: valuesYamlUrl
@@ -275,7 +274,7 @@ spec:
           value: https://llm-d-incubation.github.io/llm-d-modelservice/
         
         - name: namespace
-          value: $(params.namespace)-$(context.taskRun.name)
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
         - name: timeout
           value: 15m
         - name: valuesYamlUrl
@@ -298,7 +297,7 @@ spec:
           echo ">> skipping"
           exit 0
         fi
-        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+        NAMESPACE="$(params.targetNamespacePrefix)-$(context.taskRun.name)"
         MODEL_ID="$(params.model-id)"
         MODEL_LABEL=$(echo "$MODEL_ID" | tr '[:upper:]' '[:lower:]' | sed 's/[./]/-/g')
         MODEL_START_TIMEOUT="$(params.modelWaitTimeout)"
@@ -352,11 +351,11 @@ spec:
         - name: LLMDBENCH_HARNESS_NAME
           value: "$(params.harnessName)"
         - name: LLMDBENCH_HARNESS_NAMESPACE
-          value: "$(params.namespace)-$(context.taskRun.name)"
+          value: "$(params.targetNamespacePrefix)-$(context.taskRun.name)"
         - name: LLMDBENCH_HARNESS_STACK_TYPE
           value: "llm-d"
         - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
-          value: "http://experiment-gateway-inference-gateway.$(params.namespace)-$(context.taskRun.name).svc.cluster.local:80"
+          value: "http://experiment-gateway-inference-gateway.$(params.targetNamespacePrefix)-$(context.taskRun.name).svc.cluster.local:80"
         - name: LLMDBENCH_DEPLOY_METHODS
           value: "modelservice"
         - name: LLMDBENCH_MAGIC_ENVAR
@@ -521,7 +520,7 @@ spec:
       script : |
         #!/bin/sh
 
-        NAMESPACE="$(params.namespace)-$(context.taskRun.name)"
+        NAMESPACE="$(params.targetNamespacePrefix)-$(context.taskRun.name)"
 
         if [ "$(params.dry-run)" = "true" ]; then
           echo ">> skipping"
@@ -530,7 +529,7 @@ spec:
 
         # kubectl delete namespace ${NAMESPACE}
 
-        echo "✅ workload namespace deleted"
+        echo "✅ workload namespace ${NAMESPACE} deleted"
 
     - name: log-completion
       image: alpine:3.20
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index 3a10d432..8700a029 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -10,7 +10,7 @@ spec:
       persistentVolumeClaim:
         claimName: workspace-pvc
   params:
-    - name: namespace
+    - name: targetNamespacePrefix
       value: kalantar
     - name: model-id
       value: "Qwen/Qwen3-0.6B"
@@ -38,8 +38,8 @@ spec:
           - name: data
             workspace: data
         params:
-          - name: namespace
-            value: $(params.namespace)
+          - name: targetNamespacePrefix
+            value: $(params.targetNamespacePrefix)
           - name: model-id
             value: $(params.model-id)
           - name: experimentBaseUrl
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 34320b82..290d688e 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -10,7 +10,7 @@ spec:
       persistentVolumeClaim:
         claimName: workspace-pvc
   params:
-    - name: namespace
+    - name: targetNamespacePrefix
       value: kalantar
     - name: model-id
       value: "Qwen/Qwen3-0.6B"
@@ -38,8 +38,8 @@ spec:
           - name: data
             workspace: data
         params:
-          - name: namespace
-            value: $(params.namespace)
+          - name: targetNamespacePrefix
+            value: $(params.targetNamespacePrefix)
           - name: model-id
             value: $(params.model-id)
           - name: experimentBaseUrl

From afb5655226f33d1882a67f9da8fb539509dfbe6c Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 10 Oct 2025 11:53:12 -0400
Subject: [PATCH 28/44] change image for s3 upload

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index d79ce3da..b6720374 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -476,7 +476,12 @@ spec:
         llm-d-benchmark.sh
 
     - name: upload-results
-      image: amazon/aws-cli:2.31.9
+      image: ubuntu:24.04
+      # Tried amazon/aws-cli:2.31.9 but latest tar available via dnf install tar -u is 1.34. 
+      # Had errors "file changed as we read it". It may be caused by the way tar identifes 
+      # file changes in v 1.34 (ctime). Recommended solution to move to 1.35. See https://stackoverflow.com/a/77765876.
+      # and tar release notes https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00005.html)
+      # A smaller image is probably desirable. A restriction is that AWS CLI v2 requires glibc.
       workingDir: $(workspaces.data.path)
       env:
         - name: AWS_ACCESS_KEY_ID
@@ -493,14 +498,24 @@ spec:
           value: "true"
       script: |
         #!/usr/bin/env sh
-        set -euo pipefail
 
-        dnf install tar gzip -y
+        apt-get update && \
+            apt-get install -y --no-install-recommends ca-certificates curl unzip tar gzip && \
+            rm -rf /var/lib/apt/lists/*
+
+        curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip && \
+          unzip /tmp/awscliv2.zip -d /tmp && \
+          /tmp/aws/install && \
+        rm -rf /tmp/aws /tmp/awscliv2.zip
+
+        tar --version && gzip --version && aws --version
 
         EXPERIMENT_ID="experiment-$(echo -n $(params.pipelineUID) | cut -c1-8)"
         EXPERIMENT_RESULTS_FOLDER="$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
         ARCHIVE_NAME="${EXPERIMENT_RESULTS_FOLDER}.tar.gz"
 
+        tar --version && gzip --version && aws --version
+
         tar -czf ${ARCHIVE_NAME} \
             -C "$(workspaces.data.path)" ${EXPERIMENT_RESULTS_FOLDER}
 

From 78de7de8b68e5536c0f3260781ca2397879583a7 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 10 Oct 2025 12:14:01 -0400
Subject: [PATCH 29/44] prevent using kalantar ns

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/pipelinerun-matrix-subset.yaml | 3 ++-
 tekton-poc/pipeline/pipelinerun-matrix.yaml        | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index 8700a029..9d102b9e 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -11,7 +11,8 @@ spec:
         claimName: workspace-pvc
   params:
     - name: targetNamespacePrefix
-      value: kalantar
+      # This can be anything.
+      value: $(params.targetNamespacePrefix)
     - name: model-id
       value: "Qwen/Qwen3-0.6B"
 
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 290d688e..0e3af8b2 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -11,7 +11,8 @@ spec:
         claimName: workspace-pvc
   params:
     - name: targetNamespacePrefix
-      value: kalantar
+      # This can be anything.
+      value: $(params.targetNamespacePrefix)
     - name: model-id
       value: "Qwen/Qwen3-0.6B"
 

From 17bee4b6c5bf0062e28ad2bc11b4f20e5f8c3cf9 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 10 Oct 2025 12:16:32 -0400
Subject: [PATCH 30/44] prevent using kalantar ns

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/pipelinerun-matrix-subset.yaml | 2 +-
 tekton-poc/pipeline/pipelinerun-matrix.yaml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index 9d102b9e..aa6e715c 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -12,7 +12,7 @@ spec:
   params:
     - name: targetNamespacePrefix
       # This can be anything.
-      value: $(params.targetNamespacePrefix)
+      value: $(context.pipelineRun.namespace)
     - name: model-id
       value: "Qwen/Qwen3-0.6B"
 
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 0e3af8b2..6db84cf4 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -12,7 +12,7 @@ spec:
   params:
     - name: targetNamespacePrefix
       # This can be anything.
-      value: $(params.targetNamespacePrefix)
+      value: $(context.pipelineRun.namespace)
     - name: model-id
       value: "Qwen/Qwen3-0.6B"
 

From f918279b46f956efa2d6983569e013342300ab19 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 10 Oct 2025 17:10:54 -0400
Subject: [PATCH 31/44] delete experiment namespaces

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml      | 22 +++++++++++--------
 .../pipeline/pipelinerun-matrix-subset.yaml   |  6 +++++
 tekton-poc/pipeline/pipelinerun-matrix.yaml   |  6 +++++
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index b6720374..e16bf6d1 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -127,6 +127,9 @@ spec:
     - name: s3-endpoint
       type: string
 
+    - name: debug
+      type: string
+      default: "false"
     - name: dry-run
       type: string 
       default: "false"
@@ -477,10 +480,10 @@ spec:
 
     - name: upload-results
       image: ubuntu:24.04
-      # Tried amazon/aws-cli:2.31.9 but latest tar available via dnf install tar -u is 1.34. 
-      # Had errors "file changed as we read it". It may be caused by the way tar identifes 
-      # file changes in v 1.34 (ctime). Recommended solution to move to 1.35. See https://stackoverflow.com/a/77765876.
-      # and tar release notes https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00005.html)
+      # Tried amazon/aws-cli:2.31.9 but latest tar available via `dnf install tar -y` is 1.34. 
+      # There were sporadic errors "file changed as we read it". It may be caused by the way 
+      # tar identifes file changes in v 1.34 (via ctime). A recommended solution to move to 1.35. 
+      # See https://stackoverflow.com/a/77765876 and tar release notes https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00005.html)
       # A smaller image is probably desirable. A restriction is that AWS CLI v2 requires glibc.
       workingDir: $(workspaces.data.path)
       env:
@@ -531,19 +534,20 @@ spec:
         echo "✅ Uploaded results to ${ARCHIVE_NAME}"
 
     - name: delete-namespace
-      image: alpine/helm:3.14.0
+      image: alpine/kubectl:1.34.1
       script : |
         #!/bin/sh
 
         NAMESPACE="$(params.targetNamespacePrefix)-$(context.taskRun.name)"
+        DEBUG="$(params.debug)"
 
-        if [ "$(params.dry-run)" = "true" ]; then
-          echo ">> skipping"
+        if [ "$(params.debug)" = "true" ]; then
+          echo "⚠️ DEBUG=true; leaving namespace ${NAMESPACE} for inspection"
+          echo "⚠️ Manually clean up resources with \"kubectl delete namespace ${NAMESPACE}\""
           exit 0
         fi
 
-        # kubectl delete namespace ${NAMESPACE}
-
+        kubectl delete namespace ${NAMESPACE}
         echo "✅ workload namespace ${NAMESPACE} deleted"
 
     - name: log-completion
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index aa6e715c..d374a269 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -28,6 +28,10 @@ spec:
     - name: s3-endpoint
       value: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
 
+    # Control
+    - name: debug
+      value: false
+
   pipelineSpec:
     workspaces:
       - name: data
@@ -56,6 +60,8 @@ spec:
           - name: harnessProfile
             value: $(params.harnessProfile)
 
+          - name: debug
+            value: "$(params.debug)"
           - name: pipelineUID
             value: "$(context.pipelineRun.uid)"
         matrix:
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 6db84cf4..64aa21ad 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -28,6 +28,10 @@ spec:
     - name: s3-endpoint
       value: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
 
+    # Control
+    - name: debug
+      value: false
+
   pipelineSpec:
     workspaces:
       - name: data
@@ -56,6 +60,8 @@ spec:
           - name: harnessProfile
             value: $(params.harnessProfile)
 
+          - name: debug
+            value: "$(params.debug)"
           - name: pipelineUID
             value: "$(context.pipelineRun.uid)"
         matrix:

From c5a5e3845d16a7e5670bd8ea6c31c927cec11ec6 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 13:59:03 -0400
Subject: [PATCH 32/44] pd yaml

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 .../pd-disaggregation/gaie-values.yaml        |  36 +++
 .../pd-disaggregation/gateway-values.yaml     |   8 +
 .../examples/pd-disaggregation/ms-values.yaml | 264 ++++++++++++++++++
 3 files changed, 308 insertions(+)
 create mode 100644 tekton-poc/examples/pd-disaggregation/gaie-values.yaml
 create mode 100644 tekton-poc/examples/pd-disaggregation/gateway-values.yaml
 create mode 100644 tekton-poc/examples/pd-disaggregation/ms-values.yaml

diff --git a/tekton-poc/examples/pd-disaggregation/gaie-values.yaml b/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
new file mode 100644
index 00000000..2b039b76
--- /dev/null
+++ b/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
@@ -0,0 +1,36 @@
+inferenceExtension:
+  replicas: 1
+  image:
+    name: llm-d-inference-scheduler
+    hub: ghcr.io/llm-d
+    tag: v0.2.1
+    pullPolicy: Always
+  extProcPort: 9002
+  extraContainerPorts:
+    - name: zmq
+      containerPort: 5557
+      protocol: TCP
+  extraServicePorts:
+    - name: zmq
+      port: 5557
+      targetPort: 5557
+      protocol: TCP
+  env:
+    - name: HF_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-token
+          key: HF_TOKEN
+  pluginsConfigFile: "plugins-v2.yaml"
+
+inferencePool:
+  targetPortNumber: 8000
+  modelServerType: vllm
+  apiVersion: "inference.networking.x-k8s.io/v1alpha2"
+  modelServers:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: meta-lla-1b4505f6-instruct
+provider:
+  name: none
+
diff --git a/tekton-poc/examples/pd-disaggregation/gateway-values.yaml b/tekton-poc/examples/pd-disaggregation/gateway-values.yaml
new file mode 100644
index 00000000..b22f8140
--- /dev/null
+++ b/tekton-poc/examples/pd-disaggregation/gateway-values.yaml
@@ -0,0 +1,8 @@
+gateway:
+  gatewayClassName: kgateway
+  service:
+    type: NodePort
+  destinationRule:
+    host: gaie-inference-scheduling-epp.kalantar-is.svc.cluster.local
+  gatewayParameters:
+    enabled: true
diff --git a/tekton-poc/examples/pd-disaggregation/ms-values.yaml b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
new file mode 100644
index 00000000..cb81e79a
--- /dev/null
+++ b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
@@ -0,0 +1,264 @@
+fullnameOverride: meta-lla-1b4505f6-instruct
+multinode: false
+
+modelArtifacts:
+  uri: pvc://model-pvc/models/meta-llama/Llama-3.1-8B-Instruct
+  size: 300Gi
+  authSecretName: "hf-secret"
+  name: meta-llama/Llama-3.1-8B-Instruct
+
+routing:
+  servicePort: 8000
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: infra-nam-release-inference-gateway
+  proxy:
+    image: "ghcr.io/llm-d/llm-d-routing-sidecar:v0.3.0"
+    secure: false
+    connector: nixlv2
+    debugLevel: 3
+  inferenceModel:
+    create: true
+  inferencePool:
+    create: false
+    name: meta-lla-1b4505f6-instruct-gaie
+  httpRoute:
+    create: true
+    rules:
+    - backendRefs:
+      - group: inference.networking.x-k8s.io
+        kind: InferencePool
+        name: meta-lla-1b4505f6-instruct-gaie
+        port: 8000
+        weight: 1
+      timeouts:
+        backendRequest: 0s
+        request: 0s
+      matches:
+      - path:
+          type: PathPrefix
+          value: /meta-llama-llama-3-1-8b-instruct/
+      filters:
+      - type: URLRewrite
+        urlRewrite:
+          path:
+            type: ReplacePrefixMatch
+            replacePrefixMatch: /
+    - backendRefs:
+      - group: inference.networking.x-k8s.io
+        kind: InferencePool
+        name: meta-lla-1b4505f6-instruct-gaie
+        port: 8000
+        weight: 1
+      timeouts:
+        backendRequest: 0s
+        request: 0s
+
+  epp:
+    create: false
+
+decode:
+  create: true
+  replicas: 3
+  acceleratorTypes:
+      labelKey: nvidia.com/gpu.product
+      labelValues:
+        - NVIDIA-H100-80GB-HBM3
+  parallelism:
+    data: 1
+    tensor: 4
+  annotations:
+      deployed-by: nick
+      modelservice: llm-d-benchmark
+  podAnnotations:
+      deployed-by: nick
+      modelservice: llm-d-benchmark
+      k8s.v1.cni.cncf.io/networks: multi-nic-compute
+  #no____config
+  containers:
+  - name: "vllm"
+    mountModelVolume: true
+    image: "ghcr.io/llm-d/llm-d:v0.2.0"
+    modelCommand: vllmServe
+    
+    args:
+      - "--block-size"
+      - "128"
+      - "--kv-transfer-config"
+      - '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
+      - "--disable-log-requests"
+      - "--disable-uvicorn-access-log"
+      - "--max-model-len"
+      - "16000"
+      - "--tensor-parallel-size"
+      - "4" 
+    env:
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: UCX_TLS
+        value: "rc,sm,cuda_ipc,cuda_copy,tcp"
+      - name: UCX_SOCKADDR_TLS_PRIORITY
+        value: "tcp"
+      - name: UCX_NET_DEVICES
+        value: mlx5_1:1
+      - name: NCCL_IB_HCA
+        value: mlx5_1
+      - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+        value: "5557"
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: VLLM_LOGGING_LEVEL
+        value: DEBUG
+      - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
+        value: "1"
+    resources:
+      limits:
+        memory: 128Gi
+        cpu: "32"
+        
+        nvidia.com/gpu: "4"
+        rdma/roce_gdr: "1"
+      requests:
+        memory: 128Gi
+        cpu: "32"
+        
+        nvidia.com/gpu: "4"
+        rdma/roce_gdr: "1"
+    extraConfig:
+      startupProbe:
+        httpGet:
+          path: /health
+          port: 8200
+        failureThreshold: 60
+        initialDelaySeconds: 30
+        periodSeconds: 30
+        timeoutSeconds: 5
+      livenessProbe:
+        tcpSocket:
+          port: 8200
+        failureThreshold: 3
+        periodSeconds: 5
+      readinessProbe:
+        httpGet:
+          path: /health
+          port: 8200
+        failureThreshold: 3
+        periodSeconds: 5
+    #no____config
+    volumeMounts: 
+    - name: dshm
+      mountPath: /dev/shm
+  volumes: 
+  - name: dshm
+    emptyDir:
+      medium: Memory
+      sizeLimit: 16Gi
+
+prefill:
+  create: true
+  replicas: 1
+  acceleratorTypes:
+      labelKey: nvidia.com/gpu.product
+      labelValues:
+        - NVIDIA-H100-80GB-HBM3
+  parallelism:
+    data: 1
+    tensor: 4
+  annotations:
+      deployed-by: nick
+      modelservice: llm-d-benchmark
+  podAnnotations:
+      deployed-by: nick
+      modelservice: llm-d-benchmark
+      k8s.v1.cni.cncf.io/networks: multi-nic-compute
+  #no____config
+  containers:
+  - name: "vllm"
+    mountModelVolume: true
+    image: "ghcr.io/llm-d/llm-d:v0.2.0"
+    modelCommand: vllmServe
+    
+    args:
+      - "--block-size"
+      - "128"
+      - "--kv-transfer-config"
+      - '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
+      - "--disable-log-requests"
+      - "--disable-uvicorn-access-log"
+      - "--max-model-len"
+      - "16000"
+      - "--tensor-parallel-size"
+      - "4" 
+    env:
+      - name: VLLM_IS_PREFILL
+        value: "1"
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: UCX_TLS
+        value: "rc,sm,cuda_ipc,cuda_copy,tcp"
+      - name: UCX_SOCKADDR_TLS_PRIORITY
+        value: "tcp"
+      - name: UCX_NET_DEVICES
+        value: mlx5_1:1
+      - name: NCCL_IB_HCA
+        value: mlx5_1
+      - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+        value: "5557"
+      - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+        valueFrom:
+          fieldRef:
+            fieldPath: status.podIP
+      - name: VLLM_LOGGING_LEVEL
+        value: DEBUG
+      - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
+        value: "1"
+    resources:
+      limits:
+        memory: 128Gi
+        cpu: "32"
+        
+        nvidia.com/gpu: "4"
+        rdma/roce_gdr: "1"
+      requests:
+        memory: 128Gi
+        cpu: "32"
+        
+        nvidia.com/gpu: "4"
+        rdma/roce_gdr: "1"
+    extraConfig:
+      startupProbe:
+        httpGet:
+          path: /health
+          port: 8000
+        failureThreshold: 60
+        initialDelaySeconds: 30
+        periodSeconds: 30
+        timeoutSeconds: 5
+      livenessProbe:
+        tcpSocket:
+          port: 8000
+        failureThreshold: 3
+        periodSeconds: 5
+      readinessProbe:
+        httpGet:
+          path: /health
+          port: 8000
+        failureThreshold: 3
+        periodSeconds: 5
+    #no____config
+    volumeMounts: 
+    - name: dshm
+      mountPath: /dev/shm
+  volumes: 
+  - name: dshm
+    emptyDir:
+      medium: Memory
+      sizeLimit: 16Gi
+

From 5f4e42bb5070fef65cbf9460bc3189ecfc0d42eb Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 14:26:50 -0400
Subject: [PATCH 33/44] update secret name

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/gaie-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/examples/pd-disaggregation/gaie-values.yaml b/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
index 2b039b76..fe75e584 100644
--- a/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
@@ -19,7 +19,7 @@ inferenceExtension:
     - name: HF_TOKEN
       valueFrom:
         secretKeyRef:
-          name: hf-token
+          name: hf-secret
           key: HF_TOKEN
   pluginsConfigFile: "plugins-v2.yaml"
 

From c3754c7ce20d5a4070cc41e250ff266afc17eca9 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 14:41:19 -0400
Subject: [PATCH 34/44] update fullnameoverride

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/ms-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/examples/pd-disaggregation/ms-values.yaml b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
index cb81e79a..c5e725ff 100644
--- a/tekton-poc/examples/pd-disaggregation/ms-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
@@ -1,4 +1,4 @@
-fullnameOverride: meta-lla-1b4505f6-instruct
+fullnameOverride: meta-llama-llama-3-1-8b-instruct
 multinode: false
 
 modelArtifacts:

From 3fcbb206e6d150329ed78d1abcc80b27cfd40851 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 16:13:20 -0400
Subject: [PATCH 35/44] fix tensor

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/ms-values.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tekton-poc/examples/pd-disaggregation/ms-values.yaml b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
index c5e725ff..387f7cc6 100644
--- a/tekton-poc/examples/pd-disaggregation/ms-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
@@ -92,7 +92,7 @@ decode:
       - "--max-model-len"
       - "16000"
       - "--tensor-parallel-size"
-      - "4" 
+      - "1" 
     env:
       - name: VLLM_NIXL_SIDE_CHANNEL_HOST
         valueFrom:
@@ -193,7 +193,7 @@ prefill:
       - "--max-model-len"
       - "16000"
       - "--tensor-parallel-size"
-      - "4" 
+      - "1" 
     env:
       - name: VLLM_IS_PREFILL
         value: "1"

From b09aa28aa3c89be1d4b69070735f239fe079eb46 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 16:42:33 -0400
Subject: [PATCH 36/44] gateway name

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/ms-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/examples/pd-disaggregation/ms-values.yaml b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
index 387f7cc6..6f7cdce1 100644
--- a/tekton-poc/examples/pd-disaggregation/ms-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
@@ -12,7 +12,7 @@ routing:
   parentRefs:
     - group: gateway.networking.k8s.io
       kind: Gateway
-      name: infra-nam-release-inference-gateway
+      name: experiment-gateway-inference-gateway
   proxy:
     image: "ghcr.io/llm-d/llm-d-routing-sidecar:v0.3.0"
     secure: false

From b60f4472abaf1e8f098ea33c9f18d9364ceb2399 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 17:15:04 -0400
Subject: [PATCH 37/44] desitation name

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/gateway-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/examples/pd-disaggregation/gateway-values.yaml b/tekton-poc/examples/pd-disaggregation/gateway-values.yaml
index b22f8140..3f836050 100644
--- a/tekton-poc/examples/pd-disaggregation/gateway-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/gateway-values.yaml
@@ -3,6 +3,6 @@ gateway:
   service:
     type: NodePort
   destinationRule:
-    host: gaie-inference-scheduling-epp.kalantar-is.svc.cluster.local
+    host: experiment-gaie-685a862b-epp.kalantar-is.svc.cluster.local
   gatewayParameters:
     enabled: true

From a3407aed99bb1ef3a1b8d55e36386cb30ab38ac9 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 17:29:49 -0400
Subject: [PATCH 38/44] label

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/gaie-values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tekton-poc/examples/pd-disaggregation/gaie-values.yaml b/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
index fe75e584..b860acef 100644
--- a/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/gaie-values.yaml
@@ -30,7 +30,7 @@ inferencePool:
   modelServers:
     matchLabels:
       llm-d.ai/inferenceServing: "true"
-      llm-d.ai/model: meta-lla-1b4505f6-instruct
+      llm-d.ai/model: meta-llama-llama-3-1-8b-instruct
 provider:
   name: none
 

From 2af0868d5f04d407ebdd4de4f35e36eac1b9b59a Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Tue, 14 Oct 2025 18:07:58 -0400
Subject: [PATCH 39/44] progress towards pd scenario

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md                          |  21 +-
 tekton-poc/pipeline/experiment-task.yaml      | 338 +++++++--------
 tekton-poc/pipeline/pd-disaggregation-pr.yaml | 150 +++++++
 .../pipeline/pipelinerun-matrix-subset.yaml   |  62 ++-
 tekton-poc/pipeline/steps/inference-perf.yaml | 366 ++++++++++++++++
 .../pipeline/{ => steps}/stepactions.yaml     |  15 +
 tekton-poc/pipeline/steps/treatment.yaml      | 117 ++++++
 tekton-poc/pipeline/steps/vllm-benchmark.yaml | 392 ++++++++++++++++++
 8 files changed, 1253 insertions(+), 208 deletions(-)
 create mode 100644 tekton-poc/pipeline/pd-disaggregation-pr.yaml
 create mode 100644 tekton-poc/pipeline/steps/inference-perf.yaml
 rename tekton-poc/pipeline/{ => steps}/stepactions.yaml (96%)
 create mode 100644 tekton-poc/pipeline/steps/treatment.yaml
 create mode 100644 tekton-poc/pipeline/steps/vllm-benchmark.yaml

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index 1f8ed491..a1f5b8fc 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -155,7 +155,7 @@ tkn pr delete <pipelinerun_name> -f
 
 The default PipelineSpec (in `pipeline/pipelinerun-matrix.yaml`) executes all the tasks in parallel. It can be modified in a number of ways to reduce the amount of parallel execution (at the expense of time).
 
-Some examples are provided:
+Some examples are provided (**Note** examples need to be updated):
 
 - `pipeline/pipelinerun-matrix-subset.yaml`: Uses `matrix.include` to list an explicit set of combinations to execute.
 - `pipeline/pipelinerun-sequential-1.yaml`: Executes 1 task at a time. Each task depends on the previous one.
@@ -194,6 +194,23 @@ The utility script `utility/transform-pr-parallel.py` can be used to transform a
 - there are hardcoded assumptions/values about the use case in several places; these will be removed as more use cases are explored
 
 
-# Issues
+# To Do
 
+- modify script to handle unroll better
+- modify script to handle unroll and n together
 
+- single experiment namespace (possibly different from tekton ns)
+- use more stepActions
+- incorporate memory planner (Jing)
+- PD example (Nick)
+    - [IN PROGRESS] deployment of the pd scenario
+    - [DONE] enabling multiple harnesses (inference-perf and vllm-benchmark)
+    - [DONE] making factors/treatments general (they are hardcoded)
+    - [NOT STARTED] use capacity planner to determine whether or not to continue
+    - [IN PROGRESS] move step implementations to stepactions
+    - [NOT STARTED] move from multiple namespaces to single namespace
+
+- can we have just one prepare-profile now that we have treatments?
+- should we have a convert step independent of the analysis step?
+- eventually one for analysis based on analysis of converted results
+- need to wait for model download
diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index e16bf6d1..0bd8ea84 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -10,12 +10,15 @@ spec:
     - name: data
 
   params:
-    - name: question_len
+    - name: factorMapping
       type: string
-    - name: output_len
-      type: string
-    - name: gaiePluginConfig
+      description: |
+        JSON string mapping factor to path in source yaml file sorted by purpose.
+    - name: treatment
       type: string
+      description: |
+        JSON string of factors and values for one treatment. 
+        Includes both infrastructure and workload factors.
 
     - name: targetNamespacePrefix
       type: string
@@ -134,15 +137,73 @@ spec:
       type: string 
       default: "false"
 
+  results:
+    - name: treatmentAnalysisModelservice
+      value: $(steps.analyze-modelservice-factors.results.treatmentAnalysis)
+    - name: treatmentAnalysisGaie
+      value: $(steps.analyze-gaie-factors.results.treatmentAnalysis)
+    - name: treatmentAnalysisWorkload
+      value: $(steps.analyze-workload-factors.results.treatmentAnalysis)
+
   steps:
     - name: log-start
       image: alpine:3.20
       script: |
         #!/bin/sh
         echo "🔄 Starting sweep step for ..."
-        echo "     gaiePluginConfig = $(params.gaiePluginConfig)"
-        echo "         question_len = $(params.question_len)"
-        echo "           output_len = $(params.output_len)"
+        printf "%s" "$(params.treatment)"
+
+    - name: analyze-modelservice-factors
+      ref:
+        name: analyze-treatment
+      params:
+        - name: factorType
+          value: modelservice
+        - name: factorMapping
+          value: $(params.factorMapping)
+        - name: treatment
+          value: $(params.treatment)
+
+    - name: analyze-gaie-factors
+      ref:
+        name: analyze-treatment
+      params:
+        - name: factorType
+          value: gaie
+        - name: factorMapping
+          value: $(params.factorMapping)
+        - name: treatment
+          value: $(params.treatment)
+
+    - name: analyze-workload-factors
+      ref:
+        name: analyze-treatment
+      params:
+        - name: factorType
+          value: workload
+        - name: factorMapping
+          value: $(params.factorMapping)
+        - name: treatment
+          value: $(params.treatment)
+
+    - name: display-treatment-analysis
+      image: alpine:3.20
+      env:
+        - name: MODELSERVICE_SET_ARGS
+          value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
+        - name: GAIE_SET_ARGS
+          value: "$(steps.analyze-gaie-factors.results.treatmentAnalysis)"
+        - name: WORKLOAD_SET_ARGS
+          value: "$(steps.analyze-workload-factors.results.treatmentAnalysis)"
+
+      script: |
+        #!/bin/sh
+        apk add --no-cache jq yq-go >/dev/null
+        jq --version
+
+        echo "helm upgrade --install ... $(echo ${MODELSERVICE_SET_ARGS} | jq '.setArgs')"
+        echo "helm upgrade --install ...  $(echo ${GAIE_SET_ARGS} | jq '.setArgs')"
+        echo "$(echo ${WORKLOAD_SET_ARGS} | jq '.updates')"
 
     - name: prepare-namespace
       image: quay.io/openshift/origin-cli:4.21
@@ -257,8 +318,8 @@ spec:
           value: 15m
         - name: valuesYamlUrl
           value: "$(params.experimentBaseUrl)/gaie-values.yaml"
-        - name: extraArgs
-          value: "--set inferenceExtension.pluginsConfigFile=$(params.gaiePluginConfig)"
+        - name: treatmentAnalysis
+          value: "$(steps.analyze-gaie-factors.results.treatmentAnalysis)"
 
         - name: dry-run
           value: $(params.dry-run)
@@ -287,6 +348,8 @@ spec:
             --set routing.inferencePool.name=$(params.experimentName)-gaie-NAMESPACE_HASH
             --set routing.httpRoute.rules[0].backendRefs[0].name=$(params.experimentName)-gaie-NAMESPACE_HASH
             --set routing.httpRoute.rules[1].backendRefs[0].name=$(params.experimentName)-gaie-NAMESPACE_HASH
+        - name: treatmentAnalysis
+          value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
 
         - name: dry-run
           value: $(params.dry-run)
@@ -338,67 +401,33 @@ spec:
         #   --timeout=${MODEL_START_TIMEOUT}s
         # echo "✅ prefill pods serving model ${MODEL_ID} ready"
 
-    - name: workload
-      image: $(params.llmdbenchImageRegistry)/$(params.llmdbenchImageRepo)/$(params.llmdbenchImageName):$(params.llmdbenchImageTag)
-      env:
-        - name: LLMDBENCH_RUN_EXPERIMENT_LAUNCHER
-          value: "1"
-        - name: LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY
-          value: "0"
-        - name: LLMDBENCH_RUN_EXPERIMENT_HARNESS
-          value: "$(params.harnessName)-llm-d-benchmark.sh"
-        - name: LLMDBENCH_RUN_EXPERIMENT_ANALYZER
-          value: "$(params.harnessName)-analyze_results.sh"
-        - name: LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME
-          value: "$(params.harnessProfile)"
-        - name: LLMDBENCH_HARNESS_NAME
-          value: "$(params.harnessName)"
-        - name: LLMDBENCH_HARNESS_NAMESPACE
-          value: "$(params.targetNamespacePrefix)-$(context.taskRun.name)"
-        - name: LLMDBENCH_HARNESS_STACK_TYPE
-          value: "llm-d"
-        - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
-          value: "http://experiment-gateway-inference-gateway.$(params.targetNamespacePrefix)-$(context.taskRun.name).svc.cluster.local:80"
-        - name: LLMDBENCH_DEPLOY_METHODS
-          value: "modelservice"
-        - name: LLMDBENCH_MAGIC_ENVAR
-          value: "harness_pod"
-
-        - name: LLMDBENCH_LLMD_IMAGE_REGISTRY
-          value: "$(params.llmdbenchImageRegistry)"
-        - name: LLMDBENCH_LLMD_IMAGE_REPO
-          value: "$(params.llmdbenchImageRepo)"
-        - name: LLMDBENCH_LLMD_IMAGE_NAME
-          value: "$(params.llmdbenchImageName)"
-        - name: LLMDBENCH_LLMD_IMAGE_TAG
-          value: "$(params.llmdbenchImageTag)"
-
-        #  TBD add_env_vars_to_pod $LLMDBENCH_CONTROL_ENV_VAR_LIST_TO_POD
-        - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
-          value: "$(params.model-id)"
-        - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS
-          value: "0"
-        - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS
-          value: "2"
-        - name: LLMDBENCH_VLLM_COMMON_AFFINITY
-          value: "nvidia.com/gpu.product:NVIDIA-H100-80GB-HBM3"
-        - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM
-          value: "4"
-        - name: LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM
-          value: "1"
-        - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM
-          value: "1"
-        - name: LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM
-          value: "1"
-
-        - name: HF_TOKEN_SECRET
-          value: "hf-secret"
-        - name: HUGGING_FACE_HUB_TOKEN
-          valueFrom:
-            secretKeyRef:
-              name: hf-secret
-              key: HF_TOKEN
+    - name: inference-perf-prepare-profile
+      ref: 
+        name: inference-perf-prepare-profile
+      params:
+        - name: harnessName
+          value: $(params.harnessName)
+        - name: harnessProfile
+          value: $(params.harnessProfile)
+        - name: treatmentAnalysis
+          value: $(steps.analyze-workload-factors.results.treatmentAnalysis)
+        - name: model-id
+          value: $(params.model-id)
+        - name: namespace
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
+        - name: pipelineUID
+          value: $(params.pipelineUID)
 
+    - name: inference-perf-run
+      ref:
+        name: inference-perf-run
+      params:
+        - name: harnessName
+          value: $(params.harnessName)
+        - name: harnessProfile
+          value: $(params.harnessProfile)
+        - name: pipelineUID
+          value: $(params.pipelineUID)
       computeResources:
         requests:
           memory: "32Gi"
@@ -407,131 +436,58 @@ spec:
           memory: "32Gi"
           cpu: "16"
 
-      script: |
-        #!/bin/bash
-
-        export EXPERIMENT_ID="experiment-$(echo -n $(params.pipelineUID) | cut -c1-8)"
-        export LLMDBENCH_RUN_EXPERIMENT_ID="${EXPERIMENT_ID}"
-        export LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR="$(workspaces.data.path)/$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
-        export LLMDBENCH_CONTROL_WORK_DIR="$(workspaces.data.path)/$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
-        export LLMDBENCH_HARNESS_STACK_NAME=$(echo "$(params.model-id)" | tr '[:upper:]' '[:lower:]' | sed 's/[./]/-/g')
-        export LLMDBENCH_DEPLOY_CURRENT_MODELID="${LLMDBENCH_HARNESS_STACK_NAME}"
-        export LLMDBENCH_DEPLOY_CURRENT_TOKENIZER="$(params.model-id)"
-
-        export QUESTION_LEN=$(params.question_len)
-        export OUTPUT_LEN=$(params.output_len)
-
-        get_profiles() {
-          git init llm-d-benchmark
-          cd llm-d-benchmark
-          git remote add origin https://github.com/llm-d/llm-d-benchmark.git
-          git config core.sparseCheckout true
-          echo "workload/profiles/" >> .git/info/sparse-checkout
-          git pull origin main
-        }
-
-        if [ "$(params.dry-run)" = "true" ]; then
-          echo ">> skipping"
-          exit 0
-        fi
-
-        get_profiles
-
-        echo "creating CONTROL directories"
-        mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/setup
-        rm -f ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-        touch ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-
-        workload=$(echo $(params.harnessProfile) | sed 's^\.yaml^^g' )
-        echo "workload = $workload"
-        workload_template_list=$(find workload/profiles/ -name "${workload}.yaml.in")
-        echo "workload_template_list = $workload_template_list"
-
-        for workload_template_full_path in $workload_template_list; do
-          echo "PROCESSING $workload_template_full_path"
-          workload_template_type=$(echo ${workload_template_full_path} | rev | cut -d '/' -f 2 | rev)
-          echo "workload_template_type = $workload_template_type"
-          workload_template_file_name=$(echo ${workload_template_full_path} | rev | cut -d '/' -f 1 | rev | sed -e "s^\.yaml.in$^^g")
-          echo "workload_template_file_name = $workload_template_file_name"
-          ## 
-          workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/${workload_template_file_name}.yaml
-          # workload_output_file=${LLMDBENCH_CONTROL_WORK_DIR}/$workload_template_type/$workload_template_file_name
-          echo "workload_output_file = $workload_output_file"
-          ##
-          mkdir -p ${LLMDBENCH_CONTROL_WORK_DIR}/$workload_template_type
-
-          echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-          echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-          echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-          echo "s^question_len: .*^question_len: ${QUESTION_LEN}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-          echo "s^output_len: .*^output_len: ${OUTPUT_LEN}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-          echo "s^    path: .*^    path: ${LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR}^g" >> ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-
-          echo "------"
-          cat ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands
-          echo "------"
-          echo "workload_output_file=$workload_output_file"
-          sed -f ${LLMDBENCH_CONTROL_WORK_DIR}/setup/sed-commands $workload_template_full_path > $workload_output_file
-
-          cat $workload_output_file
-        done
-
-        llm-d-benchmark.sh
-
-    - name: upload-results
-      image: ubuntu:24.04
-      # Tried amazon/aws-cli:2.31.9 but latest tar available via `dnf install tar -y` is 1.34. 
-      # There were sporadic errors "file changed as we read it". It may be caused by the way 
-      # tar identifes file changes in v 1.34 (via ctime). A recommended solution to move to 1.35. 
-      # See https://stackoverflow.com/a/77765876 and tar release notes https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00005.html)
-      # A smaller image is probably desirable. A restriction is that AWS CLI v2 requires glibc.
-      workingDir: $(workspaces.data.path)
-      env:
-        - name: AWS_ACCESS_KEY_ID
-          valueFrom:
-            secretKeyRef:
-              name: $(params.s3-keys)
-              key: AWS_ACCESS_KEY_ID
-        - name: AWS_SECRET_ACCESS_KEY
-          valueFrom:
-            secretKeyRef:
-              name: $(params.s3-keys)
-              key: AWS_SECRET_ACCESS_KEY
-        - name: AWS_EC2_METADATA_DISABLED
-          value: "true"
-      script: |
-        #!/usr/bin/env sh
-
-        apt-get update && \
-            apt-get install -y --no-install-recommends ca-certificates curl unzip tar gzip && \
-            rm -rf /var/lib/apt/lists/*
-
-        curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip && \
-          unzip /tmp/awscliv2.zip -d /tmp && \
-          /tmp/aws/install && \
-        rm -rf /tmp/aws /tmp/awscliv2.zip
-
-        tar --version && gzip --version && aws --version
-
-        EXPERIMENT_ID="experiment-$(echo -n $(params.pipelineUID) | cut -c1-8)"
-        EXPERIMENT_RESULTS_FOLDER="$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
-        ARCHIVE_NAME="${EXPERIMENT_RESULTS_FOLDER}.tar.gz"
-
-        tar --version && gzip --version && aws --version
+    - name: inference-perf-analyze-results
+      ref:
+        name: inference-perf-analyze-results
+      params:
+        - name: harnessName
+          value: $(params.harnessName)
+        - name: pipelineUID
+          value: $(params.pipelineUID)
 
-        tar -czf ${ARCHIVE_NAME} \
-            -C "$(workspaces.data.path)" ${EXPERIMENT_RESULTS_FOLDER}
+    - name: vllm-benchmark-prepare-profile
+      ref: 
+        name: vllm-benchmark-prepare-profile
+      params:
+        - name: harnessName
+          value: $(params.harnessName)
+        - name: harnessProfile
+          value: $(params.harnessProfile)
+        - name: treatmentAnalysis
+          value: $(steps.analyze-workload-factors.results.treatmentAnalysis)
+        - name: model-id
+          value: $(params.model-id)
+        - name: namespace
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
+        - name: pipelineUID
+          value: $(params.pipelineUID)
 
-        aws s3 cp ${ARCHIVE_NAME} "s3://$(params.s3-bucket)/${ARCHIVE_NAME}" \
-            --endpoint-url "$(params.s3-endpoint)" \
-            --content-type "application/x-tar" \
-            --content-encoding "gzip" \
-            --no-progress
-            # --recursive \
+    - name: vllm-benchmark-run
+      ref:
+        name: vllm-benchmark-run
+      params:
+        - name: harnessName
+          value: $(params.harnessName)
+        - name: harnessProfile
+          value: $(params.harnessProfile)
+        - name: pipelineUID
+          value: $(params.pipelineUID)
+      computeResources:
+        requests:
+          memory: "32Gi"
+          cpu: "16"
+        limits:
+          memory: "32Gi"
+          cpu: "16"
 
-        rm -rf ${ARCHIVE_NAME}
-        
-        echo "✅ Uploaded results to ${ARCHIVE_NAME}"
+    - name: vllm-benchmark-analyze-results
+      ref:
+        name: vllm-benchmark-analyze-results
+      params:
+        - name: harnessName
+          value: $(params.harnessName)
+        - name: pipelineUID
+          value: $(params.pipelineUID)
 
     - name: delete-namespace
       image: alpine/kubectl:1.34.1
diff --git a/tekton-poc/pipeline/pd-disaggregation-pr.yaml b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
new file mode 100644
index 00000000..cc694459
--- /dev/null
+++ b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
@@ -0,0 +1,150 @@
+apiVersion: tekton.dev/v1
+kind: PipelineRun
+metadata:
+  name: pd
+spec:
+  taskRunTemplate:
+    serviceAccountName: helm-installer
+  workspaces:
+    - name: data
+      persistentVolumeClaim:
+        claimName: workspace-pvc
+  params:
+    - name: targetNamespacePrefix
+      # This can be anything.
+      value: $(context.pipelineRun.namespace)
+    - name: model-id
+      value: "meta-llama/Llama-3.1-8B-Instruct"
+
+    # Harness / Workload
+    - name: harnessName
+      value: vllm-benchmark
+    - name: harnessProfile
+      value: random_concurrent.yaml
+
+    # Output Location
+    - name: s3-keys
+      value: ibm-cos-secret
+    - name: s3-bucket
+      value: "cloud-object-storage-cos-standard-ere"
+    - name: s3-endpoint
+      value: "https://s3.us-east.cloud-object-storage.appdomain.cloud"
+
+    # Control
+    - name: debug
+      value: true
+
+  pipelineSpec:
+    workspaces:
+      - name: data
+    tasks:
+      - name: run-experiment
+        taskRef:
+          name: experiment
+        workspaces:
+          - name: data
+            workspace: data
+        params:
+          - name: targetNamespacePrefix
+            value: $(params.targetNamespacePrefix)
+          - name: model-id
+            value: $(params.model-id)
+          - name: experimentBaseUrl
+            value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/pd-disaggregation/
+
+          - name: s3-keys
+            value: $(params.s3-keys)
+          - name: s3-bucket
+            value: $(params.s3-bucket)
+          - name: s3-endpoint
+            value: $(params.s3-endpoint)
+
+          - name: harnessName
+            value: $(params.harnessName)
+          - name: harnessProfile
+            value: $(params.harnessProfile)
+
+          - name: factorMapping
+            value: |
+              {
+                "modelservice": {
+                  "prefillReplicas": "prefill.replicas",
+                  "prefillTensorParallelism":  "prefill.parallelism.tensor",
+                  "decodeReplicas": "decode.replicas",
+                  "decodeTensorParallelism":  "decode.parallelism.tensor"
+                },
+                "gaie": {
+                  "gaiePluginConfig": "inferenceExtension.pluginsConfigFile"
+                },
+                "workload": {
+                  "max-concurrency": "max-concurrency",
+                  "num_prompts": "num-prompts",
+                  "question_len": "data.shared_prefix.question_len",
+                  "output_len": "data.shared_prefix.output_len"
+                }
+              }
+
+          - name: debug
+            value: "$(params.debug)"
+          - name: pipelineUID
+            value: "$(context.pipelineRun.uid)"
+
+        matrix:
+          include:
+            - name: combo-0
+              params:
+                - name: treatment
+                  value: |
+                    {
+                      "prefillReplicas": 1,
+                      "prefillTensorParallelism": 1,
+                      "decodeReplicas": 1,
+                      "decodeTensorParallelism": 1,
+                      "max-concurrency": 1,
+                      "num-prompts": 10
+                    }
+            # - name: combo-1
+            #   params:
+            #     - name: treatment
+            #       value: |
+            #         {
+            #           "prefillReplicas": 1,
+            #           "prefillTensorParallelism": 2,
+            #           "decodeReplicas": 1,
+            #           "decodeTensorParallelism": 1,
+            #           "max-concurrency": 1,
+            #           "num-prompts": 10
+            #         }
+
+          # params:
+          #   - name: max-concurrency
+          #     value:
+          #       - "1"
+          #       # - "8"
+          #       # - "32"
+          #       # - "64"
+          #       # - "128"
+          #       # - "256"
+          #   - name: num-prompts
+          #     value: 
+          #       - "10"
+          #       # - "80"
+          #       # - "320"
+          #       # - "640"
+          #       # - "1280"
+          #       # - "2560"
+
+# LLMDBENCH_VLLM_COMMON_REPLICAS: "2,4"
+#   decode.replicas
+# LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM: "8"
+#   decode.parallelism.tensor
+
+# LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS: "2,4,6,8"
+#   prefill.replicas
+# LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM: "1,2"
+#   prefill.parallelism.tensor
+# LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS: "1,2,4"
+#   decode.replicas
+# LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM: "2,4,8"
+#   decodeTensorParallelism
+#   decode.parallelism.tensor
\ No newline at end of file
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index d374a269..11c73b85 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -17,6 +17,8 @@ spec:
       value: "Qwen/Qwen3-0.6B"
 
     # Harness / Workload
+    - name: harnessName
+      value: inference-perf
     - name: harnessProfile
       value: shared_prefix_synthetic_short.yaml
 
@@ -30,7 +32,7 @@ spec:
 
     # Control
     - name: debug
-      value: false
+      value: true
 
   pipelineSpec:
     workspaces:
@@ -57,28 +59,58 @@ spec:
           - name: s3-endpoint
             value: $(params.s3-endpoint)
 
+          - name: harnessName
+            value: $(params.harnessName)
           - name: harnessProfile
             value: $(params.harnessProfile)
 
+          - name: factorMapping
+            value: |
+              {
+                "modelservice": {
+                  "prefillReplicas": "prefill.replicas",
+                  "prefillTensorParallelism":  "prefill.parallelism.tensor",
+                  "decodeReplicas": "decode.replicas",
+                  "decodeTensorParallelism":  "decode.parallelism.tensor"
+                },
+                "gaie": {
+                  "gaiePluginConfig": "inferenceExtension.pluginsConfigFile"
+                },
+                "workload": {
+                  "max-concurrency": "max-concurrency",
+                  "num_prompts": "num-prompts",
+                  "question_len": "data.shared_prefix.question_len",
+                  "output_len": "data.shared_prefix.output_len"
+                }
+              }
+
+          - name: max-concurrency
+            value: "1"
+          - name: num-prompts
+            value: "10"
+
           - name: debug
             value: "$(params.debug)"
           - name: pipelineUID
             value: "$(context.pipelineRun.uid)"
         matrix:
           include:
-            - name: combo-1
+            - name: combo-0
               params:
-                - name: gaiePluginConfig
-                  value: "inf-sche-none.yaml"
-                - name: question_len
-                  value: "100"
-                - name: output_len
-                  value: "100"
-            - name: combo-2
+                - name: treatment
+                  value: |
+                    {
+                      "gaiePluginConfig": "inf-sche-queue.yaml",
+                      "question_len": 100,
+                      "output_len": 100
+                    }
+            - name: combo-1
               params:
-                - name: gaiePluginConfig
-                  value: "inf-sche-prefix.yaml"
-                - name: question_len
-                  value: "300"
-                - name: output_len
-                  value: "300"
+                - name: treatment
+                  value: |
+                    {
+                      "gaiePluginConfig": "inf-sche-prefix.yaml",
+                      "question_len": 300,
+                      "output_len": 300
+                    }
+
diff --git a/tekton-poc/pipeline/steps/inference-perf.yaml b/tekton-poc/pipeline/steps/inference-perf.yaml
new file mode 100644
index 00000000..50f91936
--- /dev/null
+++ b/tekton-poc/pipeline/steps/inference-perf.yaml
@@ -0,0 +1,366 @@
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: inference-perf-prepare-profile
+spec:
+  params:
+    - name: harnessName
+    - name: harnessProfile
+    - name: model-id
+    - name: namespace
+    - name: treatmentAnalysis
+    - name: pipelineUID
+  env:
+    - name: REQUESTED_HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: MY_HARNESS_NAME
+      value: "inference-perf"
+    - name: HARNESS_PROFILE
+      value: "$(params.harnessProfile)"
+
+    - name: TREATMENT_ANALYSIS
+      value: "$(params.treatmentAnalysis)"
+
+    - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
+      value: "$(params.model-id)"
+    - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
+      value: "http://experiment-gateway-inference-gateway.$(params.namespace).svc.cluster.local:80"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+
+  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/bin/bash
+
+    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
+      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
+      exit 0
+    fi
+
+    # TBD is this necessary or is it already there?
+    apt-get update
+    apt-get install -y --no-install-recommends curl ca-certificates jq
+    curl -fsSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
+      -o /usr/local/bin/yq
+    chmod +x /usr/local/bin/yq
+    jq --version
+    yq --version
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L48-L54
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
+    
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    CONTROL_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    RUN_DIR=$(pwd)
+
+    echo "🔄 Installing required tools"
+    apt-get update
+    apt-get install -y \
+      wget \
+      && apt-get clean && rm -rf /var/cache/apt
+
+    # Ensure all folders created
+    mkdir -p $RESULTS_DIR
+    mkdir -p $CONTROL_DIR/setup
+    rm -rf $CONTROL_DIR/setup/sed-commands
+    touch $CONTROL_DIR/setup/sed-commands
+    mkdir -p ${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates
+
+    cd ${RUN_DIR}/vllm-benchmark/
+
+    # Define constants: input profile template name and location; final profile name and location
+    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
+    workload_template=${workload}.yaml.in
+    workload_template_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates/${workload_template}
+    workload_profile=${workload}.yaml
+    workload_profile_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/${workload_profile}
+
+    echo "🔄 Prepare workload profile"
+    # Fetch profile template from llmd-benchmark
+    wget -O ${workload_template_path} \
+    --quiet \
+      https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/workload/profiles/${MY_HARNESS_NAME}/${workload_template}
+
+    # Apply treatment to profile template to produce final profile
+    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "---------- sed-commands"
+    cat ${CONTROL_DIR}/setup/sed-commands
+    echo "----------"
+    sed -f ${CONTROL_DIR}/setup/sed-commands ${workload_template_path} > ${workload_profile_path}
+
+    # TBD eliminate the TARGET_FILE env variable
+    TARGET_FILE=${workload_profile_path}
+    echo "${TREATMENT_ANALYSIS}" | jq '.updates' > /tmp/updates.json
+    echo ">>> /tmp/updates.json"
+    cat /tmp/updates.json
+
+    if [ ! -f "$TARGET_FILE" ]; then
+      echo "ERROR: File not found: $TARGET_FILE" >&2
+      exit 1
+    fi
+
+    # Apply updates to JSON or YAML
+    if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
+      ext="${TARGET_FILE##*.}"
+      tmp="${TARGET_FILE}.tmp"
+
+      # TBD eliminate the json path (copilot generated this); profiles are yaml files
+      if [ "$ext" = "json" ]; then
+        jq --slurpfile upds /tmp/updates.json '
+          reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+        ' "$TARGET_FILE" > "$tmp"
+        mv "$tmp" "$TARGET_FILE"
+      else
+        # YAML path: YAML → JSON → apply → YAML
+        yq -o=json '.' "$TARGET_FILE" \
+          | jq --slurpfile upds /tmp/updates.json '
+              reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+            ' \
+          | yq -P > "$tmp"
+        mv "$tmp" "$TARGET_FILE"
+      fi
+    fi
+
+    echo "---------- workload profile"
+    cat ${workload_profile_path}
+    echo "----------"
+    echo "✅ workload profile ready"
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: inference-perf-run
+spec:
+  params:
+    - name: harnessName
+    - name: harnessProfile
+    - name: pipelineUID
+  env:
+    - name: REQUESTED_HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: MY_HARNESS_NAME
+      value: "inference-perf"
+    - name: HARNESS_PROFILE
+      value: "$(params.harnessProfile)"
+
+    - name: GIT_REPO_URL
+      value: "https://github.com/kubernetes-sigs/inference-perf.git"
+    - name: GIT_REVISION
+      value: "main"
+    - name: GIT_COMMIT
+      value: "1ccc48b6bb9c9abb61558b719041fb000b265e59"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+
+  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/usr/bin/env bash
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/workload/harnesses/inference-perf-llm-d-benchmark.sh
+
+    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
+      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
+      exit 0
+    fi
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L56-L62
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
+
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    CONTROL_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    RUN_DIR=$(pwd)
+
+    # TODO figure out which are actually needed for each step
+    echo "🔄 Installing required tools"
+    apt-get update
+    apt-get install -y \
+      git \
+      pip \
+      yq \
+      && apt-get clean && rm -rf /var/cache/apt
+
+    echo "🔄 Cloning and installing harness: ${MY_HARNESS_NAME}"
+    git clone --branch ${GIT_REVISION} ${GIT_REPO_URL}
+    cd inference-perf
+    git checkout ${GIT_COMMIT}
+    pip install .
+
+    # profile name and location
+    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
+    workload_profile=${workload}.yaml
+    workload_profile_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/${workload_profile}
+
+    # update .storage.local_storage.path in profile
+    pushd "$RESULTS_DIR"
+    yq '.storage["local_storage"]["path"] = '\"${RESULTS_DIR}\" <"${workload_profile_path}" -y >${workload_profile}
+
+    # run inference-perf
+    inference-perf --config_file "$(realpath ./${workload_profile})" > >(tee -a ${RESULTS_DIR}/stdout.log) 2> >(tee -a ${RESULTS_DIR}/stderr.log >&2)
+    export LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC=$?
+
+    # If benchmark harness returned with an error, exit here
+    if [[ $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC -ne 0 ]]; then
+      echo "❌ Harness returned with error $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC"
+      exit $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC
+    fi
+    echo "✅ Harness completed successfully."
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: inference-perf-analyze-results
+spec:
+  params:
+    - name: harnessName
+    - name: pipelineUID
+  env:
+    - name: REQUESTED_HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: MY_HARNESS_NAME
+      value: "inference-perf"
+
+    - name: GIT_REPO_URL
+      value: "https://github.com/kubernetes-sigs/inference-perf.git"
+    - name: GIT_REVISION
+      value: "main"
+    - name: GIT_COMMIT
+      value: "1ccc48b6bb9c9abb61558b719041fb000b265e59"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+
+# https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/usr/bin/env bash
+
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+ 
+    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
+      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
+      exit 0
+    fi
+     
+    echo "🔄 Installing requirements"
+    apt-get update
+    apt-get install -y \
+      git \
+      pip \
+      && apt-get clean && rm -rf /var/cache/apt
+
+    git clone --branch ${GIT_REVISION} ${GIT_REPO_URL}
+    cd inference-perf
+    git checkout ${GIT_COMMIT}
+    pip install .
+
+    cat <<EOF > requirements-analysis.txt
+    matplotlib>=3.7.0
+    numpy>=2.3.1
+    seaborn>=0.12.0
+    pandas>=2.2.3
+    pydantic>=2.11.7
+    PyYAML>=6.0.2
+    scipy>=1.16.0
+    requests>=2.32.5
+    EOF
+
+    cat requirements-analysis.txt
+    pip --version
+    pip install --no-cache-dir \
+      --disable-pip-version-check  \
+      --upgrade \
+      -r ./requirements-analysis.txt \
+      --root-user-action=ignore
+    pip list
+
+    # Download covert python from llm-d-benchmark
+    # TBD: should the python be embedded in the step? A separate step perhaps.
+    export ROOT_DIR=workload/report
+    export BRANCH=main
+
+    cat <<EOF | python
+    import os
+    import requests
+
+    # TBD these should be parameters
+    ROOT_DIR = os.getenv("ROOT_DIR")
+    BRANCH = os.getenv("BRANCH")
+
+    api = f"https://api.github.com/repos/llm-d/llm-d-benchmark/contents/{ROOT_DIR}?ref={BRANCH}"
+    headers = {}
+
+    resp = requests.get(api, headers={})
+    resp.raise_for_status()
+    for item in resp.json():
+        if item.get("type") == "file" and item["name"].endswith(".py"):
+            url = item["download_url"]
+            r = requests.get(url, headers=headers)
+            r.raise_for_status()
+            with open(item["name"], "wb") as f:
+                f.write(r.content)
+            print("Downloaded", item["name"])
+    EOF
+
+    chmod +x convert.py schema.py
+    ls -l
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/workload/harnesses/inference-perf-llm-d-benchmark.sh#L17C1-L26C5
+    echo "🔄 Convert results into universal format"
+    for result in $(find $RESULTS_DIR -maxdepth 1 -name 'stage_*.json'); do
+      result_fname=$(echo $result | rev | cut -d '/' -f 1 | rev)
+      ./convert.py $result -w inference-perf $RESULTS_DIR/benchmark_report,_$result_fname.yaml 2> >(tee -a $RESULTS_DIR/stderr.log >&2)
+      # Report errors but don't quit
+      export RUN_EXPERIMENT_CONVERT_RC=$?
+      if [[ $RUN_EXPERIMENT_CONVERT_RC -ne 0 ]]; then
+        echo "./convert.py returned with error $RUN_EXPERIMENT_CONVERT_RC converting: $result"
+      fi
+    done
+
+    # Define function to call analysis so can call multiple times
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/analysis/inference-perf-analyze_results.sh
+    analyze_results () {
+      mkdir -p $RESULTS_DIR/analysis
+      sleep 60
+      tm=$(date)
+      inference-perf --analyze "$RESULTS_DIR"
+      ec=$?
+      find $RESULTS_DIR -type f -newermt "${tm}" -exec mv -t "$RESULTS_DIR"/analysis {} +
+      return $ec
+    }
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/llm-d-benchmark.sh#L63-L74
+    echo "🔄 Running analysis"
+    # Try to run analysis twice then give up
+    analyze_results
+    ec=$?
+    if [[ $ec -ne 0 ]]; then
+      echo "execution of analyzer failed, wating 120 seconds and trying again"
+      sleep 120
+      set -x
+      analyze_results
+    fi
+    # Return with error code of first iteration of experiment analyzer
+    # TBD modify this message depending on success
+    echo "✅ Results analyzed and reports generated"
+    exit $ec
diff --git a/tekton-poc/pipeline/stepactions.yaml b/tekton-poc/pipeline/steps/stepactions.yaml
similarity index 96%
rename from tekton-poc/pipeline/stepactions.yaml
rename to tekton-poc/pipeline/steps/stepactions.yaml
index 60d28385..282bfbff 100644
--- a/tekton-poc/pipeline/stepactions.yaml
+++ b/tekton-poc/pipeline/steps/stepactions.yaml
@@ -165,6 +165,9 @@ spec:
     - name: extraArgs
       type: string
       default: ""
+    - name: treatmentAnalysis
+      type: string
+      default: ""
 
     - name: dry-run
       type: string
@@ -224,6 +227,9 @@ spec:
     - name: HELM_EXTRA_ARGS
       value: "$(params.extraArgs)"
 
+    - name: TREATMENT_ANALYSIS
+      value: "$(params.treatmentAnalysis)"
+
     - name: DRY_RUN
       value: $(params.dry-run)
 
@@ -236,6 +242,11 @@ spec:
       exit 0
     fi
 
+    apk add --no-cache jq >/dev/null
+
+    echo ">>> helm step: treatment"
+    printf "%s" "${TREATMENT_ANALYSIS}"
+
     SHA256CMD=$(type -p gsha256sum || type -p sha256sum)
     NAMESPACE_HASH=$(echo -n "$HELM_NAMESPACE" | $SHA256CMD | awk '{print $1}' | cut -c1-8)
     HELM_RELEASE=$(echo "$HELM_RELEASE" | sed "s/NAMESPACE_HASH/$NAMESPACE_HASH/g")
@@ -303,6 +314,10 @@ spec:
       *) if [ -n "${HELM_REPO_NAME:-}" ]; then CHART_REF="${HELM_REPO_NAME}/${HELM_CHART}"; fi ;;
     esac
 
+    if [ -n "${TREATMENT_ANALYSIS:-}" ]; then
+      HELM_EXTRA_ARGS="${HELM_EXTRA_ARGS} $(echo ${TREATMENT_ANALYSIS} | jq -r '.setArgs')"
+    fi
+
     if [ -n "${HELM_EXTRA_ARGS:-}" ]; then
       HELM_EXTRA_ARGS=$(echo "$HELM_EXTRA_ARGS" | sed "s/NAMESPACE_HASH/$NAMESPACE_HASH/g")
     fi
diff --git a/tekton-poc/pipeline/steps/treatment.yaml b/tekton-poc/pipeline/steps/treatment.yaml
new file mode 100644
index 00000000..b371836f
--- /dev/null
+++ b/tekton-poc/pipeline/steps/treatment.yaml
@@ -0,0 +1,117 @@
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: analyze-treatment
+spec:
+  description: |
+    Produce '--set/--set-string path=value' flags for factorType and
+    apply values into a JSON/YAML file. Works with flat or nested treatment.
+  image: alpine:3.20
+  # Pass params via env (StepAction scripts cannot use $(params.*) directly).
+  # We'll read these envs inside the script.
+  params:
+    - name: factorType
+      type: string
+    - name: factorMapping
+      type: string
+      description: JSON mapping
+    - name: treatment
+      type: string
+      description: JSON values (flat or nested by key)
+    # - name: file
+    #   type: string
+    #   description: Target file path (relative to workdir or absolute)
+    # - name: workdir
+    #   type: string
+    #   description: Working directory (usually the bound workspace path)
+    #   default: /workspace
+  results:
+    - name: treatmentAnalysis
+      description: Space-separated '--set/--set-string path=value' tokens
+  # workingDir: $(params.workdir)
+  env:
+    - name: SELECTOR
+      value: $(params.factorType)
+    - name: MAP_JSON
+      value: $(params.factorMapping)
+    - name: VAL_JSON
+      value: $(params.treatment)
+    # - name: TARGET_FILE
+    #   value: $(params.file)
+  script: |
+    #!/bin/sh
+    set -eu
+    apk add --no-cache jq yq >/dev/null
+    # jq --version
+    # yq --version
+
+    # echo "$SELECTOR"
+    # echo "$MAP_JSON"
+    # echo "$VAL_JSON"
+
+    # Build updates + flags (uses $val for type checks — fixed version)
+    jq -r -n \
+      --arg root "$SELECTOR" \
+      --argjson map "$MAP_JSON" \
+      --argjson vals "$VAL_JSON" '
+        ($map[$root] // {}) as $m
+        | if ($m | type) != "object" then
+            error("Key not found in mapping: " + $root)
+          else
+            (if ($vals[$root] | type) == "object" then $vals[$root] else $vals end) as $v
+            | {
+                updates: [
+                  $m | to_entries[]
+                  | select($v[.key] != null)
+                  | { path: (.value | split(".")), value: $v[.key] }
+                ],
+                setArgs: (
+                  [ $m | to_entries[]
+                    | select($v[.key] != null)
+                    | ( $v[.key] ) as $val
+                    | if ( ($val | type) == "string" ) then
+                        "--set-string \(.value)=\($val)"
+                      else
+                        "--set \(.value)=\( if ( ($val|type)=="object" or ($val|type)=="array") then ($val|tojson) else ($val|tostring) end )"
+                      end
+                  ] | join(" ")
+                )
+              }
+        end
+      ' > /tmp/out.json
+
+    # FLAGS=$(jq -r '.setArgs' /tmp/out.json)
+    # jq '.updates' /tmp/out.json > /tmp/updates.json
+
+    # if [ ! -f "$TARGET_FILE" ]; then
+    #   echo "ERROR: File not found: $TARGET_FILE" >&2
+    #   # still write empty result
+    #   printf "" > "$(step.results.treatmentAnalysis.path)"
+    #   exit 1
+    # fi
+
+    # # Apply updates to JSON or YAML
+    # if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
+    #   ext="${TARGET_FILE##*.}"
+    #   tmp="${TARGET_FILE}.tmp"
+
+    #   if [ "$ext" = "json" ]; then
+    #     jq --slurpfile upds /tmp/updates.json '
+    #       reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+    #     ' "$TARGET_FILE" > "$tmp"
+    #     mv "$tmp" "$TARGET_FILE"
+    #   else
+    #     # YAML path: YAML → JSON → apply → YAML
+    #     yq -o=json '.' "$TARGET_FILE" \
+    #       | jq --slurpfile upds /tmp/updates.json '
+    #           reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+    #         ' \
+    #       | yq -P > "$tmp"
+    #     mv "$tmp" "$TARGET_FILE"
+    #   fi
+    # fi
+
+    # printf "%s" "$(cat /tmp/out.json)"
+    # Emit flags as a step-scoped result
+    # printf "%s" "$FLAGS" > "$(step.results.treatmentAnalysis.path)"
+    printf "%s" "$(cat /tmp/out.json)" > "$(step.results.treatmentAnalysis.path)"
diff --git a/tekton-poc/pipeline/steps/vllm-benchmark.yaml b/tekton-poc/pipeline/steps/vllm-benchmark.yaml
new file mode 100644
index 00000000..8daa7aeb
--- /dev/null
+++ b/tekton-poc/pipeline/steps/vllm-benchmark.yaml
@@ -0,0 +1,392 @@
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: vllm-benchmark-prepare-profile
+spec:
+  params:
+    - name: harnessName
+    - name: harnessProfile
+    - name: model-id
+    - name: namespace
+    - name: treatmentAnalysis
+    - name: pipelineUID
+  env:
+    - name: REQUESTED_HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: MY_HARNESS_NAME
+      value: "vllm-benchmark"
+    - name: HARNESS_PROFILE
+      value: "$(params.harnessProfile)"
+
+    - name: TREATMENT_ANALYSIS
+      value: "$(params.treatmentAnalysis)"
+
+    - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
+      value: "$(params.model-id)"
+    - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
+      value: "http://experiment-gateway-inference-gateway.$(params.namespace).svc.cluster.local:80"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+
+  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/bin/bash
+
+    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
+      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
+      exit 0
+    fi
+
+    # TBD is this necessary or is it already there?
+    apt-get update
+    apt-get install -y --no-install-recommends curl ca-certificates jq
+    curl -fsSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
+      -o /usr/local/bin/yq
+    chmod +x /usr/local/bin/yq
+    jq --version
+    yq --version
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L56-L62
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
+    
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    CONTROL_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    RUN_DIR=$(pwd)
+
+    echo "🔄 Installing required tools"
+    apt-get update
+    apt-get install -y \
+      wget \
+      && apt-get clean && rm -rf /var/cache/apt
+
+    # Ensure all folders created
+    mkdir -p $RESULTS_DIR
+    mkdir -p $CONTROL_DIR/setup
+    rm -rf $CONTROL_DIR/setup/sed-commands
+    touch $CONTROL_DIR/setup/sed-commands
+    mkdir -p ${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates
+
+    cd ${RUN_DIR}/vllm-benchmark/
+
+    # Define constants: input profile template name and location; final profile name and location
+    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
+    workload_template=${workload}.yaml.in
+    workload_template_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates/${workload_template}
+    workload_profile=${workload}.yaml
+    workload_profile_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/${workload_profile}
+
+    echo "🔄 Prepare workload profile"
+    # Fetch profile template from llmd-benchmark
+    wget -O ${workload_template_path} \
+    --quiet \
+      https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/workload/profiles/${MY_HARNESS_NAME}/${workload_template}
+
+    # Apply treatment to profile template to produce final profile
+    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "---------- sed-commands"
+    cat ${CONTROL_DIR}/setup/sed-commands
+    echo "----------"
+    sed -f ${CONTROL_DIR}/setup/sed-commands ${workload_template_path} > ${workload_profile_path}
+
+    # TBD eliminate the TARGET_FILE env variable
+    TARGET_FILE=${workload_profile_path}
+    echo "${TREATMENT_ANALYSIS}" | jq '.updates' > /tmp/updates.json
+    echo ">>> /tmp/updates.json"
+    cat /tmp/updates.json
+
+    if [ ! -f "$TARGET_FILE" ]; then
+      echo "ERROR: File not found: $TARGET_FILE" >&2
+      exit 1
+    fi
+
+    # Apply updates to JSON or YAML
+    if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
+      ext="${TARGET_FILE##*.}"
+      tmp="${TARGET_FILE}.tmp"
+
+      # TBD eliminate the json path (copilot generated this); profiles are yaml files
+      if [ "$ext" = "json" ]; then
+        jq --slurpfile upds /tmp/updates.json '
+          reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+        ' "$TARGET_FILE" > "$tmp"
+        mv "$tmp" "$TARGET_FILE"
+      else
+        # YAML path: YAML → JSON → apply → YAML
+        yq -o=json '.' "$TARGET_FILE" \
+          | jq --slurpfile upds /tmp/updates.json '
+              reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+            ' \
+          | yq -P > "$tmp"
+        mv "$tmp" "$TARGET_FILE"
+      fi
+    fi
+
+    echo "---------- workload profile"
+    cat ${workload_profile_path}
+    echo "----------"
+    echo "✅ workload profile ready"
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: vllm-benchmark-run
+spec:
+  params:
+    - name: harnessName
+    - name: harnessProfile
+    - name: pipelineUID
+  env:
+    - name: REQUESTED_HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: MY_HARNESS_NAME
+      value: "vllm-benchmark"
+    - name: HARNESS_PROFILE
+      value: "$(params.harnessProfile)"
+
+    - name: GIT_REPO_URL
+      value: "https://github.com/vllm-project/vllm.git"
+    - name: GIT_REVISION
+      value: "main"
+    - name: GIT_COMMIT
+      value: "b6381ced9c52271f799a8348fcc98c5f40528cdf"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+
+    - name: HF_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-secret
+          key: HF_TOKEN
+
+  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/bin/bash
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/workload/harnesses/vllm-benchmark-llm-d-benchmark.sh
+
+    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
+      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
+      exit 0
+    fi
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L56-L62
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
+
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    CONTROL_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    RUN_DIR=$(pwd)
+
+    # TODO figure out which are actually needed for each step
+    echo "🔄 Installing required tools"
+    apt-get update
+    apt-get install -y \
+      git \
+      gpg \
+      pip \
+      yq \
+      && apt-get clean && rm -rf /var/cache/apt
+
+    echo "🔄 Cloning and installing harness: ${MY_HARNESS_NAME}"
+    git clone --branch ${GIT_REVISION} ${GIT_REPO_URL}
+    cd vllm
+    git checkout ${GIT_COMMIT}
+    cd ..
+    mv -f vllm vllm-benchmark
+
+    # TBD pin versions
+    cat <<EOF > requirements-vllm-benchmark.txt
+    aiohttp
+    datasets
+    numpy
+    pandas
+    pillow
+    tqdm
+    transformers
+    EOF
+
+    cat requirements-vllm-benchmark.txt
+    pip --version
+    pip install --no-cache-dir \
+      --disable-pip-version-check  \
+      --upgrade \
+      -r ./requirements-vllm-benchmark.txt \
+      --root-user-action=ignore
+    pip list
+
+    # profile name and location
+    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
+    workload_profile=${workload}.yaml
+    workload_profile_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/${workload_profile}
+
+    # run vllm-benchmark
+    cp ${workload_profile_path} ${workload_profile}
+    en=$(cat ${workload_profile} | yq -r .executable)
+
+    echo "pwd = $(pwd)"
+    echo "RUN_DIR=$RUN_DIR"
+    echo "running - ${RUN_DIR}/vllm-benchmark/benchmarks/${en}"
+    ls -l ${RUN_DIR}/vllm-benchmark/benchmarks
+    python ${RUN_DIR}/vllm-benchmark/benchmarks/${en} --$(cat ${workload_profile} | grep -v "^executable" | yq -r 'to_entries | map("\(.key)=\(.value)") | join(" --")' | sed -e 's^=none ^^g' -e 's^=none$^^g')  --seed $(date +%s) --save-result > >(tee -a $RESULTS_DIR/stdout.log) 2> >(tee -a $RESULTS_DIR/stderr.log >&2)
+    export LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC=$?
+    find ${RUN_DIR}/vllm-benchmark -maxdepth 1 -mindepth 1 -name '*.json' -exec mv -t "$RESULTS_DIR"/ {} +
+
+    # If benchmark harness returned with an error, exit here
+    if [[ $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC -ne 0 ]]; then
+      echo "❌ Harness returned with error $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC"
+      exit $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC
+    fi
+    echo "✅ Harness completed successfully."
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: vllm-benchmark-analyze-results
+spec:
+  params:
+    - name: harnessName
+    - name: pipelineUID
+  env:
+    - name: REQUESTED_HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: MY_HARNESS_NAME
+      value: "vllm-benchmark"
+
+    - name: GIT_REPO_URL
+      value: "https://github.com/kubernetes-sigs/inference-perf.git"
+    - name: GIT_REVISION
+      value: "main"
+    - name: GIT_COMMIT
+      value: "1ccc48b6bb9c9abb61558b719041fb000b265e59"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+
+# https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/usr/bin/env bash
+
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+ 
+    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
+      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
+      exit 0
+    fi
+     
+    echo "🔄 Installing requirements"
+    # apt-get update
+    # apt-get install -y \
+    #   git \
+    #   pip \
+    #   && apt-get clean && rm -rf /var/cache/apt
+
+    cat <<EOF > requirements-analysis.txt
+    matplotlib>=3.7.0
+    numpy>=2.3.1
+    seaborn>=0.12.0
+    pandas>=2.2.3
+    pydantic>=2.11.7
+    PyYAML>=6.0.2
+    scipy>=1.16.0
+    requests>=2.32.5
+    EOF
+
+    cat requirements-analysis.txt
+    pip --version
+    pip install --no-cache-dir \
+      --disable-pip-version-check  \
+      --upgrade \
+      -r ./requirements-analysis.txt \
+      --root-user-action=ignore
+    pip list
+
+    # Download covert python from llm-d-benchmark
+    # TBD: should the python be embedded in the step? A separate step perhaps.
+    export ROOT_DIR=workload/report
+    export BRANCH=main
+
+    cat <<EOF | python
+    import os
+    import requests
+
+    # TBD these should be parameters
+    ROOT_DIR = os.getenv("ROOT_DIR")
+    BRANCH = os.getenv("BRANCH")
+
+    api = f"https://api.github.com/repos/llm-d/llm-d-benchmark/contents/{ROOT_DIR}?ref={BRANCH}"
+    headers = {}
+
+    resp = requests.get(api, headers={})
+    resp.raise_for_status()
+    for item in resp.json():
+        if item.get("type") == "file" and item["name"].endswith(".py"):
+            url = item["download_url"]
+            r = requests.get(url, headers=headers)
+            r.raise_for_status()
+            with open(item["name"], "wb") as f:
+                f.write(r.content)
+            print("Downloaded", item["name"])
+    EOF
+
+    chmod +x convert.py schema.py
+    ls -l
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/workload/harnesses/inference-perf-llm-d-benchmark.sh#L17C1-L26C5
+    echo "🔄 Convert results into universal format"
+    for result in $(find $RESULTS_DIR -maxdepth 1 -name 'stage_*.json'); do
+      result_fname=$(echo $result | rev | cut -d '/' -f 1 | rev)
+      ./convert.py $result -w inference-perf $RESULTS_DIR/benchmark_report,_$result_fname.yaml 2> >(tee -a $RESULTS_DIR/stderr.log >&2)
+      # Report errors but don't quit
+      export RUN_EXPERIMENT_CONVERT_RC=$?
+      if [[ $RUN_EXPERIMENT_CONVERT_RC -ne 0 ]]; then
+        echo "./convert.py returned with error $RUN_EXPERIMENT_CONVERT_RC converting: $result"
+      fi
+    done
+
+    # Define function to call analysis so can call multiple times
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/analysis/vllm-benchmark-analyze_results.sh
+    analyze_results () {
+      mkdir -p $RESULTS_DIR/analysis
+      result_start=$(grep -nr "Result ==" $RESULTS_DIR/stdout.log | cut -d ':' -f 1)
+      total_file_lenght=$(cat $RESULTS_DIR/stdout.log | wc -l)
+      cat $RESULTS_DIR/stdout.log | sed "$result_start,$total_file_lenght!d" > $RESULTS_DIR/analysis/summary.txt
+      return $?
+    }
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/llm-d-benchmark.sh#L63-L74
+    echo "🔄 Running analysis"
+    # Try to run analysis twice then give up
+    analyze_results
+    ec=$?
+    if [[ $ec -ne 0 ]]; then
+      echo "execution of analyzer failed, wating 120 seconds and trying again"
+      sleep 120
+      set -x
+      analyze_results
+    fi
+    # Return with error code of first iteration of experiment analyzer
+    # TBD modify this message depending on success
+    echo "✅ Results analyzed and reports generated"
+    exit $ec
+

From e7301928f93d4c9beb08952889870b2bf7d3ff7e Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Wed, 15 Oct 2025 13:33:20 -0400
Subject: [PATCH 40/44] remove --tensor-parallel-size

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/examples/pd-disaggregation/ms-values.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tekton-poc/examples/pd-disaggregation/ms-values.yaml b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
index 6f7cdce1..7a5be879 100644
--- a/tekton-poc/examples/pd-disaggregation/ms-values.yaml
+++ b/tekton-poc/examples/pd-disaggregation/ms-values.yaml
@@ -91,8 +91,6 @@ decode:
       - "--disable-uvicorn-access-log"
       - "--max-model-len"
       - "16000"
-      - "--tensor-parallel-size"
-      - "1" 
     env:
       - name: VLLM_NIXL_SIDE_CHANNEL_HOST
         valueFrom:
@@ -192,8 +190,6 @@ prefill:
       - "--disable-uvicorn-access-log"
       - "--max-model-len"
       - "16000"
-      - "--tensor-parallel-size"
-      - "1" 
     env:
       - name: VLLM_IS_PREFILL
         value: "1"

From 3bacf14c9cc7ed02b674635b7fb3b2a466861351 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Fri, 17 Oct 2025 10:26:56 -0400
Subject: [PATCH 41/44] capacity planner

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/README.md                          |  12 +-
 tekton-poc/pipeline/experiment-task.yaml      | 425 +++++++++++++++---
 tekton-poc/pipeline/pd-disaggregation-pr.yaml | 110 ++++-
 .../pipeline/pipelinerun-matrix-subset.yaml   |   4 +
 tekton-poc/pipeline/pipelinerun-matrix.yaml   |  17 +-
 tekton-poc/pipeline/steps/inference-perf.yaml | 137 ------
 tekton-poc/pipeline/steps/treatment.yaml      |  54 +--
 tekton-poc/pipeline/steps/vllm-benchmark.yaml | 137 ------
 8 files changed, 473 insertions(+), 423 deletions(-)

diff --git a/tekton-poc/README.md b/tekton-poc/README.md
index a1f5b8fc..44616393 100644
--- a/tekton-poc/README.md
+++ b/tekton-poc/README.md
@@ -204,13 +204,19 @@ The utility script `utility/transform-pr-parallel.py` can be used to transform a
 - incorporate memory planner (Jing)
 - PD example (Nick)
     - [IN PROGRESS] deployment of the pd scenario
+        - [DONE] wait for model download
+        - [NOT STARTED] move from helm chart (job) to step - depends on ns change
+        - [NOT STARTED] debug --tensor-parallel-size argument
     - [DONE] enabling multiple harnesses (inference-perf and vllm-benchmark)
     - [DONE] making factors/treatments general (they are hardcoded)
-    - [NOT STARTED] use capacity planner to determine whether or not to continue
+    - [DONE] use capacity planner to determine whether or not to continue
     - [IN PROGRESS] move step implementations to stepactions
     - [NOT STARTED] move from multiple namespaces to single namespace
 
-- can we have just one prepare-profile now that we have treatments?
 - should we have a convert step independent of the analysis step?
 - eventually one for analysis based on analysis of converted results
-- need to wait for model download
+
+- wrapper to generate pipelineRun
+- generate task?
+
+- missing steps: validate accelerator configuartion (wrt to cluster)
\ No newline at end of file
diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index 0bd8ea84..a2b4840f 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -10,16 +10,17 @@ spec:
     - name: data
 
   params:
-    - name: factorMapping
-      type: string
-      description: |
-        JSON string mapping factor to path in source yaml file sorted by purpose.
     - name: treatment
       type: string
       description: |
         JSON string of factors and values for one treatment. 
         Includes both infrastructure and workload factors.
 
+    - name: factorMapping
+      type: string
+      description: |
+        JSON string mapping factor to path in source yaml file sorted by purpose.
+
     - name: targetNamespacePrefix
       type: string
       default: llmdbench
@@ -30,6 +31,27 @@ spec:
     - name: inferencePort
       default: 8000
 
+    # Properties needed to evaluate stack capacity (will it be able to host the model)?
+    - name: validateCapacity
+      default: "true"
+    - name: behaviorOnValidationFailure
+      default: "terminate"
+
+    - name: maxModelLength
+
+    - name: decodeReplicas
+    - name: decodeTensorParallelism
+    - name: decodeDataParallelism
+    - name: decodeNumGpus
+
+    - name: prefillReplicas
+    - name: prefillTensorParallelism
+    - name: prefillDataParallelism
+    - name: prefillNumGpus
+
+    - name: gpuType
+    - name: gpuMemory
+
     - name: experimentBaseUrl
       type: string
     - name: experimentName
@@ -133,6 +155,9 @@ spec:
     - name: debug
       type: string
       default: "false"
+    - name: step-upload-results
+      type: string
+      default: "true"
     - name: dry-run
       type: string 
       default: "false"
@@ -186,24 +211,192 @@ spec:
         - name: treatment
           value: $(params.treatment)
 
-    - name: display-treatment-analysis
-      image: alpine:3.20
-      env:
-        - name: MODELSERVICE_SET_ARGS
-          value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
-        - name: GAIE_SET_ARGS
-          value: "$(steps.analyze-gaie-factors.results.treatmentAnalysis)"
-        - name: WORKLOAD_SET_ARGS
-          value: "$(steps.analyze-workload-factors.results.treatmentAnalysis)"
+    # - name: display-treatment-analysis
+    #   image: alpine:3.20
+    #   env:
+    #     - name: MODELSERVICE_SET_ARGS
+    #       value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
+    #     - name: GAIE_SET_ARGS
+    #       value: "$(steps.analyze-gaie-factors.results.treatmentAnalysis)"
+    #     - name: WORKLOAD_SET_ARGS
+    #       value: "$(steps.analyze-workload-factors.results.treatmentAnalysis)"
 
-      script: |
-        #!/bin/sh
-        apk add --no-cache jq yq-go >/dev/null
-        jq --version
+    #   script: |
+    #     #!/bin/sh
+    #     apk add --no-cache jq yq-go >/dev/null
+    #     jq --version
 
-        echo "helm upgrade --install ... $(echo ${MODELSERVICE_SET_ARGS} | jq '.setArgs')"
-        echo "helm upgrade --install ...  $(echo ${GAIE_SET_ARGS} | jq '.setArgs')"
-        echo "$(echo ${WORKLOAD_SET_ARGS} | jq '.updates')"
+    #     echo "helm upgrade --install ... $(echo ${MODELSERVICE_SET_ARGS} | jq '.setArgs')"
+    #     echo "helm upgrade --install ...  $(echo ${GAIE_SET_ARGS} | jq '.setArgs')"
+    #     echo "$(echo ${WORKLOAD_SET_ARGS} | jq '.updates')"
+
+    #     printf "%s" "$MODELSERVICE_SET_ARGS"
+
+    - name: compute-decode-dp
+      ref:
+        name:
+          compute-value
+      params:
+        - name: name
+          value: "decodeDataParallelism"
+        - name: value
+          value: $(params.decodeDataParallelism)
+        - name: defaultValue
+          value: 1
+
+    - name: compute-decode-tp
+      ref:
+        name:
+          compute-value
+      params:
+        - name: name
+          value: "decodeTensorParallelism"
+        - name: value
+          value: $(params.decodeTensorParallelism)
+        - name: defaultValue
+          value: 1
+
+    - name: compute-decode-replicas
+      ref:
+        name:
+          compute-value
+      params:
+        - name: name
+          value: "decodeReplicas"
+        - name: value
+          value: $(params.decodeReplicas)
+        - name: defaultValue
+          value: 1
+
+    - name: compute-decode-num-gpus
+      ref:
+        name:
+          compute-num-gpus
+      params:
+        - name: name
+          value: "decodeNumGpus"
+        - name: value
+          value: $(params.decodeNumGpus)
+        - name: dp
+          value: $(steps.compute-decode-dp.results.value)
+        - name: tp
+          value: $(steps.compute-decode-tp.results.value)
+
+    # - name: display-decode-values
+    #   image: alpine:3.20
+    #   env:
+    #     - name: REPLICAS
+    #       value: "$(steps.compute-decode-replicas.results.value)"
+    #     - name: TP
+    #       value: "$(steps.compute-decode-tp.results.value)"
+    #     - name: DP
+    #       value: "$(steps.compute-decode-dp.results.value)"
+    #     - name: NUM_GPUS
+    #       value: "$(steps.compute-decode-num-gpus.results.value)"
+
+    #   script: |
+    #     #!/bin/sh
+
+    #     echo "decodeReplicas = ${REPLICAS}"
+    #     echo "decodeTensorParallelism = ${TP}"
+    #     echo "decodeDataParallelism = ${DP}"
+    #     echo "decodeNumGpus = ${NUM_GPUS}"
+
+    - name: compute-prefill-dp
+      ref:
+        name:
+          compute-value
+      params:
+        - name: name
+          value: "prefillDataParallelism"
+        - name: value
+          value: $(params.prefillDataParallelism)
+        - name: defaultValue
+          value: 1
+
+    - name: compute-prefill-tp
+      ref:
+        name:
+          compute-value
+      params:
+        - name: name
+          value: "prefillTensorParallelism"
+        - name: value
+          value: $(params.prefillTensorParallelism)
+        - name: defaultValue
+          value: 1
+
+    - name: compute-prefill-replicas
+      ref:
+        name:
+          compute-value
+      params:
+        - name: name
+          value: "prefillReplicas"
+        - name: value
+          value: $(params.prefillReplicas)
+        - name: defaultValue
+          value: 1
+
+    - name: compute-prefill-num-gpus
+      ref:
+        name:
+          compute-num-gpus
+      params:
+        - name: name
+          value: "prefillNumGpus"
+        - name: value
+          value: $(params.prefillNumGpus)
+        - name: dp
+          value: $(steps.compute-prefill-dp.results.value)
+        - name: tp
+          value: $(steps.compute-prefill-tp.results.value)
+
+    - name: check-decode-capacity
+      ref:
+        name: check-capacity
+      params:
+        - name: validateCapacity
+          value: $(params.validateCapacity)
+        - name: behaviorOnValidationFailure
+          value: $(params.behaviorOnValidationFailure)
+        - name: model
+          value: $(params.model-id)
+        - name: max_model_len
+          value: $(params.maxModelLength)
+        - name: replicas
+          value: $(steps.compute-decode-replicas.results.value)
+        - name: tp
+          value: $(steps.compute-decode-tp.results.value)
+        - name: dp
+          value: $(steps.compute-decode-dp.results.value)
+        - name: gpu_memory
+          value: $(params.gpuMemory)
+        - name: user_requested_gpu_count
+          value: $(steps.compute-decode-num-gpus.results.value)
+
+    - name: check-prefill-capacity
+      ref:
+        name: check-capacity
+      params:
+        - name: validateCapacity
+          value: $(params.validateCapacity)
+        - name: behaviorOnValidationFailure
+          value: $(params.behaviorOnValidationFailure)
+        - name: model
+          value: $(params.model-id)
+        - name: max_model_len
+          value: $(params.maxModelLength)
+        - name: replicas
+          value: $(steps.compute-prefill-replicas.results.value)
+        - name: tp
+          value: $(steps.compute-prefill-tp.results.value)
+        - name: dp
+          value: $(steps.compute-prefill-dp.results.value)
+        - name: gpu_memory
+          value: $(params.gpuMemory)
+        - name: user_requested_gpu_count
+          value: $(steps.compute-prefill-num-gpus.results.value)
 
     - name: prepare-namespace
       image: quay.io/openshift/origin-cli:4.21
@@ -238,6 +431,9 @@ spec:
         oc adm policy add-scc-to-user anyuid -z helm-installer -n ${NAMESPACE}
         # oc adm policy add-scc-to-user privileged -z helm-installer -n ${NAMESPACE}
 
+    # TBD when move from multiple NS to single NS then can move to 
+    # step implementation instead of kubernetes job (replacing the next 2 steps)
+    # Can't do yet because step executes in a different NS from target.
     - name: model-download
       ref: 
         name: helm-upgrade-install
@@ -273,10 +469,45 @@ spec:
           value: $(params.dry-run)
 
     - name: wait-for-download
-      image: alpine:3.20
+      image: alpine/kubectl:1.34.1
+      env:
+        - name: JOB_NAME
+          value: "llm-d-benchark-job"
+        - name: NAMESPACE
+          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
+        - name: TIMEOUT
+          value: "300" # seconds
+        - name: SLEEP_INTERVAL
+          value: "5"   # seconds
       script : |
-        #!/bin/sh
-        echo "⏳ TBD: Wait for download job to complete"
+        #!/usr/bin/env sh
+
+        echo "⏳ Wait for model to download"
+
+        elapsed=0
+
+        while [ "$elapsed" -lt "${TIMEOUT}" ]; do
+          status=$(kubectl get job "${JOB_NAME}" -n "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')
+          if [ "$status" = "True" ]; then
+            echo "✅ Job succeeded"
+            kubectl delete job "${JOB_NAME}" -n "${NAMESPACE}" --ignore-not-found
+            exit 0
+          fi
+
+          status=$(kubectl get job "${JOB_NAME}" -n "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}')
+          if [ "$status" = "True" ]; then
+            echo "❌ Job failed"
+            kubectl delete job "${JOB_NAME}" -n "${NAMESPACE}" --ignore-not-found
+            exit 1
+          fi
+
+          sleep "${SLEEP_INTERVAL}"
+          elapsed=$((elapsed + SLEEP_INTERVAL))
+        done
+
+        echo "❌ Timed out waiting for job to complete or fail"
+        kubectl delete job "${JOB_NAME}" -n "${NAMESPACE}" --ignore-not-found
+        exit 2
 
     - name: gateway
       ref: 
@@ -355,6 +586,11 @@ spec:
           value: $(params.dry-run)
 
     - name: wait-for-model
+      env:
+        - name: DECODE_REPLICAS
+          value: $(steps.compute-decode-replicas.results.value)
+        - name: PREFILL_REPLICAS
+          value: $(steps.compute-prefill-replicas.results.value)
       image: alpine/kubectl:1.34.1
       script: |
         #!/bin/sh
@@ -371,39 +607,45 @@ spec:
         echo "⏳ Waiting for pods serving model ${MODEL_ID} to be 'Running'"
         echo "Model label = ${MODEL_LABEL}"
 
-        kubectl --namespace ${NAMESPACE} \
-          wait pod \
-          -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=decode \
-          --for=create \
-          --timeout=${MODEL_START_TIMEOUT}s
-        echo "✅ (decode) pods serving model ${MODEL_ID} created"
+        if [ ${DECODE_REPLICAS} -gt 0 ]; then
+          kubectl --namespace ${NAMESPACE} \
+            wait pod \
+              -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=decode \
+              --for=create \
+              --timeout=${MODEL_START_TIMEOUT}s
+          echo "✅ (decode) pods serving model ${MODEL_ID} created"
+        fi
  
-        # TBD check if any prefill pods and wait if so
-        # kubectl --namespace ${NAMESPACE} \
-        #   wait pod \
-        #   -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
-        #   --for=create \
-        #   --timeout=${MODEL_START_TIMEOUT}s
-        # echo "✅ prefill pods serving model ${MODEL_ID} created"
-
-        kubectl --namespace ${NAMESPACE} \
+        if [ ${PREFILL_REPLICAS} -gt 0 ]; then
+          kubectl --namespace ${NAMESPACE} \
+            wait pod \
+              -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
+              --for=create \
+              --timeout=${MODEL_START_TIMEOUT}s
+          echo "✅ prefill pods serving model ${MODEL_ID} created"
+        fi
+
+        if [ ${DECODE_REPLICAS} -gt 0 ]; then
+          kubectl --namespace ${NAMESPACE} \
+            wait pod \
+              -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=decode \
+              --for=condition=Ready=True \
+              --timeout=${MODEL_START_TIMEOUT}s
+          echo "✅ (decode) pods serving model ${MODEL_ID} ready"
+        fi
+
+        if [ ${PREFILL_REPLICAS} -gt 0 ]; then
+         kubectl --namespace ${NAMESPACE} \
           wait pod \
-          -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=decode \
-          --for=condition=Ready=True \
-          --timeout=${MODEL_START_TIMEOUT}s
-        echo "✅ (decode) pods serving model ${MODEL_ID} ready"
-
-        # TBD check if any prefill pods and wait if so
-        # kubectl --namespace ${NAMESPACE} \
-        #   wait pod \
-        #   -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
-        #   --for=condition=Ready=True \
-        #   --timeout=${MODEL_START_TIMEOUT}s
-        # echo "✅ prefill pods serving model ${MODEL_ID} ready"
-
-    - name: inference-perf-prepare-profile
+            -l llm-d.ai/model=${MODEL_LABEL},llm-d.ai/role=prefill \
+            --for=condition=Ready=True \
+            --timeout=${MODEL_START_TIMEOUT}s
+          echo "✅ prefill pods serving model ${MODEL_ID} ready"
+        fi
+
+    - name: prepare-workload-profile
       ref: 
-        name: inference-perf-prepare-profile
+        name: prepare-workload-profile
       params:
         - name: harnessName
           value: $(params.harnessName)
@@ -445,23 +687,6 @@ spec:
         - name: pipelineUID
           value: $(params.pipelineUID)
 
-    - name: vllm-benchmark-prepare-profile
-      ref: 
-        name: vllm-benchmark-prepare-profile
-      params:
-        - name: harnessName
-          value: $(params.harnessName)
-        - name: harnessProfile
-          value: $(params.harnessProfile)
-        - name: treatmentAnalysis
-          value: $(steps.analyze-workload-factors.results.treatmentAnalysis)
-        - name: model-id
-          value: $(params.model-id)
-        - name: namespace
-          value: $(params.targetNamespacePrefix)-$(context.taskRun.name)
-        - name: pipelineUID
-          value: $(params.pipelineUID)
-
     - name: vllm-benchmark-run
       ref:
         name: vllm-benchmark-run
@@ -489,6 +714,66 @@ spec:
         - name: pipelineUID
           value: $(params.pipelineUID)
 
+    - name: upload-results
+      image: ubuntu:24.04
+      # Tried amazon/aws-cli:2.31.9 but latest tar available via `dnf install tar -y` is 1.34.
+      # There were sporadic errors "file changed as we read it". It may be caused by the way
+      # tar identifes file changes in v 1.34 (via ctime). A recommended solution to move to 1.35.
+      # See https://stackoverflow.com/a/77765876 and tar release notes https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00005.html)
+      # A smaller image is probably desirable. A restriction is that AWS CLI v2 requires glibc.
+      workingDir: $(workspaces.data.path)
+      env:
+        - name: AWS_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              name: $(params.s3-keys)
+              key: AWS_ACCESS_KEY_ID
+        - name: AWS_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: $(params.s3-keys)
+              key: AWS_SECRET_ACCESS_KEY
+        - name: AWS_EC2_METADATA_DISABLED
+          value: "true"
+      script: |
+        #!/usr/bin/env sh
+
+        if [ "$(params.step-upload-results)" = "false" ]; then
+          echo "Upload disabled ... skipping."
+          exit 0
+        fi
+
+        apt-get update && \
+            apt-get install -y --no-install-recommends ca-certificates curl unzip tar gzip && \
+            rm -rf /var/lib/apt/lists/*
+
+        curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip && \
+          unzip /tmp/awscliv2.zip -d /tmp && \
+          /tmp/aws/install && \
+        rm -rf /tmp/aws /tmp/awscliv2.zip
+
+        tar --version && gzip --version && aws --version
+
+        EXPERIMENT_ID="experiment-$(echo -n $(params.pipelineUID) | cut -c1-8)"
+        EXPERIMENT_RESULTS_FOLDER="$(params.harnessName)_${EXPERIMENT_ID}_$(context.taskRun.name)"
+        ARCHIVE_NAME="${EXPERIMENT_RESULTS_FOLDER}.tar.gz"
+
+        tar --version && gzip --version && aws --version
+
+        tar -czf ${ARCHIVE_NAME} \
+            -C "$(workspaces.data.path)" ${EXPERIMENT_RESULTS_FOLDER}
+
+        aws s3 cp ${ARCHIVE_NAME} "s3://$(params.s3-bucket)/${ARCHIVE_NAME}" \
+            --endpoint-url "$(params.s3-endpoint)" \
+            --content-type "application/x-tar" \
+            --content-encoding "gzip" \
+            --no-progress
+            # --recursive \
+
+        rm -rf ${ARCHIVE_NAME}
+
+        echo "✅ Uploaded results to ${ARCHIVE_NAME}"
+
     - name: delete-namespace
       image: alpine/kubectl:1.34.1
       script : |
diff --git a/tekton-poc/pipeline/pd-disaggregation-pr.yaml b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
index cc694459..b78c5891 100644
--- a/tekton-poc/pipeline/pd-disaggregation-pr.yaml
+++ b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
@@ -15,6 +15,42 @@ spec:
       value: $(context.pipelineRun.namespace)
     - name: model-id
       value: "meta-llama/Llama-3.1-8B-Instruct"
+      # value: "meta-llama/Llama-3.1-70B-Instruct"
+
+    # Properties needed to evaluate stack capacity (will it be able to host the model)?
+    - name: validateCapacity
+      value: true
+    - name: behaviorOnValidationFailure
+      value: terminate
+    - name: maxModelLength
+      value: 16000
+    # will be set via treatment below
+    # - name: decodeReplicas
+    # - name: decodeTensorParallelism
+    - name: decodeDataParallelism
+      value: 1
+    # If not set, will be set to decodeTensorParallelism * decodeDataParallelism
+    # - name: decodeNumGpus
+
+    # will be set via treatment below
+    # - name: prefillReplicas
+    # - name: prefillTensorParallelism
+    - name: prefillDataParallelism
+      value: 1
+    # If not set, will be set to prefillTensorParallelism * prefillDataParallelism
+    # - name: prefillNumGpus
+
+    # Rely on default value
+    # Assume the same for prefill and decode
+    # - name: targetGpuMemoryUtilization
+
+    # Required
+    # Assume the same for prefill and decode
+    # TBD - attempt to read from the cluster
+    - name: gpuType
+      value: "NVIDIA-H100-80GB-HBM3"
+    - name: gpuMemory
+      value: 80 #GB
 
     # Harness / Workload
     - name: harnessName
@@ -37,6 +73,26 @@ spec:
   pipelineSpec:
     workspaces:
       - name: data
+    params:
+      - name: maxModelLength
+        default: ""
+      - name: decodeReplicas
+        default: ""
+      - name: decodeTensorParallelism
+        default: ""
+      - name: decodeDataParallelism
+        default: ""
+      - name: decodeNumGpus
+        default: ""
+      - name: prefillReplicas
+        default: ""
+      - name: prefillTensorParallelism
+        default: ""
+      - name: prefillDataParallelism
+        default: ""
+      - name: prefillNumGpus
+        default: ""
+
     tasks:
       - name: run-experiment
         taskRef:
@@ -45,10 +101,43 @@ spec:
           - name: data
             workspace: data
         params:
-          - name: targetNamespacePrefix
-            value: $(params.targetNamespacePrefix)
           - name: model-id
             value: $(params.model-id)
+
+          # Properties needed to evaluate stack capacity (will it be able to host the model)?
+          - name: validateCapacity
+            value: $(params.validateCapacity)
+          - name: behaviorOnValidationFailure
+            value: $(params.behaviorOnValidationFailure)
+
+          - name: maxModelLength
+            value: $(params.maxModelLength)
+
+          - name: decodeReplicas
+            value: $(params.decodeReplicas)
+          - name: decodeTensorParallelism
+            value: $(params.decodeTensorParallelism)
+          - name: decodeDataParallelism
+            value: $(params.decodeDataParallelism)
+          - name: decodeNumGpus
+            value: $(params.decodeNumGpus)
+
+          - name: prefillReplicas
+            value: $(params.prefillReplicas)
+          - name: prefillTensorParallelism
+            value: $(params.prefillTensorParallelism)
+          - name: prefillDataParallelism
+            value: $(params.prefillDataParallelism)
+          - name: prefillNumGpus
+            value: $(params.prefillNumGpus)
+
+          - name: gpuType
+            value: $(params.gpuType)
+          - name: gpuMemory
+            value: $(params.gpuMemory)
+
+          - name: targetNamespacePrefix
+            value: $(params.targetNamespacePrefix)
           - name: experimentBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/pd-disaggregation/
 
@@ -86,6 +175,8 @@ spec:
 
           - name: debug
             value: "$(params.debug)"
+          - name: step-upload-results
+            value: false
           - name: pipelineUID
             value: "$(context.pipelineRun.uid)"
 
@@ -133,18 +224,3 @@ spec:
           #       # - "640"
           #       # - "1280"
           #       # - "2560"
-
-# LLMDBENCH_VLLM_COMMON_REPLICAS: "2,4"
-#   decode.replicas
-# LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM: "8"
-#   decode.parallelism.tensor
-
-# LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS: "2,4,6,8"
-#   prefill.replicas
-# LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM: "1,2"
-#   prefill.parallelism.tensor
-# LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS: "1,2,4"
-#   decode.replicas
-# LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM: "2,4,8"
-#   decodeTensorParallelism
-#   decode.parallelism.tensor
\ No newline at end of file
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index 11c73b85..822fea78 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -33,6 +33,8 @@ spec:
     # Control
     - name: debug
       value: true
+    - name: step-upload-results
+      value: false
 
   pipelineSpec:
     workspaces:
@@ -91,6 +93,8 @@ spec:
 
           - name: debug
             value: "$(params.debug)"
+          - name: step-upload-results
+            value: "$(params.step-upload-results)"
           - name: pipelineUID
             value: "$(context.pipelineRun.uid)"
         matrix:
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 64aa21ad..5cc1661b 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -1,3 +1,8 @@
+#####
+# This is an example of how the matrix specification works.
+# It is currently out of date.
+# To test, use pipelinerun-matrix-subset.yaml instead.
+#####
 apiVersion: tekton.dev/v1
 kind: PipelineRun
 metadata:
@@ -68,17 +73,17 @@ spec:
           params:
             - name: gaiePluginConfig
               value: 
-                # - "inf-sche-none.yaml"
-                # - "inf-sche-prefix.yaml"
-                # - "inf-sche-kv.yaml"
+                - "inf-sche-none.yaml"
+                - "inf-sche-prefix.yaml"
+                - "inf-sche-kv.yaml"
                 - "inf-sche-queue.yaml"
             - name: question_len
               value:
-                # - "100"
-                # - "300"
+                - "100"
+                - "300"
                 - "1000"
             - name: output_len
               value: 
-                # - "100"
+                - "100"
                 - "300"
                 - "1000"
diff --git a/tekton-poc/pipeline/steps/inference-perf.yaml b/tekton-poc/pipeline/steps/inference-perf.yaml
index 50f91936..b0d82c64 100644
--- a/tekton-poc/pipeline/steps/inference-perf.yaml
+++ b/tekton-poc/pipeline/steps/inference-perf.yaml
@@ -1,142 +1,5 @@
 apiVersion: tekton.dev/v1beta1
 kind: StepAction
-metadata:
-  name: inference-perf-prepare-profile
-spec:
-  params:
-    - name: harnessName
-    - name: harnessProfile
-    - name: model-id
-    - name: namespace
-    - name: treatmentAnalysis
-    - name: pipelineUID
-  env:
-    - name: REQUESTED_HARNESS_NAME
-      value: "$(params.harnessName)"
-    - name: MY_HARNESS_NAME
-      value: "inference-perf"
-    - name: HARNESS_PROFILE
-      value: "$(params.harnessProfile)"
-
-    - name: TREATMENT_ANALYSIS
-      value: "$(params.treatmentAnalysis)"
-
-    - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
-      value: "$(params.model-id)"
-    - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
-      value: "http://experiment-gateway-inference-gateway.$(params.namespace).svc.cluster.local:80"
-
-    - name: DATA_ROOT_DIR
-      value: $(workspaces.data.path)
-    - name: MY_TASK_NAME
-      value: $(context.taskRun.name)
-    - name: MY_PIPELINE_UID
-      value: $(params.pipelineUID)
-
-  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
-  image: python:3.12.9-slim-bookworm
-  script: |
-    #!/bin/bash
-
-    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
-      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
-      exit 0
-    fi
-
-    # TBD is this necessary or is it already there?
-    apt-get update
-    apt-get install -y --no-install-recommends curl ca-certificates jq
-    curl -fsSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
-      -o /usr/local/bin/yq
-    chmod +x /usr/local/bin/yq
-    jq --version
-    yq --version
-
-    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L48-L54
-    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
-    
-    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
-    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
-    CONTROL_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
-    RUN_DIR=$(pwd)
-
-    echo "🔄 Installing required tools"
-    apt-get update
-    apt-get install -y \
-      wget \
-      && apt-get clean && rm -rf /var/cache/apt
-
-    # Ensure all folders created
-    mkdir -p $RESULTS_DIR
-    mkdir -p $CONTROL_DIR/setup
-    rm -rf $CONTROL_DIR/setup/sed-commands
-    touch $CONTROL_DIR/setup/sed-commands
-    mkdir -p ${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates
-
-    cd ${RUN_DIR}/vllm-benchmark/
-
-    # Define constants: input profile template name and location; final profile name and location
-    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
-    workload_template=${workload}.yaml.in
-    workload_template_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates/${workload_template}
-    workload_profile=${workload}.yaml
-    workload_profile_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/${workload_profile}
-
-    echo "🔄 Prepare workload profile"
-    # Fetch profile template from llmd-benchmark
-    wget -O ${workload_template_path} \
-    --quiet \
-      https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/workload/profiles/${MY_HARNESS_NAME}/${workload_template}
-
-    # Apply treatment to profile template to produce final profile
-    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${CONTROL_DIR}/setup/sed-commands
-    echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${CONTROL_DIR}/setup/sed-commands
-    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${CONTROL_DIR}/setup/sed-commands
-    echo "---------- sed-commands"
-    cat ${CONTROL_DIR}/setup/sed-commands
-    echo "----------"
-    sed -f ${CONTROL_DIR}/setup/sed-commands ${workload_template_path} > ${workload_profile_path}
-
-    # TBD eliminate the TARGET_FILE env variable
-    TARGET_FILE=${workload_profile_path}
-    echo "${TREATMENT_ANALYSIS}" | jq '.updates' > /tmp/updates.json
-    echo ">>> /tmp/updates.json"
-    cat /tmp/updates.json
-
-    if [ ! -f "$TARGET_FILE" ]; then
-      echo "ERROR: File not found: $TARGET_FILE" >&2
-      exit 1
-    fi
-
-    # Apply updates to JSON or YAML
-    if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
-      ext="${TARGET_FILE##*.}"
-      tmp="${TARGET_FILE}.tmp"
-
-      # TBD eliminate the json path (copilot generated this); profiles are yaml files
-      if [ "$ext" = "json" ]; then
-        jq --slurpfile upds /tmp/updates.json '
-          reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
-        ' "$TARGET_FILE" > "$tmp"
-        mv "$tmp" "$TARGET_FILE"
-      else
-        # YAML path: YAML → JSON → apply → YAML
-        yq -o=json '.' "$TARGET_FILE" \
-          | jq --slurpfile upds /tmp/updates.json '
-              reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
-            ' \
-          | yq -P > "$tmp"
-        mv "$tmp" "$TARGET_FILE"
-      fi
-    fi
-
-    echo "---------- workload profile"
-    cat ${workload_profile_path}
-    echo "----------"
-    echo "✅ workload profile ready"
----
-apiVersion: tekton.dev/v1beta1
-kind: StepAction
 metadata:
   name: inference-perf-run
 spec:
diff --git a/tekton-poc/pipeline/steps/treatment.yaml b/tekton-poc/pipeline/steps/treatment.yaml
index b371836f..33888574 100644
--- a/tekton-poc/pipeline/steps/treatment.yaml
+++ b/tekton-poc/pipeline/steps/treatment.yaml
@@ -7,8 +7,6 @@ spec:
     Produce '--set/--set-string path=value' flags for factorType and
     apply values into a JSON/YAML file. Works with flat or nested treatment.
   image: alpine:3.20
-  # Pass params via env (StepAction scripts cannot use $(params.*) directly).
-  # We'll read these envs inside the script.
   params:
     - name: factorType
       type: string
@@ -18,17 +16,9 @@ spec:
     - name: treatment
       type: string
       description: JSON values (flat or nested by key)
-    # - name: file
-    #   type: string
-    #   description: Target file path (relative to workdir or absolute)
-    # - name: workdir
-    #   type: string
-    #   description: Working directory (usually the bound workspace path)
-    #   default: /workspace
   results:
     - name: treatmentAnalysis
       description: Space-separated '--set/--set-string path=value' tokens
-  # workingDir: $(params.workdir)
   env:
     - name: SELECTOR
       value: $(params.factorType)
@@ -36,18 +26,10 @@ spec:
       value: $(params.factorMapping)
     - name: VAL_JSON
       value: $(params.treatment)
-    # - name: TARGET_FILE
-    #   value: $(params.file)
   script: |
     #!/bin/sh
     set -eu
     apk add --no-cache jq yq >/dev/null
-    # jq --version
-    # yq --version
-
-    # echo "$SELECTOR"
-    # echo "$MAP_JSON"
-    # echo "$VAL_JSON"
 
     # Build updates + flags (uses $val for type checks — fixed version)
     jq -r -n \
@@ -63,7 +45,7 @@ spec:
                 updates: [
                   $m | to_entries[]
                   | select($v[.key] != null)
-                  | { path: (.value | split(".")), value: $v[.key] }
+                  | { name: .key, path: (.value | split(".")), value: $v[.key] }
                 ],
                 setArgs: (
                   [ $m | to_entries[]
@@ -80,38 +62,4 @@ spec:
         end
       ' > /tmp/out.json
 
-    # FLAGS=$(jq -r '.setArgs' /tmp/out.json)
-    # jq '.updates' /tmp/out.json > /tmp/updates.json
-
-    # if [ ! -f "$TARGET_FILE" ]; then
-    #   echo "ERROR: File not found: $TARGET_FILE" >&2
-    #   # still write empty result
-    #   printf "" > "$(step.results.treatmentAnalysis.path)"
-    #   exit 1
-    # fi
-
-    # # Apply updates to JSON or YAML
-    # if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
-    #   ext="${TARGET_FILE##*.}"
-    #   tmp="${TARGET_FILE}.tmp"
-
-    #   if [ "$ext" = "json" ]; then
-    #     jq --slurpfile upds /tmp/updates.json '
-    #       reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
-    #     ' "$TARGET_FILE" > "$tmp"
-    #     mv "$tmp" "$TARGET_FILE"
-    #   else
-    #     # YAML path: YAML → JSON → apply → YAML
-    #     yq -o=json '.' "$TARGET_FILE" \
-    #       | jq --slurpfile upds /tmp/updates.json '
-    #           reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
-    #         ' \
-    #       | yq -P > "$tmp"
-    #     mv "$tmp" "$TARGET_FILE"
-    #   fi
-    # fi
-
-    # printf "%s" "$(cat /tmp/out.json)"
-    # Emit flags as a step-scoped result
-    # printf "%s" "$FLAGS" > "$(step.results.treatmentAnalysis.path)"
     printf "%s" "$(cat /tmp/out.json)" > "$(step.results.treatmentAnalysis.path)"
diff --git a/tekton-poc/pipeline/steps/vllm-benchmark.yaml b/tekton-poc/pipeline/steps/vllm-benchmark.yaml
index 8daa7aeb..aead6b96 100644
--- a/tekton-poc/pipeline/steps/vllm-benchmark.yaml
+++ b/tekton-poc/pipeline/steps/vllm-benchmark.yaml
@@ -1,142 +1,5 @@
 apiVersion: tekton.dev/v1beta1
 kind: StepAction
-metadata:
-  name: vllm-benchmark-prepare-profile
-spec:
-  params:
-    - name: harnessName
-    - name: harnessProfile
-    - name: model-id
-    - name: namespace
-    - name: treatmentAnalysis
-    - name: pipelineUID
-  env:
-    - name: REQUESTED_HARNESS_NAME
-      value: "$(params.harnessName)"
-    - name: MY_HARNESS_NAME
-      value: "vllm-benchmark"
-    - name: HARNESS_PROFILE
-      value: "$(params.harnessProfile)"
-
-    - name: TREATMENT_ANALYSIS
-      value: "$(params.treatmentAnalysis)"
-
-    - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
-      value: "$(params.model-id)"
-    - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
-      value: "http://experiment-gateway-inference-gateway.$(params.namespace).svc.cluster.local:80"
-
-    - name: DATA_ROOT_DIR
-      value: $(workspaces.data.path)
-    - name: MY_TASK_NAME
-      value: $(context.taskRun.name)
-    - name: MY_PIPELINE_UID
-      value: $(params.pipelineUID)
-
-  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
-  image: python:3.12.9-slim-bookworm
-  script: |
-    #!/bin/bash
-
-    if [ "${REQUESTED_HARNESS_NAME}" != "${MY_HARNESS_NAME}" ]; then
-      echo "Requested harness not ${MY_HARNESS_NAME}, skipping"
-      exit 0
-    fi
-
-    # TBD is this necessary or is it already there?
-    apt-get update
-    apt-get install -y --no-install-recommends curl ca-certificates jq
-    curl -fsSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
-      -o /usr/local/bin/yq
-    chmod +x /usr/local/bin/yq
-    jq --version
-    yq --version
-
-    # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L56-L62
-    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
-    
-    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
-    RESULTS_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
-    CONTROL_DIR="${DATA_ROOT_DIR}/${MY_HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
-    RUN_DIR=$(pwd)
-
-    echo "🔄 Installing required tools"
-    apt-get update
-    apt-get install -y \
-      wget \
-      && apt-get clean && rm -rf /var/cache/apt
-
-    # Ensure all folders created
-    mkdir -p $RESULTS_DIR
-    mkdir -p $CONTROL_DIR/setup
-    rm -rf $CONTROL_DIR/setup/sed-commands
-    touch $CONTROL_DIR/setup/sed-commands
-    mkdir -p ${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates
-
-    cd ${RUN_DIR}/vllm-benchmark/
-
-    # Define constants: input profile template name and location; final profile name and location
-    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
-    workload_template=${workload}.yaml.in
-    workload_template_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/templates/${workload_template}
-    workload_profile=${workload}.yaml
-    workload_profile_path=${CONTROL_DIR}/workload/profiles/${MY_HARNESS_NAME}/${workload_profile}
-
-    echo "🔄 Prepare workload profile"
-    # Fetch profile template from llmd-benchmark
-    wget -O ${workload_template_path} \
-    --quiet \
-      https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/workload/profiles/${MY_HARNESS_NAME}/${workload_template}
-
-    # Apply treatment to profile template to produce final profile
-    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${CONTROL_DIR}/setup/sed-commands
-    echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${CONTROL_DIR}/setup/sed-commands
-    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${CONTROL_DIR}/setup/sed-commands
-    echo "---------- sed-commands"
-    cat ${CONTROL_DIR}/setup/sed-commands
-    echo "----------"
-    sed -f ${CONTROL_DIR}/setup/sed-commands ${workload_template_path} > ${workload_profile_path}
-
-    # TBD eliminate the TARGET_FILE env variable
-    TARGET_FILE=${workload_profile_path}
-    echo "${TREATMENT_ANALYSIS}" | jq '.updates' > /tmp/updates.json
-    echo ">>> /tmp/updates.json"
-    cat /tmp/updates.json
-
-    if [ ! -f "$TARGET_FILE" ]; then
-      echo "ERROR: File not found: $TARGET_FILE" >&2
-      exit 1
-    fi
-
-    # Apply updates to JSON or YAML
-    if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
-      ext="${TARGET_FILE##*.}"
-      tmp="${TARGET_FILE}.tmp"
-
-      # TBD eliminate the json path (copilot generated this); profiles are yaml files
-      if [ "$ext" = "json" ]; then
-        jq --slurpfile upds /tmp/updates.json '
-          reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
-        ' "$TARGET_FILE" > "$tmp"
-        mv "$tmp" "$TARGET_FILE"
-      else
-        # YAML path: YAML → JSON → apply → YAML
-        yq -o=json '.' "$TARGET_FILE" \
-          | jq --slurpfile upds /tmp/updates.json '
-              reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
-            ' \
-          | yq -P > "$tmp"
-        mv "$tmp" "$TARGET_FILE"
-      fi
-    fi
-
-    echo "---------- workload profile"
-    cat ${workload_profile_path}
-    echo "----------"
-    echo "✅ workload profile ready"
----
-apiVersion: tekton.dev/v1beta1
-kind: StepAction
 metadata:
   name: vllm-benchmark-run
 spec:

From 62fc6c458b0de298b99f8a79a4017320c202867f Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Mon, 20 Oct 2025 10:08:17 -0400
Subject: [PATCH 42/44] reduce task pod requirements

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/experiment-task.yaml      | 179 +++----
 tekton-poc/pipeline/pd-disaggregation-pr.yaml |  15 +-
 .../pipeline/steps/capacity-planner.yaml      | 439 ++++++++++++++++++
 .../pipeline/steps/workload-profile.yaml      | 131 ++++++
 4 files changed, 652 insertions(+), 112 deletions(-)
 create mode 100644 tekton-poc/pipeline/steps/capacity-planner.yaml
 create mode 100644 tekton-poc/pipeline/steps/workload-profile.yaml

diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/experiment-task.yaml
index a2b4840f..8bcf868a 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/experiment-task.yaml
@@ -232,42 +232,26 @@ spec:
 
     #     printf "%s" "$MODELSERVICE_SET_ARGS"
 
-    - name: compute-decode-dp
+    # TBD split into individual steps to compute each value?
+    - name: compute-capacity-validation-values
       ref:
         name:
-          compute-value
+          compute-values
       params:
-        - name: name
-          value: "decodeDataParallelism"
-        - name: value
+        - name: decodeDataParallelism
           value: $(params.decodeDataParallelism)
-        - name: defaultValue
-          value: 1
-
-    - name: compute-decode-tp
-      ref:
-        name:
-          compute-value
-      params:
-        - name: name
-          value: "decodeTensorParallelism"
-        - name: value
+        - name: decodeTensorParallelism
           value: $(params.decodeTensorParallelism)
-        - name: defaultValue
-          value: 1
-
-    - name: compute-decode-replicas
-      ref:
-        name:
-          compute-value
-      params:
-        - name: name
-          value: "decodeReplicas"
-        - name: value
+        - name: decodeReplicas
           value: $(params.decodeReplicas)
-        - name: defaultValue
-          value: 1
+        - name: prefillDataParallelism
+          value: $(params.prefillDataParallelism)
+        - name: prefillTensorParallelism
+          value: $(params.prefillTensorParallelism)
+        - name: prefillReplicas
+          value: $(params.prefillReplicas)
 
+    # TBD fold into compute-capacity-validation-values ?
     - name: compute-decode-num-gpus
       ref:
         name:
@@ -278,66 +262,13 @@ spec:
         - name: value
           value: $(params.decodeNumGpus)
         - name: dp
-          value: $(steps.compute-decode-dp.results.value)
+          # value: $(steps.compute-decode-dp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.decodeDataParallelism)
         - name: tp
-          value: $(steps.compute-decode-tp.results.value)
-
-    # - name: display-decode-values
-    #   image: alpine:3.20
-    #   env:
-    #     - name: REPLICAS
-    #       value: "$(steps.compute-decode-replicas.results.value)"
-    #     - name: TP
-    #       value: "$(steps.compute-decode-tp.results.value)"
-    #     - name: DP
-    #       value: "$(steps.compute-decode-dp.results.value)"
-    #     - name: NUM_GPUS
-    #       value: "$(steps.compute-decode-num-gpus.results.value)"
-
-    #   script: |
-    #     #!/bin/sh
-
-    #     echo "decodeReplicas = ${REPLICAS}"
-    #     echo "decodeTensorParallelism = ${TP}"
-    #     echo "decodeDataParallelism = ${DP}"
-    #     echo "decodeNumGpus = ${NUM_GPUS}"
-
-    - name: compute-prefill-dp
-      ref:
-        name:
-          compute-value
-      params:
-        - name: name
-          value: "prefillDataParallelism"
-        - name: value
-          value: $(params.prefillDataParallelism)
-        - name: defaultValue
-          value: 1
-
-    - name: compute-prefill-tp
-      ref:
-        name:
-          compute-value
-      params:
-        - name: name
-          value: "prefillTensorParallelism"
-        - name: value
-          value: $(params.prefillTensorParallelism)
-        - name: defaultValue
-          value: 1
-
-    - name: compute-prefill-replicas
-      ref:
-        name:
-          compute-value
-      params:
-        - name: name
-          value: "prefillReplicas"
-        - name: value
-          value: $(params.prefillReplicas)
-        - name: defaultValue
-          value: 1
+          # value: $(steps.compute-decode-tp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.decodeTensorParallelism)
 
+    # TBD fold into compute-capacity-validation-values ?
     - name: compute-prefill-num-gpus
       ref:
         name:
@@ -348,9 +279,11 @@ spec:
         - name: value
           value: $(params.prefillNumGpus)
         - name: dp
-          value: $(steps.compute-prefill-dp.results.value)
+          # value: $(steps.compute-prefill-dp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.prefillDataParallelism)
         - name: tp
-          value: $(steps.compute-prefill-tp.results.value)
+          # value: $(steps.compute-prefill-tp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.prefillDataParallelism)
 
     - name: check-decode-capacity
       ref:
@@ -365,15 +298,19 @@ spec:
         - name: max_model_len
           value: $(params.maxModelLength)
         - name: replicas
-          value: $(steps.compute-decode-replicas.results.value)
+          value: $(steps.compute-capacity-validation-values.results.decodeReplicas)
         - name: tp
-          value: $(steps.compute-decode-tp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.decodeTensorParallelism)
         - name: dp
-          value: $(steps.compute-decode-dp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.decodeDataParallelism)
         - name: gpu_memory
           value: $(params.gpuMemory)
         - name: user_requested_gpu_count
           value: $(steps.compute-decode-num-gpus.results.value)
+      when:
+        - input: $(params.validateCapacity)
+          operator: in
+          values: [ "true" ]
 
     - name: check-prefill-capacity
       ref:
@@ -388,15 +325,19 @@ spec:
         - name: max_model_len
           value: $(params.maxModelLength)
         - name: replicas
-          value: $(steps.compute-prefill-replicas.results.value)
+          value: $(steps.compute-capacity-validation-values.results.prefillReplicas)
         - name: tp
-          value: $(steps.compute-prefill-tp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.prefillTensorParallelism)
         - name: dp
-          value: $(steps.compute-prefill-dp.results.value)
+          value: $(steps.compute-capacity-validation-values.results.prefillDataParallelism)
         - name: gpu_memory
           value: $(params.gpuMemory)
         - name: user_requested_gpu_count
           value: $(steps.compute-prefill-num-gpus.results.value)
+      when:
+        - input: $(params.validateCapacity)
+          operator: in
+          values: [ "true" ]
 
     - name: prepare-namespace
       image: quay.io/openshift/origin-cli:4.21
@@ -588,9 +529,9 @@ spec:
     - name: wait-for-model
       env:
         - name: DECODE_REPLICAS
-          value: $(steps.compute-decode-replicas.results.value)
+          value: $(steps.compute-capacity-validation-values.results.decodeReplicas)
         - name: PREFILL_REPLICAS
-          value: $(steps.compute-prefill-replicas.results.value)
+          value: $(steps.compute-capacity-validation-values.results.prefillReplicas)
       image: alpine/kubectl:1.34.1
       script: |
         #!/bin/sh
@@ -670,13 +611,17 @@ spec:
           value: $(params.harnessProfile)
         - name: pipelineUID
           value: $(params.pipelineUID)
-      computeResources:
-        requests:
-          memory: "32Gi"
-          cpu: "16"
-        limits:
-          memory: "32Gi"
-          cpu: "16"
+      when:
+        - input: $(params.harnessName)
+          operator: in
+          values: [ "inference-perf" ]
+      # computeResources:
+      #   requests:
+      #     memory: "32Gi"
+      #     cpu: "16"
+      #   limits:
+      #     memory: "32Gi"
+      #     cpu: "16"
 
     - name: inference-perf-analyze-results
       ref:
@@ -686,6 +631,10 @@ spec:
           value: $(params.harnessName)
         - name: pipelineUID
           value: $(params.pipelineUID)
+      when:
+        - input: $(params.harnessName)
+          operator: in
+          values: [ "inference-perf" ]
 
     - name: vllm-benchmark-run
       ref:
@@ -697,13 +646,17 @@ spec:
           value: $(params.harnessProfile)
         - name: pipelineUID
           value: $(params.pipelineUID)
-      computeResources:
-        requests:
-          memory: "32Gi"
-          cpu: "16"
-        limits:
-          memory: "32Gi"
-          cpu: "16"
+      when:
+        - input: $(params.harnessName)
+          operator: in
+          values: [ "vllm-benchmark" ]
+      # computeResources:
+      #   requests:
+      #     memory: "32Gi"
+      #     cpu: "16"
+      #   limits:
+      #     memory: "32Gi"
+      #     cpu: "16"
 
     - name: vllm-benchmark-analyze-results
       ref:
@@ -713,6 +666,10 @@ spec:
           value: $(params.harnessName)
         - name: pipelineUID
           value: $(params.pipelineUID)
+      when:
+        - input: $(params.harnessName)
+          operator: in
+          values: [ "vllm-benchmark" ]
 
     - name: upload-results
       image: ubuntu:24.04
diff --git a/tekton-poc/pipeline/pd-disaggregation-pr.yaml b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
index b78c5891..d97212e0 100644
--- a/tekton-poc/pipeline/pd-disaggregation-pr.yaml
+++ b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
@@ -5,6 +5,19 @@ metadata:
 spec:
   taskRunTemplate:
     serviceAccountName: helm-installer
+  taskRunSpecs:
+    - pipelineTaskName: run-experiment
+      computeResources:
+        requests:
+          memory: "16Gi"
+          cpu: "8"
+          # memory: "32Gi"
+          # cpu: "16"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+          # memory: "32Gi"
+          # cpu: "16"
   workspaces:
     - name: data
       persistentVolumeClaim:
@@ -190,7 +203,7 @@ spec:
                       "prefillReplicas": 1,
                       "prefillTensorParallelism": 1,
                       "decodeReplicas": 1,
-                      "decodeTensorParallelism": 1,
+                      "decodeTensorParallelism": 2,
                       "max-concurrency": 1,
                       "num-prompts": 10
                     }
diff --git a/tekton-poc/pipeline/steps/capacity-planner.yaml b/tekton-poc/pipeline/steps/capacity-planner.yaml
new file mode 100644
index 00000000..879bec20
--- /dev/null
+++ b/tekton-poc/pipeline/steps/capacity-planner.yaml
@@ -0,0 +1,439 @@
+# apiVersion: tekton.dev/v1beta1
+# kind: StepAction
+# metadata:
+#   name: compute-value
+# spec:
+#   results:
+#     - name: value
+#   params:
+#     - name: name
+#     - name: value
+#     - name: defaultValue
+#   env:
+#     - name: PARAMETER_NAME
+#       value: "$(params.name)"
+#     - name: PARAMETER_VALUE
+#       value: $(params.value)
+#     - name: DEFAULT_VALUE
+#       value: $(params.defaultValue)
+#     - name: TREATMENT_ANALYSIS
+#       value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
+#   image: alpine:3.20
+#   script: |
+#     #!/usr/bin/env sh
+
+#     apk add --no-cache jq yq >/dev/null
+
+#     echo "PARAMETER_NAME = ${PARAMETER_NAME}"
+#     echo "PARAMETER_VALUE = ${PARAMETER_VALUE}"
+#     echo "DEFAULT_VALUE = ${DEFAULT_VALUE}"
+
+#     if [ -n "${PARAMETER_VALUE}" ]; then
+#       value="${PARAMETER_VALUE}"
+#       echo ">>> Using value from parameter: ${value}"
+#     else
+#       value=$(
+#         echo ${TREATMENT_ANALYSIS} \
+#         | jq -r ".updates[] | select(.name == \"${PARAMETER_NAME}\") | .value"
+#       )
+#       echo ">>> value from treatment: ${value}"
+#       if [ -z $value ]; then
+#         value=${DEFAULT_VALUE}
+#         echo ">>> Using default value: ${value}"
+#       fi
+#     fi
+    
+#     echo -n "${value}" > "$(step.results.value.path)"
+# ---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: compute-values
+spec:
+  results:
+    - name: decodeDataParallelism
+    - name: decodeTensorParallelism
+    - name: decodeReplicas
+    - name: prefillDataParallelism
+    - name: prefillTensorParallelism
+    - name: prefillReplicas
+  params:
+    - name: decodeDataParallelism
+    - name: decodeTensorParallelism
+    - name: decodeReplicas
+    - name: prefillDataParallelism
+    - name: prefillTensorParallelism
+    - name: prefillReplicas
+  env:
+    - name: DECODE_DP
+      value: "$(params.decodeDataParallelism)"
+    - name: DECODE_TP
+      value: "$(params.decodeTensorParallelism)"
+    - name: DECODE_REPLICAS
+      value: "$(params.decodeReplicas)"
+    - name: PREFILL_DP
+      value: "$(params.prefillDataParallelism)"
+    - name: PREFILL_TP
+      value: "$(params.prefillTensorParallelism)"
+    - name: PREFILL_REPLICAS
+      value: "$(params.prefillReplicas)"
+    - name: TREATMENT_ANALYSIS
+      value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
+  image: alpine:3.20
+  script: |
+    #!/usr/bin/env sh
+
+    apk add --no-cache jq yq >/dev/null
+
+    compute_value() {
+      _name="$1"
+      _value="$2"
+      _default="$3"
+
+      if [ -n "${_value}" ]; then
+        _result="${_value}"
+      else
+        _result=$(
+          # echo "from treatment"
+          echo "${TREATMENT_ANALYSIS}" \
+          | jq -r ".updates[] | select(.name == \"${_name}\") | .value"
+        )
+        if [ -z $_result ]; then
+          _result="${_default}"
+        fi
+      fi
+      echo "${_result}"
+    }
+
+    echo "input DECODE_DP = ${DECODE_DP}"
+    value=$(compute_value "decodeDataParallelism" "${DECODE_DP}" 1)
+    echo "output decodeDataParallelism = $value"
+    echo -n "${value}" > "$(step.results.decodeDataParallelism.path)"
+
+    echo "input DECODE_TP = ${DECODE_TP}"
+    value=$(compute_value "decodeTensorParallelism" "${DECODE_TP}" 1)
+    echo "output decodeTensorParallelism = $value"
+    echo -n "${value}" > "$(step.results.decodeTensorParallelism.path)"
+
+    echo "input DECODE_REPLICAS = ${DECODE_REPLICAS}"
+    value=$(compute_value "decodeReplicas" "${DECODE_REPLICAS}" 1)
+    echo "output decodeReplicas = $value"
+    echo -n "${value}" > "$(step.results.decodeReplicas.path)"
+
+    echo "input PREFILL_DP = ${PREFILL_DP}"
+    value=$(compute_value "prefillDataParallelism" "${PREFILL_DP}" 1)
+    echo "output prefillDataParallelism = $value"
+    echo -n "${value}" > "$(step.results.prefillDataParallelism.path)"
+
+    echo "input PREFILL_TP = ${PREFILL_TP}"
+    value=$(compute_value "prefillTensorParallelism" "${PREFILL_TP}" 1)
+    echo "output prefillTensorParallelism = $value"
+    echo -n "${value}" > "$(step.results.prefillTensorParallelism.path)"
+
+    echo "input PREFILL_REPLICAS = ${PREFILL_REPLICAS}"
+    value=$(compute_value "prefillReplicas" "${PREFILL_REPLICAS}" 1)
+    echo "output prefillReplicas = $value"
+    echo -n "${value}" > "$(step.results.prefillReplicas.path)"
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: compute-num-gpus
+spec:
+  results:
+    - name: value
+  params:
+    - name: name
+    - name: value
+    - name: dp
+    - name: tp
+  env:
+    - name: PARAMETER_NAME
+      value: "$(params.name)"
+    - name: PARAMETER_VALUE
+      value: $(params.value)
+    - name: DP
+      value: $(params.dp)
+    - name: TP
+      value: $(params.tp)
+    - name: TREATMENT_ANALYSIS
+      value: "$(steps.analyze-modelservice-factors.results.treatmentAnalysis)"
+  image: alpine:3.20
+  script: |
+    #!/usr/bin/env sh
+
+    apk add --no-cache jq yq >/dev/null
+
+    echo "PARAMETER_NAME = ${PARAMETER_NAME}"
+    echo "PARAMETER_VALUE = ${PARAMETER_VALUE}"
+    echo "DP = ${DP}"
+    echo "TP = ${TP}"
+
+    if [ -n "${PARAMETER_VALUE}" ]; then
+      value=${PARAMETER_VALUE}
+      echo ">>> Using value from parameter: ${value}"
+    else
+      value=$(
+        echo ${TREATMENT_ANALYSIS} \
+        | jq -r ".updates[] | select(.name == \"${PARAMETER_NAME}\") | .value"
+      )
+      echo ">>> value from treatment: ${value}"
+      if [ -z $value ]; then
+        value=$(( $TP * $DP ))
+        echo ">>> Using value from computation: $TP * $DP = ${value}"
+      fi
+    fi
+    
+    echo -n "${value}" > "$(step.results.value.path)"
+---
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: check-capacity
+spec:
+  params:
+    - name: validateCapacity
+      default: "true"
+    - name: behaviorOnValidationFailure
+      default: terminate # ignore
+
+    - name: model
+    - name: max_model_len
+    - name: replicas
+    - name: tp
+    - name: dp
+    - name: gpu_memory
+    - name: user_requested_gpu_count
+    - name: gpu_memory_util
+      default: "0.95"
+
+    - name: py
+      default: |
+        import os
+        import sys
+        from typing import Tuple
+        from config_explorer.capacity_planner import *
+
+        def log_failed(msg: str, ignore_if_failed = True):
+          print(f'❌ {msg}')
+          if not ignore_if_failed:
+            sys.exit(1)
+
+        def log_warning(msg):
+          print(f'⚠️ {msg}')
+
+        def log_info(msg):
+          print(f'ℹ️ {msg}')
+
+        def get_model_info(model_name: str, hf_token: str, ignore_if_failed: bool) -> ModelInfo | None:
+            """
+            Obtains model info from HF
+            """
+
+            try:
+                return get_model_info_from_hf(model_name, hf_token)
+
+            except GatedRepoError:
+                log_failed("Model is gated and provided token does not, work. Please double check.", ignore_if_failed)
+            except HfHubHTTPError as hf_exp:
+                log_failed(f"Error reaching Hugging Face API: {hf_exp}", ignore_if_failed)
+            except Exception as e:
+                log_failed(f"Cannot retrieve ModelInfo: {e}", ignore_if_failed)
+
+            return None
+
+        def get_model_config_and_text_config(model_name: str, hf_token: str, ignore_if_failed: bool) -> Tuple[AutoConfig | None, AutoConfig | None]:
+            """
+            Obtains model config and text config from HF
+            """
+
+            try:
+                config = get_model_config_from_hf(model_name, hf_token)
+                return config, get_text_config(config)
+
+            except GatedRepoError:
+                log_failed("Model is gated and provided token does not, work. Please double check.", ignore_if_failed)
+            except HfHubHTTPError as hf_exp:
+                log_failed(f"Error reaching Hugging Face API: {hf_exp}", ignore_if_failed)
+            except Exception as e:
+                log_failed(f"Cannot retrieve model config: {e}", ignore_if_failed)
+
+            return None, None
+
+        def validate_vllm_params():
+          print ("validate_vllm_params() called")
+
+          replicas = int(os.getenv("REPLICAS"))
+          user_requested_gpu_count = int(os.getenv("USER_REQUESTED_GPU_COUNT"))
+          tp = int(os.getenv("TP"))
+          dp = int(os.getenv("DP"))
+          model = os.getenv("MODEL")
+          gpu_memory = int(os.getenv("GPU_MEMORY"))
+          max_model_len = int(os.getenv("MAX_MODEL_LEN"))
+          gpu_memory_util = float(os.getenv("GPU_MEMORY_UTIL"))
+          hf_token = os.getenv("HF_TOKEN")
+          ignore_if_failed = os.getenv("BEHAVIOR_ON_FAILURE") != 'terminate'
+
+          print(f"model = {model}")
+          print(f"replicas = {replicas}")
+          print(f"user_requested_gpu_count = {user_requested_gpu_count}")
+          print(f"tp = {tp}")
+          print(f"dp = {dp}")
+          print(f"gpu_memory = {gpu_memory}")
+          print(f"max_model_len = {max_model_len}")
+          print(f"gpu_memory_util = {gpu_memory_util}")
+          print(f"ignore_if_failed = {ignore_if_failed}")
+
+          # Sanity check on user inputs. If GPU memory cannot be determined, return False indicating that the sanity check is incomplete
+          skip_gpu_tests = False
+          if gpu_memory is None or gpu_memory == 0:
+            log_failure("Cannot determine accelerator memory. Please set LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY to enable Capacity Planner. Skipping GPU memory required checks, especially KV cache estimation.", ignore_if_failed)
+            skip_gpu_tests = True
+
+          per_replica_requirement = gpus_required(tp=tp, dp=dp)
+          if replicas == 0:
+            per_replica_requirement = 0
+          total_gpu_requirement = per_replica_requirement
+          
+          if total_gpu_requirement > user_requested_gpu_count:
+            log_failed(f"Requested {user_requested_gpu_count} GPUs but it is too low. It must be greater than TP x DP ({tp} x {dp} = {total_gpu_requirement})")
+
+          if total_gpu_requirement < user_requested_gpu_count:
+            log_warning(f"For each replica, model requires {total_gpu_requirement}, but you requested {user_requested_gpu_count} for the deployment. Some GPUs will be idle.")
+
+          model_info = get_model_info(model, hf_token, ignore_if_failed)
+          model_config, text_config = get_model_config_and_text_config(model, hf_token, ignore_if_failed)
+          if model_config is not None:
+            # Check if parallelism selections are valid
+            try:
+              valid_tp_values = find_possible_tp(text_config)
+              log_info(f"valid tp values are: {valid_tp_values}")
+              if tp not in valid_tp_values:
+                log_failed(f"TP={tp} is invalid. Please select from these options ({valid_tp_values}) for {model}.", ignore_if_failed)
+              else:
+                log_info(f"TP={tp} is valid.")
+            except AttributeError:
+              # Error: config['num_attention_heads'] not in config
+              log_failed(f"Cannot obtain data on the number of attention heads, cannot find valid tp values: {e}", ignore_if_failed)
+
+            # Check if model context length is valid
+            valid_max_context_len = 0
+            try:
+              # Error: config['max_positional_embeddings'] not in config
+              valid_max_context_len = max_context_len(model_config)
+              log_info(f"The max context length is {valid_max_context_len}")
+            except AttributeError as e:
+              log_failed(f"Cannot obtain data on the max context length for model: {e}", ignore_if_failed)
+
+            if max_model_len > valid_max_context_len:
+                log_failed(f"Max model length = {max_model_len} exceeds the acceptable for {model}. Set LLMDBENCH_VLLM_COMMON_MAX_MODEL_LEN to a value below or equal to {valid_max_context_len}", ignore_if_failed)
+          else:
+            log_failed("Model config on parameter shape is not available.", ignore_if_failed)
+
+          # Display memory info
+          if not skip_gpu_tests:
+            log_info("👉 Collecting GPU information....")
+            avail_gpu_memory = available_gpu_memory(gpu_memory, gpu_memory_util)
+            log_info(f"{gpu_memory} GB of memory per GPU, with {gpu_memory} GB x {gpu_memory_util} (gpu_memory_utilization) = {avail_gpu_memory} GB available to use.")
+            log_info(f"Each model replica requires {per_replica_requirement} GPUs, total available GPU memory = {avail_gpu_memory * per_replica_requirement} GB.")
+
+          # Calculate model memory requirement
+          log_info("👉 Collecting model information....")
+          if model_info is not None:
+            try:
+              model_params = model_total_params(model_info)
+              log_info(f"{model} has a total of {model_params} parameters")
+
+              model_mem_req = model_memory_req(model_info, model_config)
+              log_info(f"{model} requires {model_mem_req} GB of memory")
+
+              # Estimate KV cache memory and max number of requests that can be served in worst case scenario
+              if not skip_gpu_tests:
+                log_info("👉 Estimating available KV cache....")
+                available_kv_cache = allocatable_kv_cache_memory(
+                  model_info, model_config,
+                  gpu_memory, gpu_memory_util,
+                  tp=tp, dp=dp,
+                )
+                log_info(f"Allocatable memory for KV cache {available_kv_cache} GB")
+
+                if available_kv_cache < 0:
+                  log_failed(f"There is not enough GPU memory to stand up model. Exceeds by {abs(available_kv_cache)} GB.", ignore_if_failed)
+                else:
+                  kv_details = KVCacheDetail(model_info, model_config, max_model_len, batch_size=1)
+                  log_info(f"KV cache memory for a request taking --max-model-len={max_model_len} requires {kv_details.per_request_kv_cache_gb} GB of memory")
+
+                  total_concurrent_reqs = max_concurrent_requests(
+                    model_info, model_config, max_model_len,
+                    gpu_memory, gpu_memory_util,
+                    tp=tp, dp=dp,
+                  )
+                  log_info(f"The vLLM server can process up to {total_concurrent_reqs} number of requests at the same time, assuming the worst case scenario that each request takes --max-model-len")
+
+            except AttributeError as e:
+              # Model might not have safetensors data on parameters
+              log_failed(f"Does not have enough information about model to estimate model memory or KV cache: {e}", ignore_if_failed)
+          else:
+            log_failed(f"Model info on model's architecture is not available.", ignore_if_failed)
+
+        def main():
+          """Main function"""
+          print("main() called")
+          validate_vllm_params()
+          print("main() exiting")
+        
+        if __name__ == "__main__":
+          sys.exit(main())
+  env:
+    - name: VALIDATE_CAPACITY
+      value: $(params.validateCapacity)
+    - name: BEHAVIOR_ON_FAILURE
+      value: $(params.behaviorOnValidationFailure)
+
+    - name: MODEL
+      value: $(params.model)
+
+    - name: REPLICAS
+      value: $(params.replicas)
+    - name: TP
+      value: $(params.tp)
+    - name: DP
+      value: $(params.dp)
+    - name: GPU_MEMORY
+      value: $(params.gpu_memory)
+    - name: USER_REQUESTED_GPU_COUNT
+      value: $(params.user_requested_gpu_count)
+    - name: MAX_MODEL_LEN
+      value: $(params.max_model_len)
+    - name: GPU_MEMORY_UTIL
+      value: $(params.gpu_memory_util)
+
+    - name: HF_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: hf-secret
+          key: HF_TOKEN
+
+    - name: PY_BIN
+      value: "$(params.py)"
+
+  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/usr/bin/env bash
+
+    if [ "${VALIDATE_CAPACITY}" != "true" ]; then
+      echo "ℹ️ Skipping capacity validation"
+      exit 0
+    fi
+
+    # Install git so can install capacity explorer
+    apt-get update \
+    && apt-get install -y git \
+    && rm -rf /var/lib/apt/lists/*
+    python -m pip install --no-cache "config_explorer @ git+https://github.com/llm-d/llm-d-benchmark.git/#subdirectory=config_explorer"
+
+    # run capacity explorer
+    printf "%s\n" "${PY_BIN}" | python -
+
+
diff --git a/tekton-poc/pipeline/steps/workload-profile.yaml b/tekton-poc/pipeline/steps/workload-profile.yaml
new file mode 100644
index 00000000..bc4810fe
--- /dev/null
+++ b/tekton-poc/pipeline/steps/workload-profile.yaml
@@ -0,0 +1,131 @@
+apiVersion: tekton.dev/v1beta1
+kind: StepAction
+metadata:
+  name: prepare-workload-profile
+spec:
+  params:
+    - name: harnessName
+    - name: harnessProfile
+    - name: model-id
+    - name: namespace
+    - name: treatmentAnalysis
+    - name: pipelineUID
+  env:
+    - name: HARNESS_NAME
+      value: "$(params.harnessName)"
+    - name: HARNESS_PROFILE
+      value: "$(params.harnessProfile)"
+
+    - name: TREATMENT_ANALYSIS
+      value: "$(params.treatmentAnalysis)"
+
+    - name: LLMDBENCH_DEPLOY_CURRENT_MODEL
+      value: "$(params.model-id)"
+    - name: LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
+      value: "http://experiment-gateway-inference-gateway.$(params.namespace).svc.cluster.local:80"
+
+    - name: DATA_ROOT_DIR
+      value: $(workspaces.data.path)
+    - name: MY_TASK_NAME
+      value: $(context.taskRun.name)
+    - name: MY_PIPELINE_UID
+      value: $(params.pipelineUID)
+
+  # https://github.com/llm-d/llm-d-benchmark/blob/main/build/Dockerfile#L1C6-L1C33
+  image: python:3.12.9-slim-bookworm
+  script: |
+    #!/bin/bash
+
+    echo "🔄 Preparing workload profile ${HARNESS_PROFILE} for ${HARNESS_NAME}"
+
+    # TBD is this necessary or is it already there?
+    apt-get update
+    apt-get install -y --no-install-recommends curl ca-certificates jq
+    curl -fsSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
+      -o /usr/local/bin/yq
+    chmod +x /usr/local/bin/yq
+    jq --version
+    yq --version
+
+    # https://github.com/llm-d/llm-d-benchmark/blob/main/setup/run.sh
+    
+    EXPERIMENT_ID="experiment-$(echo -n ${MY_PIPELINE_UID} | cut -c1-8)"
+    RESULTS_DIR="${DATA_ROOT_DIR}/${HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    CONTROL_DIR="${DATA_ROOT_DIR}/${HARNESS_NAME}_${EXPERIMENT_ID}_${MY_TASK_NAME}"
+    RUN_DIR=$(pwd)
+
+    echo "🔄 Installing required tools"
+    apt-get update
+    apt-get install -y \
+      wget \
+      && apt-get clean && rm -rf /var/cache/apt
+
+    # Ensure all folders created
+    mkdir -p $RESULTS_DIR
+    mkdir -p $CONTROL_DIR/setup
+    rm -rf $CONTROL_DIR/setup/sed-commands
+    touch $CONTROL_DIR/setup/sed-commands
+    mkdir -p ${CONTROL_DIR}/workload/profiles/${HARNESS_NAME}/templates
+
+    cd ${RUN_DIR}/vllm-benchmark/
+
+    # Define constants: input profile template name and location; final profile name and location
+    workload=$(echo ${HARNESS_PROFILE} | sed 's^\.yaml^^g' )
+    workload_template=${workload}.yaml.in
+    workload_template_path=${CONTROL_DIR}/workload/profiles/${HARNESS_NAME}/templates/${workload_template}
+    workload_profile=${workload}.yaml
+    workload_profile_path=${CONTROL_DIR}/workload/profiles/${HARNESS_NAME}/${workload_profile}
+
+    echo "🔄 Prepare workload profile"
+    # Fetch profile template from llmd-benchmark
+    wget -O ${workload_template_path} \
+    --quiet \
+      https://raw.githubusercontent.com/llm-d/llm-d-benchmark/refs/heads/main/workload/profiles/${HARNESS_NAME}/${workload_template}
+
+    # Apply treatment to profile template to produce final profile
+    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL^${LLMDBENCH_DEPLOY_CURRENT_MODEL}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "s^REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL^${LLMDBENCH_HARNESS_STACK_ENDPOINT_URL}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "s^REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_TOKENIZER^${LLMDBENCH_DEPLOY_CURRENT_TOKENIZER}^g" >> ${CONTROL_DIR}/setup/sed-commands
+    echo "---------- sed-commands"
+    cat ${CONTROL_DIR}/setup/sed-commands
+    echo "----------"
+    sed -f ${CONTROL_DIR}/setup/sed-commands ${workload_template_path} > ${workload_profile_path}
+
+    # TBD eliminate the TARGET_FILE env variable
+    TARGET_FILE=${workload_profile_path}
+    echo "${TREATMENT_ANALYSIS}" | jq '.updates' > /tmp/updates.json
+    echo ">>> /tmp/updates.json"
+    cat /tmp/updates.json
+
+    if [ ! -f "$TARGET_FILE" ]; then
+      echo "ERROR: File not found: $TARGET_FILE" >&2
+      exit 1
+    fi
+
+    # Apply updates to JSON or YAML
+    if [ "$(jq 'length' /tmp/updates.json)" -gt 0 ]; then
+      ext="${TARGET_FILE##*.}"
+      tmp="${TARGET_FILE}.tmp"
+
+      # TBD eliminate the json path (copilot generated this); profiles are yaml files
+      if [ "$ext" = "json" ]; then
+        jq --slurpfile upds /tmp/updates.json '
+          reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+        ' "$TARGET_FILE" > "$tmp"
+        mv "$tmp" "$TARGET_FILE"
+      else
+        # YAML path: YAML → JSON → apply → YAML
+        yq -o=json '.' "$TARGET_FILE" \
+          | jq --slurpfile upds /tmp/updates.json '
+              reduce $upds[0][] as $u (. ; setpath($u.path; $u.value))
+            ' \
+          | yq -P > "$tmp"
+        mv "$tmp" "$TARGET_FILE"
+      fi
+    fi
+
+    echo "---------- workload profile"
+    cat ${workload_profile_path}
+    echo "----------"
+
+    echo "✅ workload profile ready"

From 218833faa15a4a6b0f55b0872115547929926e73 Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Mon, 20 Oct 2025 11:38:21 -0400
Subject: [PATCH 43/44] rename a few things

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/pd-disaggregation-pr.yaml |  8 +--
 .../pipeline/pipelinerun-matrix-subset.yaml   |  2 +-
 tekton-poc/pipeline/pipelinerun-matrix.yaml   |  2 +-
 .../pipeline/pipelinerun-sequential-1.yaml    | 72 +++++++++----------
 .../pipelinerun-sequential-4-barrier.yaml     | 72 +++++++++----------
 .../pipelinerun-sequential-4-sliding.yaml     | 72 +++++++++----------
 ...un-sequential-unroll-gaiePluginConfig.yaml |  8 +--
 .../treatment.yaml}                           | 10 +--
 8 files changed, 123 insertions(+), 123 deletions(-)
 rename tekton-poc/pipeline/{experiment-task.yaml => tasks/treatment.yaml} (99%)

diff --git a/tekton-poc/pipeline/pd-disaggregation-pr.yaml b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
index d97212e0..8be4b769 100644
--- a/tekton-poc/pipeline/pd-disaggregation-pr.yaml
+++ b/tekton-poc/pipeline/pd-disaggregation-pr.yaml
@@ -6,7 +6,7 @@ spec:
   taskRunTemplate:
     serviceAccountName: helm-installer
   taskRunSpecs:
-    - pipelineTaskName: run-experiment
+    - pipelineTaskName: treatment
       computeResources:
         requests:
           memory: "16Gi"
@@ -107,9 +107,9 @@ spec:
         default: ""
 
     tasks:
-      - name: run-experiment
+      - name: treatment
         taskRef:
-          name: experiment
+          name: treatment
         workspaces:
           - name: data
             workspace: data
@@ -151,7 +151,7 @@ spec:
 
           - name: targetNamespacePrefix
             value: $(params.targetNamespacePrefix)
-          - name: experimentBaseUrl
+          - name: stackBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/pd-disaggregation/
 
           - name: s3-keys
diff --git a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
index 822fea78..f805f9ec 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix-subset.yaml
@@ -51,7 +51,7 @@ spec:
             value: $(params.targetNamespacePrefix)
           - name: model-id
             value: $(params.model-id)
-          - name: experimentBaseUrl
+          - name: stackBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
 
           - name: s3-keys
diff --git a/tekton-poc/pipeline/pipelinerun-matrix.yaml b/tekton-poc/pipeline/pipelinerun-matrix.yaml
index 5cc1661b..529ec2b9 100644
--- a/tekton-poc/pipeline/pipelinerun-matrix.yaml
+++ b/tekton-poc/pipeline/pipelinerun-matrix.yaml
@@ -52,7 +52,7 @@ spec:
             value: $(params.targetNamespacePrefix)
           - name: model-id
             value: $(params.model-id)
-          - name: experimentBaseUrl
+          - name: stackBaseUrl
             value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
 
           - name: s3-keys
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-1.yaml b/tekton-poc/pipeline/pipelinerun-sequential-1.yaml
index a4b77783..4a2f0089 100644
--- a/tekton-poc/pipeline/pipelinerun-sequential-1.yaml
+++ b/tekton-poc/pipeline/pipelinerun-sequential-1.yaml
@@ -24,7 +24,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -45,7 +45,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -68,7 +68,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -91,7 +91,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -114,7 +114,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -137,7 +137,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -160,7 +160,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -183,7 +183,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -206,7 +206,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -229,7 +229,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -252,7 +252,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -275,7 +275,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -298,7 +298,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -321,7 +321,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -344,7 +344,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -367,7 +367,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -390,7 +390,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -413,7 +413,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -436,7 +436,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -459,7 +459,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -482,7 +482,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -505,7 +505,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -528,7 +528,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -551,7 +551,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -574,7 +574,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -597,7 +597,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -620,7 +620,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -643,7 +643,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -666,7 +666,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -689,7 +689,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -712,7 +712,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -735,7 +735,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -758,7 +758,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -781,7 +781,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -804,7 +804,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -827,7 +827,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-4-barrier.yaml b/tekton-poc/pipeline/pipelinerun-sequential-4-barrier.yaml
index 988117a1..1dc4f388 100644
--- a/tekton-poc/pipeline/pipelinerun-sequential-4-barrier.yaml
+++ b/tekton-poc/pipeline/pipelinerun-sequential-4-barrier.yaml
@@ -24,7 +24,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -45,7 +45,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -66,7 +66,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -87,7 +87,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -108,7 +108,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -134,7 +134,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -160,7 +160,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -186,7 +186,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -212,7 +212,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -238,7 +238,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -264,7 +264,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -290,7 +290,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -316,7 +316,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -342,7 +342,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -368,7 +368,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -394,7 +394,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -420,7 +420,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -446,7 +446,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -472,7 +472,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -498,7 +498,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -524,7 +524,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -550,7 +550,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -576,7 +576,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -602,7 +602,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -628,7 +628,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -654,7 +654,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -680,7 +680,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -706,7 +706,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -732,7 +732,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -758,7 +758,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -784,7 +784,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -810,7 +810,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -836,7 +836,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -862,7 +862,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -888,7 +888,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -914,7 +914,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml b/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml
index 76f815b6..9a750925 100644
--- a/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml
+++ b/tekton-poc/pipeline/pipelinerun-sequential-4-sliding.yaml
@@ -24,7 +24,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -45,7 +45,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -66,7 +66,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -87,7 +87,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -108,7 +108,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -131,7 +131,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -154,7 +154,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -177,7 +177,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -200,7 +200,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -223,7 +223,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -246,7 +246,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -269,7 +269,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -292,7 +292,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -315,7 +315,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -338,7 +338,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -361,7 +361,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -384,7 +384,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -407,7 +407,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -430,7 +430,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -453,7 +453,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -476,7 +476,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -499,7 +499,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -522,7 +522,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -545,7 +545,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -568,7 +568,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -591,7 +591,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -614,7 +614,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -637,7 +637,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -660,7 +660,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -683,7 +683,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -706,7 +706,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -729,7 +729,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -752,7 +752,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -775,7 +775,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -798,7 +798,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -821,7 +821,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
diff --git a/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml b/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml
index 5c36a680..eae7a3f5 100644
--- a/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml
+++ b/tekton-poc/pipeline/pipelinerun-sequential-unroll-gaiePluginConfig.yaml
@@ -24,7 +24,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -55,7 +55,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -80,7 +80,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
@@ -105,7 +105,7 @@ spec:
         value: kalantar
       - name: model-id
         value: Qwen/Qwen3-0.6B
-      - name: experimentBaseUrl
+      - name: stackBaseUrl
         value: https://raw.githubusercontent.com/kalantar/llm-d-benchmark/refs/heads/tekton-poc/tekton-poc/examples/inference-scheduling/
       - name: harnessProfile
         value: shared_prefix_synthetic.yaml
diff --git a/tekton-poc/pipeline/experiment-task.yaml b/tekton-poc/pipeline/tasks/treatment.yaml
similarity index 99%
rename from tekton-poc/pipeline/experiment-task.yaml
rename to tekton-poc/pipeline/tasks/treatment.yaml
index 8bcf868a..17844190 100644
--- a/tekton-poc/pipeline/experiment-task.yaml
+++ b/tekton-poc/pipeline/tasks/treatment.yaml
@@ -1,7 +1,7 @@
 apiVersion: tekton.dev/v1
 kind: Task
 metadata:
-  name: experiment
+  name: treatment
 spec:
   description: >
     Runs an llm-d-benchmark experiment.
@@ -52,7 +52,7 @@ spec:
     - name: gpuType
     - name: gpuMemory
 
-    - name: experimentBaseUrl
+    - name: stackBaseUrl
       type: string
     - name: experimentName
       type: string
@@ -468,7 +468,7 @@ spec:
         - name: timeout
           value: 15m
         - name: valuesYamlUrl
-          value: "$(params.experimentBaseUrl)/gateway-values.yaml"
+          value: "$(params.stackBaseUrl)/gateway-values.yaml"
 
         - name: dry-run
           value: $(params.dry-run)
@@ -489,7 +489,7 @@ spec:
         - name: timeout
           value: 15m
         - name: valuesYamlUrl
-          value: "$(params.experimentBaseUrl)/gaie-values.yaml"
+          value: "$(params.stackBaseUrl)/gaie-values.yaml"
         - name: treatmentAnalysis
           value: "$(steps.analyze-gaie-factors.results.treatmentAnalysis)"
 
@@ -514,7 +514,7 @@ spec:
         - name: timeout
           value: 15m
         - name: valuesYamlUrl
-          value: "$(params.experimentBaseUrl)/ms-values.yaml"
+          value: "$(params.stackBaseUrl)/ms-values.yaml"
         - name: extraArgs
           value: >
             --set routing.inferencePool.name=$(params.experimentName)-gaie-NAMESPACE_HASH

From 168739d8e7a1d42448c06fd5b0abde593e829aea Mon Sep 17 00:00:00 2001
From: Michael Kalantar <kalantar@us.ibm.com>
Date: Thu, 30 Oct 2025 12:50:48 -0400
Subject: [PATCH 44/44] update roles

Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
---
 tekton-poc/pipeline/roles.yaml             |  4 ++--
 tekton-poc/pipeline/steps/stepactions.yaml | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tekton-poc/pipeline/roles.yaml b/tekton-poc/pipeline/roles.yaml
index 5f447233..b1ae2e54 100644
--- a/tekton-poc/pipeline/roles.yaml
+++ b/tekton-poc/pipeline/roles.yaml
@@ -28,7 +28,7 @@ rules:
   resources: ["gatewayparameters"]
   verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
 - apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools", "inferencemodels"]
+  resources: ["inferencepools", "inferencemodels", "inferenceobjectives"]
   verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
 - apiGroups: ["authentication.k8s.io"]
   resources: ["tokenreviews"]
@@ -95,7 +95,7 @@ rules:
   resources: ["gatewayparameters"]
   verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
 - apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools", "inferencemodels"]
+  resources: ["inferencepools", "inferencemodels", "inferenceobjectives"]
   verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
 - apiGroups: ["authentication.k8s.io"]
   resources: ["tokenreviews", "subjectaccessreviews"]
diff --git a/tekton-poc/pipeline/steps/stepactions.yaml b/tekton-poc/pipeline/steps/stepactions.yaml
index 282bfbff..fd1d07ee 100644
--- a/tekton-poc/pipeline/steps/stepactions.yaml
+++ b/tekton-poc/pipeline/steps/stepactions.yaml
@@ -165,6 +165,9 @@ spec:
     - name: extraArgs
       type: string
       default: ""
+    - name: extraValues
+      type: string
+      default: ""
     - name: treatmentAnalysis
       type: string
       default: ""
@@ -226,6 +229,8 @@ spec:
       value: "$(params.valuesYamlUrl)"
     - name: HELM_EXTRA_ARGS
       value: "$(params.extraArgs)"
+    - name: HELM_EXTRA_VALUES
+      value: "$(params.extraValues)"
 
     - name: TREATMENT_ANALYSIS
       value: "$(params.treatmentAnalysis)"
@@ -275,6 +280,14 @@ spec:
       VALUES_FLAG="-f /tmp/${HELM_RELEASE}-values.yaml"
     fi 
 
+    if [ -n "${HELM_EXTRA_VALUES:-}" ]; then
+      echo ">>> HELM_EXTRA_VALUES"
+      printf "%s" "${HELM_EXTRA_VALUES}"
+      printf "%s" "${HELM_EXTRA_VALUES}" > /tmp/${HELM_RELEASE}-extra-values.yaml
+      VALUES_FLAG="${VALUES_FLAG} -f /tmp/${HELM_RELEASE}-extra-values.yaml"
+    fi 
+
+
     # Optional repo add (idempotent via --force-update)
     if [ -n "${HELM_REPO_NAME:-}" ] && [ -n "${HELM_REPO_URL:-}" ]; then
       REPO_ADD_FLAGS="--force-update"
@@ -324,6 +337,11 @@ spec:
 
     echo "==> helm upgrade --install ${HELM_RELEASE} ${CHART_REF} --namespace ${HELM_NAMESPACE} ${VERSION_FLAG} ${CREATE_NS_FLAG} ${WAIT_FLAG} ${TIMEOUT_FLAG} ${VALUES_FLAG} ${HELM_EXTRA_ARGS}"
     # shellcheck disable=SC2086
+    helm template \
+      "${HELM_RELEASE}" "${CHART_REF}" \
+      --namespace "${HELM_NAMESPACE}" \
+      ${VERSION_FLAG} ${CREATE_NS_FLAG} ${WAIT_FLAG} ${TIMEOUT_FLAG} ${VALUES_FLAG} ${HELM_EXTRA_ARGS}
+    # shellcheck disable=SC2086
     helm template \
       "${HELM_RELEASE}" "${CHART_REF}" \
       --namespace "${HELM_NAMESPACE}" \