From 33ecafb655152605fb53e0bfb8a03370dc16fc42 Mon Sep 17 00:00:00 2001
From: Wen Zhou <wenzhou@redhat.com>
Date: Mon, 7 Jul 2025 18:20:11 +0200
Subject: [PATCH 1/4] docs: update using "starter" distro than "ollama"

- update example and create one without using userconfigmap
- set new env to enable ollama
- use the same llama model as in llama-stack
- remove deprecated distro images from distribution.json

Signed-off-by: Wen Zhou <wenzhou@redhat.com>
---
 README.md                                     |  9 ++++---
 config/manager/manager.yaml                   |  2 +-
 .../_v1alpha1_llamastackdistribution.yaml     |  6 +++--
 config/samples/example-with-configmap.yaml    | 26 ++++++++++++-------
 config/samples/example-withoutconfigmpa.yaml  | 22 ++++++++++++++++
 distributions.json                            | 11 +++-----
 6 files changed, 52 insertions(+), 24 deletions(-)
 create mode 100644 config/samples/example-withoutconfigmpa.yaml

diff --git a/README.md b/README.md
index 90efac15..99145197 100644
--- a/README.md
+++ b/README.md
@@ -80,21 +80,22 @@ spec:
   replicas: 1
   server:
     distribution:
-      name: ollama
+      name: starter
     containerSpec:
-      port: 8321
       env:
       - name: INFERENCE_MODEL
-        value: "llama3.2:1b"
+        value: "llama3.2:3b"
       - name: OLLAMA_URL
         value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
+      - name: ENABLE_OLLAMA
+        value: ollama
     storage:
       size: "20Gi"
       mountPath: "/home/lls/.lls"
 ```
 3. Verify the server pod is running in the user defined namespace.
 
-### Using a ConfigMap for run.yaml configuration
+### Using a ConfigMap to override default run.yaml configuration from distribution
 
 A ConfigMap can be used to store run.yaml configuration for each LlamaStackDistribution.
 Updates to the ConfigMap will restart the Pod to load the new data.
diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml
index f8202ac6..7c0a9afd 100644
--- a/config/manager/manager.yaml
+++ b/config/manager/manager.yaml
@@ -48,7 +48,7 @@ spec:
           allowPrivilegeEscalation: false
           capabilities:
             drop:
-              - "ALL"
+            - "ALL"
         livenessProbe:
           httpGet:
             path: /healthz
diff --git a/config/samples/_v1alpha1_llamastackdistribution.yaml b/config/samples/_v1alpha1_llamastackdistribution.yaml
index 7091485c..737000ae 100644
--- a/config/samples/_v1alpha1_llamastackdistribution.yaml
+++ b/config/samples/_v1alpha1_llamastackdistribution.yaml
@@ -8,12 +8,14 @@ spec:
     containerSpec:
       env:
         - name: INFERENCE_MODEL
-          value: 'llama3.2:1b'
+          value: 'llama3.2:3b'
         - name: OLLAMA_URL
           value: 'http://ollama-server-service.ollama-dist.svc.cluster.local:11434'
+        - name: ENABLE_OLLAMA
+          value: ollama
       name: llama-stack
     distribution:
-      name: ollama
+      name: starter
     # Uncomment the storage section to use persistent storage
     # storage: {}  # Will use default size of 10Gi and default mount path of /.llama
     # Or specify custom values:
diff --git a/config/samples/example-with-configmap.yaml b/config/samples/example-with-configmap.yaml
index b4600ee3..439fbcfd 100644
--- a/config/samples/example-with-configmap.yaml
+++ b/config/samples/example-with-configmap.yaml
@@ -6,38 +6,44 @@ data:
   run.yaml: |
     # Llama Stack Configuration
     version: '2'
-    image_name: ollama
+    image_name: starter
     apis:
     - inference
     providers:
       inference:
-      - provider_id: ollama
+      - provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
         provider_type: "remote::ollama"
         config:
           url: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
     models:
-      - model_id: "llama3.2:1b"
+      - model_id: "ollama/llama3.2:3b"
         provider_id: ollama
         model_type: llm
+        provider_model_id: llama3.2:3b
+      - embedding_dimension: 384
+        model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
+        provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
+        provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
+        model_type: embedding
     server:
       port: 8321
 ---
 apiVersion: llamastack.io/v1alpha1
 kind: LlamaStackDistribution
 metadata:
-  name: llamastack-with-config
+  name: llamastack-with-userconfig
 spec:
   replicas: 1
   server:
     distribution:
-      name: ollama
+      name: starter
     containerSpec:
       port: 8321
       env:
-      - name: INFERENCE_MODEL
-        value: "llama3.2:1b"
-      - name: OLLAMA_URL
-        value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
+      - name: ENABLE_OLLAMA
+        value: ollama
+      - name: OLLAMA_EMBEDDING_MODEL
+        value: all-minilm:l6-v2
     userConfig:
-      configMapName: llama-stack-config
+      configMapName: llama-stack-config  # use ConfigMap's data.run.yaml
       # configMapNamespace: ""  # Optional - defaults to the same namespace as the CR
diff --git a/config/samples/example-withoutconfigmpa.yaml b/config/samples/example-withoutconfigmpa.yaml
new file mode 100644
index 00000000..66960f6e
--- /dev/null
+++ b/config/samples/example-withoutconfigmpa.yaml
@@ -0,0 +1,22 @@
+---
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-without-userconfig
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: starter
+    containerSpec:
+      port: 8321
+      env:
+      - name: OLLAMA_INFERENCE_MODEL
+        value: "llama3.2:3b"
+      - name: OLLAMA_URL
+        value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
+      - name: ENABLE_OLLAMA
+        value: ollama
+    storage:
+      size: "10Gi"  # Optional - defaults to 10Gi
+      mountPath: "/home/lls/.lls"  # Optional - defaults to /.llama
diff --git a/distributions.json b/distributions.json
index 63664c73..a16495d5 100644
--- a/distributions.json
+++ b/distributions.json
@@ -1,9 +1,6 @@
 {
-"starter": "docker.io/llamastack/distribution-starter:latest",
-"ollama": "docker.io/llamastack/distribution-ollama:latest",
-"bedrock": "docker.io/llamastack/distribution-bedrock:latest",
-"remote-vllm": "docker.io/llamastack/distribution-remote-vllm:latest",
-"tgi": "docker.io/llamastack/distribution-tgi:latest",
-"together": "docker.io/llamastack/distribution-together:latest",
-"vllm-gpu": "docker.io/llamastack/distribution-vllm-gpu:latest"
+  "starter": "docker.io/llamastack/distribution-starter:0.2.15",
+  "remote-vllm": "docker.io/llamastack/distribution-remote-vllm:0.2.15",
+  "meta-reference-gpu": "docker.io/llamastack/distribution-meta-reference-gpu:0.2.15",
+  "postgres-demo": "docker.io/llamastack/distribution-postgres-demo:0.2.15"
 }

From 54c517df1655e8d4bd121222cb15b1137e66ba1d Mon Sep 17 00:00:00 2001
From: Wen Zhou <wenzhou@redhat.com>
Date: Thu, 28 Aug 2025 11:46:59 +0200
Subject: [PATCH 2/4] update: address review comments

- revert back to use llama3.2:1b
- remove unnecessary/unrelated comments/changes
- set INFERNECE_MODEL to OLLAMA_INFERENCE_MODEL
- remove ENALBE_OLLAMA
- set images to use "latest" tag than 0.2.15
-

Signed-off-by: Wen Zhou <wenzhou@redhat.com>
---
 README.md                                         |  6 ++----
 config/manager/manager.yaml                       |  2 +-
 .../samples/_v1alpha1_llamastackdistribution.yaml |  6 ++----
 config/samples/example-with-configmap.yaml        | 15 ++++-----------
 config/samples/example-withoutconfigmpa.yaml      |  4 +---
 distributions.json                                |  8 ++++----
 6 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 99145197..36250935 100644
--- a/README.md
+++ b/README.md
@@ -84,18 +84,16 @@ spec:
     containerSpec:
       env:
       - name: INFERENCE_MODEL
-        value: "llama3.2:3b"
+        value: "llama3.2:1b"
       - name: OLLAMA_URL
         value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
-      - name: ENABLE_OLLAMA
-        value: ollama
     storage:
       size: "20Gi"
       mountPath: "/home/lls/.lls"
 ```
 3. Verify the server pod is running in the user defined namespace.
 
-### Using a ConfigMap to override default run.yaml configuration from distribution
+### Using a ConfigMap for run.yaml configuration
 
 A ConfigMap can be used to store run.yaml configuration for each LlamaStackDistribution.
 Updates to the ConfigMap will restart the Pod to load the new data.
diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml
index 7c0a9afd..f8202ac6 100644
--- a/config/manager/manager.yaml
+++ b/config/manager/manager.yaml
@@ -48,7 +48,7 @@ spec:
           allowPrivilegeEscalation: false
           capabilities:
             drop:
-            - "ALL"
+              - "ALL"
         livenessProbe:
           httpGet:
             path: /healthz
diff --git a/config/samples/_v1alpha1_llamastackdistribution.yaml b/config/samples/_v1alpha1_llamastackdistribution.yaml
index 737000ae..e2b8063f 100644
--- a/config/samples/_v1alpha1_llamastackdistribution.yaml
+++ b/config/samples/_v1alpha1_llamastackdistribution.yaml
@@ -7,12 +7,10 @@ spec:
   server:
     containerSpec:
       env:
-        - name: INFERENCE_MODEL
-          value: 'llama3.2:3b'
+        - name: OLLAMA_INFERENCE_MODEL
+          value: 'llama3.2:1b'
         - name: OLLAMA_URL
           value: 'http://ollama-server-service.ollama-dist.svc.cluster.local:11434'
-        - name: ENABLE_OLLAMA
-          value: ollama
       name: llama-stack
     distribution:
       name: starter
diff --git a/config/samples/example-with-configmap.yaml b/config/samples/example-with-configmap.yaml
index 439fbcfd..3f70a2b1 100644
--- a/config/samples/example-with-configmap.yaml
+++ b/config/samples/example-with-configmap.yaml
@@ -11,20 +11,15 @@ data:
     - inference
     providers:
       inference:
-      - provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
+      - provider_id: ollama
         provider_type: "remote::ollama"
         config:
           url: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
     models:
-      - model_id: "ollama/llama3.2:3b"
+      - model_id: "llama3.2:1b"
         provider_id: ollama
         model_type: llm
-        provider_model_id: llama3.2:3b
-      - embedding_dimension: 384
-        model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
-        provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
-        provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
-        model_type: embedding
+        provider_model_id: llama3.2:1b
     server:
       port: 8321
 ---
@@ -40,10 +35,8 @@ spec:
     containerSpec:
       port: 8321
       env:
-      - name: ENABLE_OLLAMA
-        value: ollama
       - name: OLLAMA_EMBEDDING_MODEL
         value: all-minilm:l6-v2
     userConfig:
-      configMapName: llama-stack-config  # use ConfigMap's data.run.yaml
+      configMapName: llama-stack-config
       # configMapNamespace: ""  # Optional - defaults to the same namespace as the CR
diff --git a/config/samples/example-withoutconfigmpa.yaml b/config/samples/example-withoutconfigmpa.yaml
index 66960f6e..4d1abbd4 100644
--- a/config/samples/example-withoutconfigmpa.yaml
+++ b/config/samples/example-withoutconfigmpa.yaml
@@ -12,11 +12,9 @@ spec:
       port: 8321
       env:
       - name: OLLAMA_INFERENCE_MODEL
-        value: "llama3.2:3b"
+        value: "llama3.2:1b"
       - name: OLLAMA_URL
         value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
-      - name: ENABLE_OLLAMA
-        value: ollama
     storage:
       size: "10Gi"  # Optional - defaults to 10Gi
       mountPath: "/home/lls/.lls"  # Optional - defaults to /.llama
diff --git a/distributions.json b/distributions.json
index a16495d5..80dd7d7b 100644
--- a/distributions.json
+++ b/distributions.json
@@ -1,6 +1,6 @@
 {
-  "starter": "docker.io/llamastack/distribution-starter:0.2.15",
-  "remote-vllm": "docker.io/llamastack/distribution-remote-vllm:0.2.15",
-  "meta-reference-gpu": "docker.io/llamastack/distribution-meta-reference-gpu:0.2.15",
-  "postgres-demo": "docker.io/llamastack/distribution-postgres-demo:0.2.15"
+  "starter": "docker.io/llamastack/distribution-starter:latest",
+  "remote-vllm": "docker.io/llamastack/distribution-remote-vllm:latest",
+  "meta-reference-gpu": "docker.io/llamastack/distribution-meta-reference-gpu:latest",
+  "postgres-demo": "docker.io/llamastack/distribution-postgres-demo:latest"
 }

From 097297af766ace25212ab053891c727f80c82593 Mon Sep 17 00:00:00 2001
From: Wen Zhou <wenzhou@redhat.com>
Date: Thu, 28 Aug 2025 12:36:57 +0200
Subject: [PATCH 3/4] update: code review

- remove default port 8321 in sample

Signed-off-by: Wen Zhou <wenzhou@redhat.com>
---
 config/samples/example-withoutconfigmpa.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/config/samples/example-withoutconfigmpa.yaml b/config/samples/example-withoutconfigmpa.yaml
index 4d1abbd4..ef723cee 100644
--- a/config/samples/example-withoutconfigmpa.yaml
+++ b/config/samples/example-withoutconfigmpa.yaml
@@ -9,7 +9,6 @@ spec:
     distribution:
       name: starter
     containerSpec:
-      port: 8321
       env:
       - name: OLLAMA_INFERENCE_MODEL
         value: "llama3.2:1b"

From 23d0be30a685e31492d3d6428af5599345da8e0d Mon Sep 17 00:00:00 2001
From: Wen Zhou <wenzhou@redhat.com>
Date: Tue, 2 Sep 2025 10:23:23 +0200
Subject: [PATCH 4/4] fix: after code review

Signed-off-by: Wen Zhou <wenzhou@redhat.com>
---
 README.md                                  | 2 +-
 config/samples/example-with-configmap.yaml | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 36250935..5770722b 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ spec:
       name: starter
     containerSpec:
       env:
-      - name: INFERENCE_MODEL
+      - name: OLLAMA_INFERENCE_MODEL
         value: "llama3.2:1b"
       - name: OLLAMA_URL
         value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
diff --git a/config/samples/example-with-configmap.yaml b/config/samples/example-with-configmap.yaml
index 3f70a2b1..e158c884 100644
--- a/config/samples/example-with-configmap.yaml
+++ b/config/samples/example-with-configmap.yaml
@@ -19,7 +19,6 @@ data:
       - model_id: "llama3.2:1b"
         provider_id: ollama
         model_type: llm
-        provider_model_id: llama3.2:1b
     server:
       port: 8321
 ---