From 33ecafb655152605fb53e0bfb8a03370dc16fc42 Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Mon, 7 Jul 2025 18:20:11 +0200 Subject: [PATCH 1/4] docs: update using "starter" distro than "ollama" - update example and create one without using userconfigmap - set new env to enable ollama - use the same llama model as in llama-stack - remove deprecated distro images from distribution.json Signed-off-by: Wen Zhou --- README.md | 9 ++++--- config/manager/manager.yaml | 2 +- .../_v1alpha1_llamastackdistribution.yaml | 6 +++-- config/samples/example-with-configmap.yaml | 26 ++++++++++++------- config/samples/example-withoutconfigmpa.yaml | 22 ++++++++++++++++ distributions.json | 11 +++----- 6 files changed, 52 insertions(+), 24 deletions(-) create mode 100644 config/samples/example-withoutconfigmpa.yaml diff --git a/README.md b/README.md index 90efac15..99145197 100644 --- a/README.md +++ b/README.md @@ -80,21 +80,22 @@ spec: replicas: 1 server: distribution: - name: ollama + name: starter containerSpec: - port: 8321 env: - name: INFERENCE_MODEL - value: "llama3.2:1b" + value: "llama3.2:3b" - name: OLLAMA_URL value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" + - name: ENABLE_OLLAMA + value: ollama storage: size: "20Gi" mountPath: "/home/lls/.lls" ``` 3. Verify the server pod is running in the user defined namespace. -### Using a ConfigMap for run.yaml configuration +### Using a ConfigMap to override default run.yaml configuration from distribution A ConfigMap can be used to store run.yaml configuration for each LlamaStackDistribution. Updates to the ConfigMap will restart the Pod to load the new data. diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index f8202ac6..7c0a9afd 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -48,7 +48,7 @@ spec: allowPrivilegeEscalation: false capabilities: drop: - - "ALL" + - "ALL" livenessProbe: httpGet: path: /healthz diff --git a/config/samples/_v1alpha1_llamastackdistribution.yaml b/config/samples/_v1alpha1_llamastackdistribution.yaml index 7091485c..737000ae 100644 --- a/config/samples/_v1alpha1_llamastackdistribution.yaml +++ b/config/samples/_v1alpha1_llamastackdistribution.yaml @@ -8,12 +8,14 @@ spec: containerSpec: env: - name: INFERENCE_MODEL - value: 'llama3.2:1b' + value: 'llama3.2:3b' - name: OLLAMA_URL value: 'http://ollama-server-service.ollama-dist.svc.cluster.local:11434' + - name: ENABLE_OLLAMA + value: ollama name: llama-stack distribution: - name: ollama + name: starter # Uncomment the storage section to use persistent storage # storage: {} # Will use default size of 10Gi and default mount path of /.llama # Or specify custom values: diff --git a/config/samples/example-with-configmap.yaml b/config/samples/example-with-configmap.yaml index b4600ee3..439fbcfd 100644 --- a/config/samples/example-with-configmap.yaml +++ b/config/samples/example-with-configmap.yaml @@ -6,38 +6,44 @@ data: run.yaml: | # Llama Stack Configuration version: '2' - image_name: ollama + image_name: starter apis: - inference providers: inference: - - provider_id: ollama + - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} provider_type: "remote::ollama" config: url: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" models: - - model_id: "llama3.2:1b" + - model_id: "ollama/llama3.2:3b" provider_id: ollama model_type: llm + provider_model_id: llama3.2:3b + - embedding_dimension: 384 + model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} + provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} + model_type: embedding server: port: 8321 --- apiVersion: llamastack.io/v1alpha1 kind: LlamaStackDistribution metadata: - name: llamastack-with-config + name: llamastack-with-userconfig spec: replicas: 1 server: distribution: - name: ollama + name: starter containerSpec: port: 8321 env: - - name: INFERENCE_MODEL - value: "llama3.2:1b" - - name: OLLAMA_URL - value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" + - name: ENABLE_OLLAMA + value: ollama + - name: OLLAMA_EMBEDDING_MODEL + value: all-minilm:l6-v2 userConfig: - configMapName: llama-stack-config + configMapName: llama-stack-config # use ConfigMap's data.run.yaml # configMapNamespace: "" # Optional - defaults to the same namespace as the CR diff --git a/config/samples/example-withoutconfigmpa.yaml b/config/samples/example-withoutconfigmpa.yaml new file mode 100644 index 00000000..66960f6e --- /dev/null +++ b/config/samples/example-withoutconfigmpa.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: llamastack.io/v1alpha1 +kind: LlamaStackDistribution +metadata: + name: llamastack-without-userconfig +spec: + replicas: 1 + server: + distribution: + name: starter + containerSpec: + port: 8321 + env: + - name: OLLAMA_INFERENCE_MODEL + value: "llama3.2:3b" + - name: OLLAMA_URL + value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" + - name: ENABLE_OLLAMA + value: ollama + storage: + size: "10Gi" # Optional - defaults to 10Gi + mountPath: "/home/lls/.lls" # Optional - defaults to /.llama diff --git a/distributions.json b/distributions.json index 63664c73..a16495d5 100644 --- a/distributions.json +++ b/distributions.json @@ -1,9 +1,6 @@ { -"starter": "docker.io/llamastack/distribution-starter:latest", -"ollama": "docker.io/llamastack/distribution-ollama:latest", -"bedrock": "docker.io/llamastack/distribution-bedrock:latest", -"remote-vllm": "docker.io/llamastack/distribution-remote-vllm:latest", -"tgi": "docker.io/llamastack/distribution-tgi:latest", -"together": "docker.io/llamastack/distribution-together:latest", -"vllm-gpu": "docker.io/llamastack/distribution-vllm-gpu:latest" + "starter": "docker.io/llamastack/distribution-starter:0.2.15", + "remote-vllm": "docker.io/llamastack/distribution-remote-vllm:0.2.15", + "meta-reference-gpu": "docker.io/llamastack/distribution-meta-reference-gpu:0.2.15", + "postgres-demo": "docker.io/llamastack/distribution-postgres-demo:0.2.15" } From 54c517df1655e8d4bd121222cb15b1137e66ba1d Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Thu, 28 Aug 2025 11:46:59 +0200 Subject: [PATCH 2/4] update: address review comments - revert back to use llama3.2:1b - remove unnecessary/unrelated comments/changes - set INFERNECE_MODEL to OLLAMA_INFERENCE_MODEL - remove ENALBE_OLLAMA - set images to use "latest" tag than 0.2.15 - Signed-off-by: Wen Zhou --- README.md | 6 ++---- config/manager/manager.yaml | 2 +- .../samples/_v1alpha1_llamastackdistribution.yaml | 6 ++---- config/samples/example-with-configmap.yaml | 15 ++++----------- config/samples/example-withoutconfigmpa.yaml | 4 +--- distributions.json | 8 ++++---- 6 files changed, 14 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 99145197..36250935 100644 --- a/README.md +++ b/README.md @@ -84,18 +84,16 @@ spec: containerSpec: env: - name: INFERENCE_MODEL - value: "llama3.2:3b" + value: "llama3.2:1b" - name: OLLAMA_URL value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" - - name: ENABLE_OLLAMA - value: ollama storage: size: "20Gi" mountPath: "/home/lls/.lls" ``` 3. Verify the server pod is running in the user defined namespace. -### Using a ConfigMap to override default run.yaml configuration from distribution +### Using a ConfigMap for run.yaml configuration A ConfigMap can be used to store run.yaml configuration for each LlamaStackDistribution. Updates to the ConfigMap will restart the Pod to load the new data. diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 7c0a9afd..f8202ac6 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -48,7 +48,7 @@ spec: allowPrivilegeEscalation: false capabilities: drop: - - "ALL" + - "ALL" livenessProbe: httpGet: path: /healthz diff --git a/config/samples/_v1alpha1_llamastackdistribution.yaml b/config/samples/_v1alpha1_llamastackdistribution.yaml index 737000ae..e2b8063f 100644 --- a/config/samples/_v1alpha1_llamastackdistribution.yaml +++ b/config/samples/_v1alpha1_llamastackdistribution.yaml @@ -7,12 +7,10 @@ spec: server: containerSpec: env: - - name: INFERENCE_MODEL - value: 'llama3.2:3b' + - name: OLLAMA_INFERENCE_MODEL + value: 'llama3.2:1b' - name: OLLAMA_URL value: 'http://ollama-server-service.ollama-dist.svc.cluster.local:11434' - - name: ENABLE_OLLAMA - value: ollama name: llama-stack distribution: name: starter diff --git a/config/samples/example-with-configmap.yaml b/config/samples/example-with-configmap.yaml index 439fbcfd..3f70a2b1 100644 --- a/config/samples/example-with-configmap.yaml +++ b/config/samples/example-with-configmap.yaml @@ -11,20 +11,15 @@ data: - inference providers: inference: - - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + - provider_id: ollama provider_type: "remote::ollama" config: url: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" models: - - model_id: "ollama/llama3.2:3b" + - model_id: "llama3.2:1b" provider_id: ollama model_type: llm - provider_model_id: llama3.2:3b - - embedding_dimension: 384 - model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} - provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} - model_type: embedding + provider_model_id: llama3.2:1b server: port: 8321 --- @@ -40,10 +35,8 @@ spec: containerSpec: port: 8321 env: - - name: ENABLE_OLLAMA - value: ollama - name: OLLAMA_EMBEDDING_MODEL value: all-minilm:l6-v2 userConfig: - configMapName: llama-stack-config # use ConfigMap's data.run.yaml + configMapName: llama-stack-config # configMapNamespace: "" # Optional - defaults to the same namespace as the CR diff --git a/config/samples/example-withoutconfigmpa.yaml b/config/samples/example-withoutconfigmpa.yaml index 66960f6e..4d1abbd4 100644 --- a/config/samples/example-withoutconfigmpa.yaml +++ b/config/samples/example-withoutconfigmpa.yaml @@ -12,11 +12,9 @@ spec: port: 8321 env: - name: OLLAMA_INFERENCE_MODEL - value: "llama3.2:3b" + value: "llama3.2:1b" - name: OLLAMA_URL value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" - - name: ENABLE_OLLAMA - value: ollama storage: size: "10Gi" # Optional - defaults to 10Gi mountPath: "/home/lls/.lls" # Optional - defaults to /.llama diff --git a/distributions.json b/distributions.json index a16495d5..80dd7d7b 100644 --- a/distributions.json +++ b/distributions.json @@ -1,6 +1,6 @@ { - "starter": "docker.io/llamastack/distribution-starter:0.2.15", - "remote-vllm": "docker.io/llamastack/distribution-remote-vllm:0.2.15", - "meta-reference-gpu": "docker.io/llamastack/distribution-meta-reference-gpu:0.2.15", - "postgres-demo": "docker.io/llamastack/distribution-postgres-demo:0.2.15" + "starter": "docker.io/llamastack/distribution-starter:latest", + "remote-vllm": "docker.io/llamastack/distribution-remote-vllm:latest", + "meta-reference-gpu": "docker.io/llamastack/distribution-meta-reference-gpu:latest", + "postgres-demo": "docker.io/llamastack/distribution-postgres-demo:latest" } From 097297af766ace25212ab053891c727f80c82593 Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Thu, 28 Aug 2025 12:36:57 +0200 Subject: [PATCH 3/4] update: code review - remove default port 8321 in sample Signed-off-by: Wen Zhou --- config/samples/example-withoutconfigmpa.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/config/samples/example-withoutconfigmpa.yaml b/config/samples/example-withoutconfigmpa.yaml index 4d1abbd4..ef723cee 100644 --- a/config/samples/example-withoutconfigmpa.yaml +++ b/config/samples/example-withoutconfigmpa.yaml @@ -9,7 +9,6 @@ spec: distribution: name: starter containerSpec: - port: 8321 env: - name: OLLAMA_INFERENCE_MODEL value: "llama3.2:1b" From 23d0be30a685e31492d3d6428af5599345da8e0d Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Tue, 2 Sep 2025 10:23:23 +0200 Subject: [PATCH 4/4] fix: after code review Signed-off-by: Wen Zhou --- README.md | 2 +- config/samples/example-with-configmap.yaml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 36250935..5770722b 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ spec: name: starter containerSpec: env: - - name: INFERENCE_MODEL + - name: OLLAMA_INFERENCE_MODEL value: "llama3.2:1b" - name: OLLAMA_URL value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434" diff --git a/config/samples/example-with-configmap.yaml b/config/samples/example-with-configmap.yaml index 3f70a2b1..e158c884 100644 --- a/config/samples/example-with-configmap.yaml +++ b/config/samples/example-with-configmap.yaml @@ -19,7 +19,6 @@ data: - model_id: "llama3.2:1b" provider_id: ollama model_type: llm - provider_model_id: llama3.2:1b server: port: 8321 ---