Use Vulkan GPU inside container to run AI models

unsuman · unsuman · commit 78677a7e7729 · 2025-11-04T05:03:26.000+05:30
Signed-off-by: Ansuman Sahoo &lt;anshumansahoo500@gmail.com&gt;
diff --git a/pkg/driver/krunkit/boot/00-add-user-to-video-render-group.sh b/pkg/driver/krunkit/boot/00-add-user-to-video-render-group.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright The Lima Authors
+# SPDX-License-Identifier: Apache-2.0
+
+set -eux -o pipefail
+
+u="${LIMA_CIDATA_USER:-$USER}"
+getent group render >/dev/null 2>&1 || groupadd -f render
+getent group video  >/dev/null 2>&1 || groupadd -f video
+sudo usermod -aG render "$u" || true
+sudo usermod -aG video  "$u" || true
diff --git a/pkg/driver/krunkit/boot/01-gpu-device-perms.sh b/pkg/driver/krunkit/boot/01-gpu-device-perms.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright The Lima Authors
+# SPDX-License-Identifier: Apache-2.0
+
+set -eux -o pipefail
+
+# Make DRM render/card nodes world-accessible
+install -d -m 0755 /etc/udev/rules.d
+cat >/etc/udev/rules.d/70-lima-drm.rules <<'EOF'
+KERNEL=="renderD*", SUBSYSTEM=="drm", MODE="0666"
+KERNEL=="card*",    SUBSYSTEM=="drm", MODE="0666"
+EOF
+
+# Apply to existing nodes now and future ones via udev
+udevadm control --reload || true
+udevadm trigger --subsystem-match=drm || true
+
+if [ -d /dev/dri ]; then
+  chmod 0666 /dev/dri/renderD* 2>/dev/null || true
+  chmod 0666 /dev/dri/card*    2>/dev/null || true
+fi
diff --git a/pkg/driver/krunkit/hack/install-vulkan-gpu.sh b/pkg/driver/krunkit/hack/install-vulkan-gpu.sh
@@ -3,7 +3,7 @@
 # SPDX-FileCopyrightText: Copyright The Lima Authors
 # SPDX-License-Identifier: Apache-2.0
 
-set -eux -o pipefail
+set -eu -o pipefail
 
 # Install required packages
 dnf install -y dnf-plugins-core dnf-plugin-versionlock llvm18-libs
@@ -30,4 +30,22 @@ dnf versionlock add mesa-vulkan-drivers
 # Clean up
 dnf clean all
 
-echo "Krunkit GPU(Venus) setup complete. Verify Vulkan installation by running 'vulkaninfo --summary'."
+read -r -p "$(printf '\033[32mDo you want to download and build llama.cpp (for Vulkan) and install required packages? This may take a while. Proceed? [y/N]: \033[0m')" REPLY
+case "$REPLY" in
+  [yY][eE][sS]|[yY]) ;;
+  *) echo "Aborted."; exit 0 ;;
+esac
+
+echo "Installing llama.cpp with Vulkan support..."
+# Build and install llama.cpp with Vulkan support
+dnf install -y git cmake clang curl-devel glslc vulkan-devel virglrenderer
+git clone https://github.com/ggml-org/llama.cpp
+cd llama.cpp
+git reset --hard 97340b4c9924be86704dbf155e97c8319849ee19
+cmake -B build -DGGML_VULKAN=ON -DGGML_CCACHE=OFF -DCMAKE_INSTALL_PREFIX=/usr
+cmake --build build --config Release -j8
+cmake --install build
+cd ..
+rm -fr llama.cpp
+
+echo "Successfully installed llama.cpp. Use 'llama-cli' app with .gguf models."
diff --git a/pkg/driver/krunkit/krunkit_driver_darwin_arm64.go b/pkg/driver/krunkit/krunkit_driver_darwin_arm64.go
@@ -5,7 +5,7 @@ package krunkit
 
 import (
 	"context"
-	_ "embed"
+	"embed"
 	"errors"
 	"fmt"
 	"net"
@@ -39,19 +39,6 @@ type LimaKrunkitDriver struct {
 	krunkitWaitCh chan error
 }
 
-type KrunkitOpts struct {
-	GPUAccel *bool `yaml:"gpuAccel,omitempty"`
-}
-
-func NewKrunkitOpts(cfg *limatype.LimaYAML) (*KrunkitOpts, error) {
-	var krunkitOpts KrunkitOpts
-	if err := limayaml.Convert(cfg.VMOpts[vmType], &krunkitOpts, "vmOpts.krunkit"); err != nil {
-		return nil, err
-	}
-
-	return &krunkitOpts, nil
-}
-
 var (
 	_      driver.Driver   = (*LimaKrunkitDriver)(nil)
 	vmType limatype.VMType = "krunkit"
@@ -188,17 +175,6 @@ func validateConfig(cfg *limatype.LimaYAML) error {
 		return fmt.Errorf("field `mountType` must be %q or %q for krunkit driver, got %q", limatype.VIRTIOFS, limatype.REVSSHFS, *cfg.MountType)
 	}
 
-	// If GPU acceleration is requested, ensure Fedora image/template is used
-	krunkitOpts, err := NewKrunkitOpts(cfg)
-	if err != nil {
-		return err
-	}
-	if krunkitOpts.GPUAccel != nil && *krunkitOpts.GPUAccel {
-		if !isFedoraConfigured(cfg) {
-			logrus.Warn("gpuAccel: true requires a Fedora image (use a Fedora base template or image)")
-		}
-	}
-
 	return nil
 }
 
@@ -231,16 +207,7 @@ func (l *LimaKrunkitDriver) FillConfig(_ context.Context, cfg *limatype.LimaYAML
 
 	cfg.VMType = ptr.Of(vmType)
 
-	krunkitOpts, err := NewKrunkitOpts(cfg)
-	if err != nil {
-		return err
-	}
-
-	if krunkitOpts.GPUAccel == nil {
-		krunkitOpts.GPUAccel = ptr.Of(false)
-	}
-
-	if *krunkitOpts.GPUAccel {
+	if isFedoraConfigured(cfg) {
 		gpuInstallScript := limatype.Provision{
 			Mode:   limatype.ProvisionModeData,
 			Script: ptr.Of(gpuProvisionScript),
@@ -252,30 +219,46 @@ func (l *LimaKrunkitDriver) FillConfig(_ context.Context, cfg *limatype.LimaYAML
 				Owner:       cfg.User.Name,
 			},
 		}
+
 		cfg.Provision = append(cfg.Provision, gpuInstallScript)
-		cfg.Message = "To enable GPU support for krunkit, run the following command inside the VM:\n\033[32msudo install-vulkan-gpu.sh\033[0m\n"
+		cfg.Message = `To enable GPU support (Vulkan) for Krunkit to use AI models without containers, run the following command inside the VM:
+    ` + "\x1b[32m" + `sudo install-vulkan-gpu.sh` + "\x1b[0m" + `
+` + "\x1b[31m" + `Ignore this if already done` + "\x1b[0m" + "\n"
 	}
 
 	return validateConfig(cfg)
 }
 
+//go:embed boot/*.sh
+var bootFS embed.FS
+
 func (l *LimaKrunkitDriver) BootScripts() (map[string][]byte, error) {
-	// Override default reboot-if-required with a no-op because Fedora does not support this well and
-	// takes a long time to start up.
-	krunkitOpts, err := NewKrunkitOpts(l.Instance.Config)
-	if err != nil {
-		return nil, err
-	}
-	if krunkitOpts.GPUAccel == nil || !*krunkitOpts.GPUAccel {
-		return nil, nil
+	scripts := make(map[string][]byte)
+
+	entries, err := bootFS.ReadDir("boot")
+	if err == nil && !isFedoraConfigured(l.Instance.Config) {
+		for _, entry := range entries {
+			if entry.IsDir() {
+				continue
+			}
+
+			content, err := bootFS.ReadFile("boot/" + entry.Name())
+			if err != nil {
+				return nil, err
+			}
+
+			scripts[entry.Name()] = content
+		}
 	}
-	scripts := map[string][]byte{
-		"00-reboot-if-required.sh": []byte(`#!/bin/sh
+
+	// Disabled by krunkit driver for Fedora to make boot time faster
+	if isFedoraConfigured(l.Instance.Config) {
+		scripts["00-reboot-if-required.sh"] = []byte(`#!/bin/sh
 set -eu
-# Disabled by krunkit driver
 exit 0
-`),
+`)
 	}
+
 	return scripts, nil
 }
 
diff --git a/templates/experimental/krunkit.yaml b/templates/experimental/krunkit.yaml
diff --git a/website/content/en/docs/config/vmtype/krunkit.md b/website/content/en/docs/config/vmtype/krunkit.md
@@ -31,59 +31,94 @@ go build -o <PREFIX>/libexec/lima/lima-driver-krunkit ./cmd/lima-driver-krunkit/
 limactl info   # "vmTypes" should include "krunkit"
 ```
 
-
 ## Quick start
 
-- Non‑GPU (general workloads)
+You can run AI models either:
+- With containers (fast to get started; any distro works), or
+- Without containers (choose Fedora and ensure a writable mount; build `llama.cpp` from source).
+
+Before running, install a small model on the host so examples can run quickly. We’ll use `Qwen3‑1.7B GGUF`:
+
+```bash
+mkdir -p models
+curl -LO --output-dir models 'https://huggingface.co/Qwen/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q8_0.gguf'
+```
+
+### 1) Run models using containers (fastest)
+
+Start a krunkit VM with the default Lima template:
+
+{{< tabpane text=true >}}
+{{% tab header="CLI" %}}
+```bash
+limactl start --vm-type=krunkit
+limactl shell default
+```
+{{% /tab %}}
+{{< /tabpane >}}
+
+Then inside the VM:
+
 ```bash
-limactl start default --vm-type=krunkit
+nerdctl run --rm -ti \
+  --device /dev/dri \
+  -v $(pwd)/models:/models \
+  quay.io/slopezpa/fedora-vgpu-llama
 ```
 
-- GPU (Vulkan via Venus)
-  - Recommended distro: Fedora 40+ (smoothest Mesa/Vulkan setup; uses COPR “slp/mesa-krunkit” for patched mesa-vulkan-drivers).
-  - Start from the krunkit template and follow the logs to complete GPU setup.
+Once inside the container:
+
+```bash
+llama-cli -m /models/Qwen3-1.7B-Q8_0.gguf -b 512 -ngl 99 -p "Introduce yourself"
+```
+
+You can now chat with the model.
+
+### 2) Run models without containers (slow)
+
+This path builds and installs dependencies (which can take some time. For faster builds, allocate more CPUs and memory to the VM. See [`options`](../../reference/limactl_start/#options)). Use Fedora and make sure your home mount is writable.
 
 {{< tabpane text=true >}}
 {{% tab header="CLI" %}}
 ```bash
-# GPU (Vulkan via Venus on Fedora)
-limactl start template:experimental/krunkit
+limactl start --vm-type=krunkit template://_images/fedora --mount=~:w
+limactl shell fedora
 ```
 {{% /tab %}}
 {{% tab header="YAML" %}}
 ```yaml
 vmType: krunkit
 
-# For AI workloads, at least 4GiB memory and 4 CPUs are recommended.
-memory: 4GiB
-cpus: 4
-arch: aarch64
-
-# Fedora 40+ is preferred for Mesa & Vulkan (Venus) support
 base:
 - template://_images/fedora
+- template://_default/mounts
 
 mounts:
-- location: "~"
-  writable: true
-
+- writable: true
 mountType: virtiofs
-
-vmOpts:
-  krunkit:
-    gpuAccel: true
 ```
 {{% /tab %}}
 {{< /tabpane >}}
 
-After the VM is READY, inside the VM:
+Once inside the VM, install GPU/Vulkan support:
+
 ```bash
 sudo install-vulkan-gpu.sh
 ```
 
+The script will prompt to build and install `llama.cpp` with Venus support from source.
+
+After installation, run:
+
+```bash
+llama-cli -m /models/Qwen3-1.7B-Q8_0.gguf -b 512 -ngl 99 -p "Introduce yourself"
+```
+
+and enjoy chatting with the AI model.
+
 ## Notes and caveats
 - macOS Ventura or later on Apple Silicon is required.
-- GPU mode requires a Fedora image/template; Fedora 40+ recommended for Mesa/Vulkan (Venus).
-- To verify GPU/Vulkan in the guest, use tools like `vulkaninfo` after running the install script.
-- `Libkrun` and [`Ramalama`](https://github.com/containers/ramalama)(a tool that simplifies running AI models locally) use CPU inferencing as of **July 2, 2025** and are actively working to support GPU inferencing. [More info](https://developers.redhat.com/articles/2025/07/02/supercharging-ai-isolation-microvms-ramalama-libkrun#current_limitations_and_future_directions__gpu_enablement).
+- To verify GPU/Vulkan in the guest container or VM, use tools like `vulkaninfo --summary`.
+- AI models on containers can run on any Linux distribution but without containers Fedora is required.
+- For more information about usage of `llama-cli`. See [llama.cpp](https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#llamacpp) docs.
 - Driver architecture details: see [Virtual Machine Drivers](../../dev/drivers).