Skip to content

Commit 78677a7

Browse files
committed
Use Vulkan GPU inside container to run AI models
Signed-off-by: Ansuman Sahoo <[email protected]>
1 parent 5ba59c5 commit 78677a7

File tree

6 files changed

+145
-96
lines changed

6 files changed

+145
-96
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
3+
# SPDX-FileCopyrightText: Copyright The Lima Authors
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
set -eux -o pipefail
7+
8+
u="${LIMA_CIDATA_USER:-$USER}"
9+
getent group render >/dev/null 2>&1 || groupadd -f render
10+
getent group video >/dev/null 2>&1 || groupadd -f video
11+
sudo usermod -aG render "$u" || true
12+
sudo usermod -aG video "$u" || true
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
3+
# SPDX-FileCopyrightText: Copyright The Lima Authors
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
set -eux -o pipefail
7+
8+
# Make DRM render/card nodes world-accessible
9+
install -d -m 0755 /etc/udev/rules.d
10+
cat >/etc/udev/rules.d/70-lima-drm.rules <<'EOF'
11+
KERNEL=="renderD*", SUBSYSTEM=="drm", MODE="0666"
12+
KERNEL=="card*", SUBSYSTEM=="drm", MODE="0666"
13+
EOF
14+
15+
# Apply to existing nodes now and future ones via udev
16+
udevadm control --reload || true
17+
udevadm trigger --subsystem-match=drm || true
18+
19+
if [ -d /dev/dri ]; then
20+
chmod 0666 /dev/dri/renderD* 2>/dev/null || true
21+
chmod 0666 /dev/dri/card* 2>/dev/null || true
22+
fi

pkg/driver/krunkit/hack/install-vulkan-gpu.sh

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-FileCopyrightText: Copyright The Lima Authors
44
# SPDX-License-Identifier: Apache-2.0
55

6-
set -eux -o pipefail
6+
set -eu -o pipefail
77

88
# Install required packages
99
dnf install -y dnf-plugins-core dnf-plugin-versionlock llvm18-libs
@@ -30,4 +30,22 @@ dnf versionlock add mesa-vulkan-drivers
3030
# Clean up
3131
dnf clean all
3232

33-
echo "Krunkit GPU(Venus) setup complete. Verify Vulkan installation by running 'vulkaninfo --summary'."
33+
read -r -p "$(printf '\033[32mDo you want to download and build llama.cpp (for Vulkan) and install required packages? This may take a while. Proceed? [y/N]: \033[0m')" REPLY
34+
case "$REPLY" in
35+
[yY][eE][sS]|[yY]) ;;
36+
*) echo "Aborted."; exit 0 ;;
37+
esac
38+
39+
echo "Installing llama.cpp with Vulkan support..."
40+
# Build and install llama.cpp with Vulkan support
41+
dnf install -y git cmake clang curl-devel glslc vulkan-devel virglrenderer
42+
git clone https://github.com/ggml-org/llama.cpp
43+
cd llama.cpp
44+
git reset --hard 97340b4c9924be86704dbf155e97c8319849ee19
45+
cmake -B build -DGGML_VULKAN=ON -DGGML_CCACHE=OFF -DCMAKE_INSTALL_PREFIX=/usr
46+
cmake --build build --config Release -j8
47+
cmake --install build
48+
cd ..
49+
rm -fr llama.cpp
50+
51+
echo "Successfully installed llama.cpp. Use 'llama-cli' app with .gguf models."

pkg/driver/krunkit/krunkit_driver_darwin_arm64.go

Lines changed: 31 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ package krunkit
55

66
import (
77
"context"
8-
_ "embed"
8+
"embed"
99
"errors"
1010
"fmt"
1111
"net"
@@ -39,19 +39,6 @@ type LimaKrunkitDriver struct {
3939
krunkitWaitCh chan error
4040
}
4141

42-
type KrunkitOpts struct {
43-
GPUAccel *bool `yaml:"gpuAccel,omitempty"`
44-
}
45-
46-
func NewKrunkitOpts(cfg *limatype.LimaYAML) (*KrunkitOpts, error) {
47-
var krunkitOpts KrunkitOpts
48-
if err := limayaml.Convert(cfg.VMOpts[vmType], &krunkitOpts, "vmOpts.krunkit"); err != nil {
49-
return nil, err
50-
}
51-
52-
return &krunkitOpts, nil
53-
}
54-
5542
var (
5643
_ driver.Driver = (*LimaKrunkitDriver)(nil)
5744
vmType limatype.VMType = "krunkit"
@@ -188,17 +175,6 @@ func validateConfig(cfg *limatype.LimaYAML) error {
188175
return fmt.Errorf("field `mountType` must be %q or %q for krunkit driver, got %q", limatype.VIRTIOFS, limatype.REVSSHFS, *cfg.MountType)
189176
}
190177

191-
// If GPU acceleration is requested, ensure Fedora image/template is used
192-
krunkitOpts, err := NewKrunkitOpts(cfg)
193-
if err != nil {
194-
return err
195-
}
196-
if krunkitOpts.GPUAccel != nil && *krunkitOpts.GPUAccel {
197-
if !isFedoraConfigured(cfg) {
198-
logrus.Warn("gpuAccel: true requires a Fedora image (use a Fedora base template or image)")
199-
}
200-
}
201-
202178
return nil
203179
}
204180

@@ -231,16 +207,7 @@ func (l *LimaKrunkitDriver) FillConfig(_ context.Context, cfg *limatype.LimaYAML
231207

232208
cfg.VMType = ptr.Of(vmType)
233209

234-
krunkitOpts, err := NewKrunkitOpts(cfg)
235-
if err != nil {
236-
return err
237-
}
238-
239-
if krunkitOpts.GPUAccel == nil {
240-
krunkitOpts.GPUAccel = ptr.Of(false)
241-
}
242-
243-
if *krunkitOpts.GPUAccel {
210+
if isFedoraConfigured(cfg) {
244211
gpuInstallScript := limatype.Provision{
245212
Mode: limatype.ProvisionModeData,
246213
Script: ptr.Of(gpuProvisionScript),
@@ -252,30 +219,46 @@ func (l *LimaKrunkitDriver) FillConfig(_ context.Context, cfg *limatype.LimaYAML
252219
Owner: cfg.User.Name,
253220
},
254221
}
222+
255223
cfg.Provision = append(cfg.Provision, gpuInstallScript)
256-
cfg.Message = "To enable GPU support for krunkit, run the following command inside the VM:\n\033[32msudo install-vulkan-gpu.sh\033[0m\n"
224+
cfg.Message = `To enable GPU support (Vulkan) for Krunkit to use AI models without containers, run the following command inside the VM:
225+
` + "\x1b[32m" + `sudo install-vulkan-gpu.sh` + "\x1b[0m" + `
226+
` + "\x1b[31m" + `Ignore this if already done` + "\x1b[0m" + "\n"
257227
}
258228

259229
return validateConfig(cfg)
260230
}
261231

232+
//go:embed boot/*.sh
233+
var bootFS embed.FS
234+
262235
func (l *LimaKrunkitDriver) BootScripts() (map[string][]byte, error) {
263-
// Override default reboot-if-required with a no-op because Fedora does not support this well and
264-
// takes a long time to start up.
265-
krunkitOpts, err := NewKrunkitOpts(l.Instance.Config)
266-
if err != nil {
267-
return nil, err
268-
}
269-
if krunkitOpts.GPUAccel == nil || !*krunkitOpts.GPUAccel {
270-
return nil, nil
236+
scripts := make(map[string][]byte)
237+
238+
entries, err := bootFS.ReadDir("boot")
239+
if err == nil && !isFedoraConfigured(l.Instance.Config) {
240+
for _, entry := range entries {
241+
if entry.IsDir() {
242+
continue
243+
}
244+
245+
content, err := bootFS.ReadFile("boot/" + entry.Name())
246+
if err != nil {
247+
return nil, err
248+
}
249+
250+
scripts[entry.Name()] = content
251+
}
271252
}
272-
scripts := map[string][]byte{
273-
"00-reboot-if-required.sh": []byte(`#!/bin/sh
253+
254+
// Disabled by krunkit driver for Fedora to make boot time faster
255+
if isFedoraConfigured(l.Instance.Config) {
256+
scripts["00-reboot-if-required.sh"] = []byte(`#!/bin/sh
274257
set -eu
275-
# Disabled by krunkit driver
276258
exit 0
277-
`),
259+
`)
278260
}
261+
279262
return scripts, nil
280263
}
281264

templates/experimental/krunkit.yaml

Lines changed: 0 additions & 21 deletions
This file was deleted.

website/content/en/docs/config/vmtype/krunkit.md

Lines changed: 60 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,59 +31,94 @@ go build -o <PREFIX>/libexec/lima/lima-driver-krunkit ./cmd/lima-driver-krunkit/
3131
limactl info # "vmTypes" should include "krunkit"
3232
```
3333

34-
3534
## Quick start
3635

37-
- Non‑GPU (general workloads)
36+
You can run AI models either:
37+
- With containers (fast to get started; any distro works), or
38+
- Without containers (choose Fedora and ensure a writable mount; build `llama.cpp` from source).
39+
40+
Before running, install a small model on the host so examples can run quickly. We’ll use `Qwen3‑1.7B GGUF`:
41+
42+
```bash
43+
mkdir -p models
44+
curl -LO --output-dir models 'https://huggingface.co/Qwen/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-Q8_0.gguf'
45+
```
46+
47+
### 1) Run models using containers (fastest)
48+
49+
Start a krunkit VM with the default Lima template:
50+
51+
{{< tabpane text=true >}}
52+
{{% tab header="CLI" %}}
53+
```bash
54+
limactl start --vm-type=krunkit
55+
limactl shell default
56+
```
57+
{{% /tab %}}
58+
{{< /tabpane >}}
59+
60+
Then inside the VM:
61+
3862
```bash
39-
limactl start default --vm-type=krunkit
63+
nerdctl run --rm -ti \
64+
--device /dev/dri \
65+
-v $(pwd)/models:/models \
66+
quay.io/slopezpa/fedora-vgpu-llama
4067
```
4168

42-
- GPU (Vulkan via Venus)
43-
- Recommended distro: Fedora 40+ (smoothest Mesa/Vulkan setup; uses COPR “slp/mesa-krunkit” for patched mesa-vulkan-drivers).
44-
- Start from the krunkit template and follow the logs to complete GPU setup.
69+
Once inside the container:
70+
71+
```bash
72+
llama-cli -m /models/Qwen3-1.7B-Q8_0.gguf -b 512 -ngl 99 -p "Introduce yourself"
73+
```
74+
75+
You can now chat with the model.
76+
77+
### 2) Run models without containers (slow)
78+
79+
This path builds and installs dependencies (which can take some time. For faster builds, allocate more CPUs and memory to the VM. See [`options`](../../reference/limactl_start/#options)). Use Fedora and make sure your home mount is writable.
4580

4681
{{< tabpane text=true >}}
4782
{{% tab header="CLI" %}}
4883
```bash
49-
# GPU (Vulkan via Venus on Fedora)
50-
limactl start template:experimental/krunkit
84+
limactl start --vm-type=krunkit template://_images/fedora --mount=~:w
85+
limactl shell fedora
5186
```
5287
{{% /tab %}}
5388
{{% tab header="YAML" %}}
5489
```yaml
5590
vmType: krunkit
5691

57-
# For AI workloads, at least 4GiB memory and 4 CPUs are recommended.
58-
memory: 4GiB
59-
cpus: 4
60-
arch: aarch64
61-
62-
# Fedora 40+ is preferred for Mesa & Vulkan (Venus) support
6392
base:
6493
- template://_images/fedora
94+
- template://_default/mounts
6595

6696
mounts:
67-
- location: "~"
68-
writable: true
69-
97+
- writable: true
7098
mountType: virtiofs
71-
72-
vmOpts:
73-
krunkit:
74-
gpuAccel: true
7599
```
76100
{{% /tab %}}
77101
{{< /tabpane >}}
78102
79-
After the VM is READY, inside the VM:
103+
Once inside the VM, install GPU/Vulkan support:
104+
80105
```bash
81106
sudo install-vulkan-gpu.sh
82107
```
83108

109+
The script will prompt to build and install `llama.cpp` with Venus support from source.
110+
111+
After installation, run:
112+
113+
```bash
114+
llama-cli -m /models/Qwen3-1.7B-Q8_0.gguf -b 512 -ngl 99 -p "Introduce yourself"
115+
```
116+
117+
and enjoy chatting with the AI model.
118+
84119
## Notes and caveats
85120
- macOS Ventura or later on Apple Silicon is required.
86-
- GPU mode requires a Fedora image/template; Fedora 40+ recommended for Mesa/Vulkan (Venus).
87-
- To verify GPU/Vulkan in the guest, use tools like `vulkaninfo` after running the install script.
88-
- `Libkrun` and [`Ramalama`](https://github.com/containers/ramalama)(a tool that simplifies running AI models locally) use CPU inferencing as of **July 2, 2025** and are actively working to support GPU inferencing. [More info](https://developers.redhat.com/articles/2025/07/02/supercharging-ai-isolation-microvms-ramalama-libkrun#current_limitations_and_future_directions__gpu_enablement).
121+
- To verify GPU/Vulkan in the guest container or VM, use tools like `vulkaninfo --summary`.
122+
- AI models on containers can run on any Linux distribution but without containers Fedora is required.
123+
- For more information about usage of `llama-cli`. See [llama.cpp](https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#llamacpp) docs.
89124
- Driver architecture details: see [Virtual Machine Drivers](../../dev/drivers).

0 commit comments

Comments
 (0)