Skip to content

Commit c9ae05c

Browse files
committed
feat(actors): expose container system metrics
1 parent cca32b6 commit c9ae05c

File tree

75 files changed

+2413
-50
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+2413
-50
lines changed

docker/dev-full/docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ services:
233233
# We only reserve 100 ports instead of the default 22,000. See
234234
# rivet-guard for explanation.
235235
- "7600-7699:7600-7699"
236+
# cAdvisor metrics endpoint
237+
- "7780:7780"
236238
networks:
237239
- rivet-network
238240

docker/dev-full/otel-collector/config.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ receivers:
55
endpoint: 0.0.0.0:4317
66
http:
77
endpoint: 0.0.0.0:4318
8+
prometheus:
9+
config:
10+
scrape_configs:
11+
- job_name: 'cadvisor'
12+
static_configs:
13+
- targets: ['rivet-client:7780']
14+
metrics_path: /metrics
15+
scrape_interval: 30s
816

917
processors:
1018
batch:
@@ -52,7 +60,7 @@ service:
5260
processors: [batch]
5361
exporters: [clickhouse]
5462
metrics:
55-
receivers: [otlp]
63+
receivers: [otlp, prometheus]
5664
processors: [batch]
5765
exporters: [clickhouse]
5866

docker/universal/Dockerfile

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ RUN apt-get update -y && \
120120
apt-get install -y ca-certificates openssl curl tini curl && \
121121
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"
122122

123-
# MARK: Runner (Full)
124-
FROM --platform=linux/amd64 base-runner AS client-full
123+
# MARK: Runner (Slim)
124+
FROM --platform=linux/amd64 base-runner AS client-slim
125125
ARG CNI_PLUGINS_VERSION=1.3.0
126126
RUN apt-get install -y skopeo iproute2 runc dnsutils && \
127127
echo "Downloading lz4" && \
@@ -142,6 +142,21 @@ COPY ./docker/dev-full/rivet-client/rivet-actor.conflist /opt/cni/config/rivet-a
142142
COPY --from=builder /app/dist/rivet-client /app/dist/rivet-container-runner /usr/local/bin/
143143
ENTRYPOINT ["/usr/bin/tini", "--", "entrypoint.sh"]
144144

145+
# MARK: Runner (Full)
146+
FROM client-slim AS client-full
147+
ARG CADVISOR_VERSION=v0.52.0
148+
RUN apt-get update -y && \
149+
apt-get install -y wget && \
150+
wget -O /usr/local/bin/cadvisor "https://github.com/google/cadvisor/releases/download/${CADVISOR_VERSION}/cadvisor-${CADVISOR_VERSION}-linux-amd64" && \
151+
chmod +x /usr/local/bin/cadvisor && \
152+
apt-get clean && \
153+
rm -rf /var/lib/apt/lists/*
154+
155+
COPY docker/universal/client-full-entrypoint.sh /usr/local/bin/client-full-entrypoint.sh
156+
RUN chmod +x /usr/local/bin/client-full-entrypoint.sh
157+
158+
ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/client-full-entrypoint.sh"]
159+
145160
# MARK: Monlith
146161
FROM --platform=linux/amd64 debian:12.9-slim AS monolith
147162
ENV DEBIAN_FRONTEND=noninteractive
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Start cadvisor in the background
5+
cadvisor \
6+
--port=7780 \
7+
--listen_ip=0.0.0.0 \
8+
--prometheus_endpoint="/metrics" \
9+
--enable_metrics=cpu,cpuLoad,memory,network,disk,diskIO,oom_event,process,tcp,udp \
10+
--docker_only=false \
11+
--disable_root_cgroup_stats=false &
12+
13+
# TODO:
14+
# --raw_cgroup_prefix_whitelist="" \
15+
16+
# Start rivet-client with all passed arguments
17+
exec rivet-client "$@"

frontend/apps/hub/src/domains/project/components/actors/actors-provider.tsx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
actorBuildsAtom,
1313
createActorAtom,
1414
type Logs,
15+
type Metrics,
1516
actorsQueryAtom,
1617
actorsInternalFilterAtom,
1718
type Actor,
@@ -30,6 +31,7 @@ import {
3031
createActorEndpoint,
3132
destroyActorMutationOptions,
3233
actorLogsQueryOptions,
34+
actorMetricsQueryOptions,
3335
actorRegionsQueryOptions,
3436
actorBuildsQueryOptions,
3537
} from "../../queries";
@@ -257,9 +259,63 @@ export function ActorsProvider({
257259
};
258260
};
259261

262+
const metrics = atom({
263+
metrics: { cpu: null, memory: null } as Metrics,
264+
status: "pending",
265+
});
266+
metrics.onMount = (set) => {
267+
const metricsObserver = new QueryObserver(
268+
queryClient,
269+
actorMetricsQueryOptions({
270+
projectNameId,
271+
environmentNameId,
272+
actorId: actor.id,
273+
}, { refetchInterval: 5000 }),
274+
);
275+
276+
type MetricsQuery = {
277+
status: string;
278+
data?: Awaited<
279+
ReturnType<
280+
Exclude<
281+
ReturnType<
282+
typeof actorMetricsQueryOptions
283+
>["queryFn"],
284+
undefined
285+
>
286+
>
287+
>;
288+
};
289+
290+
function updateMetrics(query: MetricsQuery) {
291+
const data = query.data;
292+
set((prev) => ({
293+
...prev,
294+
...data,
295+
status: query.status,
296+
}));
297+
}
298+
299+
const subMetrics = metricsObserver.subscribe(
300+
(query) => {
301+
updateMetrics(query);
302+
},
303+
);
304+
305+
updateMetrics(
306+
metricsObserver.getCurrentQuery().state,
307+
);
308+
309+
return () => {
310+
metricsObserver.destroy();
311+
subMetrics();
312+
};
313+
};
314+
260315
return {
261316
...actor,
262317
logs,
318+
metrics,
263319
destroy,
264320
status: getActorStatus(actor),
265321
};

frontend/apps/hub/src/domains/project/queries/actors/query-options.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,73 @@ export const actorLogsQueryOptions = (
255255
});
256256
};
257257

258+
export const actorMetricsQueryOptions = (
259+
{
260+
projectNameId,
261+
environmentNameId,
262+
actorId,
263+
}: {
264+
projectNameId: string;
265+
environmentNameId: string;
266+
actorId: string;
267+
},
268+
opts: { refetchInterval?: number } = {},
269+
) => {
270+
return queryOptions({
271+
...opts,
272+
queryKey: [
273+
"project",
274+
projectNameId,
275+
"environment",
276+
environmentNameId,
277+
"actor",
278+
actorId,
279+
"metrics",
280+
] as const,
281+
queryFn: async ({
282+
signal: abortSignal,
283+
queryKey: [, project, , environment, , actorId],
284+
}) => {
285+
const now = Date.now();
286+
const start = now - 60000; // Last minute
287+
const end = now;
288+
289+
const response = await rivetClient.actors.metrics.get(
290+
{
291+
project,
292+
environment,
293+
start,
294+
end,
295+
interval: 10000, // 10 second intervals
296+
actorIdsJson: JSON.stringify([actorId]),
297+
metricsJson: JSON.stringify([
298+
"container_cpu_usage_seconds_total",
299+
"container_memory_usage_bytes"
300+
]),
301+
},
302+
{ abortSignal },
303+
);
304+
305+
// Extract the latest metrics values
306+
let cpu: number | null = null;
307+
let memory: number | null = null;
308+
309+
if (response.metrics && response.metrics.length >= 2) {
310+
const cpuData = response.metrics[0];
311+
const memoryData = response.metrics[1];
312+
313+
// Get the last data point for each metric
314+
cpu = cpuData && cpuData.length > 0 ? cpuData[cpuData.length - 1] : null;
315+
memory = memoryData && memoryData.length > 0 ? memoryData[memoryData.length - 1] : null;
316+
}
317+
318+
return {
319+
metrics: { cpu, memory },
320+
};
321+
},
322+
});
323+
};
324+
258325
export const actorBuildsQueryOptions = ({
259326
projectNameId,
260327
environmentNameId,

frontend/packages/components/src/actors/actor-config-tab.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Button, DocsSheet, ScrollArea } from "@rivet-gg/components";
22
import { Icon, faBooks } from "@rivet-gg/icons";
3+
import { ActorMetrics } from "./actor-metrics";
34
import { ActorGeneral } from "./actor-general";
45
import { ActorNetwork } from "./actor-network";
56
import { ActorRuntime } from "./actor-runtime";
@@ -23,6 +24,7 @@ export function ActorConfigTab(props: ActorConfigTabProps) {
2324
</Button>
2425
</DocsSheet>
2526
</div>
27+
<ActorMetrics {...props} />
2628
<ActorGeneral {...props} />
2729
<ActorNetwork {...props} />
2830
<ActorRuntime {...props} />

frontend/packages/components/src/actors/actor-context.tsx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export type Actor = Omit<
2525
lifecycle?: Rivet.actor.Lifecycle;
2626
endpoint?: string;
2727
logs: LogsAtom;
28+
metrics: MetricsAtom;
2829
network?: Rivet.actor.Network | null;
2930
resources?: Rivet.actor.Resources | null;
3031
runtime?: Rivet.actor.Runtime | null;
@@ -43,6 +44,11 @@ export type Logs = {
4344
properties: Record<string, unknown>;
4445
}[];
4546

47+
export type Metrics = {
48+
cpu: number | null;
49+
memory: number | null;
50+
};
51+
4652
export type Build = Rivet.actor.Build;
4753
export type DestroyActor = {
4854
isDestroying: boolean;
@@ -55,6 +61,11 @@ export type LogsAtom = Atom<{
5561
// query status
5662
status: string;
5763
}>;
64+
export type MetricsAtom = Atom<{
65+
metrics: Metrics;
66+
// query status
67+
status: string;
68+
}>;
5869
export type BuildAtom = Atom<Build>;
5970
export type DestroyActorAtom = Atom<DestroyActor>;
6071

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { useAtomValue } from "jotai";
2+
import { selectAtom } from "jotai/utils";
3+
import equal from "fast-deep-equal";
4+
import { Dd, Dl, Dt, Flex } from "@rivet-gg/components";
5+
import type { Actor, ActorAtom } from "./actor-context";
6+
7+
const selector = (a: Actor) => ({
8+
metrics: a.metrics,
9+
});
10+
11+
export interface ActorMetricsProps {
12+
actor: ActorAtom;
13+
}
14+
15+
export function ActorMetrics({ actor }: ActorMetricsProps) {
16+
const { metrics } = useAtomValue(selectAtom(actor, selector, equal));
17+
const metricsData = useAtomValue(metrics);
18+
19+
const formatCpuUsage = (cpu: number | null) => {
20+
if (cpu === null) return "n/a";
21+
return `${(cpu * 100).toFixed(2)}%`;
22+
};
23+
24+
const formatMemoryUsage = (memory: number | null) => {
25+
if (memory === null) return "n/a";
26+
return `${(memory / 1024 / 1024).toFixed(1)} MB`;
27+
};
28+
29+
const isLoading = metricsData.status === "pending";
30+
const hasError = metricsData.status === "error";
31+
32+
return (
33+
<div className="px-4 my-8">
34+
<h3 className="mb-2 font-semibold">Metrics</h3>
35+
<Flex gap="2" direction="col" className="text-xs">
36+
<Dl>
37+
<Dt>CPU Usage</Dt>
38+
<Dd className={hasError ? "text-destructive" : ""}>
39+
{isLoading ? "Loading..." : hasError ? "Error" : formatCpuUsage(metricsData.metrics.cpu)}
40+
</Dd>
41+
<Dt>Memory Usage</Dt>
42+
<Dd className={hasError ? "text-destructive" : ""}>
43+
{isLoading ? "Loading..." : hasError ? "Error" : formatMemoryUsage(metricsData.metrics.memory)}
44+
</Dd>
45+
</Dl>
46+
</Flex>
47+
</div>
48+
);
49+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
name = "ACTOR_METRICS_INVALID_METRICS"
3+
description = "Invalid metrics format."
4+
http_status = 400
5+
---
6+
7+
# Invalid Metrics
8+
9+
The provided list of metrics is not in a valid JSON format. Please provide a valid JSON array of metric names.

0 commit comments

Comments
 (0)