Skip to content

Commit 2caa54a

Browse files
committed
feat(actors): expose container system metrics
1 parent 5c794f0 commit 2caa54a

File tree

66 files changed

+2225
-77
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2225
-77
lines changed

docker/dev-full/docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ services:
233233
# We only reserve 100 ports instead of the default 22,000. See
234234
# rivet-guard for explanation.
235235
- "7600-7699:7600-7699"
236+
# cAdvisor metrics endpoint
237+
- "7780:7780"
236238
networks:
237239
- rivet-network
238240

docker/dev-full/otel-collector/config.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ receivers:
55
endpoint: 0.0.0.0:4317
66
http:
77
endpoint: 0.0.0.0:4318
8+
prometheus:
9+
config:
10+
scrape_configs:
11+
- job_name: 'cadvisor'
12+
static_configs:
13+
- targets: ['rivet-client:7780']
14+
metrics_path: /metrics
15+
scrape_interval: 30s
816

917
processors:
1018
batch:
@@ -52,7 +60,7 @@ service:
5260
processors: [batch]
5361
exporters: [clickhouse]
5462
metrics:
55-
receivers: [otlp]
63+
receivers: [otlp, prometheus]
5664
processors: [batch]
5765
exporters: [clickhouse]
5866

docker/universal/Dockerfile

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ RUN apt-get update -y && \
120120
apt-get install -y ca-certificates openssl curl tini curl && \
121121
curl -Lf -o /lib/libfdb_c.so "https://github.com/apple/foundationdb/releases/download/7.1.60/libfdb_c.x86_64.so"
122122

123-
# MARK: Runner (Full)
124-
FROM --platform=linux/amd64 base-runner AS client-full
123+
# MARK: Runner (Slim)
124+
FROM --platform=linux/amd64 base-runner AS client-slim
125125
ARG CNI_PLUGINS_VERSION=1.3.0
126126
RUN apt-get install -y skopeo iproute2 runc dnsutils && \
127127
echo "Downloading lz4" && \
@@ -142,6 +142,21 @@ COPY ./docker/dev-full/rivet-client/rivet-actor.conflist /opt/cni/config/rivet-a
142142
COPY --from=builder /app/dist/rivet-client /app/dist/rivet-container-runner /usr/local/bin/
143143
ENTRYPOINT ["/usr/bin/tini", "--", "entrypoint.sh"]
144144

145+
# MARK: Runner (Full)
146+
FROM client-slim AS client-full
147+
ARG CADVISOR_VERSION=v0.52.0
148+
RUN apt-get update -y && \
149+
apt-get install -y wget && \
150+
wget -O /usr/local/bin/cadvisor "https://github.com/google/cadvisor/releases/download/${CADVISOR_VERSION}/cadvisor-${CADVISOR_VERSION}-linux-amd64" && \
151+
chmod +x /usr/local/bin/cadvisor && \
152+
apt-get clean && \
153+
rm -rf /var/lib/apt/lists/*
154+
155+
COPY docker/universal/client-full-entrypoint.sh /usr/local/bin/client-full-entrypoint.sh
156+
RUN chmod +x /usr/local/bin/client-full-entrypoint.sh
157+
158+
ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/client-full-entrypoint.sh"]
159+
145160
# MARK: Monlith
146161
FROM --platform=linux/amd64 debian:12.9-slim AS monolith
147162
ENV DEBIAN_FRONTEND=noninteractive
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Start cadvisor in the background
5+
cadvisor \
6+
--port=7780 \
7+
--listen_ip=0.0.0.0 \
8+
--prometheus_endpoint="/metrics" \
9+
--enable_metrics=cpu,cpuLoad,memory,network,disk,diskIO,oom_event,process,tcp,udp \
10+
--docker_only=false \
11+
--disable_root_cgroup_stats=false &
12+
13+
# TODO:
14+
# --raw_cgroup_prefix_whitelist="" \
15+
16+
# Start rivet-client with all passed arguments
17+
exec rivet-client "$@"

examples/system-test-actor/src/shared/server.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@ export function createAndStartServer(
77
getUpgradeWebSocket: GetUpgradeWebSocketFn,
88
): { app: Hono; port: number } {
99
// Setup auto-exit timer
10-
setTimeout(() => {
11-
console.error(
12-
"Actor should've been destroyed by now. Automatically exiting.",
13-
);
14-
15-
if (typeof Deno !== "undefined") Deno.exit(1);
16-
else process.exit(1);
17-
}, 60 * 1000);
10+
// setTimeout(() => {
11+
// console.error(
12+
// "Actor should've been destroyed by now. Automatically exiting.",
13+
// );
14+
//
15+
// if (typeof Deno !== "undefined") Deno.exit(1);
16+
// else process.exit(1);
17+
// }, 60 * 1000);
1818

1919
let tickIndex = 0;
2020
setInterval(() => {

frontend/apps/hub/src/domains/project/components/actors/actors-provider.tsx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
actorBuildsAtom,
1313
createActorAtom,
1414
type Logs,
15+
type Metrics,
1516
actorsQueryAtom,
1617
actorsInternalFilterAtom,
1718
type Actor,
@@ -30,6 +31,7 @@ import {
3031
createActorEndpoint,
3132
destroyActorMutationOptions,
3233
actorLogsQueryOptions,
34+
actorMetricsQueryOptions,
3335
actorRegionsQueryOptions,
3436
actorBuildsQueryOptions,
3537
} from "../../queries";
@@ -257,9 +259,63 @@ export function ActorsProvider({
257259
};
258260
};
259261

262+
const metrics = atom({
263+
metrics: { cpu: null, memory: null } as Metrics,
264+
status: "pending",
265+
});
266+
metrics.onMount = (set) => {
267+
const metricsObserver = new QueryObserver(
268+
queryClient,
269+
actorMetricsQueryOptions({
270+
projectNameId,
271+
environmentNameId,
272+
actorId: actor.id,
273+
}, { refetchInterval: 5000 }),
274+
);
275+
276+
type MetricsQuery = {
277+
status: string;
278+
data?: Awaited<
279+
ReturnType<
280+
Exclude<
281+
ReturnType<
282+
typeof actorMetricsQueryOptions
283+
>["queryFn"],
284+
undefined
285+
>
286+
>
287+
>;
288+
};
289+
290+
function updateMetrics(query: MetricsQuery) {
291+
const data = query.data;
292+
set((prev) => ({
293+
...prev,
294+
...data,
295+
status: query.status,
296+
}));
297+
}
298+
299+
const subMetrics = metricsObserver.subscribe(
300+
(query) => {
301+
updateMetrics(query);
302+
},
303+
);
304+
305+
updateMetrics(
306+
metricsObserver.getCurrentQuery().state,
307+
);
308+
309+
return () => {
310+
metricsObserver.destroy();
311+
subMetrics();
312+
};
313+
};
314+
260315
return {
261316
...actor,
262317
logs,
318+
metrics,
263319
destroy,
264320
status: getActorStatus(actor),
265321
};

frontend/apps/hub/src/domains/project/queries/actors/query-options.ts

Lines changed: 128 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
keepPreviousData,
1111
queryOptions,
1212
} from "@tanstack/react-query";
13-
import stripAnsi from 'strip-ansi';
13+
import stripAnsi from "strip-ansi";
1414

1515
export const projectActorsQueryOptions = ({
1616
projectNameId,
@@ -243,18 +243,142 @@ export const actorLogsQueryOptions = (
243243
line: raw,
244244
message: "",
245245
properties: {},
246-
} as const
246+
} as const;
247247
});
248248

249-
250-
return {...response, logs };
249+
return { ...response, logs };
251250
},
252251
meta: {
253252
watch: mergeWatchStreams,
254253
},
255254
});
256255
};
257256

257+
export const actorMetricsQueryOptions = (
258+
{
259+
projectNameId,
260+
environmentNameId,
261+
actorId,
262+
}: {
263+
projectNameId: string;
264+
environmentNameId: string;
265+
actorId: string;
266+
},
267+
opts: { refetchInterval?: number } = {},
268+
) => {
269+
return queryOptions({
270+
...opts,
271+
queryKey: [
272+
"project",
273+
projectNameId,
274+
"environment",
275+
environmentNameId,
276+
"actor",
277+
actorId,
278+
"metrics",
279+
] as const,
280+
queryFn: async ({
281+
signal: abortSignal,
282+
queryKey: [, project, , environment, , actorId],
283+
}) => {
284+
const pollOffset = 5_000;
285+
const pollInterval = 15_000;
286+
287+
const now = Date.now();
288+
const start = now - pollInterval * 2 - pollOffset; // Last minute + 2 data points
289+
const end = now - pollOffset; // Metrics have a minimum a 5 second latency based on poll interval
290+
291+
const response = await rivetClient.actors.metrics.get(
292+
actorId,
293+
{
294+
project,
295+
environment,
296+
start,
297+
end,
298+
interval: pollInterval,
299+
},
300+
{ abortSignal },
301+
);
302+
303+
// Process the new response format
304+
const metrics: Record<string, number | null> = {};
305+
const rawData: Record<string, number[]> = {};
306+
307+
if (
308+
response.metricNames &&
309+
response.metricValues &&
310+
response.metricAttributes &&
311+
response.metricNames.length > 0
312+
) {
313+
response.metricNames.forEach((metricName, index) => {
314+
const metricValues = response.metricValues[index];
315+
const attributes = response.metricAttributes[index] || {};
316+
317+
// Create the metric key based on the metric name and attributes
318+
let metricKey = metricName;
319+
320+
// Handle specific attribute mappings to match UI expectations
321+
if (attributes.failure_type && attributes.scope) {
322+
metricKey = `memory_failures_${attributes.failure_type}_${attributes.scope}`;
323+
} else if (attributes.tcp_state) {
324+
if (metricName.includes('tcp6')) {
325+
metricKey = `network_tcp6_usage_${attributes.tcp_state}`;
326+
} else {
327+
metricKey = `network_tcp_usage_${attributes.tcp_state}`;
328+
}
329+
} else if (attributes.udp_state) {
330+
if (metricName.includes('udp6')) {
331+
metricKey = `network_udp6_usage_${attributes.udp_state}`;
332+
} else {
333+
metricKey = `network_udp_usage_${attributes.udp_state}`;
334+
}
335+
} else if (attributes.state) {
336+
metricKey = `tasks_state_${attributes.state}`;
337+
} else if (attributes.interface) {
338+
// Handle network interface attributes
339+
const baseMetric = metricName.replace(/^container_/, '');
340+
metricKey = `${baseMetric}_${attributes.interface}`;
341+
} else if (attributes.device) {
342+
// Handle filesystem device attributes
343+
const baseMetric = metricName.replace(/^container_/, '');
344+
metricKey = `${baseMetric}_${attributes.device}`;
345+
} else {
346+
// Remove "container_" prefix to match UI expectations
347+
metricKey = metricName.replace(/^container_/, '');
348+
}
349+
350+
// Store raw time series data for rate calculations
351+
rawData[metricKey] = metricValues || [];
352+
353+
if (metricValues && metricValues.length > 0) {
354+
// Get the latest non-zero value (last value is often 0)
355+
let value = null;
356+
for (let i = metricValues.length - 1; i >= 0; i--) {
357+
if (metricValues[i] !== 0) {
358+
value = metricValues[i];
359+
break;
360+
}
361+
}
362+
// If all values are 0, use the last value anyway
363+
if (value === null && metricValues.length > 0) {
364+
value = metricValues[metricValues.length - 1];
365+
}
366+
metrics[metricKey] = value;
367+
} else {
368+
metrics[metricKey] = null;
369+
}
370+
});
371+
}
372+
373+
return {
374+
metrics,
375+
rawData,
376+
interval: pollInterval,
377+
};
378+
},
379+
});
380+
};
381+
258382
export const actorBuildsQueryOptions = ({
259383
projectNameId,
260384
environmentNameId,

frontend/packages/components/src/actors/actor-context.tsx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export enum ActorFeature {
1313
State = "state",
1414
Console = "console",
1515
Runtime = "runtime",
16+
Metrics = "metrics",
1617
InspectReconnectNotification = "inspect_reconnect_notification",
1718
}
1819

@@ -25,6 +26,7 @@ export type Actor = Omit<
2526
lifecycle?: Rivet.actor.Lifecycle;
2627
endpoint?: string;
2728
logs: LogsAtom;
29+
metrics: MetricsAtom;
2830
network?: Rivet.actor.Network | null;
2931
resources?: Rivet.actor.Resources | null;
3032
runtime?: Rivet.actor.Runtime | null;
@@ -43,6 +45,8 @@ export type Logs = {
4345
properties: Record<string, unknown>;
4446
}[];
4547

48+
export type Metrics = Record<string, number | null>;
49+
4650
export type Build = Rivet.actor.Build;
4751
export type DestroyActor = {
4852
isDestroying: boolean;
@@ -55,6 +59,11 @@ export type LogsAtom = Atom<{
5559
// query status
5660
status: string;
5761
}>;
62+
export type MetricsAtom = Atom<{
63+
metrics: Metrics;
64+
// query status
65+
status: string;
66+
}>;
5867
export type BuildAtom = Atom<Build>;
5968
export type DestroyActorAtom = Atom<DestroyActor>;
6069

@@ -378,6 +387,7 @@ const commonActorFeatures = [
378387
ActorFeature.Logs,
379388
ActorFeature.Config,
380389
ActorFeature.Runtime,
390+
ActorFeature.Metrics,
381391
ActorFeature.InspectReconnectNotification,
382392
];
383393

0 commit comments

Comments
 (0)