From 560842eca6ded058942acbdb6252d8ac56a5cec1 Mon Sep 17 00:00:00 2001 From: Josh Date: Wed, 3 Sep 2025 15:29:47 -0400 Subject: [PATCH] metrics: handle varying ncpu within a query. The metrics dashboards assume that queries return a consistent number of series over time. For example, if a query for cpu utilization gets four timeseries at one timepoint, it shouldhave four timeseries at all timepoints. However, this assumption isn't met when the cardinality of the given resource changes over time, such as when the user scales the number of vcpus for an instance. This assumption leads to two bugs: * If the number of vcpus changes within a query, we normalize by the largest observed vcpu count. In other words, increasing the number vcpus makes utilization appear lower at timepoints prior to the scaling event. * If a user scales up the number of vcpus such that the 0th vcpu timeseries has fewer timepoints than subsequent timeseries, we truncate series after the 0th. This patch attempts to fix both bugs. First, we count the number of non-null values per timeseries to normalize cpu use. Second, we consider all timestamps from all returned series so that we don't accidentally truncate the results. --- app/components/oxql-metrics/OxqlMetric.tsx | 9 +- app/components/oxql-metrics/util.spec.ts | 135 ++++++++++++++++++++- app/components/oxql-metrics/util.ts | 48 +++++--- 3 files changed, 170 insertions(+), 22 deletions(-) diff --git a/app/components/oxql-metrics/OxqlMetric.tsx b/app/components/oxql-metrics/OxqlMetric.tsx index db8c09799..accfc7334 100644 --- a/app/components/oxql-metrics/OxqlMetric.tsx +++ b/app/components/oxql-metrics/OxqlMetric.tsx @@ -72,17 +72,16 @@ export function OxqlMetric({ title, description, unit, ...queryObj }: OxqlMetric const hasError = !!error && !errorMeansEmpty const { startTime, endTime } = queryObj - const { chartData, timeseriesCount } = useMemo( - () => - errorMeansEmpty ? { chartData: [], timeseriesCount: 0 } : composeOxqlData(metrics), + const { chartData, valueCounts } = useMemo( + () => (errorMeansEmpty ? { chartData: [], valueCounts: [] } : composeOxqlData(metrics)), [metrics, errorMeansEmpty] ) const { data, label, unitForSet, yAxisTickFormatter } = useMemo(() => { if (unit === 'Bytes') return getBytesChartProps(chartData) if (unit === 'Count') return getCountChartProps(chartData) - return getUtilizationChartProps(chartData, timeseriesCount) - }, [unit, chartData, timeseriesCount]) + return getUtilizationChartProps(chartData, valueCounts) + }, [unit, chartData, valueCounts]) const [modalOpen, setModalOpen] = useState(false) diff --git a/app/components/oxql-metrics/util.spec.ts b/app/components/oxql-metrics/util.spec.ts index 9d2f2121b..ac5ca915e 100644 --- a/app/components/oxql-metrics/util.spec.ts +++ b/app/components/oxql-metrics/util.spec.ts @@ -5,6 +5,7 @@ * * Copyright Oxide Computer Company */ +import * as R from 'remeda' import { describe, expect, test } from 'vitest' import type { OxqlQueryResult } from '~/api' @@ -164,6 +165,70 @@ const utilizationQueryResult1: OxqlQueryResult = { const timeseries1 = utilizationQueryResult1.tables[0].timeseries[0] +const utilizationQueryResult2: OxqlQueryResult = { + tables: [ + { + name: 'virtual_machine:vcpu_usage', + timeseries: [ + { + fields: { + vcpuId: { + type: 'u32', + value: 0, + }, + }, + points: { + timestamps: [ + new Date('2025-02-21T19:28:43Z'), + new Date('2025-02-21T19:29:43Z'), + new Date('2025-02-21T19:30:43Z'), + new Date('2025-02-21T19:31:43Z'), + new Date('2025-02-21T19:32:43Z'), + new Date('2025-02-21T19:33:43Z'), + new Date('2025-02-21T19:34:43Z'), + new Date('2025-02-21T19:35:43Z'), + ], + values: [ + { + values: { + type: 'double', + values: R.range(1, 9).map((value) => value * 1000000), + }, + metricType: 'gauge', + }, + ], + }, + }, + { + fields: { + vcpuId: { + type: 'u32', + value: 1, + }, + }, + points: { + timestamps: [ + new Date('2025-02-21T19:32:43Z'), + new Date('2025-02-21T19:33:43Z'), + new Date('2025-02-21T19:34:43Z'), + new Date('2025-02-21T19:35:43Z'), + ], + values: [ + { + values: { + type: 'double', + values: R.range(1, 5).map((value) => value * 1000000), + }, + metricType: 'gauge', + }, + ], + }, + }, + ], + }, + ], +} + test('sumValues', () => { expect(sumValues([], 0)).toEqual([]) expect(sumValues([timeseries1], 4)).toEqual([ @@ -205,7 +270,41 @@ const composedUtilizationData = { value: null, }, ], - timeseriesCount: 1, + valueCounts: [1, 1, 1, 0], +} + +const composedUtilizationData2 = { + chartData: [ + { + timestamp: 1740166183000, + value: 4000000, + }, + { + timestamp: 1740166243000, + value: 6000000, + }, + { + timestamp: 1740166303000, + value: 8000000, + }, + { + timestamp: 1740166363000, + value: 5000000, + }, + { + timestamp: 1740166423000, + value: 6000000, + }, + { + timestamp: 1740166483000, + value: 7000000, + }, + { + timestamp: 1740166543000, + value: 8000000, + }, + ], + valueCounts: [2, 2, 2, 2, 1, 1, 1, 1], } // As above, we've removed the first value from the original data @@ -241,15 +340,45 @@ const utilizationChartData4 = [ }, ] +// These are the exepcted values if the vcpu count changed mid-query. +// As above, we've discarded the first value from the original data +const utilizationChartData5 = [ + { + timestamp: 1740166183000, + value: 100.04612223059189, + }, + { + timestamp: 1740166243000, + value: 25.028739852939403, + }, + { + timestamp: 1740166303000, + value: null, + }, +] + test('get utilization chart data and process it for chart display', () => { const composedData = composeOxqlData(utilizationQueryResult1) expect(composedData).toEqual(composedUtilizationData) - const { data: chartData } = getUtilizationChartProps(composedUtilizationData.chartData, 1) + + const composedData2 = composeOxqlData(utilizationQueryResult2) + expect(composedData2).toEqual(composedUtilizationData2) + + const { data: chartData } = getUtilizationChartProps( + composedUtilizationData.chartData, + [1, 1, 1, 0] + ) expect(chartData).toEqual(utilizationChartData) // Testing the same data, but for a 4-vcpu instance const { data: chartData4 } = getUtilizationChartProps( composedUtilizationData.chartData, - 4 + [4, 4, 4, 4] ) expect(chartData4).toEqual(utilizationChartData4) + // Testing the same data, but where the cpu count changed mid-results + const { data: chartData5 } = getUtilizationChartProps( + composedUtilizationData.chartData, + [1, 4, 4, 4] + ) + expect(chartData5).toEqual(utilizationChartData5) }) diff --git a/app/components/oxql-metrics/util.ts b/app/components/oxql-metrics/util.ts index 266dfec6c..2aabc280d 100644 --- a/app/components/oxql-metrics/util.ts +++ b/app/components/oxql-metrics/util.ts @@ -122,21 +122,40 @@ export const sumValues = (timeseries: Timeseries[], arrLen: number): (number | n ) ) +export const countValues = (timeseries: Timeseries[], arrLen: number): number[] => + Array.from({ length: arrLen }).map((_, i) => + R.pipe( + timeseries, + // get point at that index for each timeseries + R.map((ts) => ts.points.values.at(0)?.values.values?.[i]), + // filter out nulls (undefined shouldn't happen) + R.filter((p) => typeof p === 'number'), + // count non-null elements per series + (points) => points.length + ) + ) + // Take the OxQL Query Result and return the data in a format that the chart can use // We'll do this by creating two arrays: one for the timestamps and one for the values // We'll then combine these into an array of objects, each with a timestamp and a value // Note that this data will need to be processed further, based on the kind of chart we're creating export const composeOxqlData = (data: OxqlQueryResult | undefined) => { let timeseriesCount = 0 - if (!data) return { chartData: [], timeseriesCount } + if (!data) return { chartData: [], valueCounts: [] } const timeseriesData = data.tables[0].timeseries timeseriesCount = timeseriesData.length - if (!timeseriesCount) return { chartData: [], timeseriesCount } - // Extract timestamps (all series should have the same timestamps) - const timestamps = - timeseriesData[0]?.points.timestamps.map((t) => new Date(t).getTime()) || [] - // Sum up the values across all time series + if (!timeseriesCount) return { chartData: [], valueCounts: [] } + // Extract timestamps. Series may have different timestamps, e.g. when new + // resources are created within the query interval + const timestamps = R.pipe( + timeseriesData, + R.map((series) => series.points.timestamps.map((t) => new Date(t).getTime())), + R.flat(), + R.unique(), + R.sort((a, b) => a - b) + ) const summedValues = sumValues(timeseriesData, timestamps.length) + const valueCounts = countValues(timeseriesData, timestamps.length) const chartData = timestamps .map((timestamp, idx) => ({ timestamp, value: summedValues[idx] })) // Drop the first datapoint, which — for delta metric types — is the cumulative sum of all previous @@ -144,7 +163,7 @@ export const composeOxqlData = (data: OxqlQueryResult | undefined) => { // We could use a more elegant approach to this down the road .slice(1) - return { chartData, timeseriesCount } + return { chartData, valueCounts } } // What each function will return @@ -204,16 +223,17 @@ export const getCountChartProps = (chartData: ChartDatum[]): OxqlMetricChartProp export const getUtilizationChartProps = ( chartData: ChartDatum[], - nCPUs: number + valueCounts: number[] ): OxqlMetricChartProps => { - // The divisor is the oximeter logging interval for CPU data (5 seconds) * 1,000,000,000 (nanoseconds) * nCPUs - const divisor = VCPU_KSTAT_INTERVAL_SEC * 1000 * 1000 * 1000 * nCPUs + // The divisor is the oximeter logging interval for CPU data (5 seconds) * 1,000,000,000 (nanoseconds) + const baseDivisor = VCPU_KSTAT_INTERVAL_SEC * 1000 * 1000 * 1000 + const hasData = R.sum(valueCounts) > 0 const data = - // dividing by 0 would blow it up, so on the off chance that timeSeriesCount is 0, data should be an empty array - divisor > 0 - ? chartData.map(({ timestamp, value }) => ({ + // dividing by 0 would blow it up, so on the off chance that all series are empty, data should be an empty array + hasData + ? chartData.map(({ timestamp, value }, idx) => ({ timestamp, - value: value !== null ? (value * 100) / divisor : null, + value: value !== null ? (value * 100) / baseDivisor / valueCounts[idx] : null, })) : [] return { data, label: '(%)', unitForSet: '%', yAxisTickFormatter: (n: number) => `${n}%` }